<a href="https://colab.research.google.com/github/yecatstevir/teambrainiac/blob/main/source/helper/hyperparameters_and_normalization_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Grid Search Results
- Go to 'Runtime' in Colab browser bar, select 'Change Runtime Type', select 'High-RAM' from 'Runtime Shape'. 
- load local pickle file containing all GridSearchCV objects (single_subject_znorm_cv.pkl)
- Check results from GridSearchCV for each patient

### Mount Google Drive and clone repository
- open to source directory

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')#, force_remount = True)

Mounted at /content/gdrive


In [2]:

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
# Clone the entire repo.
!git clone -l -s https://github.com/yecatstevir/teambrainiac.git
# Change directory into cloned repo
%cd teambrainiac/source
!ls


Cloning into 'teambrainiac'...
remote: Enumerating objects: 588, done.[K
remote: Counting objects: 100% (588/588), done.[K
remote: Compressing objects: 100% (409/409), done.[K
remote: Total 588 (delta 356), reused 335 (delta 163), pack-reused 0[K
Receiving objects: 100% (588/588), 61.73 MiB | 41.86 MiB/s, done.
Resolving deltas: 100% (356/356), done.
/content/teambrainiac/source
AccuracyMeasures.ipynb	  __init__.py
cross_validation.py	  models
data			  process.py
DL			  SingleSubjectSVM.ipynb
Explore_data.ipynb	  SVM_Group_Child_Whole_Brain.ipynb
explore.py		  SVM_Group_YA_Whole_brain.ipynb
Group_All_MASK_SVM.ipynb  utils.py
helper			  VisualizationPlayground.ipynb
Images			  Visualize_Data.ipynb


### Load path_config.py 
- we are already in source so we can just load this file without chanding directory

In [4]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving path_config.py to path_config.py
User uploaded file "path_config.py" with length 196 bytes


### Load the individual clf's from the gridsearchcv
- filename single_subject_znorm_cv.pkl

### Import libraries


In [38]:

# Import libraries
!pip install boto3 nilearn nibabel
from sklearn.model_selection import GridSearchCV
import pickle
from utils import data_to_nib, load_mat, open_pickle, access_load_data, save_data, create_mask, labels_mask_binary, masking_data, masked_data_n_labels
from sklearn.svm import SVC
import numpy as np
import pandas as pd
import random
import cv2 as cv
import altair as alt



### Load Data from AWS

In [9]:
%%time
pkl_file = "single_subject_znorm_cv.pkl"
bool_mat = False
data = access_load_data(pkl_file, bool_mat)

CPU times: user 25 s, sys: 28.3 s, total: 53.3 s
Wall time: 1min 26s


In [25]:
# Create dataframe from cross validation results
index = 0

cv_columns = ['user_id', 'score', 'C', 'kernel', 'random_state']
results = {}

for id in data.keys():
  user_clf = data[id]['model']
  ids = [id for x in range(len(user_clf.cv_results_['params']))]
  score = user_clf.cv_results_['mean_test_score']
  C = [x['C'] for x in user_clf.cv_results_['params']]
  kernel = [x['kernel'] for x in user_clf.cv_results_['params']]
  random_state = [x['random_state'] for x in user_clf.cv_results_['params']]
  for row in zip(ids, score, C, kernel, random_state):
    results[index] = row
    index += 1

df_results = pd.DataFrame.from_dict(results, orient='index', columns=cv_columns)
df_results.head() 

Unnamed: 0,user_id,score,C,kernel,random_state
0,10004_08693,0.702941,0.7,linear,111
1,10004_08693,0.702941,0.7,linear,222
2,10004_08693,0.832353,0.7,rbf,111
3,10004_08693,0.832353,0.7,rbf,222
4,10004_08693,0.702941,1.0,linear,111


## Looking at Cross Validation Results


In [30]:
# Note that changing the random state did not have an effect on the score
# Deleting random state duplicates to simplify dataframe
df_results = df_results[df_results['random_state']==111]
df_results = df_results.drop(['random_state'], axis=1)
df_results.head()

Unnamed: 0,user_id,score,C,kernel
0,10004_08693,0.702941,0.7,linear
2,10004_08693,0.832353,0.7,rbf
4,10004_08693,0.702941,1.0,linear
6,10004_08693,0.869118,1.0,rbf
8,10004_08693,0.702941,5.0,linear


AttributeError: ignored

In [73]:
df_results['c_kernel'] = ['C: ' + str(x[0]) + ',   kernel: ' + x[1] for x in zip(df_results['C'], df_results['kernel'])]

stripplot =  alt.Chart(df_results, width=40).mark_circle(size=25).encode(
    x=alt.X(
        'jitter:Q',
        title=None,
        axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
        scale=alt.Scale(),
    ),
    y=alt.Y('score:Q'),
    color=alt.Color('c_kernel:N', legend=None),
    column=alt.Column(
        'c_kernel:N',
        header=alt.Header(
            labelAngle=90,
            titleOrient='bottom',
            labelOrient='bottom',
            labelAlign='right',
            labelPadding=3,
            labelFontSize=12
        ),
    ),
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).properties(
    width = 70,
    height = 400
)

stripplot

In [91]:
box_plot = alt.Chart(df_results).mark_boxplot(extent='min-max').encode(
    x=alt.X(
        'c_kernel:O',
        sort=alt.EncodingSortFieldFieldName(field='score', op='mean', order='descending')
    ),
    y='score:Q'
).properties(height=600, width=800)

box_plot

## Opening Second Pickle File

In [86]:
# %%time
pkl_file = "single_subj_acc_measures.pkl"
bool_mat = False
data = access_load_data(pkl_file, bool_mat)

In [88]:
data.keys()

dict_keys(['acc_df_psconly', 'acc_df_znorm_only', 'acc_df_znorm_psc', 'acc_df_nonorm'])

In [92]:
acc_df_psconly = pd.DataFrame(data['acc_df_znorm_only'])
acc_df_psconly

Unnamed: 0.1,Unnamed: 0,Subject,TrainAcc,ValAcc,ValAUC,ValPrecision,ValRecall,ValF1,TestAcc,TestAUC,TestPrecision,TestRecall,TestF1,NormBy,DataType
0,0,10004_08693,1.0,0.714286,0.714286,0.704545,0.738095,0.72093,0.833333,0.833333,0.85,0.809524,0.829268,RUNS,PSCNORM
1,1,10008_09924,1.0,0.654762,0.654762,0.644444,0.690476,0.666667,0.738095,0.738095,0.794118,0.642857,0.710526,RUNS,PSCNORM
2,2,10009_08848,0.97619,0.821429,0.821429,0.846154,0.785714,0.814815,0.738095,0.738095,0.75,0.714286,0.731707,RUNS,PSCNORM
3,3,10016_09694,1.0,0.892857,0.892857,1.0,0.785714,0.88,0.785714,0.785714,0.8,0.761905,0.780488,RUNS,PSCNORM
4,4,10017_08894,1.0,0.630952,0.630952,0.612245,0.714286,0.659341,0.547619,0.547619,0.55,0.52381,0.536585,RUNS,PSCNORM
5,5,10018_08907,1.0,0.571429,0.571429,0.568182,0.595238,0.581395,0.666667,0.666667,0.659091,0.690476,0.674419,RUNS,PSCNORM
6,6,10021_08839,1.0,0.892857,0.892857,0.971429,0.809524,0.883117,0.583333,0.583333,0.6,0.5,0.545455,RUNS,PSCNORM
7,7,10022_08854,1.0,0.809524,0.809524,0.825,0.785714,0.804878,0.714286,0.714286,0.704545,0.738095,0.72093,RUNS,PSCNORM
8,8,10023_09126,0.97619,0.642857,0.642857,0.6875,0.52381,0.594595,0.595238,0.595238,0.617647,0.5,0.552632,RUNS,PSCNORM
9,9,10027_09455,1.0,0.654762,0.654762,0.685714,0.571429,0.623377,0.75,0.75,0.769231,0.714286,0.740741,RUNS,PSCNORM


In [None]:
##