<a href="https://colab.research.google.com/github/yecatstevir/teambrainiac/blob/main/source/SVM_Group_Whole_Brain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Group Whole Brain Support Vector Machine Training
- Go to 'Runtime' in Colab browser bar, select 'Change Runtime Type', select 'High-RAM' from 'Runtime Shape'. 
- load local pickle file containing all masked, normalized Whole Brain subject data in numpy matrix format
- SVM training all subjects
- SVM training per subject

### Mount Google Drive and clone repository
- open to source directory

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')#, force_remount = True)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# Clone the entire repo.
!git clone -l -s https://github.com/yecatstevir/teambrainiac.git
# Change directory into cloned repo
%cd teambrainiac/source
!ls


fatal: destination path 'teambrainiac' already exists and is not an empty directory.
/content/teambrainiac/source
 Access_Load_Data.ipynb		    path_config.py
 All_subject_masked_labeled.ipynb   percent_signal_change.ipynb
 data				    __pycache__
 __init__.py			    SVM.ipynb
 Masking.ipynb			    SVM_Whole_Brain.ipynb
 Mat_to_Numpy.ipynb		    utils.py
'path_config (1).py'		    Visualize_Data.ipynb


### Load path_config.py 
- we are already in source so we can just load this file without chanding directory

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving path_config.py to path_config (2).py
User uploaded file "path_config.py" with length 228 bytes


### Load the Whole brain normalized masked all subject 2d pickle file 
- go to Drive outside of this notebook
  - create a folder named data
  - upload 'whole_brain_all_norm_2d.pkl' - will take 5 hours but should remain on system without need to upload again
  - once uploaded, drag the file to the repo source/data directory

In [None]:
!ls

Access_Load_Data.ipynb		  path_config.py
All_subject_masked_labeled.ipynb  percent_signal_change.ipynb
data				  SVM.ipynb
__init__.py			  SVM_Whole_Brain.ipynb
Masking.ipynb			  utils.py
Mat_to_Numpy.ipynb		  Visualize_Data.ipynb


#### Import libraries


In [None]:
# Import libraries
!pip install boto3 nilearn
import pickle
from utils import *
from sklearn.svm import SVC
import numpy as np
import random
import cv2 as cv



### Load Data from AWS and create Train/Val/Test splits

In [None]:
%%time
pkl_file = "whole_brain_all_norm_2d.pkl" # normalized
#pkl_file = 'all_data_dictionary.pkl' #Unnormalized
bool_mat = False
data = access_load_data(pkl_file, bool_mat)

CPU times: user 51.4 s, sys: 55.7 s, total: 1min 47s
Wall time: 2min 28s


In [None]:
 #open path dictionary file to get subject ids
path = "data/data_path_dictionary.pkl"
data_path_dict = open_pickle(path)
subject_ids = data_path_dict['subject_ID']

# Randomly shuffle ids for train test val splits
random.seed(42)
random.shuffle(subject_ids)

In [None]:
# get train, test val data
train_ids = subject_ids[:36]
val_ids = subject_ids[36:44]
test_ids = subject_ids[44:53]

print("Number of training examples: ", len(train_ids))
print("Number of validation examples: ", len(val_ids))
print("Number of testing examples: ", len(test_ids))
len(subject_ids)

36
8
8


52

In [None]:
# Get train test val data 
X_train = []
y_train = []

X_val = []
y_val = []

X_test = []
y_test = []

# Get X data from dictionary
for id_ in train_ids:
    for matrix in data[id_]:
        #print(matrix)
        X_train.append(matrix)
        
        
# Get y label from dictioanry
for id_ in train_ids:
    for label in data[f"{id_}_rt_labels"]:
        #print(matrix)
        y_train.append(label)
        


# Get X data from dictionary
for id_ in val_ids:
    for matrix in data[id_]:
        #print(matrix)
        X_val.append(matrix)
        
        
# Get y label from dictioanry
for id_ in val_ids:
    for label in data[f"{id_}_rt_labels"]:
        #print(matrix)
        y_val.append(label)
        
    

# Get X data from dictionary
for id_ in test_ids:
    for matrix in data[id_]:
        #print(matrix)
        X_test.append(matrix)
        
        
# Get y label from dictioanry
for id_ in test_ids:
    for label in data[f"{id_}_rt_labels"]:
        #print(matrix)
        y_test.append(label)
        
      

In [None]:
#4 runs * 52 subjects = 208
print(f"length of Xtrain data: {len(X_train)} and length of ytrain data: {len(y_train)}")
print(f"length of Xval data: {len(X_val)} and length of yval data: {len(y_val)}")
print(f"length of Xtest data: {len(X_test)} and length of ytest data: {len(y_test)}")
#print(f"length of X data: {len(X)} and length of y data: {len(y)}")

length of Xtrain data: 144 and length of ytrain data: 144
length of Xval data: 32 and length of yval data: 32
length of Xtest data: 32 and length of ytest data: 32


In [None]:
%%time
X_train = np.array(X_train)
y_train = np.array(y_train)
print( "Xtrain data shape ", X_train.shape)
print( "ytrain data shape ", y_train.shape)


Xtrain data shape  (144, 84, 237979)
ytrain data shape  (144, 84)
CPU times: user 1.47 s, sys: 2.37 s, total: 3.84 s
Wall time: 3.82 s


In [None]:
%%time
X_val = np.array(X_val)
y_val = np.array(y_val)
print( "Xtrain data shape ", X_val.shape)
print( "ytrain data shape ", y_val.shape)

Xtrain data shape  (32, 84, 237979)
ytrain data shape  (32, 84)
CPU times: user 545 ms, sys: 572 ms, total: 1.12 s
Wall time: 1.11 s


In [None]:
%%time
X_test = np.array(X_test)
y_test = np.array(y_test)
print( "Xtrain data shape ", X_test.shape)
print( "ytrain data shape ", y_test.shape)

Xtrain data shape  (32, 84, 237979)
ytrain data shape  (32, 84)
CPU times: user 565 ms, sys: 520 ms, total: 1.08 s
Wall time: 1.08 s


In [None]:
%%time
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)
print( "Xtrain data shape after concantenation ", X_train.shape)
print( "ytrain data shape after concantenation ", y_train.shape)

Xtrain data shape after concantenation  (12096, 237979)
ytrain data shape after concantenation  (12096,)
CPU times: user 753 ms, sys: 2.72 s, total: 3.47 s
Wall time: 3.45 s


In [None]:
%%time
X_val = np.concatenate(X_val)
y_val = np.concatenate(y_val)
print( "Xtrain data shape after concantenation ", X_val.shape)
print( "ytrain data shape after concantenation ", y_val.shape)

Xtrain data shape after concantenation  (2688, 237979)
ytrain data shape after concantenation  (2688,)
CPU times: user 519 ms, sys: 507 ms, total: 1.03 s
Wall time: 1.02 s


In [None]:
%%time
X_test = np.concatenate(X_test)
y_test = np.concatenate(y_test)
print( "Xtrain data shape after concantenation ", X_test.shape)
print( "ytrain data shape after concantenation ", y_test.shape)

Xtrain data shape after concantenation  (2688, 237979)
ytrain data shape after concantenation  (2688,)
CPU times: user 513 ms, sys: 517 ms, total: 1.03 s
Wall time: 1.02 s


## Set up SVM Model

#### SKlearn model

In [None]:
%%time
clf = SVC()
clf.fit(X_test, y_test)

CPU times: user 36min 23s, sys: 8min 45s, total: 45min 8s
Wall time: 5min 46s


In [None]:
%%time
y_pred = clf.predict(X_val)


In [None]:
from sklearn.metrics import accuracy_score

# Model Accuracy
print("Accuracy:", accuracy_score(y_val, y_pred))

#### LibSVM

#### OpenCV library
- https://docs.opencv.org/3.4/d1/d73/tutorial_introduction_to_svm.html


In [None]:
%%time

# Train the SVM using openCV
svm = cv.ml.SVM_create()
svm.setType(cv.ml.SVM_C_SVC)
svm.setKernel(cv.ml.SVM_LINEAR)
svm.setTermCriteria((cv.TERM_CRITERIA_MAX_ITER, 100, 1e-6))
svm.train(X_train, cv.ml.ROW_SAMPLE, y_train)
