# **Decoding hand shapes from fMRI data using different ML Models**

Decoding hand shapes from fMRI data

This project involves implemention of classification models to classify different motor actions from fMRI data

In this implementation, we have trained the decoding model and tested it.

We have also tested different machine learning architectures -- logistic regression, Support vector machines, Random Forests and K-nearest neighbor.

Tasks done:
1. Loaded data using bdpy

2. Explored the metadata (different regions in the brain)

3. Built classification models (logistic regression, SVM, KNN and RF)

4. Tried cross-validation technique



## Description of the task

Can you decode the motor movement from brain activation data?
In this project, you will be implementing the classification models to classify different motor actions from fMRI data. You will need to expand on the model to accommodate for classifying between more than two classes.
There is data available  [here](https://figshare.com/articles/dataset/Hand_shape_decoding_rock_paper_scissors_/6698780) . In this implementation, you will learn how to properly train the decoding model and test it statistically. You can also test different machine learning architectures.





In [None]:
#Loading data and installing libraries
!wget -O data.h5 https://figshare.com/ndownloader/files/12227786

!pip install bdpy
!pip install seaborn_image

In [None]:
#Importing required libraries
import bdpy
from bdpy.util import get_refdata
import numpy as np
import pandas as pd
import seaborn_image as isns
import seaborn as sns
import ipywidgets as widgets  # interactive display
import matplotlib.pyplot as plt
from random import seed
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split, cross_val_score
import urllib
from PIL import Image
from torchvision import transforms
import torch


In [None]:
#Creating dictionary of Regions of Interest
rois = {'SMAR' : 'VOX_SMA_RHand = 1',
        'SMAL' : 'VOX_SMA_LHand = 1',
        'M1R' : ' VOX_M1_RHand = 1',
        'M1L' : 'VOX_M1_LHand = 1',
        'CBR' : 'VOX_CB_RHand = 1',
        'CBL' : 'VOX_CB_LHand = 1'}

#Function to set different seeds for different workers to get random sequences
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32 #This line calculates a worker-specific seed using the initial seed of PyTorch's random number generator
    np.random.seed(worker_id) #This line sets the random seed for NumPy's random number generator
    seed(worker_id) #his line sets the random seed for Python's built-in random module to the worker_id

In [None]:
# Getting data of images from the dataset
filename = 'data.h5'
voxel_data = bdpy.BData(filename)
voxel_data.show_metadata()

In [None]:
#Returning categories
np.unique(voxel_data.get('Label'))

array([2., 3., 4.])

In [None]:
voxel_data.get('Label')

In [None]:
data_labels = voxel_data.select('Label')

In [None]:
def extract_data(roi, number_of_categories=3):
    filename = 'data.h5'
    data = bdpy.BData(filename)
    voxel_data = data.select(rois[roi])
    data_labels = data.select('Label')
    data_labels = np.floor(data_labels).astype(int)
    return voxel_data, data_labels

In [None]:
#Returning data size and label
X, y = extract_data(roi='M1L')
print(f"Data size: {X.shape}")
print(f"Label size: {y.shape}")

Data size: (60, 569)
Label size: (60, 1)


In [None]:
#Returning y values as integers
y

**ML Models**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit

**Logistic Regression**

In [None]:
#Creating a logistic regression model with L2 regularization (distributes correlated features evenly among coefficients preventing any one from dominating model's predictions)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
pipe = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2'))

pipe.fit(X_train, y_train)  # apply scaling on training data

pipe.score(X_test, y_test)  # apply scaling on testing data, without leaking training data.

  y = column_or_1d(y, warn=True)


0.8

**Logistic regression - CV**

In [None]:
pipe = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2'))
cv = ShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
scores = cross_val_score(pipe, X, y, cv=cv).mean()
scores

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2'))
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
  pipe.fit(X_train, y_train)

  accuracy.loc[roi, 'accuracy'] = pipe.score(X_test, y_test)

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('Logistic Regression')
plt.show

**Logistic regression - CV**

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy_cv = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2'))
  cv = ShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
  scores = cross_val_score(pipe, X, y, cv=cv).mean()
  scores


  accuracy_cv.loc[roi, 'accuracy'] = scores

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('Logistic Regression')
plt.show

In [None]:
print(accuracy)
print(f'the mean accuracy for logistic regression is: {accuracy.mean()}')

      accuracy
SMAR       0.8
SMAL       0.8
M1R        1.0
M1L        0.8
CBR   0.733333
CBL        0.8
the mean accuracy for logistic regression is: accuracy    0.822222
dtype: float64


In [None]:
print(accuracy_cv)
print(f'the mean accuracy for logistic regression with CV is: {accuracy_cv.mean()}')

      accuracy
SMAR  0.586667
SMAL  0.586667
M1R   0.813333
M1L       0.72
CBR   0.613333
CBL   0.573333
the mean accuracy for logistic regression with CV is: accuracy    0.648889
dtype: float64





---


*   SMAR= Supplementary Motor Area Rt side
*   SMAL= Supplementary Motor Area Lt side
*   M1R=Primary Motor Cortex Rt side
*   M1L=Primary Motor Cortex Lt side
*   CBR=Cerebellum Rt side
*   CBL=Cerebellum Lt side



---



**KNN Classifier**

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=7))
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
  pipe.fit(X_train, y_train)

  accuracy.loc[roi, 'accuracy'] = pipe.score(X_test, y_test)

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('KNN Classifier')
plt.show

**KNN - CV**

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy_cv = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=7))
  cv = ShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
  scores = cross_val_score(pipe, X, y, cv=cv).mean()
  scores


  accuracy_cv.loc[roi, 'accuracy'] = scores

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('Logistic Regression')
plt.show

In [None]:
print(accuracy)
print(f'the mean accuracy for KNN is: {accuracy.mean()}')

     accuracy
SMAR      0.4
SMAL      0.4
M1R       0.8
M1L       0.6
CBR       0.6
CBL       0.6
the mean accuracy for KNN is: accuracy    0.566667
dtype: float64


In [None]:
print(accuracy_cv)
print(f'the mean accuracy for KNN classifier with CV is: {accuracy_cv.mean()}')

**Support Vector Classification**

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(), SVC(C=2,kernel='linear'))
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
  pipe.fit(X_train, y_train)

  accuracy.loc[roi, 'accuracy'] = pipe.score(X_test, y_test)

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('SVM Classifier')
plt.show

**Support Vector Classification - CV**

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy_cv = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(),SVC(C=2,kernel='linear'))
  cv = ShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
  scores = cross_val_score(pipe, X, y, cv=cv).mean()
  scores


  accuracy_cv.loc[roi, 'accuracy'] = scores

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('Logistic Regression')
plt.show

In [None]:
print(accuracy)
print(f'the mean accuracy for SVM is: {accuracy.mean()}')

      accuracy
SMAR  0.733333
SMAL  0.733333
M1R        1.0
M1L        0.8
CBR        0.8
CBL   0.733333
the mean accuracy for SVM is: accuracy    0.8
dtype: float64


In [None]:
print(accuracy_cv)
print(f'the mean accuracy for SVM with CV is: {accuracy_cv.mean()}')

      accuracy
SMAR  0.546667
SMAL  0.546667
M1R   0.826667
M1L   0.666667
CBR   0.586667
CBL   0.573333
the mean accuracy for SVM with CV is: accuracy    0.624444
dtype: float64


**Random Forest Classifier**

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(), RandomForestClassifier())
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
  pipe.fit(X_train, y_train)

  accuracy.loc[roi, 'accuracy'] = pipe.score(X_test, y_test)

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('Random Forest Classifier')
plt.show

**Random Forest Classifier - CV**

In [None]:
roi_list = ['SMAR','SMAL','M1R','M1L','CBR','CBL']
accuracy_cv = pd.DataFrame(index=roi_list, columns=['accuracy'])
for roi in roi_list:
  X, y = extract_data(roi)
  pipe = make_pipeline(StandardScaler(),RandomForestClassifier())
  cv = ShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
  scores = cross_val_score(pipe, X, y, cv=cv).mean()
  scores


  accuracy_cv.loc[roi, 'accuracy'] = scores

accuracy.plot(kind='bar')
plt.xlabel('Hand shape')
plt.ylabel('Accuracy')
plt.title('Logistic Regression')
plt.show

In [None]:
print(accuracy)
print(f'the mean accuracy for random forest is: {accuracy.mean()}')

      accuracy
SMAR  0.266667
SMAL  0.266667
M1R   0.733333
M1L        0.4
CBR   0.333333
CBL   0.533333
the mean accuracy for random forest is: accuracy    0.422222
dtype: float64


In [None]:
print(accuracy_cv)
print(f'the mean accuracy for random forest with CV is: {accuracy_cv.mean()}')

      accuracy
SMAR  0.546667
SMAL      0.44
M1R        0.8
M1L   0.453333
CBR   0.466667
CBL   0.346667
the mean accuracy for random forest with CV is: accuracy    0.508889
dtype: float64


**END 😊**