In [1]:
import h5py
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
filepath = 'data/pca_data_N400T8.mat'

variables = {}
with h5py.File(filepath, 'r') as f:
    for k, v in f.items():
        variables[k] = np.array(v)
    
COEFFS = variables['COEFFS']
SCORES = variables['SCORES']
FCmeans = loadmat('data/means.mat')['means']

del variables

In [2]:
num_comps = 3200 # Choose number of components to include

In [3]:
SCORES_temp, COEFFS_temp = SCORES, COEFFS
SCORES_temp[num_comps:,:], COEFFS_temp[:,num_comps:] = 0, 0

In [5]:
orig_mat = np.dot(COEFFS, SCORES) # use all PCA components to reconstruct matrix
orig_mat += FCmeans.transpose() # plug mean back in 
orig_mat.shape #400 subjects, 8 subjects, test, retest. order: 3200/3200 test retest, 400 subject split, 8 tasks within subject

(6400, 69751)

In [6]:
rest = np.arange(0, 6400, 8)
task = np.random.randint(1,8, size=len(rest))

In [7]:
subset_indices = np.zeros(rest.shape[0] + task.shape[0], dtype=rest.dtype)
subset_indices[::2] = rest
subset_indices[1::2] = rest + task

In [11]:
rest_task_mat = orig_mat[subset_indices,:]

In [12]:
task_vec = ['Rest', 'Task']
num_subjects = 400
labels = np.array(2*num_subjects*task_vec)

## Machine Learning Models

In [15]:
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [16]:
shuffle_indices = np.random.permutation(rest_task_mat.shape[0])
train_idx = shuffle_indices[:int(0.6*rest_task_mat.shape[0])]
val_idx = shuffle_indices[int(0.6*rest_task_mat.shape[0]):int(0.8*rest_task_mat.shape[0])]
test_idx = shuffle_indices[int(0.8*rest_task_mat.shape[0]):]

### Standardize, PCA (or use sparse autoencoder) for dimensionality reduction

In [29]:
task_mat = StandardScaler().fit_transform(rest_task_mat)
pca = PCA(n_components = 100)
rest_task_mat_pca = pca.fit_transform(rest_task_mat)

TypeError: __init__() got an unexpected keyword argument 'num_components'

In [23]:
train_data = rest_task_mat_pca[train_idx,:]
val_data = rest_task_mat_pca[val_idx,:]
test_data = rest_task_mat_pca[test_idx,:]

## Support Vector Machine

In [24]:
clf = svm.SVC()
clf.fit(train_data, labels[train_idx])

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [25]:
val_pred = clf.predict(val_data)
val_acc = accuracy_score(val_pred, labels[val_idx])

In [26]:
val_acc

0.565625

In [27]:
test_pred = clf.predict(test_data)
test_acc = accuracy_score(test_pred, labels[test_idx])

In [28]:
test_acc

0.4875