In [1]:
import scipy.io as sio
from scipy import stats
from sklearn import svm
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
import os, fnmatch
import pandas as pd
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis



In [2]:
'''function returns cross validated score and the predicted values by the classifier'''
def get_cross_val_score(X_train, Y_train, fold_val):
    
    #Fisher Discriminant Analysis (LDA) with default shrinkage (it had to be stated explicitly)
    clf = LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto')
    
    return cross_val_score(clf, X_train, np.squeeze(Y_train), cv = fold_val), cross_val_predict(clf, X_train, np.squeeze(Y_train), cv = fold_val)

'''function to load all the files and divide them into the data for each task as well as divide 
them into the time bins; 50 bins for the perception and 32 bins for the memory task (stated in files)'''
def load_time_sliced_data(file_name):
    data = sio.loadmat(file_name)
    X_data_M = data['dataMatM']
    Y_data_M = data['simVecM']
    X_data_P = data['dataMatP']
    Y_data_P = data['simVecP']
    N_Times_M = data['NtimePointsM'][0,0]
    N_Times_P = data['NtimePointsP'][0,0]
    
    X_data_P = np.array_split(X_data_P, N_Times_P, axis=1)
    X_data_M = np.array_split(X_data_M, N_Times_M, axis=1)
  
    return X_data_M, Y_data_M, X_data_P, Y_data_P, N_Times_M, N_Times_P 
    

In [3]:
'''load files, run the classifier and cross validate with 8 folds.
extract the actual and predicted labels.'''

dir_path = '../Data/'
files = fnmatch.filter(os.listdir(dir_path), '*.mat')
accuracy_list = []
scores_M = list()
scores_M_std = list()
scores_P = list()
scores_P_std = list()
predicted_m = list()
label_m = list()

predicted_p = list()
label_p = list()

for file in files: #list of tuples, file names and accuracy for memory and perception
  
    X_data_M, Y_data_M, X_data_P, Y_data_P, N_Times_M, N_Times_P  = load_time_sliced_data(dir_path  + file)
    for i in range(len(X_data_M)):
        cross_M, clf_Pred_M = get_cross_val_score(X_data_M[i], Y_data_M, 8) #run classifier and cross validate (8 folds)
        t_score_M, p_val_M = stats.ttest_ind(Y_data_M, clf_Pred_M)
        predicted_m =  predicted_m  + list(clf_Pred_M)
        label_m =  label_m  + list(Y_data_M[:,0])
        scores_M.append((file, i,np.mean(cross_M),np.std(cross_M),t_score_M[0], p_val_M[0]))
    for i in range(len(X_data_P)):
        cross_P, clf_Pred_P = get_cross_val_score(X_data_P[i], Y_data_P, 8) #run classifier and cross validate (8 folds)
        t_score_P, p_val_P = stats.ttest_ind(Y_data_P, clf_Pred_P)
        predicted_p = predicted_p + list(clf_Pred_P)
        label_p = label_p +  list(Y_data_P[:,0])
        scores_P.append((file,i, np.mean(cross_P),np.std(cross_P), t_score_P[0], p_val_P[0]))

df_m = pd.DataFrame()
df_p = pd.DataFrame()
df_m['predicted_m'] = predicted_m
df_m['label_m'] = label_m
df_p['predicted_p'] = predicted_p
df_p['label_p'] = label_p
df_m.to_csv('../CSV_Files(Outputs)/predicted_label_m_LDA.csv')
df_p.to_csv('../CSV_Files(Outputs)/predicted_label_p_LDA.csv')



In [4]:
#save the output for analysis

df_P = pd.DataFrame(scores_P, columns= ['file', 'index', 'accuracy', 'std-dev', 't-test', 'p-val'])
df_M = pd.DataFrame(scores_M, columns= ['file', 'index', 'accuracy', 'std-dev', 't-test', 'p-val'])

df_P.to_csv('../CSV_Files(Outputs)/perception_LDA_final.csv') 
df_M.to_csv('../CSV_Files(Outputs)/memory_LDA_final.csv')