In [5]:
import scipy.io
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.svm import SVC
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import AdaBoostClassifier

In [6]:
def load_data(file = 'data-starplus-04847-v7.mat'):
    # load the data for one subject
    data = scipy.io.loadmat(file)
    return data

In [7]:
def load_metadata(data):
    #metadata
    meta_data = data['meta']
    print("the study subject:", meta_data['study'])
    print("the number of trials:",meta_data['ntrials'])
    print("the rois in the data:",meta_data['roi'][0][0][0].split('_'))
    return meta_data

info: This variable defines the experiment in terms of a sequence of 'trials'.
'info' is a 1x54 struct array, describing the 54 time intervals, or trials.
Most of these time intervals correspond to trials during which the subject views
a single picture and a single sentence, and presses a button to indicate whether
the sentence correctly describes the picture.  Other time intervals correspond
to rest periods.  The relevant fields of info are illustrated in the following
example:

info(18)
 mint: 894
 maxt: 948
 cond: 2       
 firstStimulus: 'P'
 sentence: ''It is true that the star is below the plus.''
 sentenceRel: 'below'
 sentenceSym1: 'star'
 sentenceSym2: 'plus'
 img: sap
 actionAnswer: 0
 actionRT: 3613

info.mint gives the time of the first image in the interval (the minimum time)

info.maxt gives the time of the last image in the interval (the maximum time)

info.cond has possible values 0,1,2,3.  Cond=0 indicates the data in this
segment should be ignored. Cond=1 indicates the segment is a rest, or fixation
interval.  Cond=2 indicates the interval is a sentence/picture trial in which
the sentence is not negated.  Cond=3 indicates the interval is a
sentence/picture trial in which the sentence is negated.

info.firstStimulus: is either 'P' or 'S' indicating whether this trail was
obtained during the session is which Pictures were presented before sentences,
or during the session in which Sentences were presented before pictures.  The
first 27 trials have firstStimulus='P', the remained have firstStimulus='S'.
Note this value is present even for trials that are rest trials.  You can pick
out the trials for which sentences and pictures were presented by selecting just
the trials trials with info.cond=2 or info.cond=3.

info.sentence gives the sentence presented during this trial.  If none, the
value is '' (the empty string).  The fields info.sentenceSym1,
info.sentenceSym2, and info.sentenceRel describe the two symbols mentioned in
the sentence, and the relation between them.

info.img describes the image presented during this trial.  For example, 'sap'
means the image contained a 'star above plus'.  Each image has two tokens, where
one is above the other.  The possible tokens are star (s), plus (p), and dollar
(d).

info.actionAnswer: has values -1 or 0.  A value of 0 indicates the subject is
expected to press the answer button during this trial (either the 'yes' or 'no'
button to indicate whether the sentence correctly describes the picture).  A
value of -1 indicates it is inappropriate for the subject to press the answer
button during this trial (i.e., it is a rest, or fixation trial).

info.actionRT: gives the reaction time of the subject, measured as the time at
which they pressed the answer button, minus the time at which the second
stimulus was presented.  Time is in milliseconds.  If the subject did not press
the button at all, the value is 0.

In [8]:
#get only the ROI that is relevant and gives us the best performance
def check_ROI_relevancy(data_meta, voxel_index):
    ROIs_to_consider = ['CALC', 'LIPL', 'LT', 'LTRIA', 'LOPER', 'LIPS', 'LDLPFC']
    if(data_meta['colToROI'][0][0][voxel_index][0][0] in ROIs_to_consider):
        return True
    return False

In [9]:
# prepare voxel list of ROIs
def prepare_ROI_voxel_list(data):
    voxel_list = []
    for i in range(4634):
        if(check_ROI_relevancy(data['meta'], i)):
            voxel_list.append(i)
    return voxel_list

In [129]:
# function to obtain the 3D voxel stimulus data 
def get_trial_data_for_stimulus_with_ROI_selection(data):
    data_voxels = data['data']
    data_info = data['info']
    data_meta = data['meta']
    #print(data_info['cond'][0][53][0][0])
    # The first stimulus (sentence or picture) was presented at the begining of the trail (image=1).
    # Four seconds later (image=9) the stimulus was removed, replaced by a blank screen.
    # Four seconds later (image=17) the second stimulus was presented. This remained on the screen for four seconds, or until the subject pressed the mouse button, whichever came first.
    # A rest period of 15 seconds (30 images) was added after the second stimulus was removed from the screen. Thus, each trial lasted a total of approximately 27 seconds (approximately 54 images).
    out = []
    stimulus_period = [1,2,3,4,5,6,7,8,17,18,19,20,21,22,23,24]
    condition = []
    condition_s = [1]*8+[0]*8
    condition_p = [0]*8+[1]*8
    print(data_voxels[1][0].shape)
    print(data['meta']['colToROI'][0][0][8][0][0])
    voxel_list = prepare_ROI_voxel_list(data)
    print(len(voxel_list))
    print("loop",len(data_voxels))
    for i in range(1,len(data_voxels)):
        if(data_info['cond'][0][i][0][0]==2 or data_info['cond'][0][i][0][0]==3 and data_voxels[i][0].shape[0]>25):
            #print(data_voxels[i][0][stimulus_period][2])
            #print(data_voxels[i][0][stimulus_period].shape)
            for k in stimulus_period:
                #print(len(data_voxels[i][0][[k]][0][voxel_list]))
                out.append(data_voxels[i][0][[k]][0][voxel_list])
                #out.extend(data_voxels[i][0][stimulus_period])
            #print(len(out))
            if (i<27):
                condition += condition_p
            else:
                condition += condition_s
                
                
    for i in range(len(out)):
        out[i] = out[i][:1678]
        #print(len(out[i]))
    print(len(out[0]))
    print(len(condition))
    return np.array(out),np.array(condition)


In [130]:
# function to obtain the 3D voxel stimulus data 
def get_trial_data_for_stimulus(data):
    data_voxels = data['data']
    data_info = data['info']
    data_meta = data['meta']
    print(data_info['cond'][0][53][0][0])
    # The first stimulus (sentence or picture) was presented at the begining of the trail (image=1).
    # Four seconds later (image=9) the stimulus was removed, replaced by a blank screen.
    # Four seconds later (image=17) the second stimulus was presented. This remained on the screen for four seconds, or until the subject pressed the mouse button, whichever came first.
    # A rest period of 15 seconds (30 images) was added after the second stimulus was removed from the screen. Thus, each trial lasted a total of approximately 27 seconds (approximately 54 images).
    out = []
    stimulus_period = [1,2,3,4,5,6,7,8,17,18,19,20,21,22,23,24]
    condition = []
    condition_s = [1]*8+[0]*8
    condition_p = [0]*8+[1]*8
    print(data_voxels[1][0].shape)
    print(data['meta']['colToROI'][0][0][8][0][0])
    for i in range(1,len(data_voxels)):
        if(data_info['cond'][0][i][0][0]==2 or data_info['cond'][0][i][0][0]==3 and data_voxels[i][0].shape[0]>25):
            
            out.extend(data_voxels[i][0][stimulus_period])
            if (i<27):
                condition += condition_p
            else:
                condition += condition_s
                
    print(len(out))
    print(len(condition))
    out = out[:770]
    condition = condition[:770]
    return np.array(out),np.array(condition)


In [131]:
def SVC_caller(X, conditions):
    svc = SVC(kernel='poly')

    # Define the dimension reduction to be used.
    # Here we use a classical univariate feature selection based on F-test,
    # namely Anova. When doing full-brain analysis, it is better to use
    # SelectPercentile, keeping 5% of voxels
    # (because it is independent of the resolution of the data).
    feature_selection = SelectPercentile(f_classif, percentile=5)
    X_train, X_test, y_train, y_test = train_test_split(
    X, conditions, test_size=0.33, random_state=78)

    # We have our classifier (SVC), our feature selection (SelectPercentile),and now,
    # we can plug them together in a *pipeline* that performs the two operations
    # successively:
    anova_svc = Pipeline([('anova', feature_selection), ('svc', svc)])
    anova_svc.fit(X_train, y_train)
    score = anova_svc.score(X_test, y_test)
    # Compute the prediction accuracy for the different folds (i.e. session)

    # Return the corresponding mean prediction accuracy
    #classification_accuracy = cv_scores.mean()
   
    # Print the results
    print("Classification accuracy: %.4f / Chance level: %f" %
          (score, 1. / 2))
    # Classification accuracy:  0.70370 / Chance level: 0.5000
    

In [132]:
def Ada_boost(X, y):
    clf = AdaBoostClassifier(n_estimators=100)
    scores = cross_val_score(clf, X, y, cv=5)
    print("Classification accuracy: %.4f / Chance level: %f" %
          (scores.mean(), 1. / 2))

In [136]:
def SVC_on_multiple_subjects(subject_file):
    
    subjects = ['data-starplus-04847-v7.mat','data-starplus-05710-v7.mat', 'data-starplus-04799-v7.mat', 'data-starplus-04820-v7.mat', 'data-starplus-05675-v7.mat', 'data-starplus-05680-v7.mat']
    
    data = load_data(subjects[0])
    meta_data = load_metadata(data)
    X, y = get_trial_data_for_stimulus_with_ROI_selection(data)
    for file in subjects[1:]:
        data = load_data(file)
        meta_data = load_metadata(data)
        X_out, y_out = get_trial_data_for_stimulus_with_ROI_selection(data)
        X = np.concatenate((X,X_out), axis=0)
        y = np.concatenate((y,y_out), axis=0)
    
    print("Extracted stimulus data for the trial: shape - ",(X.shape), type(X))
    print("Extracted stimulus label for the trial: shape - ",(len(y)), type(y))
    print("-----------------------------------------------------------------\n")
    SVC_caller(X, y)

In [137]:
def AdaBoost_on_single_subject(subject_file):
    data = load_data(file = subject_file)
    meta_data = load_metadata(data)
    
    X, y = get_trial_data_for_stimulus_with_ROI_selection(data)
    print("Extracted stimulus data for the trial: shape - ",(X.shape), type(X))
    print("Extracted stimulus label for the trial: shape - ",(len(y)), type(y))
    print("-----------------------------------------------------------------\n")
    Ada_boost(X, y)
    

In [138]:
SVC_on_multiple_subjects('data-starplus-04847-v7.mat')

the study subject: [[array(['data-starplus'], dtype='<U13')]]
the number of trials: [[array([[54]], dtype=uint8)]]
the rois in the data: ['CALC', 'LFEF', 'LIPL', 'LIT', 'LPPREC', 'LSPL', 'LTRIA', 'RFEF', 'RIPS', 'ROPER', 'RSGA', 'RT', 'SMA', 'LDLPFC', 'LIPS', 'LOPER', 'LSGA', 'LT', 'RDLPFC', 'RIPL', 'RIT', 'RPPREC', 'RSPL', 'RTRIA']
(55, 4698)
LDLPFC
1678
loop 54
1678
640


  after removing the cwd from sys.path.


the study subject: [[array(['data-starplus'], dtype='<U13')]]
the number of trials: [[array([[54]], dtype=uint8)]]
the rois in the data: ['CALC', 'LFEF', 'LIPL', 'LIT', 'LPPREC', 'LSPL', 'LTRIA', 'RFEF', 'RIPS', 'ROPER', 'RSGA', 'RT', 'SMA', 'LDLPFC', 'LIPS', 'LOPER', 'LSGA', 'LT', 'RDLPFC', 'RIPL', 'RIT', 'RPPREC', 'RSPL', 'RTRIA']
(55, 4634)
LDLPFC
1883
loop 54
1678
640
the study subject: [[array(['data-starplus'], dtype='<U13')]]
the number of trials: [[array([[54]], dtype=uint8)]]
the rois in the data: ['CALC', 'LFEF', 'LIPL', 'LIT', 'LPPREC', 'LSPL', 'LTRIA', 'RFEF', 'RIPS', 'ROPER', 'RSGA', 'RT', 'SMA', 'LDLPFC', 'LIPS', 'LOPER', 'LSGA', 'LT', 'RDLPFC', 'RIPL', 'RIT', 'RPPREC', 'RSPL', 'RTRIA']
(55, 4949)
RDLPFC
1812
loop 54
1678
640
the study subject: [[array(['data-starplus'], dtype='<U13')]]
the number of trials: [[array([[54]], dtype=uint8)]]
the rois in the data: ['CALC', 'LFEF', 'LIPL', 'LIT', 'LPPREC', 'LSPL', 'LTRIA', 'RFEF', 'RIPS', 'ROPER', 'RSGA', 'RT', 'SMA', 'LDLPFC'

In [139]:
AdaBoost_on_single_subject('data-starplus-04847-v7.mat')

the study subject: [[array(['data-starplus'], dtype='<U13')]]
the number of trials: [[array([[54]], dtype=uint8)]]
the rois in the data: ['CALC', 'LFEF', 'LIPL', 'LIT', 'LPPREC', 'LSPL', 'LTRIA', 'RFEF', 'RIPS', 'ROPER', 'RSGA', 'RT', 'SMA', 'LDLPFC', 'LIPS', 'LOPER', 'LSGA', 'LT', 'RDLPFC', 'RIPL', 'RIT', 'RPPREC', 'RSPL', 'RTRIA']
(55, 4698)
LDLPFC
1678
loop 54
1678
640
Extracted stimulus data for the trial: shape -  (640, 1678) <class 'numpy.ndarray'>
Extracted stimulus label for the trial: shape -  640 <class 'numpy.ndarray'>
-----------------------------------------------------------------



  after removing the cwd from sys.path.


Classification accuracy: 0.5484 / Chance level: 0.500000
