Update February 21st 2019
--
We performed all classification experiments again the evoked response filtering in low frequency (low pass 30 Hz), and  we selected components using a Kbest with 40 electrodes, before training the classifiers. 

We train classifiers on one condition, and test it on the same or other condition (eg. Train Regular, Test Regular), so this is why we get four graphs. 

One interesting part would be to observe a group difference in the respect to which classifiers in one group are able to generalize across conditions, or not. 

For instance, one interesting hypothesis to test would be : 

- Classifiers trained on controls do generalize from the pattern trained in Regular when tested in Irregular, or the opposite. 
- Classifiers trained on Patients data are less/not able to generalize across conditions.


In [1]:
import numpy as np
from matplotlib import pyplot as plt 
%matplotlib inline 
import os 
from statsmodels.stats import multitest

statspath = '/home/nfarrugi/datasets/mpi_pd_cueing/results_fkbest40_filter/'
statspath2 = '/home/nfarrugi/datasets/mpi_pd_cueing/results_fkbest40/'

Step 0 - preparing data 

fields for the saved stats files are : 
- scores
- proba
- pval
- U

For each of them, layout is 4 (Regreg,RegIrreg,IrregReg,IrregIrreg) x CV x TestTime x TrainTime

Group files per subject type (patients or control) 

In [2]:
allsubj = os.listdir(statspath)

controls_id = []
patients_id = []

for cursubj in allsubj:
    if (cursubj[9]=='k'):
        controls_id.append(cursubj)
    else:
        if cursubj[10]=='1':
            patients_id.append(cursubj)
            

In [5]:
len(patients_id)

20

In [6]:
len(controls_id)

20

In [7]:
def diagonal_group(subjlist,statspath,thresh_unc = 0.01,timevec=None,showall=False,threshold_zeros=40,doplots=True,keepall=True,onlyregreg=False):


    if (showall):
        print("Showing all subjects")
    else:
        print("Showing subjects with less than %d FDR significant points" % threshold_zeros)
    
    diag_sig_fdr_all = []
    diag_sig_unc_all = []
    diag_ROC_all = []
    for subj in subjlist:
        cursubj = os.path.join(statspath,subj)
        curroc = np.load(cursubj)['scores'].mean(axis=1)
        curpvals = np.load(cursubj)['pval'].mean(axis=1) # Calculating mean p-value accross CV splits 
        diag_pvals = np.stack([np.diag(curmat) for curmat in curpvals]) # extract diagonal classification for each condition pair

        diag_ROC = np.stack([np.diag(curmat) for curmat in curroc]) # extract diagonal classification
        
        diag_sig_fdr = []
        diag_sig_unc = []
        
        if onlyregreg:
            H,p,_,_=multitest.multipletests(diag_pvals[2],method='fdr_bh')
            H_unc = diag_pvals[2] < thresh_unc
            diag_sig_unc.append(H_unc)
            diag_sig_fdr.append(H)        
        else:
            for curdiag in diag_pvals:
                H,p,_,_=multitest.multipletests(curdiag,method='fdr_bh')
                H_unc = curdiag < thresh_unc
                diag_sig_unc.append(H_unc)
                diag_sig_fdr.append(H)

        
        nonzeros = np.sum(np.stack(diag_sig_fdr))
        if showall:
            print("Subj %s number of non-zeros FDR : %d " %(subj,nonzeros))
                        
        elif nonzeros < threshold_zeros:
            print("Subj %s has zero or few significant FDR : %d " %(subj,nonzeros))
            
            if (nonzeros >= 0 ) and doplots:
                #plt.subplot(1,2,1)
                plt.plot(timevec,diag_sig_fdr[0])
                #plt.subplot(1,2,2)
                plt.plot(timevec,diag_ROC[0])
                plt.show()
            
        
        #print("Subj %s number of non-zeros Uncorrected : %d " %(subj,np.sum(np.stack(diag_sig_unc))))
        
        if (nonzeros > threshold_zeros) or keepall:
            diag_sig_fdr_all.append(np.stack(diag_sig_fdr))
            diag_sig_unc_all.append(np.stack(diag_sig_unc))
            diag_ROC_all.append(diag_ROC)

    
    return np.sum(np.stack(diag_sig_fdr_all),axis=0),np.sum(np.stack(diag_sig_unc_all),axis=0),np.stack(diag_ROC_all)

In [8]:
def plot_ROC_allcond(allscores,timepoints,figtitle='Default Title'):

    fig, (ax) = plt.subplots(ncols=2,nrows=2,figsize=(10,10))

    titles = ['Train Regular Test Regular',
              'Train Regular Test Irregular',
              'Train Irregular Test Regular',
              'Train Irregular Test Irregular']

    for i in range(4):
        curax = ax.ravel()[i]

        im = curax.matshow(allscores.mean(axis=1)[i], cmap='RdBu_r', vmin=0.2,vmax=0.8,origin='lower',
                            extent=timepoints)
        curax.axhline(0., color='k')
        curax.axvline(0., color='k')
        curax.xaxis.set_ticks_position('bottom')
        curax.set_xlabel('Testing Time (s)')
        curax.set_ylabel('Training Time (s)')
        curax.set_title(titles[i])
        plt.colorbar(im, ax=curax)
    fig.suptitle(figtitle)
    return fig

In [9]:
def plot_ROC_group(subjlist,statspath):

    diag_sig_fdr_all = []
    diag_sig_unc_all = []
    diag_ROC_all = []
    
    timepoints = np.array([-0.05      ,  0.52000002, -0.05      ,  0.52000002])
    for subj in subjlist:
        cursubj = os.path.join(statspath,subj)
        curroc = np.load(cursubj)['scores']
        curpvals = np.load(cursubj)['pval']
        #print(subj)
        
        plot_ROC_allcond(curroc,timepoints,subj)
        
        
        

Part1
--

Individual plots of ROC curves for temporal generalization using evoked resp + PCA80perc to train classifiers (averaged over four consecutive trials)
--

In [10]:
#plot_ROC_group(controls_id,statspath)

In [11]:
#plot_ROC_group(patients_id,statspath)

Individual ROC curves for temporal generalization on evoked response to train classifiers without PCA ( just showing again previous results here to compare)
--

In [12]:
#plot_ROC_group(avcontrols_id,avstatspath)

In [13]:
#plot_ROC_group(avpatients_id,avstatspath)

In [14]:
def four_plots(timevec,subtitles,diag_sig,mytitle):
    plt.figure(figsize=(10,10))

    for i in range(1,5):
        plt.subplot(2,2,i)
        plt.plot(timevec,diag_sig[i-1])
        plt.title(subtitles[i-1])
    plt.suptitle('Diagonal decoding - %s' % mytitle)
    plt.show()

In [15]:
def four_errorplots(timevec,subtitles,diag_roc,mytitle):
    plt.figure(figsize=(10,10))

    for i in range(1,5):
        plt.subplot(2,2,i)
        plt.errorbar(timevec,diag_roc[:,i-1,:].mean(axis=0),yerr=diag_roc[:,i-1,:].std(axis=0),errorevery=5)
        plt.title(subtitles[i-1])
        plt.grid(True)
    plt.suptitle('Diagonal decoding - %s' % mytitle)
    plt.show()

Part2
--

Summarizing statistics using the beta results
--

Step 1 - Analysis of "Diagonal" Classification

We average the diagonal of the p values of the within-subjects Mann Whittney U-Test, and average them across CV folds. We correct the resulting p values using FDR Benjamin Hochberg correction, and sum the resulting vectors across all subjects. 

We also compute an uncorrected version with a threshold of $p < 0.01$

In [16]:
diag_sig_fdr_controls,diag_sig_unc_controls,diag_ROC_controls = diagonal_group(controls_id,statspath,0.05,doplots=False)
diag_sig_fdr_patients,diag_sig_unc_patients,diag_ROC_patients = diagonal_group(patients_id,statspath,0.05,doplots=False)

Showing subjects with less than 40 FDR significant points
Subj 180802_02k1_formal_conditionwise.npz has zero or few significant FDR : 0 
Showing subjects with less than 40 FDR significant points
Subj 180802_12c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_15c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_17c1_formal_conditionwise.npz has zero or few significant FDR : 3 
Subj 180802_11c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_14c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_04c1_formal_conditionwise.npz has zero or few significant FDR : 38 
Subj 180802_04m1_formal_conditionwise.npz has zero or few significant FDR : 0 


1 control and 7 patients don't have sig FDR. This is way better than K-best 40 alone! 

For K-best 40 only, we had 4 controls and 10 patients. 

In [17]:
diag_sig_fdr_controls,diag_sig_unc_controls,diag_ROC_controls = diagonal_group(controls_id,statspath2,0.05,doplots=False)
diag_sig_fdr_patients,diag_sig_unc_patients,diag_ROC_patients = diagonal_group(patients_id,statspath2,0.05,doplots=False)

Showing subjects with less than 40 FDR significant points
Subj 180802_07k1_formal_conditionwise.npz has zero or few significant FDR : 19 
Subj 180802_02k1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_11k1_formal_conditionwise.npz has zero or few significant FDR : 25 
Subj 180802_18k1_formal_conditionwise.npz has zero or few significant FDR : 21 
Showing subjects with less than 40 FDR significant points
Subj 180802_12c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_15c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_17c1_formal_conditionwise.npz has zero or few significant FDR : 3 
Subj 180802_11c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_14c1_formal_conditionwise.npz has zero or few significant FDR : 0 
Subj 180802_04c1_formal_conditionwise.npz has zero or few significant FDR : 2 
Subj 180802_01m1_formal_conditionwise.npz has zero or few significant FDR : 20 
Subj 180802

Uncomment the following cell to output number of FDR corrected Mann Whitney whithin subjects statistics for all subjects (to give an idea how many signficant values we get for the ones who work well)

With Kbest AND PCA  

In [18]:
diag_sig_fdr_controls,diag_sig_unc_controls,_ = diagonal_group(controls_id,statspath,0.05,showall=True)
diag_sig_fdr_patients,diag_sig_unc_patients,_ = diagonal_group(patients_id,statspath,0.05,showall=True)

Showing all subjects
Subj 180802_07k1_formal_conditionwise.npz number of non-zeros FDR : 89 
Subj 180802_04k1_formal_conditionwise.npz number of non-zeros FDR : 481 
Subj 180802_13k1_formal_conditionwise.npz number of non-zeros FDR : 402 
Subj 180802_05k1_formal_conditionwise.npz number of non-zeros FDR : 241 
Subj 180802_06k1_formal_conditionwise.npz number of non-zeros FDR : 87 
Subj 180802_20k1_formal_conditionwise.npz number of non-zeros FDR : 376 
Subj 180802_09k1_formal_conditionwise.npz number of non-zeros FDR : 192 
Subj 180802_08k1_formal_conditionwise.npz number of non-zeros FDR : 453 
Subj 180802_02k1_formal_conditionwise.npz number of non-zeros FDR : 0 
Subj 180802_12k1_formal_conditionwise.npz number of non-zeros FDR : 317 
Subj 180802_11k1_formal_conditionwise.npz number of non-zeros FDR : 110 
Subj 180802_01k1_formal_conditionwise.npz number of non-zeros FDR : 555 
Subj 180802_14k1_formal_conditionwise.npz number of non-zeros FDR : 223 
Subj 180802_15k1_formal_conditionw

Without kbest only 

In [19]:
diag_sig_fdr_controls,diag_sig_unc_controls,_ = diagonal_group(controls_id,statspath2,0.05,showall=True)
diag_sig_fdr_patients,diag_sig_unc_patients,_ = diagonal_group(patients_id,statspath2,0.05,showall=True)

Showing all subjects
Subj 180802_07k1_formal_conditionwise.npz number of non-zeros FDR : 19 
Subj 180802_04k1_formal_conditionwise.npz number of non-zeros FDR : 241 
Subj 180802_13k1_formal_conditionwise.npz number of non-zeros FDR : 352 
Subj 180802_05k1_formal_conditionwise.npz number of non-zeros FDR : 129 
Subj 180802_06k1_formal_conditionwise.npz number of non-zeros FDR : 52 
Subj 180802_20k1_formal_conditionwise.npz number of non-zeros FDR : 302 
Subj 180802_09k1_formal_conditionwise.npz number of non-zeros FDR : 104 
Subj 180802_08k1_formal_conditionwise.npz number of non-zeros FDR : 305 
Subj 180802_02k1_formal_conditionwise.npz number of non-zeros FDR : 0 
Subj 180802_12k1_formal_conditionwise.npz number of non-zeros FDR : 170 
Subj 180802_11k1_formal_conditionwise.npz number of non-zeros FDR : 25 
Subj 180802_01k1_formal_conditionwise.npz number of non-zeros FDR : 445 
Subj 180802_14k1_formal_conditionwise.npz number of non-zeros FDR : 80 
Subj 180802_15k1_formal_conditionwis