In this exercise we will decode orientation using data collected for the class.  

In [3]:
import nipype.algorithms.modelgen as model   # model generation
import nipype.interfaces.fsl as fsl          # fsl
from nipype.interfaces.base import Bunch
import os,json,glob
import numpy
import nibabel
import nilearn.plotting
import sklearn.multiclass
from sklearn.svm import SVC
import sklearn.metrics
import sklearn.cross_validation
from nilearn.input_data import NiftiMasker
import scipy.stats
import random
import nilearn.datasets

%matplotlib inline
import matplotlib.pyplot as plt


datadir='orientation_data'

print('using data from %s'%datadir)

def get_orientation_data(datadir):
    groups=['All','V1','V2','V3']
    data={}
    for g in groups:
        yfile=os.path.join(datadir,'%s_groups.txt'%g)
        xfile=os.path.join(datadir,'%s_instances.txt'%g)
        data[g]={'X':numpy.loadtxt(xfile),'Y':numpy.loadtxt(yfile)}
    return data
odata=get_orientation_data(datadir)
        


using data from orientation_data


Fit a simple classifier using balanced 8-fold crossvalidation

In [26]:
def run_classifier(odata, shuffle=False,nfolds=8):
    groups=['All','V1','V2','V3']
    acc={}
    for g in groups:
        features=odata[g]['X']
        labels=odata[g]['Y'].copy()
        if shuffle:
            numpy.random.shuffle(labels)
        skf = sklearn.cross_validation.StratifiedKFold(labels, 8)
        pred=numpy.zeros(labels.shape[0])
        for train, test in skf:
            clf=sklearn.svm.SVC()
            clf.fit(features[train,:],labels[train])
            pred[test]=clf.predict(features[test,:])
        acc[g]=sklearn.metrics.accuracy_score(labels, pred)
    return acc

acc=run_classifier(odata)
print(acc)

{'V3': 0.50993377483443714, 'V2': 0.60264900662251653, 'All': 0.60927152317880795, 'V1': 0.55629139072847678}


Now let's see if this is better than we would predict by chance.  we will do this by randomly shuffling the labels and recording the accuracy score for each random run, and then comparing our actual score to that null distribution.  

NOTE: This will take a few minutes to complete.

In [27]:
groups=['All','V1','V2','V3']
nruns=1000
rand_acc=numpy.zeros((nruns,4))
for r in range(nruns):
    tmp=run_classifier(odata,shuffle=True)
    rand_acc[r,:]=[tmp['All'],tmp['V1'],tmp['V2'],tmp['V3']]

In [32]:
mean_acc=numpy.mean(rand_acc,0)

for i in range(len(groups)):
    print('%s voxels: %0.3f (p=%0.3f)'%(groups[i],acc[groups[i]],
                    1-scipy.stats.percentileofscore(rand_acc[i,:],acc[groups[i]])/100.))


All voxels: 0.609 (p=0.000)
V1 voxels: 0.556 (p=0.000)
V2 voxels: 0.603 (p=0.000)
V3 voxels: 0.510 (p=1.000)
