In [None]:
import os
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import scipy.stats as stats
import scipy.io as sio
import os
import subprocess
import pickle
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC, SVR
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn.grid_search import GridSearchCV
from statsmodels.distributions.empirical_distribution import ECDF

In [None]:
def binaryclassifier(y, X):
    hyperparameters = {'kernel': ['rbf'], 'gamma': [1e-2, 1e-1, 1e0, 1e1, 1e2],
                     'C': [1e-2, 1e-1, 1e0, 1e1, 1e2]}
    clf = GridSearchCV(SVC(), hyperparameters, cv=10)
    if np.all(np.isnan(X)):
        accuracy=np.nan
    else:
        clf.fit(X, y)
        accuracy = clf.best_score_
    #reference for 10-fold cross-validation http://web.cs.iastate.edu/~jtian/cs573/Papers/Kohavi-IJCAI-95.pdf
    return accuracy

def svmregression(y, X):
    hyperparameters = {'kernel': ['rbf'], 'C': np.logspace(-3, 3, 5),
                      'epsilon': np.logspace(-3, 3, 5),
                      'gamma': np.logspace(-5, 5, 10)}
    clf = GridSearchCV(SVR(), hyperparameters, cv=10)
    if np.all(np.isnan(X)):
        R2=np.nan
    else:
        clf.fit(X, y)
        """y_pred = clf.predict(X)
        plt.plot(y,y_pred,'ko')"""
        R2 = clf.best_score_
    #reference for 10-fold cross-validation http://web.cs.iastate.edu/~jtian/cs573/Papers/Kohavi-IJCAI-95.pdf
    return R2

Verify that the above code works on known data

In [None]:
rng = np.random.RandomState(0)
x = 100 * rng.rand(100,) #np.arange(100)
y = x #np.sin(x).ravel()

plt.plot(x,y,'o')

print svmregression(y, np.expand_dims(x, axis=1))

The data are organized into individual folders for each group ('LateNAc', 'LatePVT', 'EarlyNAc', 'EarlyPVT', 'LateCaMKii', 'EarlyCaMKii') and each folder contains individual sessions of imaging. Within each such session, the files that are important are:

1. extractedsignals.npy Contains the extracted fluorescence traces from all recorded ROIs. In this experiment, the recordings were only made from 7 seconds before cue delivery to 13 seconds after cue delivery at a framerate of 2.5Hz. So each trial has 50 frames and since there are 100 trials, there a total of 5000 frames.

2. alignedtotrial.npy contains the above signals organized into an array of shape (numtrials, numframes, numrois). So there are 100 trials, 50 frames per trial and however many numbers of ROIs were recorded per session.

3. csplustrials.npy and csminustrials.npy contain aligned arrays as above but split into CS+ and CS-

4. A .mat file ending with _results.mat: this is a MATLAB file containing the behavioral timestamps. There are many more variables in this file than are useful for this particular experiment. The important variables from this file are imported into python in the cell below.

In [None]:
basedir = 'full/path/to/directory/where/data/is/stored'

groups = ['LateNAc','LatePVT']#,'EarlyNAc','EarlyPVT','LateCaMKii','EarlyCaMKii']
csperiod = [23, 26]
baselineperiod = [14, 16]
rewardperiod = [28, 35]
numshuffles = 1 #number of shuffles

csminus_classification_accuracy = {}
lickprediction_R2 = {}

for group in groups:
    csminus_classification_accuracy[group] = {}
    csminus_classification_accuracy[group]['individualneurons'] = {}
    csminus_classification_accuracy[group]['individualneurons']['shuffled'] = {}
    csminus_classification_accuracy[group]['individualneurons']['unshuffled'] = {}
    csminus_classification_accuracy[group]['session'] = {}
    csminus_classification_accuracy[group]['session']['shuffled'] = {}
    csminus_classification_accuracy[group]['session']['unshuffled'] = {}
    lickprediction_R2[group] = {}
    lickprediction_R2[group]['individualneurons'] = {}
    lickprediction_R2[group]['individualneurons']['shuffled'] = {}
    lickprediction_R2[group]['individualneurons']['unshuffled'] = {}
    lickprediction_R2[group]['session'] = {}
    lickprediction_R2[group]['session']['shuffled'] = {}
    lickprediction_R2[group]['session']['unshuffled'] = {}
    
    datadirs = os.walk(os.path.join(basedir, group)).next()[1]
    numneuronstillnow = 0
    for datadir in datadirs:
        print 'Analyzing %s\%s'%(group, datadir)
        tempmatfiles = os.walk(os.path.join(basedir, group, datadir)).next()[2]
        matfiles = [f for f in tempmatfiles if 'results' in f and os.path.splitext(f)[1]=='.mat']
        if len(matfiles) > 1:
            raise Exception('%s/%s: Multiple .mat files found. Only keep the results file'%(group,datadir))
        elif len(matfiles)==0:
            raise Exception('%s/%s: No .mat files found!'%(group,datadir))
        matfile = matfiles[0]

        behaviordata = sio.loadmat(os.path.join(basedir, group, datadir, matfile))
        tempdataplus = np.squeeze(behaviordata['nlicksplus'])[:,-1] #Use anticipatory licks during trace interval
        tempdataminus = np.squeeze(behaviordata['nlicksminus'])[:,-1] #Use anticipatory licks during trace interval
        tempdataplusconsumption = np.squeeze(behaviordata['nlicksplusconsumption'])[:,0] #Use consumption licks during 1st second
        tempdataminusconsumption = np.squeeze(behaviordata['nlicksminusconsumption'])[:,0] #Use consumption licks during 1st second
        csminusflagfortrial = np.squeeze(behaviordata['csminusflagfortrial'])
        nlicksplus = tempdataplus[np.logical_not(csminusflagfortrial)]
        nlicksminus = tempdataminus[csminusflagfortrial==1]

        #csplustrials = np.load(os.path.join(basedir, group, datadir, 'csplustrials.npy'))
        #csminustrials = np.load(os.path.join(basedir, group, datadir, 'csminustrials.npy'))
        alltrials = np.load(os.path.join(basedir, group, datadir, 'aligned to trial.npy'))
        numneurons = alltrials.shape[2]
        numtrials = alltrials.shape[0]
        numsamples = alltrials.shape[1]
        neuralactivity_population_cue = np.nan*np.ones((numtrials,numneurons))
        neuralactivity_population_reward = np.nan*np.ones((numtrials,numneurons))
        
        neuralactivity_population_cue_plus = np.nan*np.ones((nlicksplus.shape[0],numneurons))
                
        for neuron in range(numneurons):
            cue = np.mean(alltrials[:,csperiod[0]:csperiod[1],neuron], axis=1)
            baseline = np.mean(alltrials[:,baselineperiod[0]:baselineperiod[1],neuron], axis=1)
            reward = np.mean(alltrials[:,rewardperiod[0]:rewardperiod[1],neuron], axis=1)

            neuralactivity_population_cue[:,neuron] = cue-baseline
            neuralactivity_population_reward[:,neuron] = reward-baseline
                        
            csminus_classification_accuracy[group]['individualneurons']['unshuffled'][numneuronstillnow+neuron] = binaryclassifier(csminusflagfortrial,
                                                                                                   np.expand_dims(neuralactivity_population_cue[:,neuron], axis=1))
            
            shuffledresults = np.nan*np.ones((numshuffles,))
            for shuffleid in range(numshuffles):
                shuffled_csminusflag = np.random.permutation(csminusflagfortrial)
                shuffledresults[shuffleid] = binaryclassifier(shuffled_csminusflag,
                                                               np.expand_dims(neuralactivity_population_cue[:,neuron], axis=1))
            csminus_classification_accuracy[group]['individualneurons']['shuffled'][numneuronstillnow+neuron] = shuffledresults
            
            neuralactivity_population_cue_plus[:,neuron] = (cue-baseline)[np.logical_not(csminusflagfortrial)]
            lickprediction_R2[group]['individualneurons']['unshuffled'][numneuronstillnow+neuron] = svmregression(nlicksplus,
                                                                                  np.expand_dims(neuralactivity_population_cue_plus[:,neuron], axis=1))
            
            shuffledresults = np.nan*np.ones((numshuffles,))
            for shuffleid in range(numshuffles):
                shuffled_nlicksplus = np.random.permutation(nlicksplus)
                shuffledresults[shuffleid] = svmregression(shuffled_nlicksplus,
                                                           np.expand_dims(neuralactivity_population_cue_plus[:,neuron], axis=1))
            lickprediction_R2[group]['individualneurons']['shuffled'][numneuronstillnow+neuron] = shuffledresults
        
        numneuronstillnow += numneurons
        csminus_classification_accuracy[group]['session']['unshuffled'][datadir] = binaryclassifier(csminusflagfortrial, 
                                                                                                    neuralactivity_population_cue)
        shuffledresults = np.nan*np.ones((numshuffles,))
        for shuffleid in range(numshuffles):
            shuffled_csminusflag = np.random.permutation(csminusflagfortrial)
            shuffledresults[shuffleid] = binaryclassifier(shuffled_csminusflag, neuralactivity_population_cue)
        csminus_classification_accuracy[group]['session']['shuffled'][datadir] = shuffledresults
        print '%s/%s unshuffled: accuracy = %.2f, number of neurons =%d'%(group,
                                                                   datadir,
                                                                   csminus_classification_accuracy[group]['session']['unshuffled'][datadir],
                                                                   numneurons)
        
        lickprediction_R2[group]['session']['unshuffled'][datadir] = svmregression(nlicksplus,
                                                                                   neuralactivity_population_cue_plus)
        shuffledresults = np.nan*np.ones((numshuffles,))
        for shuffleid in range(numshuffles):
            shuffled_nlicksplus = np.random.permutation(nlicksplus)
            shuffledresults[shuffleid] = svmregression(shuffled_nlicksplus,
                                                       neuralactivity_population_cue_plus)
        lickprediction_R2[group]['session']['shuffled'][datadir] = shuffledresults
"""with open(os.path.join(basedir, 'csminus_classification_accuracy_%s.pickle'%groups), 'wb') as handle:
    pickle.dump(csminus_classification_accuracy, handle)
with open(os.path.join(basedir, 'lickprediction_R2_%s.pickle'%groups), 'wb') as handle:
    pickle.dump(lickprediction_R2, handle)"""

In [None]:
def CDFplot(x, ax, color=None, label='', linetype='-'):
    x = np.array(x)
    ix=np.argsort(x)
    ax.plot(x[ix], ECDF(x)(x)[ix], linetype, color=color, label=label)
    return ax

def plot_accuracy(data, label, neuron_or_session):
    fig, ax = plt.subplots()
    for g, group in enumerate(groups):
        ax = CDFplot(data[group][neuron_or_session]['unshuffled'].values(),
                     ax,
                     label=group+' unshuffled')
        temp = data[group][neuron_or_session]['shuffled'].values()
        ax = CDFplot(np.array(temp).T[0],
                     ax,
                     linetype = '--',
                     label=group+' shuffled')
    ax.set_ylabel('CDF')
    ax.set_xlabel('Individual neuronal decoding accuracy')
    ax.legend()
    fig.show()
    fig.savefig(os.path.join(basedir, '%s_%s_%s.png'%(label, neuron_or_session, groups)), format='png', dpi=300)
    fig.savefig(os.path.join(basedir, '%s_%s_%s.pdf'%(label, neuron_or_session, groups)), format='pdf')

def calculate_df_Welch_ttest(x,
                             y):
    # This function calculates the degrees of freedom for Welch's t test.
    # This is not given as a result for the scipy function
    # x and y should be vectors
    nx = x.shape[0];ny = y.shape[0]
    df = (np.var(x)/nx+np.var(y)/ny)**2/((np.var(x)/nx)**2/(nx-1)+(np.var(y)/ny)**2/(ny-1))
    return df

In [None]:
plot_accuracy(csminus_classification_accuracy, 'csminus_classification_accuracy', 'individualneurons')
#plot_accuracy(csminus_classification_accuracy, 'csminus_classification_accuracy', 'session')
#plot_accuracy(lickprediction_R2, 'lickprediction_R2', 'individualneurons')
#plot_accuracy(lickprediction_R2, 'lickprediction_R2', 'session')

In [None]:
x = np.array(lickprediction_R2['LatePVT']['individualneurons']['unshuffled'].values())
y = np.array(lickprediction_R2['LateNAc']['individualneurons']['unshuffled'].values())
print stats.ttest_ind(x, y, equal_var=False)
print 'Degrees of freedom for the above test = %f' % calculate_df_Welch_ttest(x, y)
print np.mean(x), np.mean(y)

In [None]:
fig, ax = plt.subplots()
for g, group in enumerate(groups):
    ax = CDFplot(lickprediction_R2[group]['individualneurons']['unshuffled'].values(),
                 ax,
                 label=group+': unshuffled')
    temp = lickprediction_R2[group]['individualneurons']['shuffled'].values()
    ax = CDFplot(np.array(temp).T[0],
                 ax,
                 label=group+': shuffled',
                 linetype='--')
    ax.legend(loc='upper left')
ax.set_ylabel('CDF')
ax.set_xlabel('Individual neuronal decoding accuracy')
fig.show()