This is the code for analyzing optogenetic learning experiments in Otis, Namboodiri et al. as shown in Figure 4. 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import scipy.io as sio
import os
import subprocess
import pickle
from statsmodels.distributions.empirical_distribution import ECDF
import pandas
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats.multicomp as multi
import statsmodels
import scipy.stats as stats
from sklearn.metrics import roc_auc_score as auROC

In [None]:
indir = 'full/path/to/directory/where/data/is/stored'

groups = ['NAc', 'PVT']
conditions = ['ChR2','eNpHR3','eYFP']

nlicksdata_pop = {}

alldata_plus = pandas.DataFrame(columns=['nlicks','Group','Virus','Animal','Day','Laser'])

for group in groups:
    for condition in conditions:
        tempanimals = os.walk(os.path.join(indir,group,condition)).next()[1]
        animals = [a for a in tempanimals if 'out of study' not in a]
        for animal in animals:
            datadir = os.path.join(indir,group,condition, animal)
            tempmatfiles = os.walk(datadir).next()[2]
            matfiles = [f for f in tempmatfiles if os.path.splitext(f)[1]=='.mat']
            for matfile in matfiles:
                day = int(matfile.split('_')[2][-1])
                
                behaviordata = sio.loadmat(os.path.join(datadir, matfile))
                tempnlicksplus = np.squeeze(behaviordata['nlicksplus'])
                tempnlicksminus = np.squeeze(behaviordata['nlicksminus'])
                templaserontrial = np.squeeze(behaviordata['laserontrial'])
                tempbaselineplus = np.squeeze(behaviordata['nlicksbaselineplus'])
                tempbaselineminus = np.squeeze(behaviordata['nlicksbaselineminus'])
                
                # In the MATLAB results files, the 12kHz tone is labeled as CSplus and the 3kHz
                # tone is labeled as CS-. This does not reflect the reward association as that
                # was set by the variables 'csplusprob' and 'csminusprob'. So if 'csplusprob' was
                # 0 and 'csminusprob' was 100, the 12kHz tone was associated with no reward but
                # the 3kHz tone was associated with 100% reward. Thus, please don't confuse the
                # variable names CSplus and CSminus as reflecting the cue-reward contingency.
                try:
                    csplusprob = np.squeeze(behaviordata['csplusprob'])
                    csminusprob = np.squeeze(behaviordata['csminusprob'])    
                except: #If these are not mentioned, they were the default
                    csplusprob = 100
                    csminusprob = 0
                if csplusprob>csminusprob:    
                    laserontrial = templaserontrial[np.isfinite(tempnlicksplus[:,0])]
                    baseline = tempbaselineplus[np.isfinite(tempnlicksplus[:,0]),-1]
                    nlicksplus = tempnlicksplus[np.isfinite(tempnlicksplus[:,0]),:]
                else:
                    laserontrial = templaserontrial[np.isfinite(tempnlicksminus[:,0])]
                    baseline = tempbaselineminus[np.isfinite(tempnlicksminus[:,0]),-1]
                    nlicksplus = tempnlicksminus[np.isfinite(tempnlicksminus[:,0]),:]
                nlicksplus = np.mean(nlicksplus, axis=1) - baseline
                
                data = np.column_stack([nlicksplus, 
                                        [group]*nlicksplus.shape[0],
                                        [condition]*nlicksplus.shape[0], 
                                        [animal]*nlicksplus.shape[0],
                                        [day]*nlicksplus.shape[0],
                                        laserontrial])
                df = pandas.DataFrame(data=data, columns=['nlicks','Group','Virus','Animal','Day','Laser'])
                alldata_plus = alldata_plus.append(df, ignore_index=True)

In [None]:
alldata_plus['nlicks'] = alldata_plus['nlicks'].astype(float)
numdays = 9
fig, axs = plt.subplots(2,3, sharex='col', sharey='row',figsize=(10,10))
for g, group in enumerate(groups):
    for c, condition in enumerate(conditions):
        mean_licks_on_day_laser = np.nan*np.ones((numdays,2)) # mean, sem
        mean_licks_on_day_nolaser = np.nan*np.ones((numdays,2)) #mean, sem
        mean_licks_on_day = np.nan*np.ones((numdays,2)) #mean, sem
        for day in range(numdays):
            animals = list(set(alldata_plus[(alldata_plus['Group']==group) & (alldata_plus['Virus']==condition)]['Animal']))
            nlicks_laser = np.nan*np.ones((len(animals),))
            nlicks_nolaser = np.nan*np.ones((len(animals),))
            nlicks = np.nan*np.ones((len(animals),))
            for a, animal in enumerate(animals):
                nlicks_laser[a] = np.mean(np.array(alldata_plus[(alldata_plus['Group']==group) & (alldata_plus['Virus']==condition) & (alldata_plus['Day']==str(day+1)) & (alldata_plus['Laser']=='1') & (alldata_plus['Animal']==animal)]['nlicks']))
                nlicks_nolaser[a] = np.mean(np.array(alldata_plus[(alldata_plus['Group']==group) & (alldata_plus['Virus']==condition) & (alldata_plus['Day']==str(day+1)) & (alldata_plus['Laser']=='0') & (alldata_plus['Animal']==animal)]['nlicks']))
                nlicks[a] = np.mean(np.array(alldata_plus[(alldata_plus['Group']==group) & (alldata_plus['Virus']==condition) & (alldata_plus['Day']==str(day+1)) & (alldata_plus['Animal']==animal)]['nlicks']))
                            
            mean_licks_on_day_laser[day,0] = np.mean(nlicks_laser)
            mean_licks_on_day_laser[day,1] = stats.sem(nlicks_laser)
            mean_licks_on_day_nolaser[day,0] = np.mean(nlicks_nolaser)
            mean_licks_on_day_nolaser[day,1] = stats.sem(nlicks_nolaser)
            mean_licks_on_day[day,0] = np.mean(nlicks)
            mean_licks_on_day[day,1] = stats.sem(nlicks)
        #axs[g,c].errorbar(range(numdays), mean_licks_on_day_laser[:,0], mean_licks_on_day_laser[:,1], color='b', label='Laser')
        #axs[g,c].errorbar(range(numdays), mean_licks_on_day_nolaser[:,0], mean_licks_on_day_nolaser[:,1], color='k', label='No Laser')
        axs[g,c].errorbar(range(numdays), mean_licks_on_day[:,0], mean_licks_on_day[:,1], color='k')
        axs[g,c].legend(loc='upper right')
        axs[g,c].set_title('%s: %s'%(group, condition))
        axs[1,c].set_xlabel('Session number')
        axs[1,c].set_xticks(range(numdays))
        axs[1,c].set_xticklabels([str(a+1) for a in range(numdays-1)]+['%s:\ntest day\nno laser'%numdays])
        #axs[g,c].set_xlim([0, numdays+1])
    axs[g,0].set_ylabel('Mean number of licks on CS+ trials')
    axs[g,0].set_ylim([0, 5])
fig.tight_layout()
#fig.savefig(os.path.join(indir, 'Learning evolution.pdf'), format='pdf')

In [None]:
def CDFplot(x, ax, color=None, label='', linetype='-'):
    x = np.array(x)
    ix=np.argsort(x)
    ax.plot(x[ix], ECDF(x)(x)[ix], linetype, color=color, label=label)
    return ax

days_to_test = [9] # This is the set of days that we pool to analyze whether lick rate is different across groups.
# Since days 1-8 potentially confound learning and expression, we decided to test performance on day 9 (NoLaser)
# to test whether the optogenetic manipulation affected learning or expression.
# You can input any days above to run the test below by pooling all trials from those days if you so wish.
colors_for_condition = {}
colors_for_condition['ChR2'] = 'b'
colors_for_condition['eNpHR3'] = 'g'
colors_for_condition['eYFP'] = 'k'
numdays = 9
numinit = 10000
fig, axs = plt.subplots(1,len(groups), sharex='col', sharey='row',figsize=(10,5))
maxnlicks = 15#np.amax(np.array(alldata_plus['nlicks']))
minnlicks = 0#np.amin(np.array(alldata_plus['nlicks']))
nlicks_per_animal = {}
for g, group in enumerate(groups):
    for c, condition in enumerate(conditions):
        animals = list(set(alldata_plus[(alldata_plus['Group']==group) & (alldata_plus['Virus']==condition)]['Animal']))
        
        nlicks_per_animal[os.path.join(group, condition)] = np.nan*np.ones((len(animals), numinit))
        
        for a, animal in enumerate(animals):
            licks_on_testdays = np.nan*np.ones((numinit,))
            numtrialstillnow = 0
            for day in days_to_test:
                nlicks = np.array(alldata_plus[(alldata_plus['Group']==group) & (alldata_plus['Virus']==condition) & (alldata_plus['Day']==str(day)) & (alldata_plus['Animal']==animal)]['nlicks']) 
                #Pools Laser and no laser trials
                #print group, condition, animal, nlicks.size
                licks_on_testdays[numtrialstillnow:numtrialstillnow+nlicks.shape[0]] = nlicks
                numtrialstillnow += nlicks.shape[0]
            licks_on_testdays = licks_on_testdays[np.isfinite(licks_on_testdays)]
            nlicks_per_animal[os.path.join(group, condition)][a,:licks_on_testdays.size] = licks_on_testdays
            
            #Plot individual animals' data
            """if a==0:
                CDFplot(licks_on_testdays, axs[g], color=colors_for_condition[condition], label=condition)
            else:
                CDFplot(licks_on_testdays, axs[g], color=colors_for_condition[condition])"""
        # Plot pooled data across animals
        data = nlicks_per_animal[os.path.join(group, condition)].flatten()
        data = data[np.isfinite(data)]
        CDFplot(data, axs[g], color=colors_for_condition[condition], label=condition)
        # Till here
        axs[g].legend(loc='upper right')
        axs[g].set_title(group)
        axs[g].set_xlabel('Lick number')
        axs[g].set_ylabel('CDF')
        axs[g].set_xlim([minnlicks, maxnlicks])
fig.tight_layout()
#fig.savefig(os.path.join(indir, 'Learning d9 CS+ CDF.pdf'), format='pdf')

In [None]:
# Plot the distribution of mean nlicks per session for each animal per group

fig, axs = plt.subplots(1,len(groups), sharey='row', figsize=(8,4))
width=1
for g, group in enumerate(groups):
    for c, condition in enumerate(conditions):
        data = nlicks_per_animal[os.path.join(group, condition)]
        means = np.mean(np.nanmean(data, axis=1))
        sems = stats.sem(np.nanmean(data, axis=1))
        axs[g].bar(c, means, yerr=sems,
               color=colors_for_condition[condition], width=width, ecolor='k') 
    axs[g].set_xticks([a+ width/2.0 for a in [0, 1, 2]])
    axs[g].set_xticklabels(conditions, rotation='vertical')
    #axs[g].set_ylim([0, 2])
    axs[g].set_title(group)
axs[0].set_ylabel('$\Delta$Licking Rate')

In [None]:
fig.savefig(os.path.join(indir, 'Bar graph learning D1-8.pdf'), format='pdf')

In [None]:
def Benjamini_Hochberg_pvalcorrection(vector_of_pvals):
    # This function implements the BH FDR correction
    
    # Parameters:
    # Vector of p values from the different tests
    
    # Returns: Corrected p values.
    
    sortedpvals = np.sort(vector_of_pvals)
    orderofpvals = np.argsort(vector_of_pvals)
    m = sortedpvals[np.isfinite(sortedpvals)].size #Total number of hypotheses
    corrected_sortedpvals = np.nan*np.ones((sortedpvals.size,))
    corrected_sortedpvals[m-1] = sortedpvals[m-1]
    for i in range(m-2, -1, -1):
        corrected_sortedpvals[i] = np.amin([corrected_sortedpvals[i+1], sortedpvals[i]*m/(i+1)])
    correctedpvals = np.nan*np.ones((vector_of_pvals.size,))
    correctedpvals[orderofpvals] = corrected_sortedpvals
    return correctedpvals

In [None]:
conditions_of_interest = ['ChR2', 'eNpHR3']
for group in groups:
    print group  
    results_conditions = np.nan*np.ones((len(conditions_of_interest), 5)) # effect size, auROC, p, n1, n2
    for c, condition_to_test in enumerate(conditions_to_test):
        x = np.nanmean(nlicks_per_animal[os.path.join(group, condition_to_test)], axis=1)
        y = np.nanmean(nlicks_per_animal[os.path.join(group, 'eYFP')], axis=1)
        results_conditions[c,0] = np.median(x)-np.median(y)
        results_conditions[c,1] = auROC(np.concatenate((np.ones(x.size,), np.zeros(y.size,))),
                                        np.concatenate((x, y)))
        results_conditions[c,2] = stats.mannwhitneyu(x, y)[1]
        results_conditions[c,3] = x.size
        results_conditions[c,4] = y.size
    results_conditions[:,2] = Benjamini_Hochberg_pvalcorrection(results_conditions[:,2])
    for c, condition in enumerate(conditions_of_interest):
        print 'The effect of laser in %s when compared to eYFP\n\
        on median lick rate was %f; auROC(%d, %d) = %f, p value=%f ' % (condition,
                                                                    results_conditions[c,0],
                                                                    results_conditions[c,3],
                                                                    results_conditions[c,4],
                                                                    results_conditions[c,1],
                                                                    results_conditions[c,2])
        print '\n'