In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import scipy.io as sio
import os
import subprocess
import bisect
import errno
import time
import pandas
import pickle
from sklearn.decomposition import PCA
from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn.grid_search import GridSearchCV
import scipy.stats as stats
from sklearn.metrics import roc_auc_score as auROC
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.distributions.empirical_distribution import ECDF
import PIL
from itertools import product
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.colorbar as colorbar

In [None]:
def calculate_auROC(x,y,offset_to_zero=True):
    U, p = stats.mannwhitneyu(x,y)
    labels = np.concatenate((np.ones(x.shape), np.zeros(y.shape)))
    data = np.concatenate((x,y))
    A = auROC(labels, data)
    if offset_to_zero:
        return (A-0.5, p)
    else:
        return (A, p)
    
def Benjamini_Hochberg_correction(vector_of_pvals,
                                  alpha = 0.05):
    # This function implements the BH FDR correction
    
    # Parameters:
    # Vector of p values from the different tests
    # alpha:significance level
    
    # Returns: Corrected p values. All the p values that are above the FDR threshold are set to 1. 
    #          Remaining p values are unchanged.
    
    sortedpvals = np.sort(vector_of_pvals)
    orderofpvals = np.argsort(vector_of_pvals)
    m = sortedpvals[np.isfinite(sortedpvals)].shape[0] #Total number of hypotheses
    for i in range(m):
        if sortedpvals[i] > (i+1)*alpha/m:
            k = i
            break
        elif i == m-1:
            k = m-1
        
    correctedpvals = np.copy(vector_of_pvals)
    correctedpvals[orderofpvals[k:]] = 1
    correctedpvals[np.isnan(vector_of_pvals)] = np.nan
    return correctedpvals

The data are organized into individual folders for each group ('LateNAc', 'LatePVT', 'EarlyNAc', 'EarlyPVT', 'LateCaMKii', 'EarlyCaMKii') and each folder contains individual sessions of imaging. Within each such session, the files that are important are:

1. extractedsignals.npy Contains the extracted fluorescence traces from all recorded ROIs. In this experiment, the recordings were only made from 7 seconds before cue delivery to 13 seconds after cue delivery at a framerate of 2.5Hz. So each trial has 50 frames and since there are 100 trials, there a total of 5000 frames.

2. alignedtotrial.npy contains the above signals organized into an array of shape (numtrials, numframes, numrois). So there are 100 trials, 50 frames per trial and however many numbers of ROIs were recorded per session.

3. csplustrials.npy and csminustrials.npy contain aligned arrays as above but split into CS+ and CS-

4. A .mat file ending with _results.mat: this is a MATLAB file containing the behavioral timestamps. There are many more variables in this file than are useful for this particular experiment. The important variables from this file are imported into python in the cell below.

In [None]:
basedir = 'full/path/to/directory/where/data/is/stored'
groups = ['LateNAc','LatePVT']
colors_groups = ['r','b']
csperiod = [23, 26]
baselineperiod = [14, 16]
rewardperiod = [28, 35]
maxnumneurons = 5000 #just used to initialize arrays. Should be larger than the number of neurons present in each group
numframes = 50
framerate = 2.5

cueplusresponse_pop = np.nan*np.zeros((maxnumneurons,2,len(groups)))
cueminusresponse_pop = np.nan*np.zeros((maxnumneurons,2,len(groups)))
cuediscrimination_pop = np.nan*np.zeros((maxnumneurons,2,len(groups)))
rewardplusresponse_pop = np.nan*np.zeros((maxnumneurons,2,len(groups)))
rewardminusresponse_pop = np.nan*np.zeros((maxnumneurons,2,len(groups)))
rewarddiscrimination_pop = np.nan*np.zeros((maxnumneurons,2,len(groups)))
centroids_pop = np.nan*np.zeros((maxnumneurons,2,len(groups)))

avgtracecsplus_pop = np.nan*np.zeros((maxnumneurons,numframes,len(groups)))
avgtracecsminus_pop = np.nan*np.zeros((maxnumneurons,numframes,len(groups)))

spatialslopes_pop = {}

for g, group in enumerate(groups):
    data_dirs = os.walk(os.path.join(basedir, group)).next()[1]
    numneuronstillnow = 0
    spatialslopes_pop[group] = np.nan*np.ones((len(data_dirs), 2)) #AP, ML
    for d, data_dir in enumerate(data_dirs):
        print os.path.join(basedir, group, data_dir)
        csplustrials = np.load(os.path.join(basedir, group, data_dir, 'csplustrials.npy'))
        csminustrials = np.load(os.path.join(basedir, group, data_dir, 'csminustrials.npy'))
        numneurons = csplustrials.shape[2]
        
        for neuron in range(numneurons):
            cue = np.mean(csplustrials[:,csperiod[0]:csperiod[1],neuron], axis=1)
            baseline = np.mean(csplustrials[:,baselineperiod[0]:baselineperiod[1],neuron], axis=1)
            reward = np.mean(csplustrials[:,rewardperiod[0]:rewardperiod[1],neuron], axis=1)
            rewardplusresponse_pop[numneuronstillnow+neuron,:,g] = calculate_auROC(reward,baseline)
            cueplusresponse_pop[numneuronstillnow+neuron,:,g] = calculate_auROC(cue,baseline)
            cueplusminusbaseline = cue-baseline
            rewardplusminusbaseline = reward-baseline
            temp = np.mean(csplustrials[:,:,neuron], axis=0)
            avgtracecsplus_pop[numneuronstillnow+neuron,:,g] = temp - np.mean(temp[0:10])
            
            cue = np.mean(csminustrials[:,csperiod[0]:csperiod[1],neuron], axis=1)
            baseline = np.mean(csminustrials[:,baselineperiod[0]:baselineperiod[1],neuron], axis=1)
            reward = np.mean(csminustrials[:,rewardperiod[0]:rewardperiod[1],neuron], axis=1)
            rewardminusresponse_pop[numneuronstillnow+neuron,:,g] = calculate_auROC(reward,baseline)
            cueminusresponse_pop[numneuronstillnow+neuron,:,g] = calculate_auROC(cue,baseline)
            cueminusminusbaseline = cue-baseline
            rewardminusminusbaseline = reward-baseline
            temp = np.mean(csminustrials[:,:,neuron], axis=0)
            avgtracecsminus_pop[numneuronstillnow+neuron,:,g] = temp - np.mean(temp[0:10])
            
            cuediscrimination_pop[numneuronstillnow+neuron,:,g] = calculate_auROC(cueplusminusbaseline,
                                                                                  cueminusminusbaseline)
            rewarddiscrimination_pop[numneuronstillnow+neuron,:,g] = calculate_auROC(rewardplusminusbaseline,
                                                                                  rewardminusminusbaseline)
        numneuronstillnow += numneurons
    print('Number of neurons for %s = %d' %(group, numneuronstillnow)) 
    
    fig1 = plt.figure(1)        
    ax1 = plt.subplot(121)
    cellsofinterest = np.nonzero(np.abs(cueplusresponse_pop[:,0,g])>=0)
    ax1.plot(cueplusresponse_pop[cellsofinterest,0,g][0],
             cueminusresponse_pop[cellsofinterest,0,g][0], '.' + colors_groups[g], alpha=0.7, label=group)
    ax1.set_xlabel('auROC(CS+ v baseline)')
    ax1.set_ylabel('auROC(CS- v baseline)')
    
    ax2 = plt.subplot(122)
    cellsofinterest = np.nonzero(np.abs(rewardplusresponse_pop[:,0,g])>=0)    
    ax2.plot(rewardplusresponse_pop[cellsofinterest,0,g][0],
             rewardminusresponse_pop[cellsofinterest,0,g][0], '.' + colors_groups[g], alpha=0.7, label=group)
    ax2.set_xlabel('auROC(reward delivery v baseline)')
    ax2.set_ylabel('auROC(reward absence v baseline)')
    if g==1:
        ax1.plot([-0.5, 0.5], [-0.5, 0.5], '--k', alpha=0.5, label='No discrimination')
        ax2.plot([-0.5, 0.5], [-0.5, 0.5], '--k', alpha=0.5, label='No discrimination')
    ax1.legend(loc='upper left')
    ax2.legend(loc='upper left')
    fig1.tight_layout()
    
    
    fig2 = plt.figure(2)
    ax1 = plt.subplot()
    cellsofinterest = np.nonzero(np.abs(cuediscrimination_pop[:,0,g])>=0)
    ax1.plot(cuediscrimination_pop[cellsofinterest,0,g][0],
             rewarddiscrimination_pop[cellsofinterest,0,g][0], '.' + colors_groups[g], alpha=0.7, label=group)
    ax1.set_xlabel('auROC(CS+ v CS-)')
    ax1.set_ylabel('auROC(reward delivery v reward absence)')
    fig2.tight_layout()
    
    fig3 = plt.figure(3)
    ax1 = plt.subplot(121)
    n, bins, patches = ax1.hist(cuediscrimination_pop[cellsofinterest,0,g][0],
                                30, normed=1, facecolor=colors_groups[g], alpha=0.5)
    ax1.set_xlabel('auROC(CS+ v CS-)')
    ax1.set_ylabel('probability')
    ax2 = plt.subplot(122)
    n, bins, patches = ax2.hist(rewarddiscrimination_pop[cellsofinterest,0,g][0],
                                30, normed=1, facecolor=colors_groups[g], alpha=0.5)
    ax2.set_xlabel('auROC(reward delivery v reward absence)')
    ax2.set_ylabel('probability')
    fig3.tight_layout()
    
    fig4 = plt.figure(4)
    ax = plt.subplot(1,2,g+1)
    sortneurons = np.argsort(np.mean(avgtracecsplus_pop[:numneuronstillnow,csperiod[0]:csperiod[1],g], axis=1))
    sns.heatmap(avgtracecsplus_pop[sortneurons,:,g], cmap=plt.get_cmap('coolwarm'), vmin=-0.3, vmax=0.2, center=-0.1, linewidth=0)
    ax.grid(b=False)
    ax.set_title(group)
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Neuron number')
    ax.set_xticks(range(0, numframes+1, 10))
    ax.set_xticklabels([str(int(a/framerate)) for a in range(0, numframes+1, 10)])
    ax.set_yticks(range(0, numneuronstillnow, 10))
    ax.set_yticklabels([str(a) for a in range(0, numneuronstillnow, 10)])
    ax.plot([16.5, 16.5], [0, numneuronstillnow], '--k', linewidth=1)
    ax.plot([25, 25], [0, numneuronstillnow], '--k', linewidth=1)
    #ax.text(3, 20, 'cue', size=20)
    fig4.suptitle('CS+', size=15, y=1.05)
    fig4.tight_layout()
    
    fig5 = plt.figure(5)
    ax = plt.subplot(1,2,g+1)
    #sortneurons = np.argsort(np.mean(avgtracecsminus_pop[:numneuronstillnow,csperiod[0]:csperiod[1],g], axis=1))
    sns.heatmap(avgtracecsminus_pop[sortneurons,:,g], ax=ax, cmap=plt.get_cmap('coolwarm'), vmin=-0.3, vmax=0.2, center=-0.1, linewidth=0)
    #ax.imshow(avgtracecsminus_pop[sortneurons,:,g], cmap=plt.get_cmap('coolwarm'), vmin=-0.3, vmax=0.2)
    ax.grid(b=False)
    ax.set_title(group)
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Neuron number')
    ax.set_xticks(range(0, numframes+1, 10))
    ax.set_xticklabels([str(int(a/framerate)) for a in range(0, numframes+1, 10)])
    ax.set_yticks(range(0, numneuronstillnow, 10))
    ax.set_yticklabels([str(a) for a in range(0, numneuronstillnow, 10)])
    ax.plot([16.5, 16.5], [0, numneuronstillnow], '--k', linewidth=1)
    ax.plot([25, 25], [0, numneuronstillnow], '--k', linewidth=1)
    ax.spines['left'].set_visible(True)
    ax.spines['left'].set_color('k')
    #ax.text(3, 20, 'cue', size=20)
    fig5.suptitle('CS-', size=15, y=1.05)
    fig5.tight_layout()
    
plt.show()          

In [None]:
list_of_pvals = np.concatenate((cueplusresponse_pop[:,1,0],
                                cueplusresponse_pop[:,1,1],
                                cueminusresponse_pop[:,1,0],
                                cueminusresponse_pop[:,1,1]))
corrected_list_of_pvals = Benjamini_Hochberg_correction(list_of_pvals)
cueplusresponse_pop[:,1,0] = corrected_list_of_pvals[:maxnumneurons]
cueplusresponse_pop[:,1,1] = corrected_list_of_pvals[maxnumneurons:2*maxnumneurons]
cueminusresponse_pop[:,1,0] = corrected_list_of_pvals[2*maxnumneurons:3*maxnumneurons]
cueminusresponse_pop[:,1,1] = corrected_list_of_pvals[3*maxnumneurons:4*maxnumneurons]

In [None]:
labels = 'No response', 'Positive\nresponse', 'Negative\nresponse'
colors_groups = [(0.7, 0.7,0.7),(0.84, 0.35, 0.35),(0.35,0.35,0.75)]
explode=(0, 0, 0)
frequency_population = np.zeros((3,2,2)) #3 response types x 2 trial types x 2 groups
for g, group in enumerate(groups):
    positiveresponders = np.nonzero(np.logical_and(cueplusresponse_pop[:,1,g]<1, cueplusresponse_pop[:,0,g]>0))[0]
    negativeresponders = np.nonzero(np.logical_and(cueplusresponse_pop[:,1,g]<1, cueplusresponse_pop[:,0,g]<0))[0]
    noresponders = np.nonzero(cueplusresponse_pop[:,1,g]==1)[0]
    frequency_csplusresponse = np.array([noresponders.size, positiveresponders.size, negativeresponders.size])
    #proportion_csplusresponse = frequency_csplusresponse/(0.0+positiveresponders.size+negativeresponders.size+noresponders.size)
    #plt.figure(1)
    frequency_population[:,0,g] = frequency_csplusresponse
    ax1=plt.subplot(2,2,g+1)
    if g==0:
        ax1.set_title('NAc projection neurons', y=1.1)
    else:
        ax1.set_title('PVT projection neurons', y=1.1)
        labels='','',''
        ax1.text(1.15,0,'CS+', rotation=270, size=12)
    ax1.pie(frequency_csplusresponse, explode=explode, labels=labels,
                autopct='%1.0f%%', shadow=True, startangle=180, colors=colors_groups, labeldistance=1.18)
    plt.tight_layout()
    
    positiveresponders = np.nonzero(np.logical_and(cueminusresponse_pop[:,1,g]<1, cueminusresponse_pop[:,0,g]>0))[0]
    negativeresponders = np.nonzero(np.logical_and(cueminusresponse_pop[:,1,g]<1, cueminusresponse_pop[:,0,g]<0))[0]
    noresponders = np.nonzero(cueminusresponse_pop[:,1,g]==1)[0]
    frequency_csminusresponse = np.array([noresponders.size, positiveresponders.size, negativeresponders.size])
    #proportion_csminusresponse = frequency_csplusresponse/(0.0+positiveresponders.size+negativeresponders.size+noresponders.size)
    #plt.figure(1)
    frequency_population[:,1,g] = frequency_csminusresponse
    ax=plt.subplot(2,2,g+3)
    labels='','',''
    if g>0:
        ax.text(1.15,0,'CS-', rotation=270, size=12)
    ax.pie(frequency_csminusresponse, explode=explode, labels=labels,
                autopct='%1.0f%%', pctdistance=0.6, shadow=True, startangle=180, colors=colors_groups)
    plt.tight_layout()
ax1.text(-1.8,0.9,'$\chi^2(NAc,PVT) $ = %1.0f\n   p = %f' % stats.chi2_contingency(frequency_population[:,0,:])[0:2], 
        rotation=0, size=12)

#plt.savefig('/home/stuberlab/Dropbox (Stuber Lab)/Jim Figures/ACNP Figures/population_CSplus_proportions.pdf',
#            format='pdf', bbox_inches='tight')

Quantify the statistics between NAc projectors and PVT projectors

In [None]:
print 'Pearsons chisquared test for independence of proportions on CS+ trials: chi2: %f, p: %f' % stats.chi2_contingency(frequency_population[:,0,:])[0:2]

"""(t,p) = stats.ttest_ind(cueplusresponse_pop[:numneuronstillnow,0,0],cueplusresponse_pop[:numneuronstillnow,0,1], equal_var=False)
print 'mean t(CS+ v baseline) for NAc projectors = %f' %(np.mean(cueplusresponse_pop[:numneuronstillnow,0,0]))
print 'mean t(CS+ v baseline) for PVT projectors = %f' %(np.mean(cueplusresponse_pop[:numneuronstillnow,0,1]))
print 't test results of CS+ response between NAc and PVT projectors: t=%f, p=%f' %(t,p)
(t,p) = stats.ttest_ind(rewardplusresponse_pop[:numneuronstillnow,0,0],rewardplusresponse_pop[:numneuronstillnow,0,1], equal_var=False)
print 'mean t(reward delivery v baseline) for NAc projectors = %f' %(np.mean(rewardplusresponse_pop[:numneuronstillnow,0,0]))
print 'mean t(reward delivery v baseline) for PVT projectors = %f' %(np.mean(rewardplusresponse_pop[:numneuronstillnow,0,1]))
print 't test results of reward delivery response between NAc and PVT projectors: t=%f, p=%f' %(t,p)
print numneuronstillnow"""