#### We need to install module future, not importing from \_\_future\_\_

In [None]:
#from future.utils import PY3
import future
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
import pandas as pd
import numpy as np
import os
import pprint
from IPython.display import display
from matplotlib import pyplot as plt
from scipy.stats.mstats import zscore
import warnings

## Load files

In [None]:
# Set event lengths
durations=np.array([0,10,20,15,5])
events=np.cumsum(durations)
# List dir
#mydir = '../_share/Losonczi/msa1215_1/'; FPS = 30
mydir = '../_share/Losonczi/msa0316_1/'; FPS = 8
os.listdir(mydir)

In [None]:
# Load files
experiment_traits = pd.read_hdf(os.path.join(mydir,'experiment_traits.h5'),key='table')
raw_data = pd.read_hdf(os.path.join(mydir,'raw_data.h5'),key='table')
df_data = pd.read_hdf(os.path.join(mydir,'df_data.h5'),key='table')
transients_data = pd.read_hdf(os.path.join(mydir,'transients_data.h5'),key='table')
behavior_data = pd.read_hdf(os.path.join(mydir,'behavior_data.h5'),key='table')

In [None]:
# Available trials and ROIs
trials = df_data.index.levels[0]
rois = df_data.index.levels[1]
print (df_data.shape, '\n', trials, '\n', rois)

## Prepare data

In [None]:
# See how many ROIs are available for which frames

avail_sum = (~df_data.isnull()).sum() / len(df_data.index.levels[0])
plt.plot(avail_sum)
plt.xlabel('Camera frame within experiment')
plt.ylabel('Available ROIs on average')

In [None]:
# See which ROI is available in which trial and for how many frames

## METHOD 1, step-by-step, simple availability
#avail = df_data[[]].copy()
#avail['isFound']=1
#avail.reset_index() #[['time','roi_id','isFound']]
## METHOD 2, directly with frame count
avail = ((~df_data.isnull()).sum(axis=1)).to_frame('nFrames')

# create table
## METHOD A, step-by-step
#avail.reset_index().pivot(index='time', columns='roi_id')
## BETHOD B, taking advantage of multi-indexing
avail = avail.unstack()

print(avail.shape)
avail.head()

In [None]:
# Create boolean DataFrame which ROI is spiking in which camera frame

# create empty structure for cumsum
mir = pd.MultiIndex.from_product((trials.values,rois.values),names=('time','roi_id'))
mic = pd.MultiIndex.from_product(('Spiking',np.array(range(0,480))),names=('','frame'))
df_spike = pd.DataFrame(data=0,index=mir,columns=pd.Index(np.array(range(0,480)),name='frame'))
df_spike.shape

# select spike data
spikes = transients_data.loc[transients_data['in_motion_period']==False,['start_frame','stop_frame']]
spikes['count']=1

# fill in spike start and stop points
## METHOD 0, this kind of indeing would work for numpy, not here
#ix0 = zip(spikes.reset_index()['time'],spikes.reset_index()['roi_id'])
#ix1 = spikes['start_frame'].values
#df_spike.loc[zip(ix0,ix1)]+=1
## METHOD 1, slowest in both ways
#for (idx,data) in spikes.iterrows():
#    df_spike.loc[idx,data.values]+=[1,-1]
## METHOD 2, fastest in both
#for (idx,start,stop) in spikes.itertuples():
#    df_spike.loc[idx,start]+=1
#    df_spike.loc[idx,stop]-=1
## ULTIMATE METHOD, without for loop is best:
sp = spikes[['start_frame','count']].pivot(columns='start_frame').fillna(0)
#sp.columns = pd.Index(sp.columns.levels[1].values) # would eliminate the need for ['count'] below
df_spike = df_spike.add(sp['count'], fill_value=0)
sp = spikes[['stop_frame','count']].pivot(columns='stop_frame').fillna(0)
#sp.columns = pd.Index(sp.columns.levels[1].values) # would eliminate the need for ['count'] below
df_spike = df_spike.add(-sp['count'], fill_value=0)

# cumulate, converion to int is not adviced if using NaNs
df_spike = df_spike.cumsum(axis=1).astype(int)

print(df_spike.shape)
display(df_spike.head(25))
display(df_spike.tail())

In [None]:
# Create boolean DataFrame whether licking happens in camera frame

# Check for valid data and calculate their frame
print(behavior_data.shape)
df_lick = behavior_data[behavior_data.loc[:,'stop_time']>behavior_data.loc[:,'start_time']].copy()
print(df_lick.shape)
df_lick['mid_frame'] = (FPS*(df_lick['start_time']+df_lick['stop_time'])/2).apply(np.round).astype(int)
display(df_lick.head())
display(df_lick.tail())
# Convert to a DataFrame like df_data or df_raw, this eventually skips multiple licks in one camera frame
df_lick = df_lick.reset_index().rename(columns={'index':'time'})
df_lick = df_lick.drop_duplicates(['time','mid_frame']).pivot(index='time', columns='mid_frame')
display(df_lick.head())
df_lick = (~df_lick['lick_idx'].isnull()).astype(int)
df_lick.columns.name = ''
display(df_lick.head())
# Number of remaining licks
print(df_lick.sum().sum())
# Smoothen
from scipy.ndimage.filters import gaussian_filter
df_lick = df_lick.apply(lambda x: gaussian_filter(x.astype(float), sigma=2), axis=1, raw=True)
display(df_lick.head())

## z-scoring

In [None]:
def pd_zscore_rows(df):
    ret = df.copy()
    for idx, row in df.iterrows():
        ret.loc[idx,:] = (row - row.mean())/row.std(ddof=0)
    return ret

def nan_zscore(data):
    return (data-np.nanmean(data))/np.nanstd(data)
    
def pd_zscore(df, axis = 0):
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', RuntimeWarning)
        ret = df.apply(nan_zscore, axis=axis, raw=True)
    return ret

In [None]:
## BENCHMARK
#z_spike = pd_zscore_rows(df_spike)
#z_data = pd_zscore_rows(df_data)
#z_raw = pd_zscore_rows(df_raw)
#z_lick = pd_zscore_rows(df_lick)

## FAST
z_spike = pd_zscore(df_spike, axis=1)
z_data = pd_zscore(df_data, axis=1)
z_raw = pd_zscore(raw_data, axis=1)
z_lick = pd_zscore(df_lick, axis=1)

In [None]:
z_data = z_data.sort_index()
z_raw = z_raw.sort_index()

## Experiment protocol configurations

In [None]:
et = experiment_traits.copy()
et['sum'] = 1
et.groupby(['learning_epoch','context','puffed','licking']).sum()

# Plot

In [None]:
%matplotlib inline
plt.plot(df_spike.mean(axis=0), label="Population activity")
for i in range(0,len(events)):
    plt.axvline(x=events[i]*FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
plt.legend(loc='lower left')
plt.show()

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

class helpmultipage(object):
    def __init__(self, filename):
        self.filename = filename
        self.isopen = False
        self.open()
        
    def __del__(self):
        self.close()
        
    def savefig(self):
        if self.isopen:
            self.pp.savefig()

    def open(self):
        if ~self.isopen:
            self.pp = PdfPages(self.filename)
        self.isopen = True
        
    def close(self):
        if self.isopen:
            self.pp.close()
        self.isopen = False

In [None]:
def plot_activity(data, grp = ['context','learning_epoch','licking','puffed'], name = 'Population activity (spiking)', ax = None):
    # NOTE: session_num is a string object therefore it is not included in the summation or averaging at the aggregation step of groupby
    # but the traits licking and puffed are boolean and kept if uniform, so we get rid of them by conforming to the original index
    from matplotlib.font_manager import FontProperties
    fontP = FontProperties()
    fontP.set_size('xx-small')
    if ax is None:
        fig = plt.figure()
        ax = fig.gca()
    if len(grp):
        res = data.join(experiment_traits,how='left').groupby(grp).mean().reindex(columns=data.columns)
        for i in range(0,len(res)):
            ax.plot(res.values[i],label=res.index.values[i])
    else:
        res = data.mean(axis=0)
        ax.plot(res.values,label='whole popuation')
    q = np.nanpercentile(res.values,[1,99])
    ax.set_ylim(np.mean(q)+2*(q-np.mean(q)))
    for i in range(0,len(events)):
        ax.axvline(x=events[i]*FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
    ax.set_xlabel('Camera frame')
    ax.set_ylabel(name)
    leg = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title=', '.join(grp)) # prop=fontP)
    leg.get_title().set_fontsize('large')
    leg.get_title().set_fontweight('bold')
    #ax.show()

In [None]:
def plot_data(df_spike, df_data, df_lick, grps = [[]], title=''):
    ncol = len(grps)
    fig, ax = plt.subplots(3, ncol, figsize=(6*ncol,13), sharex=True, squeeze=False, dpi=72)
    fig.tight_layout(pad=3, h_pad=3, rect=(0,0,1,0.80))
    if len(title):
        fig.suptitle(title, fontsize=16)
    for i in range(0, ncol):
        plot_activity(df_spike,grps[i],"Spiking",ax=ax[0,i])
        leg = ax[0,i].legend(loc='lower center', bbox_to_anchor=(0.5, 1.1), title=', '.join(grps[i]))
        leg.get_title().set_fontsize('large')
        leg.get_title().set_fontweight('bold')
        plot_activity(df_data,grps[i],"Ca-level",ax=ax[1,i])
        ax[1,i].legend_.remove()
        plot_activity(df_lick,grps[i],"Lick/frame",ax=ax[2,i])
        ax[2,i].legend_.remove()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        fig.show()

In [None]:
pp = helpmultipage('pop.pdf')

In [None]:
plot_data(df_spike, df_data, df_lick)
pp.savefig()

## Z-scored spiking
Spiking is "True" in the [intervals) given in transients_data.hc5

### Single criterion
* interestingly population activity is high both for puffed and licking during the UC session, to be checked in the cross-correlations

In [None]:
plot_data(z_spike, z_data, z_lick, [['context'],['learning_epoch'],['licking'],['puffed']], title='Population activity')
pp.savefig()

### Two criteria
* selecting (licking and puffed) makes clear that only the airpuffing correlates with UC (they are the same) and not licking
* selecting (epoch and puffed) shows that activity during UC decreases in the post-learning period
* selecting (context and puffed) shows that CS+ alone does not involve higher population activity

In [None]:
plot_data(z_spike, z_data, z_lick, [['context','learning_epoch'],['context','licking'],['context','puffed'],['learning_epoch','puffed'],['learning_epoch','licking'],['licking','puffed']], title='Population activity')
pp.savefig()

### Activities conditional on epoch

#### Pre-learning

In [None]:
experiment_c = experiment_traits[experiment_traits.loc[:,'learning_epoch']=='Pre-Learning']
print (experiment_c.shape)
z_spike_c = z_spike.reindex(experiment_c.index, level='time')
z_data_c = z_data.reindex(experiment_c.index, level='time')
z_raw_c = z_raw.reindex(experiment_c.index, level='time')
z_lick_c = z_lick.reindex(experiment_c.index)
print (z_spike_c.shape)

plot_data(z_spike_c, z_data_c, z_lick_c, [['context','licking'],['context','puffed'],['licking','puffed']], title='Pre-Learning')
pp.savefig()

#### Learning

In [None]:
experiment_c = experiment_traits[experiment_traits.loc[:,'learning_epoch']=='Learning']
print (experiment_c.shape)
z_spike_c = z_spike.reindex(experiment_c.index, level='time')
z_data_c = z_data.reindex(experiment_c.index, level='time')
z_raw_c = z_raw.reindex(experiment_c.index, level='time')
z_lick_c = z_lick.reindex(experiment_c.index)
print (z_spike_c.shape)

plot_data(z_spike_c, z_data_c, z_lick_c, [['context','licking'],['context','puffed'],['licking','puffed']], title='Learning')
pp.savefig()

#### Post-Learning

In [None]:
experiment_c = experiment_traits[experiment_traits.loc[:,'learning_epoch']=='Post-Learning']
print (experiment_c.shape)
z_spike_c = z_spike.reindex(experiment_c.index, level='time')
z_data_c = z_data.reindex(experiment_c.index, level='time')
z_raw_c = z_raw.reindex(experiment_c.index, level='time')
z_lick_c = z_lick.reindex(experiment_c.index)
print (z_spike_c.shape)

plot_data(z_spike_c, z_data_c, z_lick_c, [['context','licking'],['context','puffed'],['licking','puffed']], title='Post-Learning')
pp.savefig()

In [None]:
pp.close()

## Individual ROIs
* since there are many of them, save figure to pdf

In [None]:
def plot_roi(df_spike, df_data, grps = [[]], title=''):
    ncol = len(grps)
    fig, ax = plt.subplots(2, ncol, figsize=(6*ncol,9), sharex=True, squeeze=False, dpi=72)
    fig.tight_layout(pad=3, h_pad=3, rect=(0,0,1,0.80))
    if len(title):
        fig.suptitle(title, fontsize=16)
    for i in range(0, ncol):
        plot_activity(df_spike,grps[i],"Spiking",ax=ax[0,i])
        leg = ax[0,i].legend(loc='lower center', bbox_to_anchor=(0.5, 1.1), title=', '.join(grps[i]))
        leg.get_title().set_fontsize('large')
        leg.get_title().set_fontweight('bold')
        plot_activity(df_data,grps[i],"Ca-level",ax=ax[1,i])
        ax[1,i].legend_.remove()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        #fig.show()
    return fig

In [None]:
pp = PdfPages('roi1crit.pdf')
for i in range(0,len(rois)):
    z_spike_c = z_spike.loc[(slice(None),rois[i]),:]
    z_data_c = z_data.loc[(slice(None),rois[i]),:]
    z_raw_c = z_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(z_spike_c, z_data_c, [['context'],['learning_epoch'],['licking'],['puffed']], title='ROI %d:\n%s'%(i,rois[i]))
    pp.savefig()
    plt.close(fig)
pp.close()

In [None]:
pp = PdfPages('roi2crit.pdf')
for i in range(0,len(rois)):
    z_spike_c = z_spike.loc[(slice(None),rois[i]),:]
    z_data_c = z_data.loc[(slice(None),rois[i]),:]
    z_raw_c = z_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(z_spike_c, z_data_c, [['learning_epoch','context'],['learning_epoch','licking'],['learning_epoch','puffed'],['context','licking'],['context','puffed'],['licking','puffed']], title='ROI %d:\n%s'%(i,rois[i]))
    pp.savefig()
    plt.close(fig)
pp.close()

### An example of spiking
The first 1 second of the recording seems missing

In [None]:
# Plot all neural units in this experiment
experiment_id = experiment_traits.ix[0,'time']
ixt = transients_data.loc[experiment_id].index.unique()
plt.figure(figsize=(16,10))
for i in range(0,len(ixt)):
    unit = ixt[i]
    firing = np.array(transients_data.loc[(experiment_id,unit),['start_frame', 'stop_frame']])
    plt.plot(firing.T,i*np.ones_like(firing.T),c='k')
    firing = transients_data.loc[(experiment_id,unit),'max_frame']
    plt.plot(firing,i*np.ones_like(firing),'|',ms=5)
for i in range(0,len(events)):
    plt.axvline(x=events[i]*FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
plt.title('Transient peaks and durations')
plt.xlabel('Camera frame')
plt.ylabel('Unit ID')
plt.show()

### Some undocumented data

In [None]:
import pprint, pickle

# Some undocumented info about the experiments
pkl_file = open('../_share/Losonczi/msa0316_1/frame_fluor.pkl', 'rb')

# Python 2.7
data1 = pickle.load(pkl_file)

# Python 3.5
#u = pickle._Unpickler(pkl_file)
#u.encoding = 'latin1'
#data1 = u.load()
    
#pprint.pprint(data1)

pprint.pprint(data1.keys())
pprint.pprint(data1.values()[0]) # same as data1['2016-04-01-23h43m20s']