#### We need to install module future, not importing from \_\_future\_\_

In [None]:
#from future.utils import PY3
import future
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
import pandas as pd
import numpy as np
import os
import pprint
from IPython.display import display
from matplotlib import pyplot as plt
from scipy.stats.mstats import zscore
import warnings
from datetime import datetime as dt, timedelta as td

%matplotlib inline

## Load files

In [None]:
# Set event lengths
durations=np.array([0,10,20,15,5])
events=np.cumsum(durations)

# Set date formate
dtformat = '%Y-%m-%d-%Hh%Mm%Ss'

# Display database folders
display(os.listdir('../_share/Losonczi/'))

# Select animal
#animal = 'msa1215_1'; FPS = 30
animal = 'msa0316_1'; FPS = 8

# List dir
mydir = os.path.join('../_share/Losonczi',animal)
os.listdir(mydir)

In [None]:
# Load files
experiment_traits = pd.read_hdf(os.path.join(mydir,'experiment_traits.h5'),key='table')
raw_data = pd.read_hdf(os.path.join(mydir,'raw_data.h5'),key='table')
df_data = pd.read_hdf(os.path.join(mydir,'df_data.h5'),key='table')
transients_data = pd.read_hdf(os.path.join(mydir,'transients_data.h5'),key='table')
behavior_data = pd.read_hdf(os.path.join(mydir,'behavior_data.h5'),key='table')
max_nframe = df_data.shape[1]

In [None]:
# Available trials and ROIs
trials = df_data.index.levels[0]
rois = df_data.index.levels[1]
print (df_data.shape, '\n', trials, '\n', rois)

In [None]:
# Post-Learning may repeat session_num therefore an additional categorical index,
# day_num is created.
# It seems though that Pre-Learning and Learning treas session_num as documented.
numtime = experiment_traits['time'].apply(lambda t: dt.strptime(t, dtformat))
leapaday = (numtime.values[1:]-numtime.values[:-1]) > np.timedelta64(8,'h')
numday = np.cumsum(np.append([0],leapaday.astype(int)))
experiment_traits['day_num'] = numday.astype(str)
display(experiment_traits[np.append([True],leapaday)])

## Experiment protocol configurations

In [None]:
et = experiment_traits.copy()
et['sum'] = 1
display(et.groupby(['learning_epoch','context','puffed','licking']).sum())
et = et.groupby(['learning_epoch','context','licking','puffed']).sum()
### ATTENTION, for later conformity we store a different order than the one displayed here!!!

## Prepare data

In [None]:
# See how many ROIs are available for which frames

avail_sum = (~df_data.isnull()).sum() / len(df_data.index.levels[0])
plt.plot(avail_sum)
plt.xlabel('Camera frame within experiment')
plt.ylabel('Available ROIs on average')

In [None]:
# See which ROI is available in which trial and for how many frames

## METHOD 1, step-by-step, simple availability
#avail = df_data[[]].copy()
#avail['isFound']=1
#avail.reset_index() #[['time','roi_id','isFound']]
## METHOD 2, directly with frame count
avail = ((~df_data.isnull()).sum(axis=1)).to_frame('nFrames')

# create table
## METHOD A, step-by-step
#avail.reset_index().pivot(index='time', columns='roi_id')
## BETHOD B, taking advantage of multi-indexing
avail = avail.unstack()

print(avail.shape)
display(avail.head())
display(avail.tail())

In [None]:
# Create boolean DataFrame which ROI is spiking in which camera frame

# create empty structure for cumsum
mir = pd.MultiIndex.from_product((trials.values,rois.values),names=('time','roi_id'))
mic = pd.MultiIndex.from_product(('Spiking',np.array(range(0,max_nframe))),names=('','frame'))
df_spike = pd.DataFrame(data=0,index=mir,columns=pd.Index(np.array(range(0,max_nframe)),name='frame'))
df_spike.shape

# select spike data
spikes = transients_data.loc[transients_data['in_motion_period']==False,['start_frame','stop_frame']]
spikes['count']=1

# fill in spike start and stop points
## METHOD 0, this kind of indeing would work for numpy, not here
#ix0 = zip(spikes.reset_index()['time'],spikes.reset_index()['roi_id'])
#ix1 = spikes['start_frame'].values
#df_spike.loc[zip(ix0,ix1)]+=1
## METHOD 1, slowest in both ways
#for (idx,data) in spikes.iterrows():
#    df_spike.loc[idx,data.values]+=[1,-1]
## METHOD 2, fastest in both
#for (idx,start,stop) in spikes.itertuples():
#    df_spike.loc[idx,start]+=1
#    df_spike.loc[idx,stop]-=1
## ULTIMATE METHOD, without for loop is best:
sp = spikes[['start_frame','count']].pivot(columns='start_frame').fillna(0)
#sp.columns = pd.Index(sp.columns.levels[1].values) # would eliminate the need for ['count'] below
df_spike = df_spike.add(sp['count'], fill_value=0)
sp = spikes[['stop_frame','count']].pivot(columns='stop_frame').fillna(0)
#sp.columns = pd.Index(sp.columns.levels[1].values) # would eliminate the need for ['count'] below
df_spike = df_spike.add(-sp['count'], fill_value=0)

# cumulate, converion to int is not adviced if using NaNs
df_spike = df_spike.cumsum(axis=1).astype(int)

print('table shape', df_spike.shape, 'active frames*ROIs', df_spike.sum().sum())
display(df_spike.head(25))
display(df_spike.tail())

In [None]:
# Create boolean DataFrame whether licking happens in camera frame

# Check for valid data and calculate their frame
print(behavior_data.shape)
df_lick = behavior_data[behavior_data.loc[:,'stop_time']>behavior_data.loc[:,'start_time']].copy()
print(df_lick.shape)
df_lick['mid_frame'] = (FPS*(df_lick['start_time']+df_lick['stop_time'])/2).apply(np.round).astype(int)
display(df_lick.head())
display(df_lick.tail())
# Convert to a DataFrame like df_data or df_raw, this eventually skips multiple licks in one camera frame
df_lick = df_lick.reset_index().rename(columns={'index':'time'})
df_lick = df_lick.drop_duplicates(['time','mid_frame']).pivot(index='time', columns='mid_frame')
display(df_lick.head())
df_lick = (~df_lick['lick_idx'].isnull()).astype(int)
df_lick.columns.name = ''
display(df_lick.head())
# Number of remaining licks
print(df_lick.sum().sum())
# Smoothen
from scipy.ndimage.filters import gaussian_filter
df_lick = df_lick.apply(lambda x: gaussian_filter(x.astype(float), sigma=2), axis=1, raw=True)
display(df_lick.head())

## z-scoring

In [None]:
def pd_zscore_rows(df):
    ret = df.copy()
    for idx, row in df.iterrows():
        ret.loc[idx,:] = (row - row.mean())/row.std(ddof=0)
    return ret

def nan_zscore(data):
    return (data-np.nanmean(data))/np.nanstd(data)
    
def pd_zscore(df, axis = 0):
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', RuntimeWarning)
        ret = df.apply(nan_zscore, axis=axis, raw=True)
    return ret

In [None]:
## BENCHMARK
#z_spike = pd_zscore_rows(df_spike)
#z_data = pd_zscore_rows(df_data)
#z_raw = pd_zscore_rows(df_raw)
#z_lick = pd_zscore_rows(df_lick)

## FAST
z_spike = pd_zscore(df_spike, axis=1)
z_data = pd_zscore(df_data, axis=1)
z_raw = pd_zscore(raw_data, axis=1)
z_lick = pd_zscore(df_lick, axis=1)

In [None]:
z_data = z_data.sort_index()
z_raw = z_raw.sort_index()

# Plot

In [None]:
plt.title('Introductory figure')
plt.xlabel('Camera frame')
plt.ylabel('Absolute activity')
plt.plot(df_spike.mean(axis=0), label="Population activity")
for i in range(0,len(events)):
    plt.axvline(x=events[i]*FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
plt.legend(loc='lower left')
plt.show()

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

class helpmultipage(object):
    def __init__(self, filename):
        self.filename = filename
        self.isopen = False
        self.open()
        
    def __del__(self):
        self.close()
        
    def savefig(self):
        if self.isopen:
            self.pp.savefig()

    def open(self):
        if ~self.isopen:
            self.pp = PdfPages(self.filename)
        self.isopen = True
        
    def close(self):
        if self.isopen:
            self.pp.close()
        self.isopen = False

In [None]:
def plot_activity(data, grp = ['context','learning_epoch','licking','puffed'],
                  name = 'Population activity (spiking)', ax = None, div=None):
    # NOTE: session_num is a string object therefore it is not included in the summation or averaging at the aggregation step of groupby
    # but the traits licking and puffed are boolean and kept if uniform, so we get rid of them by conforming to the original index
    from matplotlib.font_manager import FontProperties
    fontP = FontProperties()
    fontP.set_size('xx-small')
    if ax is None:
        fig = plt.figure()
        ax = fig.gca()
    if len(grp):
        res = data.join(experiment_traits,how='left').groupby(grp).mean().reindex(columns=data.columns)
        count = data[[]].reset_index().drop_duplicates(['time']).set_index(['time']).join(experiment_traits,how='left').groupby(grp).count()
        if (count.ndim>1):
            count = count.ix[:,0]
        for i in range(0,len(res)):
            if div is None:
                ax.plot(res.values[i],label=('%s: %d'%(res.index.values[i],count.values[i])))
            else:
                ax.plot(div,res.values[i],label=('%s: %d'%(res.index.values[i],count.values[i])))
    else:
        res = data.mean(axis=0)
        if div is None:
            ax.plot(res.values,label='whole popuation')
        else:
            ax.plot(div,res.values,label='whole popuation')
    q = np.nanpercentile(res.values,[1,99])
    ax.set_ylim(np.mean(q)+2*(q-np.mean(q)))
    for i in range(0,len(events)):
        ax.axvline(x=events[i]*FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
    ax.set_xlabel('Camera frame')
    ax.set_ylabel(name)
    leg = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title=', '.join(grp)) # prop=fontP)
    leg.get_title().set_fontsize('large')
    leg.get_title().set_fontweight('bold')
    #ax.show()

In [None]:
def plot_data(df_spike, df_data, df_lick, grps = [[]], title=''):
    ncol = len(grps)
    fig, ax = plt.subplots(4, ncol, figsize=(6*ncol,13), sharex=True, squeeze=False, dpi=72)
    fig.tight_layout(pad=3, h_pad=3)
    if len(title):
        fig.suptitle(title, fontsize=16)
    for i in range(0, ncol):
        ax[0,i].axis('off')
        plot_activity(df_spike,grps[i],"Spiking",ax=ax[1,i])
        leg = ax[1,i].legend(loc='lower center', bbox_to_anchor=(0.5, 1.1), title=', '.join(grps[i]))
        leg.get_title().set_fontsize('large')
        leg.get_title().set_fontweight('bold')
        plot_activity(df_data,grps[i],"Ca-level",ax=ax[2,i])
        ax[2,i].legend_.remove()
        plot_activity(df_lick,grps[i],"Licking",ax=ax[3,i])
        ax[3,i].legend_.remove()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        fig.show()

In [None]:
pp = helpmultipage('explanatory.pdf')

In [None]:
import matplotlib.patches as mpatches
from matplotlib.collections import PatchCollection
center = FPS * (events[:-1]+events[1:]) /2
left = FPS * events
width = FPS * (events[1:]-events[:-1])
vcenter = 0.0
vstart = -0.5

def label90(x,y,text):
    ax.text(x, y, text, ha="center", va="center", family='sans-serif', size=14, rotation=90)

fig, (empty, ax) = plt.subplots(2,1,figsize=(6,8))
fig.tight_layout(pad=3)
empty.axis('off')
#ax = fig.gca()
fig.suptitle('Explanatory figure')
ax.set_xlabel('Camera frame')
ax.set_ylabel('z-scored activity')
ax.set_ylim(vstart,vstart+1)
ax.plot(z_spike.mean(axis=0)+0.00, label="(CategoryA, True): #trials", c=(1,1,0))
ax.plot(z_spike.mean(axis=0)+0.02, label="(CategoryB, True): #trials", c=(.5,1,.5))
ax.plot(-z_spike.mean(axis=0)+0.00, label="(CategoryA, False): #trials", c=(1,.8,1))
ax.plot(-z_spike.mean(axis=0)+0.02, label="(CategoryB, False): #trials", c=(.5,1,1))
patches = []
# mark delay
label90(center[0], vcenter, 'excitation by\nshowing water')
# mark CS
rect = mpatches.Rectangle((left[1],vstart), width[1], 1, ec="none")
patches.append(rect)
label90(center[1], vcenter, 'CS± if tone\n"Baseline" otherwise')
# mark delay
label90(center[2], vcenter, 'delay')
# mark UC
rect = mpatches.Rectangle((left[3],vstart), width[3], 1, ec="none")
patches.append(rect)
label90(center[3], vcenter, 'UC if any')
# mark water
#arrow = mpatches.FancyArrowPatch((left[1], 0.1), (left[3], 0.1),
#          arrowstyle=mpatches.ArrowStyle("simple", head_length=2*FPS, head_width=0.2, tail_width=0.1))
#patches.append(arrow)
ax.text((left[0]+left[3])/2, vstart, "water source present\niff allowed to lick",
        ha="center", va="bottom", family='sans-serif', size=14, bbox=dict(boxstyle="DArrow", pad=0.0, fc='c'))

for i in range(0,len(events)):
    ax.axvline(x=events[i]*FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
colors = np.linspace(0, 1, len(patches))
collection = PatchCollection(patches, cmap=plt.cm.hsv, alpha=0.1)
collection.set_array(np.array(colors))
ax.add_collection(collection)

leg = ax.legend(loc='lower center', title="Category name, Condition name",
               bbox_to_anchor=(0.5, 1.1))
leg.get_title().set_fontsize('large')
leg.get_title().set_fontweight('bold')
fig.show()
pp.savefig()

In [None]:
pp.close()

In [None]:
pp = helpmultipage(animal+'_pop.pdf')

In [None]:
plot_data(df_spike, df_data, df_lick)
pp.savefig()

## Z-scored spiking
Spiking is "True" in the [intervals) given in transients_data.hc5

### Single criterion
* interestingly population activity is high both for puffed and licking during the UC session, to be checked in the cross-correlations

In [None]:
plot_data(z_spike, z_data, z_lick, [['context'],['learning_epoch'],['licking'],['puffed']], title='Population activity')
pp.savefig()

### Two criteria
* selecting (licking and puffed) makes clear that only the airpuffing correlates with UC (they are the same) and not licking
* selecting (epoch and puffed) shows that activity during UC decreases in the post-learning period
* selecting (context and puffed) shows that CS+ alone does not involve higher population activity

In [None]:
plot_data(z_spike, z_data, z_lick, [['context','learning_epoch'],['context','licking'],['context','puffed'],['learning_epoch','puffed'],['learning_epoch','licking'],['licking','puffed']], title='Population activity')
pp.savefig()

### Activities conditional on epoch

#### Pre-learning

In [None]:
experiment_c = experiment_traits[experiment_traits.loc[:,'learning_epoch']=='Pre-Learning']
print (experiment_c.shape)
spike_c = z_spike.reindex(experiment_c.index, level='time')
data_c = z_data.reindex(experiment_c.index, level='time')
raw_c = z_raw.reindex(experiment_c.index, level='time')
lick_c = z_lick.reindex(experiment_c.index)
print (spike_c.shape)

plot_data(spike_c, data_c, lick_c, [['context','licking'],['context','puffed'],['licking','puffed']], title='Pre-Learning')
pp.savefig()

#### Learning

In [None]:
experiment_c = experiment_traits[experiment_traits.loc[:,'learning_epoch']=='Learning']
print (experiment_c.shape)
spike_c = z_spike.reindex(experiment_c.index, level='time')
data_c = z_data.reindex(experiment_c.index, level='time')
raw_c = z_raw.reindex(experiment_c.index, level='time')
lick_c = z_lick.reindex(experiment_c.index)
print (spike_c.shape)

plot_data(spike_c, data_c, lick_c, [['context','licking'],['context','puffed'],['licking','puffed']], title='Learning')
pp.savefig()

#### Post-Learning

In [None]:
experiment_c = experiment_traits[experiment_traits.loc[:,'learning_epoch']=='Post-Learning']
print (experiment_c.shape)
spike_c = z_spike.reindex(experiment_c.index, level='time')
data_c = z_data.reindex(experiment_c.index, level='time')
raw_c = z_raw.reindex(experiment_c.index, level='time')
lick_c = z_lick.reindex(experiment_c.index)
print (spike_c.shape)

plot_data(spike_c, data_c, lick_c, [['context','licking'],['context','puffed'],['licking','puffed']], title='Post-Learning')
pp.savefig()

In [None]:
pp.close()

## Individual ROIs
* since there are many of them, save figure to pdf
* THIS WILL <font color="red">TAKE A WHILE</font>, consider testing with a small range

In [None]:
def plot_roi(df_spike, df_data, grps = [[]], title='', div=None):
    ncol = len(grps)
    fig, ax = plt.subplots(2, ncol, figsize=(6*ncol,9), sharex=True, squeeze=False, dpi=72)
    fig.tight_layout(pad=3, h_pad=3, rect=(0,0,1,0.80))
    if len(title):
        fig.suptitle(title, fontsize=16)
    for i in range(0, ncol):
        plot_activity(df_data,grps[i],"Ca-level",ax=ax[0,i],div=div)
        leg = ax[0,i].legend(loc='lower center', bbox_to_anchor=(0.5, 1.1), title=', '.join(grps[i]))
        leg.get_title().set_fontsize('large')
        leg.get_title().set_fontweight('bold')
        plot_activity(df_spike,grps[i],"Spiking",ax=ax[1,i],div=div)
        ax[1,i].legend_.remove()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        #fig.show()
    return fig

raise ValueError("You don't want to run this automaticly")

In [None]:
pp = PdfPages(animal+'_roi1crit.pdf')
for i in range(0,len(rois)):
    spike_c = z_spike.loc[(slice(None),rois[i]),:]
    data_c = z_data.loc[(slice(None),rois[i]),:]
    raw_c = z_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(spike_c, data_c, [['context'],['learning_epoch'],['licking'],['puffed']], title='ROI %d:\n%s'%(i,rois[i]))
    pp.savefig()
    plt.close(fig)
pp.close()

In [None]:
pp = PdfPages(animal+'_roi2crit.pdf')
for i in range(0,len(rois)):
    spike_c = z_spike.loc[(slice(None),rois[i]),:]
    data_c = z_data.loc[(slice(None),rois[i]),:]
    raw_c = z_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(spike_c, data_c, [['learning_epoch','context'],['learning_epoch','licking'],['learning_epoch','puffed'],['context','licking'],['context','puffed'],['licking','puffed']], title='ROI %d:\n%s'%(i,rois[i]))
    pp.savefig()
    plt.close(fig)
pp.close()

### Averaging over intervals

In [None]:
def func_over_intervals(func, intervals, data, axis=0):
    shape = np.array(data.shape)
    n_ivs = len(intervals)-1
    shape[axis] = n_ivs
    ret = np.zeros(shape)
    for i in range(0,n_ivs):
        ret[i] = func(data[intervals[i]:intervals[i+1]])
    return tuple(ret)

#### Intervals aligned to events

In [None]:
sections = np.append(events,[60])*FPS
centers = (sections[1:]+sections[:-1])/2
def myfun(x):
    return func_over_intervals(np.nanmean, sections, np.array(x))

a_spike = df_spike.apply(myfun, axis=1, raw=True)
a_spike = pd.DataFrame(a_spike.tolist(), columns=centers.astype(str), index=a_spike.index)
a_data = df_data.apply(myfun, axis=1, raw=True)
a_data = pd.DataFrame(a_data.tolist(), columns=centers.astype(str), index=a_data.index)
a_raw = raw_data.apply(myfun, axis=1, raw=True)
a_raw = pd.DataFrame(a_raw.tolist(), columns=centers.astype(str), index=a_raw.index)
a_lick = df_lick.apply(myfun, axis=1, raw=True)
a_lick = pd.DataFrame(a_lick.tolist(), columns=centers.astype(str), index=a_lick.index)

In [None]:
a_data = a_data.sort_index()
a_raw = a_raw.sort_index()

In [None]:
pp = PdfPages(animal+'_avg1crit.pdf')
for i in range(0,len(rois)):
    spike_c = a_spike.loc[(slice(None),rois[i]),:]
    data_c = a_data.loc[(slice(None),rois[i]),:]
    raw_c = a_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(spike_c, data_c, [['context'],['learning_epoch'],['licking'],['puffed']],
                   title='ROI %d:\n%s'%(i,rois[i]), div=centers)
    pp.savefig()
    plt.close(fig)
pp.close()

In [None]:
pp = PdfPages(animal+'_avg2crit.pdf')
for i in range(0,len(rois)):
    spike_c = a_spike.loc[(slice(None),rois[i]),:]
    data_c = a_data.loc[(slice(None),rois[i]),:]
    raw_c = a_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(spike_c, data_c, [['learning_epoch','context'],['learning_epoch','licking'],['learning_epoch','puffed'],['context','licking'],['context','puffed'],['licking','puffed']], title='ROI %d:\n%s'%(i,rois[i]))
    pp.savefig()
    plt.close(fig)
pp.close()

#### Averaging over bins

In [None]:
sections = np.arange(0,60,5)*FPS
centers = (sections[1:]+sections[:-1])/2
def myfun(x):
    return func_over_intervals(np.nanmean, sections, np.array(x))

b_spike = df_spike.apply(myfun, axis=1, raw=True)
b_spike = pd.DataFrame(b_spike.tolist(), columns=centers.astype(str), index=b_spike.index)
b_data = df_data.apply(myfun, axis=1, raw=True)
b_data = pd.DataFrame(b_data.tolist(), columns=centers.astype(str), index=b_data.index)
b_raw = raw_data.apply(myfun, axis=1, raw=True)
b_raw = pd.DataFrame(b_raw.tolist(), columns=centers.astype(str), index=b_raw.index)
b_lick = df_lick.apply(myfun, axis=1, raw=True)
b_lick = pd.DataFrame(b_lick.tolist(), columns=centers.astype(str), index=b_lick.index)

In [None]:
b_data = b_data.sort_index()
b_raw = b_raw.sort_index()

In [None]:
pp = PdfPages(animal+'_bin1crit.pdf')
for i in range(0,len(rois)):
    spike_c = b_spike.loc[(slice(None),rois[i]),:]
    data_c = b_data.loc[(slice(None),rois[i]),:]
    raw_c = b_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(spike_c, data_c, [['context'],['learning_epoch'],['licking'],['puffed']],
                   title='ROI %d:\n%s'%(i,rois[i]), div=centers)
    pp.savefig()
    plt.close(fig)
pp.close()

In [None]:
pp = PdfPages(animal+'_bin2crit.pdf')
for i in range(0,len(rois)):
    spike_c = b_spike.loc[(slice(None),rois[i]),:]
    data_c = b_data.loc[(slice(None),rois[i]),:]
    raw_c = b_raw.loc[(slice(None),rois[i]),:]
    fig = plot_roi(spike_c, data_c, [['learning_epoch','context'],['learning_epoch','licking'],['learning_epoch','puffed'],['context','licking'],['context','puffed'],['licking','puffed']], title='ROI %d:\n%s'%(i,rois[i]))
    pp.savefig()
    plt.close(fig)
pp.close()

## Correlations

In [None]:
# Combine information
ord1 = z_spike.join(experiment_traits, how='inner').drop('time', axis=1).reset_index().drop('time', axis=1).set_index(['roi_id','learning_epoch','context','licking','puffed','session_num']).sort_index()
ord1.columns.name='Spike'
print(ord1.shape)
display(ord1.head())

# Search for days that contain experiments with same traits and session_num
# These entries would jeopardize unstacking
et1 = experiment_traits.reset_index(drop=True).set_index(['learning_epoch','context','licking','puffed','session_num']).sort_index()
second_occur = et1.index.duplicated()
set1 = et1.loc[second_occur,'day_num'].unique()
all_occur = et1.index.get_duplicates()
set_all = et1.loc[all_occur,'day_num'].unique()
set2 = np.array(list(set(set_all)-set(set1)))
print(set1,set2)

# Filter out set2
ord1 = ord1[ord1.loc[:,'day_num'].apply(lambda x: x not in set2)]
print(ord1.shape)

# Reshape for correlation analysis
comp = ord1['day_num'].astype(int).unstack().sort_index(axis=1)
ord1 = ord1.drop(['day_num'], axis=1).unstack()
display(comp.head())
display(ord1.head(10))

In [None]:
# Reorder
ord2 = ord1.reset_index().set_index(['learning_epoch','context','licking','puffed','roi_id']).sort_index()
print(ord2.shape)
display(ord2.head())

In [None]:
# Find the pre-learning structure
key_ref = ('Pre-Learning','CS+',True,False)
sel = ord2.loc[key_ref+(slice(None),),:]
print(sel.shape)

# Correlate
corr_df = sel.T.corr()
corr_np = corr_df.fillna(0).values

# Discard invalid series
keep = (np.diag(corr_np) == 1.0)
corr_np = corr_np[keep,:][:,keep]

# Show
fig, ax = plt.subplots(1,2, figsize=(10,5))
ax[0].matshow(corr_df.values)
ax[1].matshow(corr_np)

In [None]:
pp = helpmultipage(animal+'_correl.pdf')

In [None]:
# Define an ordering
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform, pdist
sq_dist = squareform(1.0-corr_np)
corr_link = linkage(sq_dist, 'average')
fig, ax = plt.subplots(1,2, figsize=(12,6))
fig.suptitle('Reference is presented here: '+(', '.join(np.array(key_ref))))
dendo = dendrogram(corr_link, ax=ax[0], orientation='right')
ax[0].set_title('Distance of firing patterns')
corr_order = dendo['leaves']
# Show reordered
ax[1].matshow(corr_np[corr_order,:][:,corr_order], origin='lower')
ax[1].set_title('Ordered correlation matrix')
pp.savefig()

In [None]:
num_plots = len(et.index)
num_rows = int(np.ceil(num_plots/3.0))
fig, ax = plt.subplots(num_rows,3, figsize=(18,6*num_rows))
fig.suptitle('Correlation structure under different conditions: learning_epoch, context, licking, puffed')
ax = np.ravel(ax)
    
for idx in range(0,num_plots):
    # Find the pre-learning structure
    key = et.index[idx]+(slice(None),)
    sel = ord2.loc[key,:]
    print(key,ord2.shape,sel.shape)
    
    # Correlata
    corr_tmp = sel.T.corr()
    corr_tmp = corr_tmp.fillna(0).values

    # Discard invalid series
    if len(corr_tmp):
        ax[idx].matshow(corr_tmp[keep,:][:,keep][corr_order,:][:,corr_order])
    ax[idx].set_title(et.index[idx])
pp.savefig()

In [None]:
pp.close()

### An example of spiking
The first 1 second of the recording seems missing

In [None]:
# Plot all neural units in this experiment
experiment_id = experiment_traits.ix[0,'time']
ixt = transients_data.loc[experiment_id].index.unique()
plt.figure(figsize=(16,10))
for i in range(0,len(ixt)):
    unit = ixt[i]
    firing = np.array(transients_data.loc[(experiment_id,unit),['start_frame', 'stop_frame']])
    plt.plot(firing.T,i*np.ones_like(firing.T),c='k')
    firing = transients_data.loc[(experiment_id,unit),'max_frame']
    plt.plot(firing,i*np.ones_like(firing),'|',ms=5)
for i in range(0,len(events)):
    plt.axvline(x=events[i]*FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
plt.title('Transient peaks and durations')
plt.xlabel('Camera frame')
plt.ylabel('Unit ID')
plt.show()

### Some undocumented data

In [None]:
import pprint, pickle

# Some undocumented info about the experiments
pkl_file = open('../_share/Losonczi/msa0316_1/frame_fluor.pkl', 'rb')

# Python 2.7
data1 = pickle.load(pkl_file)

# Python 3.5
#u = pickle._Unpickler(pkl_file)
#u.encoding = 'latin1'
#data1 = u.load()
    
#pprint.pprint(data1)

pprint.pprint(data1.keys())
pprint.pprint(data1.values()[0]) # same as data1['2016-04-01-23h43m20s']