### Reqirements
* #### You need to install module future, manual importing from \_\_future\_\_ is at your convenience
* #### For hdf data import you need pytables too which is not default installed with Anaconda

### Batch execution
* #### ```batch_animal=msaxxyy_z jupyter nbconvert Stat.ipynb --to=html --execute --ExecutePreprocessor.timeout=-1 --output=xxyy_z_report.html```

In [None]:
#from future.utils import PY3
import future
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
import pandas as pd
import numpy as np
import time, os, warnings, imp, itertools
import IPython.display as disp
display = disp.display
import matplotlib as mpl, matplotlib.pyplot as plt
import scipy.stats as stats
zscore, describe = stats.mstats.zscore, stats.describe
import datetime
dt, td = datetime.datetime, datetime.timedelta

%matplotlib inline

In [None]:
import ca_lib as la
imp.reload(la)

In [None]:
from os import environ
batch_animal = environ.get('batch_animal', None)

## Load files

In [None]:
basedir = '../_share/Losonczi/'

# Display database folders
display(os.listdir(basedir))

# Select animal
if batch_animal is None:
    #animal = 'msa0216_4'; FPS = 8
    #animal = 'msa0316_1'; FPS = 8
    #animal = 'msa0316_3'; FPS = 8
    animal = 'msa0316ag_1'; FPS = 8
    #animal = 'msa1215_1'; FPS = 30
else:
    FPS = None
    animal = batch_animal

print ('selecting',animal)

# List dir
mydir = os.path.join(basedir,animal)
os.listdir(mydir)

In [None]:
# Available trials and ROIs
data = la.load_files(mydir)
if (FPS is not None) and (data.FPS != FPS):
    warnings.warn('FPS indication might be wrong.')
print (data.raw.shape, '\n', data.trials, '\n', data.rois)

In [None]:
# Post-Learning may repeat session_num therefore an additional index,
# day_num is created. See msa0316_1.
# It seems though that Pre-Learning and Learning treats session_num as documented.
display(data.experiment_traits.head())
display(data.experiment_traits[data.experiment_traits['day_leap']])

## Experiment protocol configurations

In [None]:
def settings_summary(experiment_traits):
    et = la.df_epoch(experiment_traits.groupby(la.display_learning).size().to_frame(name='count'))
    #et.to_clipboard()
    disp.display(disp.HTML('<font color="red">ATTENTION, </font>for later conformity we store columns in a <b>different order</b>: %s !!!'%la.sort_learning))
    display(la.df_epoch(et))

    et = la.df_epoch(experiment_traits.groupby(la.sort_learning).size().to_frame(name='count'))
    return et

In [None]:
et = settings_summary(data.experiment_traits)

## Prepare data

In [None]:
df_data = data.filtered
df_raw = data.raw

In [None]:
# See how many ROIs are available for which frames

avail_sum = (~data.filtered.isnull()).sum() / len(data.trials)
plt.plot(avail_sum)
plt.xlabel('Camera frame within experiment')
plt.ylabel('Available ROIs on average')

In [None]:
# See which ROI is available in which trial and for how many frames

avail = ((~data.filtered.isnull()).sum(axis=1)).to_frame('nFrames').unstack(fill_value=0)

print(avail.shape)
display(avail.head())
display(avail.tail())

In [None]:
def spikes_to_timeseries(data):
    # Create boolean DataFrame which ROI is spiking in which camera frame

    # create empty structure for cumsum
    df_spike = pd.DataFrame(data=0,index=data.mirow,columns=data.icol)

    # select spike data
    spikes = data.transients.loc[data.transients['in_motion_period']==False,['start_frame','stop_frame']]
    spikes['count']=1

    # fill in spike start and stop points (rename column to keep columns.name in df_spike)
    sp = spikes[['start_frame','count']].rename(columns={'start_frame':'frame'}).pivot(columns='frame').fillna(0)
    df_spike = df_spike.add(sp['count'], fill_value=0)
    sp = spikes[['stop_frame','count']].rename(columns={'stop_frame':'frame'}).pivot(columns='frame').fillna(0)
    df_spike = df_spike.add(-sp['count'], fill_value=0)

    # cumulate, conversion to int is not adviced if using NaNs
    df_spike = df_spike.cumsum(axis=1).astype(int)
    df_spike = df_spike + data.time_roi_mask

    print('Table shape:', df_spike.shape, 'Active frames*ROIs:', df_spike.sum().sum())
    return df_spike

In [None]:
df_spike = spikes_to_timeseries(data)
display(df_spike.head(25))
display(df_spike.tail())

In [None]:
def licks_to_timeseries(data):
    '''Create DataFrame how many licks happen in a given camera frame'''
    # Check for valid data and calculate their frame
    print('All entries', data.behavior.shape)
    df_lick = data.behavior[data.behavior.loc[:,'stop_time']>data.behavior.loc[:,'start_time']].copy()
    print('Valid licks', df_lick.shape)
    df_lick['frame'] = (data.FPS*(df_lick['start_time']+df_lick['stop_time'])/2).apply(np.round).astype(int)
    #display(df_lick.head())
    #display(df_lick.tail())
    
    # Convert to a DataFrame like df_data or df_raw
    df_lick = df_lick[['lick_idx','frame']].reset_index()
    df_lick = df_lick.groupby(['time','frame']).count().unstack(fill_value=0)
    #display(df_lick.head())
    df_lick = df_lick['lick_idx'].reindex(index=data.mirow.levels[0],columns=data.icol,fill_value=0)
    #display(df_lick.head())
    
    # Number of remaining licks
    print('Remaining licks',df_lick.sum().sum())
    # Smoothen
    from scipy.ndimage.filters import gaussian_filter
    df_lick = df_lick.apply(lambda x: gaussian_filter(x.astype(float)*data.FPS, sigma=0.25*data.FPS), axis=1, raw=True)
    return df_lick

In [None]:
df_lick = licks_to_timeseries(data)
display(df_lick.head())

## z-scoring

In [None]:
z_spike = la.pd_zscore_by_roi(df_spike, data.FPS, -2*data.FPS, axis=1)
z_data = la.pd_zscore_by_roi(df_data, data.FPS, -2*data.FPS, axis=1)
z_raw = la.pd_zscore_by_roi(df_raw, data.FPS, -2*data.FPS, axis=1)
z_lick = la.pd_zscore_clip(df_lick, data.FPS, -2*data.FPS, axis=1)

z_data = z_data.sort_index()
z_raw = z_raw.sort_index()

### Triggers

In [None]:
def trigger(data, threshold, rising=True, hold_off=None):
    '''Find threshold crossings along first axis'''
    data = np.array(data)
    trig = np.full(data.shape,False,dtype=bool)
    if hold_off:
        raise ValueError('Hold off period not implemented yet.')
    if rising:
        trig[1:] = (data[1:]>threshold) & (data[:-1]<=threshold)
    else:
        trig[1:] = (data[1:]<threshold) & (data[:-1]>=threshold)
    return trig

def trigger_find_pd(df, threshold, axis=1, hold_off=None):
    '''Find threshold crossings in both directions in a DataFrame'''
    triggers_rise = df.apply(lambda x: trigger(x,threshold, True), axis=axis)
    triggers_rise[triggers_rise==0]=np.nan
    triggers_fall = df.apply(lambda x: trigger(x,threshold, False), axis=axis)
    triggers_fall[triggers_fall==0]=np.nan
    
    if axis==1:
        triggers_rise = triggers_rise.stack()
        triggers_fall = triggers_fall.stack()
    elif axis==0:
        triggers_rise = triggers_rise.T.stack().T
        triggers_fall = triggers_fall.T.stack().T
    else:
        warnings.warn('Axis reduction not implemented for axis.')
    triggers_rise.name='weight'
    triggers_fall.name='weight'
    return triggers_rise, triggers_fall

def trigger_enable_pd(df, start, stop):
    '''Create trigger enabled array based on a pair of switch on and off events'''
    mi = pd.MultiIndex.from_product((df.index.values, [start]), names=['time', 'frame'])
    triggers_start = pd.Series(1.0, index=mi, name='weight')
    mi = pd.MultiIndex.from_product((df.index.values, [stop]), names=['time', 'frame'])
    triggers_stop = pd.Series(1.0, index=mi, name='weight')
    mi = pd.MultiIndex.from_product((df.index.values, list(range(start,stop))), names=['time', 'frame'])
    triggers_allow = pd.Series(1.0, index=mi, name='weight')

    return triggers_start, triggers_stop, triggers_allow


In [None]:
z_spike_threshold = 5.0/np.sqrt(len(data.rois))

max_lick_rate = 20
c,b = np.histogram(df_lick.values.ravel(),range=(0,max_lick_rate),bins=max_lick_rate)
lick_threshold = (np.argmax(c[1:])+1.5)/2
plt.hist(df_lick.values.ravel(),log=True,range=(0,max_lick_rate),bins=max_lick_rate)
plt.plot(lick_threshold,2,'y*',ms=15)
print(lick_threshold)

In [None]:
# The histogram shape justifies putting the threshold at the half maximum
lick_triggers_rise, lick_triggers_fall = trigger_find_pd(df_lick, lick_threshold)
print (lick_triggers_rise.shape,lick_triggers_fall.shape)
print ('Port was present in %d trials.'%data.experiment_traits[data.experiment_traits['port']=='W+'].shape[0])

In [None]:
# Define the boundaryof a p<0.005 set 
spike_triggers_rise, spike_triggers_fall = trigger_find_pd(z_spike.mean(level=0), z_spike_threshold)
print (spike_triggers_rise.shape,spike_triggers_fall.shape)

In [None]:
csp_triggers_rise, csp_triggers_fall, csp_triggers_allow = trigger_enable_pd(
    data.experiment_traits[data.experiment_traits['context']=='CS+'],
    la.events[1]*data.FPS, la.events[2]*data.FPS)
csm_triggers_rise, csm_triggers_fall, csm_triggers_allow = trigger_enable_pd(
    data.experiment_traits[data.experiment_traits['context']=='CS-'],
    la.events[1]*data.FPS, la.events[2]*data.FPS)

us_triggers_rise, us_triggers_fall, us_triggers_allow = trigger_enable_pd(
    data.experiment_traits[data.experiment_traits['puffed']=='A+'],
    la.events[3]*data.FPS, la.events[4]*data.FPS)

tra_triggers_rise, tra_triggers_fall, tra_triggers_allow = trigger_enable_pd(
    data.experiment_traits[data.experiment_traits['context']!='Baseline'],
    la.events[2]*data.FPS, la.events[3]*data.FPS)

## Z-scored spiking
Spiking is "True" in the [intervals) given in transients_data.hc5

In [None]:
mymean = pd.DataFrame.mean
mystd = pd.DataFrame.std

### Averaging in 5" bins

In [None]:
bsections = np.arange(0,60,5)*data.FPS
bcenters = (bsections[1:]+bsections[:-1])/2

In [None]:
zb_spike = la.pd_aggr_col(z_spike, mymean, bsections, bcenters.astype(str))
zb_data = la.pd_aggr_col(z_data, mymean, bsections, bcenters.astype(str))
zb_raw = la.pd_aggr_col(z_raw, mymean, bsections, bcenters.astype(str))
zb_lick = la.pd_aggr_col(z_lick, mymean, bsections, bcenters.astype(str))

zb_data = zb_data.sort_index()
zb_raw = zb_raw.sort_index()

In [None]:
b_spike = la.pd_aggr_col(df_spike, mymean, bsections, bcenters.astype(str))
b_data = la.pd_aggr_col(df_data, mymean, bsections, bcenters.astype(str))
b_raw = la.pd_aggr_col(df_raw, mymean, bsections, bcenters.astype(str))
b_lick = la.pd_aggr_col(df_lick, mymean, bsections, bcenters.astype(str))

b_data = b_data.sort_index()
b_raw = b_raw.sort_index()

### Averaging within phases

In [None]:
asections = np.append(la.events,[60])*data.FPS
acenters = (asections[1:]+asections[:-1])/2

In [None]:
za_spike = la.pd_aggr_col(z_spike, mymean, asections, acenters.astype(str))
za_data = la.pd_aggr_col(z_data, mymean, asections, acenters.astype(str))
za_raw = la.pd_aggr_col(z_raw, mymean, asections, acenters.astype(str))
za_lick = la.pd_aggr_col(z_lick, mymean, asections, acenters.astype(str))

za_data = za_data.sort_index()
za_raw = za_raw.sort_index()

In [None]:
a_spike = la.pd_aggr_col(df_spike, mymean, asections, acenters.astype(str))
a_data = la.pd_aggr_col(df_data, mymean, asections, acenters.astype(str))
a_raw = la.pd_aggr_col(df_raw, mymean, asections, acenters.astype(str))
a_lick = la.pd_aggr_col(df_lick, mymean, asections, acenters.astype(str))

a_data = a_data.sort_index()
a_raw = a_raw.sort_index()

### Licking statistics

In [None]:
lick_rate_mean = la.pd_aggr_col(df_lick, mymean, asections, acenters.astype(str))
lick_rate_std = la.pd_aggr_col(df_lick, mystd, asections, acenters.astype(str))
lick_time_mean = la.pd_aggr_col((df_lick>lick_threshold).astype(float), mymean,
                                asections, acenters.astype(str))
lick_time_std = la.pd_aggr_col((df_lick>lick_threshold).astype(float), mystd,
                               asections, acenters.astype(str))

# Plot

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

class helpmultipage(object):
    def __init__(self, filename):
        self.filename = filename
        self.isopen = False
        self.open()
        
    def __del__(self):
        self.close()
        
    def savefig(self, dpi=None):
        if self.isopen:
            self.pp.savefig(dpi=dpi)

    def open(self):
        if (~self.isopen) and len(self.filename):
            self.pp = PdfPages(self.filename)
            self.isopen = True
        
    def close(self):
        if self.isopen:
            self.pp.close()
        self.isopen = False

#### Explanatory figure

In [None]:
def explain_figures(data):
    import matplotlib.patches as mpatches
    from matplotlib.collections import PatchCollection
    center = data.FPS * (la.events[:-1]+la.events[1:]) /2
    left = data.FPS * la.events
    width = data.FPS * (la.events[1:]-la.events[:-1])
    vcenter = 0.0
    vstart = -0.5

    def label90(x,y,text):
        ax.text(x, y, text, ha="center", va="center", family='sans-serif', size=14, rotation=90)

    fig, (empty, ax) = plt.subplots(2,1,figsize=(6,8))
    fig.suptitle('Explanatory figure',fontsize=16)
    fig.tight_layout(pad=3)
    empty.axis('off')
    
    ax.set_xlabel('Camera frame')
    ax.set_ylabel('z-scored activity')
    ax.set_ylim(vstart,vstart+1)
    ax.plot(z_spike.mean(axis=0)+0.00, label="(CategoryA, True): #trials", c=(1,1,0))
    ax.plot(z_spike.mean(axis=0)+0.02, label="(CategoryB, True): #trials", c=(.5,1,.5))
    ax.plot(-z_spike.mean(axis=0)+0.00, label="(CategoryA, False): #trials", c=(1,.8,1))
    ax.plot(-z_spike.mean(axis=0)+0.02, label="(CategoryB, False): #trials", c=(.5,1,1))
    patches = []
    # mark delay
    label90(center[0], vcenter, 'excitation by\nshowing water')
    # mark CS
    rect = mpatches.Rectangle((left[1],vstart), width[1], 1, ec="none")
    patches.append(rect)
    label90(center[1], vcenter, 'CS± if tone\n"Baseline" otherwise')
    # mark delay
    label90(center[2], vcenter, 'trace = delay')
    # mark UC
    rect = mpatches.Rectangle((left[3],vstart), width[3], 1, ec="none")
    patches.append(rect)
    label90(center[3], vcenter, 'UC if any')
    # mark water
    ax.text((left[0]+left[3])/2, vstart, "water port present\niff allowed to lick",
            ha="center", va="bottom", family='sans-serif', size=14, bbox=dict(boxstyle="DArrow", pad=0.0, fc='c'))

    # show event boundaries
    for i in range(0,len(la.events)):
        ax.axvline(x=la.events[i]*data.FPS, ymin=0.0, ymax = 1.0, linewidth=1, color='k')
    colors = np.linspace(0, 1, len(patches))
    collection = PatchCollection(patches, cmap=plt.cm.hsv, alpha=0.1)
    collection.set_array(np.array(colors))
    ax.add_collection(collection)

    # align legend
    leg = ax.legend(loc='lower center', title="Category name, Condition name",
                   bbox_to_anchor=(0.5, 1.1))
    leg.get_title().set_fontsize('large')
    leg.get_title().set_fontweight('bold')
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        fig.show()
    return fig

In [None]:
pp = helpmultipage(animal+'_explanatory.pdf')
fig = explain_figures(data)
pp.savefig()
pp.close()

### Learning progress

In [None]:
def learning_chart(data):
    fig, ax = plt.subplots(len(data.trials),1,figsize=(10,0.6*len(data.trials)), sharex=True, sharey=True)
    fig.tight_layout(h_pad=0.1)
    ind = np.arange(0,5)
    width, height, spacing = 1, 1.2, 10
    label_df = data.experiment_traits.replace('Baseline','B.L.')
    for i in range(0,len(data.trials)):
        trial = data.trials[i]
        sc = 2.0*lick_threshold
        rects1 = ax[i].bar(ind+2*spacing, lick_rate_mean.loc[trial]/sc, width, color='r', yerr=lick_rate_std.loc[trial]/sc)
        rects2 = ax[i].bar(ind+3*spacing, lick_time_mean.loc[trial], width, color='b')
        ax[i].set_xlim(xmin=0)
        ax[i].set_ylim(ymin=0, ymax=height)
        ax[i].set_yticks([0,0.5,1])
        la.draw_conditions(ax[i],label_df,trial,data.FPS,loc='lower left',screen_width=0.5, height=height, cw=[0.25, 0.15, 0.15, 0.15, 0.15, 0.15],fontsize=12)
    ax[-1].set_xticks([spacing, 2.2*spacing, 3.2*spacing])
    ax[-1].set_xticklabels(['Conditions', 'Licking rate', 'Licking time'])
    return fig

In [None]:
pp = helpmultipage(animal+'_protocol.pdf')
fig = learning_chart(data)
pp.savefig()
pp.close()

## Population averages

In [None]:
pp = helpmultipage(animal+'_pop.pdf')

In [None]:
la.plot_data(df_spike, df_data, df_lick, data.experiment_traits, data.FPS)
pp.savefig()

### Single criterion
* comments

In [None]:
grp = [['context'],['learning_epoch'],['port'],['puffed']]
la.plot_data(z_spike, z_data, df_lick, data.experiment_traits, data.FPS, grp, title='Population activity')
pp.savefig()
la.plot_data(zb_spike, zb_data, b_lick, data.experiment_traits, data.FPS, grp, title='Population activity binned', div=bcenters)
pp.savefig()
la.plot_data(za_spike, za_data, a_lick, data.experiment_traits, data.FPS, grp, title='Population activity averaged over events', div=acenters)
pp.savefig()

### Two criteria
* comments

### Three criteria
* comments

### All criteria
* There is no increased population activity for CS+ without puffing. (For mouse 0216_4 the 1 trial with port displays increase during the trace period - why?)
* During learning mouse 0216_4 shows incresed activity during the UC phase for CS-

In [None]:
grp = [['context','port','puffed']]
la.plot_epochs(z_spike, z_data, df_lick, data.experiment_traits, et.sum(level=(1,2,3)), data.FPS, grp, title='Population activity')
pp.savefig()
la.plot_epochs(zb_spike, zb_data, b_lick, data.experiment_traits, et.sum(level=(1,2,3)), data.FPS, grp, title='Population activity binned', div=bcenters)
pp.savefig()
la.plot_epochs(za_spike, za_data, a_lick, data.experiment_traits, et.sum(level=(1,2,3)), data.FPS, grp, title='Population activity averaged over events', div=acenters)
pp.savefig()

### Activities conditional on epoch

In [None]:
def plot_by_epoch(epoch):
    experiment_c = data.experiment_traits[data.experiment_traits.loc[:,'learning_epoch']==epoch]
    spike_c = z_spike.reindex(experiment_c.index, level='time')
    data_c = z_data.reindex(experiment_c.index, level='time')
    raw_c = z_raw.reindex(experiment_c.index, level='time')
    lick_c = df_lick.reindex(experiment_c.index)
    print (experiment_c.shape, z_spike.shape)
    spike_ca = la.pd_aggr_col(spike_c, mymean, asections, acenters.astype(str))
    data_ca = la.pd_aggr_col(data_c, mymean, asections, acenters.astype(str))
    raw_ca = la.pd_aggr_col(raw_c, mymean, asections, acenters.astype(str))
    lick_ca = la.pd_aggr_col(lick_c, mymean, asections, acenters.astype(str))
    print (spike_c.shape, spike_ca.shape)

    grp = [['context','port'],['context','puffed'],['port','puffed']]
    la.plot_data(spike_c, data_c, lick_c, data.experiment_traits, data.FPS, grp, title=epoch)
    pp.savefig()
    la.plot_data(spike_ca, data_ca, lick_ca, data.experiment_traits, data.FPS, grp, title=epoch+' averaged over events', div=acenters)
    pp.savefig()

#### Pre-learning

In [None]:
plot_by_epoch('Pre-Learning')

#### Learning

In [None]:
plot_by_epoch('Learning')

#### Post-Learning

In [None]:
plot_by_epoch('Post-Learning')

In [None]:
pp.close()

## Activity vector by phases

In [None]:
pp = helpmultipage(animal+'_phases_sp.pdf')

# trial IDs by condition (non-unique index)
etmp = data.experiment_traits.reset_index(drop=True).set_index(la.sort_learning)

nplot = len(et.index)
ncol = 14
nrow = int(np.ceil(len(et.index)/float(ncol)))

for p,aggr in enumerate(a_spike.columns):
    fig, ax = plt.subplots(nrow,ncol,figsize=(2*ncol,1+10*nrow),squeeze=False,sharey=True)
    fig.tight_layout(pad=3, h_pad=3, rect=[0,0,1,0.8])
    fig.suptitle('Spikes, Phase: %s'%la.phases[p],fontsize=16)
    for i, cond in enumerate(et.index):
        icol = i%ncol
        irow = int((i-icol)/ncol)
        sel = etmp.loc[cond,'timestr']
        tmp = a_spike.loc[sel.tolist(),aggr].unstack('time')
        img = ax[irow,icol].matshow(tmp.values,origin='lower',vmin=0,vmax=1)
        ax[irow,icol].xaxis.set_ticks_position('bottom')
        ax[irow,icol].set_title('\n'.join(cond))
        ax[irow,icol].set_ylabel('Unit ID')
        ax[irow,icol].set_xlabel('Trial')
    #cax,kw = mpl.colorbar.make_axes([axis for axis in ax.flat])
    cax = ax[-1,-1]
    plt.colorbar(img,ax=cax)#ax=cax,**kw)
    pp.savefig()    
pp.close()

In [None]:
pp = helpmultipage(animal+'_phases_ca.pdf')

# trial IDs by condition (non-unique index)
etmp = data.experiment_traits.reset_index(drop=True).set_index(la.sort_learning)

for p,aggr in enumerate(za_data.columns):
    nplot = len(et.index)
    ncol = 14
    nrow = int(np.ceil(len(et.index)/float(ncol)))
    fig, ax = plt.subplots(nrow,ncol,figsize=(2*ncol,1+10*nrow),squeeze=False,sharey=True)
    fig.tight_layout(pad=3, h_pad=3, rect=[0,0,1,0.8])
    fig.suptitle('Ca-Signal, Phase: %s'%la.phases[p],fontsize=16)
    for i, cond in enumerate(et.index):
        icol = i%ncol
        irow = int((i-icol)/ncol)
        sel = etmp.loc[cond,'timestr']
        tmp = za_data.loc[sel.tolist(),aggr].unstack('time')
        img = ax[irow,icol].matshow(tmp.values,origin='lower',vmin=-3,vmax=3)
        ax[irow,icol].xaxis.set_ticks_position('bottom')
        ax[irow,icol].set_title('\n'.join(cond))
        ax[irow,icol].set_ylabel('Unit ID')
        ax[irow,icol].set_xlabel('Trial')
    #cax,kw = mpl.colorbar.make_axes([axis for axis in ax.flat])
    cax = ax[-1,-1]
    plt.colorbar(img,ax=cax)#ax=cax,**kw)
    pp.savefig()    
pp.close()

### An example of spiking
The first 1 second of the recording seems missing

In [None]:
# Order experiments by settings (deprecated)
et3 = data.experiment_traits.copy().reset_index(drop=True)
et3 = et3.sort_values(la.sort_learning+[str('session_num')]).set_index(la.sort_learning)

In [None]:
# Triggers
trig_list_data = [lick_triggers_rise, lick_triggers_fall, spike_triggers_rise, spike_triggers_fall]
trig_list_sign = ['o', 's', '^', 'v']
trig_list_color = ['b', 'y', 'r', 'g']

In [None]:
def activity_plot(ax, data, idx, settings, seismic=False, show_nan=False, pos=-20):
    experiment_id = settings['timestr']
    fig.suptitle('%s: session %s, day %s\n'%(idx,settings['session_num'],settings['day_num'])+
                 ', '.join(la.sort_learning)+': #context in epoch, #day',fontsize=16)
    if seismic:
        la.draw_levels(ax, z_data, idx, data.FPS, data.roi_df)
    else:
        la.draw_transients(ax, data.transients, idx, data.FPS, data.roi_df)
    if show_nan:
        la.draw_spiking_nan(ax, df_spike, idx, data.rois.values)
    la.draw_population(ax, z_data, idx, pos=pos, c='y', label='population Ca-signal')
    la.draw_population(ax, z_spike, idx, pos=pos, threshold=z_spike_threshold, label='population z-spike count')
    la.draw_licking(ax, df_lick, idx, pos=pos-20, threshold=lick_threshold, label='licking')
    la.draw_triggers(ax, trig_list_data, idx, -5, trig_list_sign, c=trig_list_color)
    la.draw_conditions(ax, data.experiment_traits, experiment_id, data.FPS, height=20)
    return ax

In [None]:
# Show an example
idx, settings = data.experiment_traits.index[9], data.experiment_traits.iloc[9,:]
fig, ax = plt.subplots(1,1,figsize=(16,10))
ax.set_ylim(ymin=-60,ymax=len(data.rois)+1)
activity_plot(ax, data, idx, settings, True)
ax.legend()

In [None]:
# Show an example
idx, settings = data.experiment_traits.index[9], data.experiment_traits.iloc[9,:]
fig, ax = plt.subplots(1,1,figsize=(16,10))
ax.set_ylim(ymin=-60,ymax=len(data.rois)+1)
activity_plot(ax, data, idx, settings, False, True)
ax.legend()

In [None]:
pp = helpmultipage(animal+'_firing.pdf')

xmax = data.transients.loc[:,['stop_frame']].max().values

for idx, settings in data.experiment_traits.iterrows():
    fig, ax = plt.subplots(1,1,figsize=(16,10))
    ax.set_xlim(xmax=xmax)
    ax.set_ylim(ymin=-60,ymax=len(data.rois)+1)
    activity_plot(ax, data, idx, settings)
    pp.savefig()
    plt.close(fig)
    
pp.close()

### Pattern matching

In [None]:
prog_update = 1467888751
print ("%.0f"%time.time())

In [None]:
pattdb_file = 'pattdb_'+animal+'.h5'
if 'pattdb' in locals():
    pattdb.close()
    del pattdb
if (not la.test_hdf(pattdb_file)) or (os.path.getmtime(pattdb_file)<prog_update):
    with pd.HDFStore(pattdb_file, mode='w') as pattdb:
        for method,sel in itertools.product(['match','correlate'],['sp','ca']):
            print(method,sel)
            df = df_spike if sel == 'sp' else z_data.reindex(data.mirow)
            key = '/'.join((method,sel,'lick_rise_csp'))
            pattdb[key] = la.search_pattern(df, lick_triggers_rise, data.trials,
                                            data.FPS, trigger_allow=csp_triggers_allow)
            key = '/'.join((method,sel,'lick_fall_csp'))
            pattdb[key] = la.search_pattern(df, lick_triggers_fall, data.trials,
                                            data.FPS, trigger_allow=csp_triggers_allow)
            key = '/'.join((method,sel,'csp_rise'))
            pattdb[key] = la.search_pattern(df, csp_triggers_rise, data.trials, data.FPS)
            key = '/'.join((method,sel,'us_rise'))
            pattdb[key] = la.search_pattern(df, us_triggers_rise, data.trials, data.FPS)
pattdb = pd.HDFStore(pattdb_file, mode='r')

In [None]:
z_patt = {}
for key in pattdb.keys():
    if key[0] == '/':
        key = key[1:]
    z_patt[key] = la.nan_zscore(pattdb[key])
pattdb

In [None]:
def show_detections(ax, z_patt, method, sel, idx, ids, names, colors):
    threshold = [-3, 3]
    zoom = 3
    for i, id1 in enumerate(ids):
        la.draw_licking(ax, z_patt['/'.join((method,sel,id1))], idx, pos=-20, c=colors[i],
                threshold=threshold, zoom=zoom, label='%s: %s'%(sel, names[i]))
        threshold = None
    return ax

In [None]:
# Show an example
method = 'match' # 'match', 'correlate'
sel = 'ca' # 'ca', 'sp'
idx, settings = data.experiment_traits.index[13], data.experiment_traits.iloc[13,:]
fig, ax = plt.subplots(1,1,figsize=(16,10))
ax.set_ylim(ymin=-80,ymax=len(data.rois)+1)
show_detections(ax, z_patt, method, sel, idx, ['lick_rise_csp','lick_fall_csp','csp_rise','us_rise'],
                ['CS+ lick start', 'CS+ lick end', 'CS+ start', 'US start'], ['g', 'c', 'orange', 'r'])
activity_plot(ax, data, idx, settings, False, True, pos=-40)
ax.legend()

In [None]:
for method,sel in itertools.product(['match','correlate'], ['sp','ca']):
    print (method,sel)

    pp = helpmultipage(animal+'_triggers_%s_%s.pdf'%(method,sel))
    zoom = 4

    xmax = data.transients.loc[:,['stop_frame']].max().values

    for idx, val in data.experiment_traits.iterrows(): #et3.iterrows():
        fig, ax = plt.subplots(1,1,figsize=(16,10))
        ax.set_xlim(xmax=xmax)
        ax.set_ylim(ymin=-80,ymax=len(data.rois)+1)
        show_detections(ax, z_patt, method, sel, idx, ['lick_rise_csp','lick_fall_csp','csp_rise','us_rise'],
                ['CS+ lick start', 'CS+ lick end', 'CS+ start', 'US start'], ['g', 'c', 'orange', 'r'])
        #la.draw_licking(ax, z_patt['/'.join((method,sel,'lick_rise_csp'))], idx, pos=-20, c='g',
        #                threshold=[-3, 3], zoom=zoom, label='%s: CS+ lick start'%sel)
        #la.draw_licking(ax, z_patt['/'.join((method,sel,'lick_fall_csp'))], idx, pos=-20, c='c',
        #                threshold=None, zoom=zoom, label='%s: CS+ lick end'%sel)
        #la.draw_licking(ax, z_patt['/'.join((method,sel,'csp_rise'))], idx, pos=-20, c='orange',
        #                threshold=None, zoom=zoom, label='%s: CS+ start'%sel)
        #la.draw_licking(ax, z_patt['/'.join((method,sel,'us_rise'))], idx, pos=-20, c='r',
        #                threshold=None, zoom=zoom, label='%s: US start'%sel)
        activity_plot(ax, data, idx, settings, False, True, pos=-40)
        ax.legend()
        pp.savefig()
        plt.close(fig)

    pp.close()

## Peri-event plots

In [None]:
import matlab_tools as mt
imp.reload(la)

In [None]:
def list_peri_3a(df, title=None):
    '''Plot collection: CS+ US'''
    ret = [] # df, trig, allow, disable, title
    ret.append([df, data.rois, lick_triggers_rise, None, None, 'Lick rise'])
    ret.append([df, data.rois, lick_triggers_fall, None, None, 'Lick fall'])
    ret.append([df, data.rois, lick_triggers_rise, csp_triggers_allow, None, 'Lick rise CS+'])
    ret.append([df, data.rois, lick_triggers_fall, csp_triggers_allow, None, 'Lick fall CS+'])
    ret.append([df, data.rois, csp_triggers_rise, None, None, 'CS+ start'])
    ret.append([df, data.rois, csp_triggers_fall, None, None, 'CS+ end'])
    ret.append([df, data.rois, lick_triggers_rise, us_triggers_allow, None, 'Lick rise US'])
    ret.append([df, data.rois, lick_triggers_fall, us_triggers_allow, None, 'Lick fall US'])
    ret.append([df, data.rois, us_triggers_rise, None, None, 'US start'])
    ret.append([df, data.rois, us_triggers_fall, None, None, 'US end'])
    return ret

In [None]:
def list_peri_3b(df, title=None):
    '''Plot collection: CS+ US'''
    ret = [] # df, trig, allow, disable, title
    ret.append([df, data.rois, lick_triggers_rise, None, None, 'Lick rise'])
    ret.append([df, data.rois, lick_triggers_fall, None, None, 'Lick fall'])
    ret.append([df, data.rois, lick_triggers_rise, csp_triggers_allow, None, 'Lick rise CS+'])
    ret.append([df, data.rois, lick_triggers_fall, csp_triggers_allow, None, 'Lick fall CS+'])
    ret.append([df, data.rois, csp_triggers_rise, None, None, 'CS+ start'])
    ret.append([df, data.rois, csp_triggers_fall, None, None, 'CS+ end'])
    ret.append([df, data.rois, lick_triggers_rise, csm_triggers_allow, None, 'Lick rise CS-'])
    ret.append([df, data.rois, lick_triggers_fall, csm_triggers_allow, None, 'Lick fall CS-'])
    ret.append([df, data.rois, csm_triggers_rise, None, None, 'CS- start'])
    ret.append([df, data.rois, csm_triggers_fall, None, None, 'CS- end'])
    return ret

In [None]:
fig=la.plot_peri_collection(list_peri_3a(df_spike),'Spiking',combine=False)
fig=la.plot_peri_collection(list_peri_3a(df_spike),'Spiking')

In [None]:
pp = helpmultipage(animal+'_peri1.pdf')
for epoch in la.epochs.values:
    experiment_c = data.experiment_traits[data.experiment_traits.loc[:,'learning_epoch']==epoch]
    spike_c = df_spike.reindex(experiment_c.index, level='time')
    data_c = z_data.reindex(experiment_c.index, level='time')
    fig=la.plot_peri_collection(list_peri_3a(spike_c),'%s Spiking on US'%epoch,combine=False)
    pp.savefig()
    plt.close(fig)
    fig=la.plot_peri_collection(list_peri_3b(spike_c),'%s Spiking on CS+/-'%epoch,combine=False)
    pp.savefig()
    plt.close(fig)
for epoch in la.epochs.values:
    experiment_c = data.experiment_traits[data.experiment_traits.loc[:,'learning_epoch']==epoch]
    spike_c = df_spike.reindex(experiment_c.index, level='time')
    data_c = z_data.reindex(experiment_c.index, level='time')
    fig=la.plot_peri_collection(list_peri_3a(data_c),'%s Ca-level on US'%epoch,combine=False)
    pp.savefig()
    plt.close(fig)
    fig=la.plot_peri_collection(list_peri_3b(data_c),'%s Ca-level on CS+/-'%epoch,combine=False)
    pp.savefig()
    plt.close(fig)
pp.close()

In [None]:
pp = helpmultipage(animal+'_peri2.pdf')
for epoch in la.epochs.values:
    experiment_c = data.experiment_traits[data.experiment_traits.loc[:,'learning_epoch']==epoch]
    spike_c = df_spike.reindex(experiment_c.index, level='time')
    data_c = z_data.reindex(experiment_c.index, level='time')
    fig=la.plot_peri_collection(list_peri_3a(spike_c),'%s Spiking on US'%epoch)
    pp.savefig()
    plt.close(fig)
    fig=la.plot_peri_collection(list_peri_3b(spike_c),'%s Spiking on CS+/-'%epoch)
    pp.savefig()
    plt.close(fig)
for epoch in la.epochs.values:
    experiment_c = data.experiment_traits[data.experiment_traits.loc[:,'learning_epoch']==epoch]
    spike_c = df_spike.reindex(experiment_c.index, level='time')
    data_c = z_data.reindex(experiment_c.index, level='time')
    fig=la.plot_peri_collection(list_peri_3a(data_c),'%s Ca-level on US'%epoch)
    pp.savefig()
    plt.close(fig)
    fig=la.plot_peri_collection(list_peri_3b(data_c),'%s Ca-level on CS+/-'%epoch)
    pp.savefig()
    plt.close(fig)
pp.close()

## Individual ROIs
* since there are many of them, save figure to pdf
* THIS WILL <font color="red">TAKE A WHILE</font>, consider testing with a small range

In [None]:
def plot_roi(df_spike, df_data, filaname, grp, title_template, by_epoch=False, div=None, fill=None):
    pp = PdfPages(filaname)
    for i in range(0,len(data.rois)):
        spike_c = df_spike.loc[(slice(None),data.rois[i]),:]
        data_c = df_data.loc[(slice(None),data.rois[i]),:]
        #raw_c = df_raw.loc[(slice(None),data.rois[i]),:]
        if by_epoch:
            fig = la.plot_epochs(spike_c, data_c, None, data.experiment_traits, et.sum(level=(1,2,3)), data.FPS, grp, title=title_template%(i,data.rois[i]), div=div, fill=fill)
        else:
            fig = la.plot_data(spike_c, data_c, None, data.experiment_traits, data.FPS, grp, title=title_template%(i,data.rois[i]), div=div, fill=fill)
        pp.savefig()
        plt.close(fig)
    pp.close()

In [None]:
if batch_animal is None:
    raise ValueError("You don't want to run this automatically")

#### Raw data

### Averaging over intervals

#### Intervals aligned to events

#### Averaging over bins

## Correlations

In [None]:
def concat_for_correlation(df, data):
    # Combine information
    ord1 = df.reindex(data.mirow, data.icol)
    et1 = data.experiment_traits.copy().loc[:,la.sort_learning+['day_num','session_num']]
    ord1 = ord1.join(et1, how='inner').reset_index().drop('time', axis=1).set_index(la.sort_learning+['roi_id', 'session_num']).sort_index()
    ord1.columns.name='Spike'
    #display(ord1.head())

    # Search for days that contain experiments with same traits and session_num
    # These entries would jeopardize unstacking
    et2 = et1.reset_index(drop=True).set_index(la.sort_learning+['session_num']).sort_index()
    second_occur = et2.index.duplicated()
    set1 = et2.loc[second_occur,'day_num'].unique()
    all_occur = et2.index.get_duplicates()
    set_all = et2.loc[all_occur,'day_num'].unique()
    set2 = np.array(list(set(set_all)-set(set1)))
    print('Days repeating settings:',set1,'All days with conflicting settings:',set2)

    # Filter out second occurrences stored in set2
    if len(set2):
        ord1 = ord1[ord1.loc[:,'day_num'].apply(lambda x: x not in set2)]
    print('Filtered data:',ord1.shape)

    # Reshape for correlation analysis
    # integer values get converted to float if needed to hold NaN-s
    calendar = ord1['day_num'].unstack(fill_value=0)
    ord1 = ord1.drop(['day_num'], axis=1).unstack()
    print('Concatenated data:',ord1.shape)
    return ord1, calendar

In [None]:
ord1, calendar = concat_for_correlation(z_data, data)
display(calendar.head())
display(ord1.head(10))

In [None]:
# Find the pre-learning structure, without airpuff
key_ref = ('Post-Learning','CS+','W+','A+')
time_ref = np.array([15, 40])
col_ref = slice(int(time_ref[0]*data.FPS),int(time_ref[1]*data.FPS))
sel = ord1.loc[key_ref+(slice(None),),col_ref]
print(key_ref,time_ref,col_ref,sel.shape)

# Correlate
corr_df = sel.T.corr()
corr_np = corr_df.fillna(0).values

# Discard invalid series
keep = (np.diag(corr_np) == 1.0)
corr_np = corr_np[keep,:][:,keep]

# Show
fig, ax = plt.subplots(1,2, figsize=(12,4))
img = ax[0].matshow(corr_df.values)
img = ax[1].matshow(corr_np)
fig.colorbar(img, ax=ax[1])

In [None]:
pp = helpmultipage(animal+'_correl.pdf')

In [None]:
# Define an ordering
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform, pdist
sq_dist = squareform(1.0-corr_np)
corr_link = linkage(sq_dist, 'average')
fig, ax = plt.subplots(1,2, figsize=(18,8))
fig.suptitle('Reference is presented here: '+(', '.join(np.array(key_ref)))+
            ' and time '+('..'.join(time_ref.astype(str)))+'s',fontsize=16)
labels = sel.index.get_level_values(4).to_series().reset_index(drop=True)[keep]
dendo = dendrogram(corr_link, ax=ax[1], labels=labels.values, leaf_font_size=2.5, orientation='left')
ax[1].set_title('Distance of firing patterns')
corr_order = dendo['leaves']
# Show reordered
img = ax[0].matshow(corr_np[corr_order,:][:,corr_order], origin='lower', vmin=-0.8, vmax=1)
ax[0].xaxis.set_ticks_position('bottom')
ax[0].set_title('Ordered correlation matrix', y=1.0)
fig.colorbar(img)
pp.savefig(dpi=600)

In [None]:
phase_start = data.event_frames+data.FPS
phase_end = data.event_frames[1:]-data.FPS

num_phases = 3
num_rows = len(et.index)
num_cols = num_phases
fig, ax = plt.subplots(num_rows,num_cols, figsize=(5*num_cols,5*num_rows))
fig.suptitle('Correlation structure under different conditions: learning_epoch, context, port, puffed\n'+
             '(small number of trials might lead to larger percieved correlation)\n'+
             '(in phases Ready, CS, Trace the conditions A+ and A- should be very similar)',fontsize=16)
#ax = np.ravel(ax)
mx = {}
mi = pd.DataFrame([], index=pd.Index(la.phases[0:num_phases],name='phase'), columns = et.index).unstack().index
ds = pd.DataFrame(columns=mi)

for row,col in itertools.product(range(0,num_rows),range(0,num_cols)):
    # Find the pre-learning structure
    key = et.index[row]
    phase = la.phases[col]
    count = et.ix[row]
    col_sel = slice(int(phase_start[col]),int(phase_end[col]))
    sel = ord1.loc[key+(slice(None),),col_sel]
    print(key,phase,ord1.shape,sel.shape)
    
    # Correlate
    corr_tmp = sel.T.corr()
    corr_tmp = corr_tmp.fillna(0).values

    # Discard invalid series
    #if len(corr_tmp):
    corr_tmp = corr_tmp[keep,:][:,keep][corr_order,:][:,corr_order]
    img = ax[row,col].matshow(corr_tmp, origin='lower', vmin=-0.8, vmax=1)
    ax[row,col].xaxis.set_ticks_position('bottom')
        
    mx[key+(phase,)] = corr_tmp
    ds[key+(phase,)] = np.ravel(corr_tmp+np.diag(np.nan*np.diag(corr_tmp)))
    ax[row,col].set_title('%s, %s: %d'%(key,phase,count))
pp.savefig(dpi=600)

## Statistics of the correlation coefficients

In [None]:
num_rows = len(et.index)
num_cols = num_phases

fig, ax = plt.subplots(num_rows,num_cols, figsize=(5*num_cols,5*num_rows))
fig.suptitle('Distribution of the above correlation coefficients\n(diagonals excluded)',fontsize=16)
#ax = np.ravel(ax)

for row,col in itertools.product(range(0,num_rows),range(0,num_cols)):
    key = et.index[row]
    phase = la.phases[col]
    count = et.ix[row]
    col_sel = slice(int(phase_start[col]),int(phase_end[col]))
    sel = ord1.loc[key+(slice(None),),col_sel]
    print(key,phase,ord1.shape,sel.shape)

    corr_tmp = mx[key+(phase,)]
    corr_tmp = corr_tmp+np.diag(np.nan*np.diag(corr_tmp))
    ax[row,col].hist(np.ravel(corr_tmp),range=(-1,1),bins=20)
    ax[row,col].set_yscale('log')
    ax[row,col].set_title('%s, %s: %d'%(key,phase,count))
    
pp.savefig()

In [None]:
oraculum = False

### Compare correlation coefficient distributions

In [None]:
def describe_correlation(data, title, has_oraculum):
    fig, ax = plt.subplots(1,1,figsize=(12,16))
    fig.suptitle(title,fontsize=16)
    ax.axis('off')
    if oraculum:
        stat = np.round(data.describe(),4).T
    else:
        stat = np.round(data.stack(level=3).describe(),4).T
    ordered = la.df_epoch(stat)
    stat = stat.sort_index()
    cw = np.ones((len(ordered.columns),))
    tab = mpl.table.table(ax, cellText=ordered.values,
             rowLabels=[', '.join(x) for x in ordered.index.values],
             colLabels=ordered.columns.values.astype(str),
             loc='upper right', fontsize=20, colWidths=0.6*cw/np.sum(cw),
             bbox=[0.3,0,0.7,1], cellLoc='center')
    return stat, fig

In [None]:
def compare_correlation(stat, title, has_oraculum):
    lmi = pd.DataFrame([], index=pd.Index(la.phases[0:num_phases],name='phase'),
            columns = la.legal_conditions if oraculum else la.short_conditions).unstack().index

    fig, ax = plt.subplots(1,1,figsize=(12,16))
    fig.suptitle(title,fontsize=16)
    ax.axis('off')
    cellcolor = np.vectorize(lambda x: 'lightcoral' if x>0.5 else (
                            'lightblue' if x<-0.4 else 'white'))
    c = np.sqrt(stat.mean().loc['count'])
    diff = []
    for epoch1, epoch2 in [('Learning','Pre-Learning'),
                           ('Post-Learning','Pre-Learning'),('Post-Learning','Learning')]:
        d = (stat.loc[epoch1,'mean']-stat.loc[epoch2,'mean'])/(
             stat.loc[epoch1,'std']+stat.loc[epoch2,'std'])*2
        d = d.to_frame(name='  -  '.join((epoch1,epoch2)).replace('-Learning','-L'))
        diff.append(d)
    diff = np.round(pd.concat(diff,axis=1),4).reindex(lmi)
    cw = np.ones((3,))
    tab = mpl.table.table(ax, cellText=diff.values,
             cellColours=cellcolor(diff.values),
             rowLabels=[', '.join(x) for x in diff.index.values],
             rowColours=np.repeat(la.legal_colors if oraculum else la.short_colors,num_phases),
             colLabels=diff.columns.values.astype(str),
             loc='upper right', fontsize=32, colWidths=0.6*cw/np.sum(cw),
             bbox=[0.3,0,0.7,1], cellLoc='center')
    tab.set_fontsize(32)
    return diff, fig

In [None]:
def plot_correlation_bars(stat, title, has_oraculum):
    num_rows = num_phases
    num_cols = len(la.epochs)
    # We don't use sharex on purpose: we want to set different tick labels in the subplot columns
    fig, ax = plt.subplots(num_rows,num_cols, figsize=(5*num_cols,5*num_rows), sharey=True)
    fig.suptitle(title,fontsize=16)
    cat = len(la.legal_conditions if has_oraculum else la.short_conditions)
    bars = stat.reset_index().set_index(['phase']+la.sort_learning[0:(2 if has_oraculum else 3)])
    for (irow,row), (icol,col) in itertools.product(enumerate(la.phases),enumerate(la.epochs)):
        try:
            bar = bars.loc[(row, col),:].reindex(la.legal_conditions if has_oraculum else la.short_conditions)
            if has_oraculum:
                lab = et.loc[col,:].reindex(la.legal_conditions, fill_value=0)
            else:
                lab = et.loc[col,:].sum(level=(0,1)).reindex(la.short_conditions, fill_value=0)
            ax[irow,icol].set_title(col)
            ax[irow,icol].set_ylabel(row)
            low, high = [0]+bar['25%'].fillna(0).tolist(),[0]+bar['75%'].fillna(0).tolist()
            ax[irow,icol].fill_between(np.arange(0,cat+1), low, high, alpha=0.1, interpolate=False, color='grey', edgecolor=None, step='pre')
            ax[irow,icol].bar(range(0,cat),bar['mean'],1,yerr=bar['std'],color=la.legal_colors if has_oraculum else la.short_colors)
            ax[irow,icol].set_xticks(np.arange(0,cat)+0.5)
            if irow+1==num_rows:
                labels = [('%s: %d'%(', '.join(idx),count['count'])) for idx,count in lab.iterrows()]
            else:
                labels = [count['count'] for idx,count in lab.iterrows()]
            ax[irow,icol].set_xticklabels(labels, rotation='vertical')

        except KeyError:
            pass
    return fig

### Real value

In [None]:
stat, fig = describe_correlation(ds, 'Statistics on the correlation coefficients', oraculum)
pp.savefig()
plt.close(fig)
# count corresponds to the number of elemenets in the correlation matrix
la.df_epoch(stat)

In [None]:
diff, fig = compare_correlation(stat, 'Difference of the above correlation coefficients\n'+
                 'in stdev. units', oraculum)
pp.savefig()
#plt.close(fig)
diff

In [None]:
plot_correlation_bars(stat, 'Distribution of the above correlation coefficients\n(diagonals excluded)', oraculum)
pp.savefig()

### Absolute value

In [None]:
stat, fig = describe_correlation(ds, 'Statistics on the absolut value\nof the correlation coefficients', oraculum)
pp.savefig()
plt.close(fig)
# count corresponds to the number of elemenets in the correlation matrix
la.df_epoch(stat)

In [None]:
diff, fig = compare_correlation(stat, 'Difference of the absolute value of the correlation coefficients\n'+
                 'in stdev. units', oraculum)
pp.savefig()
#plt.close(fig)
diff

In [None]:
plot_correlation_bars(stat, 'Distribution of the absolute value of the correlation coefficients\n(diagonals excluded)', oraculum)
pp.savefig()

## Similarity of correlation matrices

In [None]:
num_rows = len(et.index)
num_cols = num_phases

change = np.zeros((num_cols,num_rows,num_rows))
for col in range(0,num_cols):
    phase = la.phases[col]
    for row1 in range(0,num_rows):
        key1 = et.index[row1]
        count1 = et.ix[row1]
        for row2 in range(0,num_rows):
            key2 = et.index[row2]
            count2 = et.ix[row2]
            change[col,row1,row2] = np.linalg.norm(np.ravel(mx[key1+(phase,)]-mx[key2+(phase,)])/np.size(mx[key2+(phase,)]),2)

for col in range(0,num_cols):
    fig, ax = plt.subplots(1,1, figsize=(8,6))
    fig.tight_layout(rect=[0.4,0,0.95,0.55])
    #fig = plt.figure()
    #ax = fig.gca()
    img = ax.matshow(change[col]+np.diag(np.nan*np.diag(change[col])), cmap=plt.get_cmap('rainbow'))

    fig.suptitle('Difference between test cases (RMS distance)\nPhase: '
                 +la.phases[col],fontsize=16)
    ax.set_xticks(np.array(range(0,len(et.index))))
    ax.set_xticklabels(et.index.values.tolist(),rotation=90)
    ax.set_yticks(np.array(range(0,len(et.index))))
    ax.set_yticklabels(et.index.values.tolist())

    # without set_yticks
    # ax.set_yticklabels([tuple()]+et.index.values.tolist())
    fig.colorbar(img)
    pp.savefig()

In [None]:
pp.close()