In [5]:
# standard
import numpy as np
import glob
import os
import pickle
import tqdm
from itertools import product
from joblib import Parallel, delayed
import datetime

# My code
import neuraltda.simpComp as sc
import neuraltda.spectralAnalysis as sa
import neuraltda.topology2 as tp2

# Plotting
import matplotlib.pyplot as plt
%matplotlib inline

# file save path
daystr = datetime.datetime.now().strftime('%Y%m%d')
figsavepth = '/home/brad/DailyLog/'+daystr+'/'
print(figsavepth)

/home/brad/DailyLog/20171020/


In [6]:
# The goal is to compute the pairwise distances using the KL divergence between all trials of each stimuli.
# First we set up the parameters for the analysis

# Bird parameters
bps = {'B1083': '/home/brad/krista/B1083/P03S03/', 'B1075': '/home/brad/krista/B1075/P01S03/',
       'B1235': '/home/brad/krista/B1235/P02S01/', 'B1056': '/home/brad/krista/B1056/klusta/phy020516/Pen01_Lft_AP100_ML1300__Site03_Z2500__B1056_cat_P01_S03_1/',
       'B1056': '/home/brad/krista/B1056/klusta/phy020516/Pen01_Lft_AP100_ML1300__Site03_Z2500__B1056_cat_P01_S03_1/'}

birds = ['B1083']

# Binning parameters
windt = 10.0                      # milliseconds
dtovr = 0.5*windt                 # milliseconds
segment_info = [0, 0]             # use full Trial
cluster_group = ['Good']          # use just good clusters
comment = 'ForSLSE'               # SLSE Computations
bdfs = {}                         # Dictionary to store bdf
scgfs = {}                         # Dictionary to store simplicial complexes

# Simplicial complex parameters
thresh = 6.0
dim = 1
beta = -0.15


In [7]:
# Now, bin the data
for bird in birds:
    block_path = bps[bird]
    bfdict = tp2.dag_bin(block_path, windt, segment_info, cluster_group=cluster_group, dt_overlap=dtovr, comment=comment)
    bdf = glob.glob(os.path.join(bfdict['raw'], '*.binned'))[0]
    print(bdf)
    bdfs[bird] = bdf

/home/brad/krista/B1083/P03S03/binned_data/win-10.0_dtovr-5.0_seg-0-0-ForSLSE/20171019T171241Z-10.0-5.0.binned


In [8]:
# Ok, data binned, now we compute chain groups
for bird in birds:
    block_path = bps[bird]
    scg_f = sa.computeChainGroups(block_path, bdfs[bird], thresh, comment=comment)
    scgfs[bird] = scg_f

Computing Chain Groups...
KeysView(<HDF5 file "20171019T171241Z-10.0-5.0.binned" (mode r)>)
Stim: I_40k, Clusters:None
Starting jobs...
Stim: J_40k, Clusters:None
Starting jobs...
Stim: K_40k, Clusters:None
Starting jobs...
Stim: L_40k, Clusters:None
Starting jobs...
Stim: M_40k, Clusters:None
Starting jobs...
Stim: N_40k, Clusters:None
Starting jobs...
Stim: O_40k, Clusters:None
Starting jobs...
Stim: P_40k, Clusters:None
Starting jobs...


In [9]:
scgfs[bird] = scg_f
print(scgfs)

{'B1083': '/home/brad/krista/B1083/P03S03/scg/20171019T171241Z-10.0-5.0-6.0-ForSLSE.scg'}


In [None]:
# Organization of scg file is [stim]
with open(scgfs[bird], 'rb') as scgf:
    scg_data = pickle.load(scgf)
    print(scg_data.keys())
    
# Logic of computation.  For each pair of stimuli, 
# we compute the KL divergence between all pairs of trials.
# so we need an Nstim x Nstim x (Ntrials*Ntrials) matrix to store all the values
stims = list(scg_data.keys())   # Get list of stimuli
Nstim = len(stims)              # Get number of stimuli
Ntrials = 5                     # Hard coded for now...

# Create result array
KL_divs = np.zeros((Nstim, Nstim, Ntrials*Ntrials))

# KL divergence is not symmetric, so we have to do i.e. I/L and L/I
for i, stim1 in enumerate(stims):
    for j, stim2 in enumerate(stims):
        print("Beginning Stimulus pair: ({}, {})".format(stim1, stim2))
        # Extract scgs for each stimulis
        scg1_dat = scg_data[stim1]
        scg2_dat = scg_data[stim2]
        Parallel(n_jobs=7)(delayed(par_KL)(scg1_dat, scg2_dat, trial1, trial2, KL_divs, i, j) 
                           for trial1,trial2 in product(range(Ntrials), range(Ntrials)))


In [None]:
def par_KL(scg1_dat, scg2_dat, trial1, trial2, KL_divs, i, j):
    scg1 = scg1_dat[trial1]
    scg2 = scg2_dat[trial2]

    # Compute Laplacians
    L1 = sc.compute_laplacian(scg1, dim)
    L2 = sc.compute_laplacian(scg2, dim)

    # Reconcile Laplacians
    # Lsamp : scg1 Ldata : scg2
    if (np.size(L1) > np.size(L2)):
        (L2, L1) = sc.reconcile_laplacians(L2, L1)
    else:
        (L1, L2) = sc.reconcile_laplacians(L1, L2)

    # Compute Divergence
    KL = sc.KLdivergence_lap(L1, L2, beta)
    KL_divs[i, j, trial1*Ntrials + trial2] = KL

In [None]:
# Non parallel version
# Organization of scg file is [stim]
with open(scgfs[bird], 'rb') as scgf:
    scg_data = pickle.load(scgf)
    print(scg_data.keys())
    
# Logic of computation.  For each pair of stimuli, 
# we compute the KL divergence between all pairs of trials.
# so we need an Nstim x Nstim x (Ntrials*Ntrials) matrix to store all the values
stims = list(scg_data.keys())   # Get list of stimuli
Nstim = len(stims)              # Get number of stimuli
Ntrials = 5                     # Hard coded for now...

# Create result array
KL_divs = np.zeros((Nstim, Nstim, Ntrials*Ntrials))

# KL divergence is not symmetric, so we have to do i.e. I/L and L/I
for i, stim1 in enumerate(stims):
    for j, stim2 in enumerate(stims):
        print("Beginning Stimulus pair: ({}, {})".format(stim1, stim2))
        # Extract scgs for each stimulis
        scg1_dat = scg_data[stim1]
        scg2_dat = scg_data[stim2]
        for (trial1, trial2) in tqdm.tqdm(product(range(Ntrials), range(Ntrials))):
            scg1 = scg1_dat[trial1]
            scg2 = scg2_dat[trial2]

            # Compute Laplacians
            L1 = sc.compute_laplacian(scg1, dim)
            L2 = sc.compute_laplacian(scg2, dim)

            # Reconcile Laplacians
            # Lsamp : scg1 Ldata : scg2
            if (np.size(L1) > np.size(L2)):
                (L2, L1) = sc.reconcile_laplacians(L2, L1)
            else:
                (L1, L2) = sc.reconcile_laplacians(L1, L2)

            # Compute Divergence
            KL = sc.KLdivergence_lap(L1, L2, beta)
            KL_divs[i, j, trial1*Ntrials + trial2] = KL
            
            

In [13]:
with open(os.path.join(figsavepth, 'KL_divs_{}.pkl'.format(bird)), 'wb') as f:
    pickle.dump(KL_divs, f)

In [None]:
KL_divs.shape

In [None]:
KL_div_mean = np.mean(KL_divs, axis=2)

In [None]:
plt.imshow(KL_div_mean, interpolation='nearest')
plt.colorbar()

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage

# Make compressed distance matrix
y = KL_div_mean[np.triu_indices(Nstim, k=1)].flatten()
Z = linkage(y)

In [None]:
dendrogram(Z)

In [None]:
d_fam_fam = KL_div_mean[4:, 4:] # familiar familiar
d_fam_nov = KL_div_mean[4:, 0:4] # familiear vs novel
d_nov_fam = KL_div_mean[0:4, 4:] # Novel vs familiar
d_nov_nov = KL_div_mean[0:4, 0:4] # Novel vs Novel