### Augment the PythonPath so python can find necessary code.

In [None]:
import os, sys, datetime
LF_CODE_PATH = os.path.expanduser('/home/andalman/projects/LFAnalyze/code')
FT_CODE_PATH = os.path.expanduser('/home/andalman/projects/fishTrax/code/analysis/')
FD_CODE_PATH = os.path.expanduser('/home/andalman/projects/fish_despair_notebooks/src/')
sys.path.insert(0,LF_CODE_PATH)
sys.path.insert(0,FT_CODE_PATH)
sys.path.insert(0,FD_CODE_PATH)

### Import useful python packages

In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import tqdm
import numpy as np
import pandas as pd
import scipy
import skimage.io
import visualization_utils as vizutil
import seaborn as sns
from skimage.filters import gaussian_filter

In [None]:
import pandas as pd
from pandas import DataFrame

### Load the data

hbstim_imaging_utils provides a list of all the datasets and provides a helper class to make loading the data easy.

The data_sets are split into two conditions:  
'c': chr2 negative fish (n=4)  
'e': chr2 positive fish (n=6)  
  
all_data is a dictionary keyed by the condition.  
all_data[condition] contains a list of HbStim_Fish objects.

In [None]:
# import hbstim_imaging_utils as hbutils
import passivity_2p_imaging_utils as p2putils
# reload(hbutils)
reload(p2putils)
tmp_dir = '/tmp/'
# all_data = hbutils.get_all_datasets(tmp_dir=tmp_dir)
all_data = p2putils.get_all_datasets(tmp_dir=tmp_dir)

print 'c, n =', len(all_data['c'])
print 'e, n =', len(all_data['e'])

In [None]:
#Grab the first experimental fish
print('p2_fish')
f = all_data.e[1] 
print 'Num z-planes imaged:', f.num_zplanes
print 'Volume-Rate:', 1/np.diff(f.frame_st[:,0]).mean() #frame_st is #frames x #slices, we examine interval between imaging first slice
print 'Movement Times', f.forward_swim_times
print 'Shock times', f.shock_st

In [None]:
f = all_data.e[1] 
# print 'Num z-planes imaged:', f.num_zplanes
# print 'Number of stimulation trials:', f.numtrials
print 'Num z-planes imaged:', f.num_zplanes
print 'Volume-Rate:', 1/np.diff(f.frame_st[:,0]).mean() #frame_st is #frames x #slices, we examine interval between imaging first slice
print 'Movement Times', f.forward_swim_times
print 'Shock times', f.shock_st

The `get_roi_table` method returns a dataframe of all the rois for teh fish.  Each row of this table represents an ROI and specified the place the ROI is in, the pixels that are included in the ROI, the centroid of the ROI, and which brain regions the ROI is in.  

Note, this data is older and was processed by simply segmenting the anatomical images.  Thus the data is does not look as clean as data that is cleaned up and processed using CNMF, for example.

In [None]:
df = f.get_roi_table() #this can be slow to run the first time as data is loaded from files
df.head()

#### 'get_signals_raw' returns a matrix containing the raw fluorescent signal associated with each ROI.  Each row of this matrix is associated with the corresponding row of the ROI table.  

Note, I only use the second half of the signal matrix, because the agarose had not fully hardened during the first of imaging which cause the fish to drift in z slightly.

In [None]:
M = f.get_signals_raw(z=None)
#M = hbutils.df_over_f(M)
print 'Num ROIs:', df.shape[0]
print 'Shape of signal matrix', M.shape

There are also various methods for grabbing the raw imaging data:  
get_tif_as_vol  
get_tif_rasl

We can use this to visualize a few ROIs in particular plane/slice and brain region:

In [None]:
M.shape

In [None]:
import statsmodels.tsa.stattools as stat
from scipy.stats import norm

In [None]:
def gcause_pval(X,lag=1):
    gc = stat.grangercausalitytests(X,lag, verbose=False)
    x = list(gc.values())[0]
    return x[0]["params_ftest"][1]

In [None]:
def moving_average(a, n=3) :
    ret = np.cumsum(a,0)
    ret[n:] = ret[n:] - ret[:-n]
    rm = ret[n - 1:] / n
    pad_start = np.full((n-1,rm.shape[1]), rm[0])
    return np.vstack([pad_start, rm])

def ewma(data,span):
    df = DataFrame(data)
    return df.ewm(span).mean().values

def df_f(x,ma_window=60,span=60):
    u = moving_average(x,ma_window)
    return ewma((x - u)/(u+1e-10), span)

In [None]:
plt.plot(M[0,0:400])

In [None]:
plt.plot(df_f(M[[0],0:400].T,6,6))

In [None]:
Mdf = df_f(M.T)

In [None]:
from scipy.stats import kurtosis, skew

In [None]:
kurtosis(M.reshape(-1))

In [None]:
skew(M.reshape(-1))

In [None]:
kurtosis(Mdf.reshape(-1))

In [None]:
skew(Mdf.reshape(-1))

In [None]:
data = Mdf.reshape(-1)
mu, std = norm.fit(data)

# Plot the histogram.
plt.hist(data, bins=1000, normed=True, alpha=0.6)

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 1000)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
plt.title(title)
plt.xlim(-.5,.5)

plt.show()

In [None]:
bins = np.arange(-.5,.5,0.01)
plt.hist(Mdf[0:1000,0:1000].reshape(-1),bins);

In [None]:
plt.hist(M[0:1000,0:1000].reshape(-1),50);

In [None]:
x = np.arange(0,100,5)
g = np.random.normal(0,1,100000)
y = [np.percentile(g,i) for i in x]
plt.hist(g,50);

In [None]:
np.min(g)

In [None]:
gcause_pval(Mdf[:,0:2])

niter = 10000
pvals = np.zeros(niter)
nroi = M.shape[0]
for n in range(niter):
    i, j = np.random.choice(nroi,2,False)
    pvals[n] = gcause_pval(Mdf[:,[i,j]])
plt.hist(pvals,100, density=True)
plt.title("Granger causality of random neuron pairs")
plt.xlabel("p val")
plt.ylabel("density")

# chord

In [None]:
ortho_regions = [u'in_r_cerebellum', u'in_l_cerebellum', u'in_l_vthal',
       u'in_l_tectum', u'in_l_raphe', u'in_r_hind', u'in_l_hind',
       u'in_l_dthal', u'in_r_tectum', u'in_r_LHb', u'in_r_dthal',
       u'in_r_raphe', u'in_r_tel',
       u'in_l_MHb', u'in_l_tel', u'in_r_MHb', u'in_l_LHb', u'in_r_vthal']

regions = df.columns[np.where([c in ortho_regions for c in df.columns])]

In [None]:
df[regions].sum(axis=0)

In [None]:
neurons_in_one_region = np.where(df[regions].sum(axis=1)==1)[0]
neurons_in_Hb = np.array(df.query("in_r_LHb==1 | in_l_LHb==1").index)
neurons_in_Ra = np.array(df.query("in_r_raphe==1 | in_l_raphe==1").index)

In [None]:
nhab = len(neurons_in_Hb)
nra = len(neurons_in_Ra)

In [None]:
def get_region(row,regions):
    return np.where(row[regions])[0][0]

In [None]:
from joblib import Parallel, delayed

In [None]:
niter = 100000
nroi = neurons_in_one_region.shape[0]
def g_draw(n):
    np.random.seed(None)
    ii, jj = np.random.choice(nroi,2,False)
#     ii = np.random.choice(nra,1,False)[0]
    i, j = neurons_in_one_region[[ii,jj]]
#     i = neurons_in_Ra[ii]
    i_region = get_region(df.iloc[i],regions)
    j_region = get_region(df.iloc[j],regions)
    pval = gcause_pval(Mdf[:,[i,j]])
    return [i_region, j_region,pval]
    
granger_list = Parallel(n_jobs=24)(delayed(g_draw)(n) for n in range(niter))
granger = DataFrame(granger_list,columns=["from","to","pval"])

In [None]:
sig_idx = np.where(granger.pval < 0.05)[0]

In [None]:
len(sig_idx)

In [None]:
import numpy as np
import pandas as pd
import holoviews as hv
from bokeh.sampledata.airport_routes import routes, airports

hv.extension('matplotlib')
%output fig='svg' size=300


In [None]:
links = granger.iloc[sig_idx].groupby(['from', 'to']).count().reset_index()

In [None]:
region_names = map(lambda x: x[3:], ortho_regions)
nodes = hv.Dataset(DataFrame(data={"index": np.arange(len(ortho_regions)), "name": region_names}),'index')
chord = hv.Chord((links,nodes))

In [None]:
%%opts Chord [edge_color_index='from' label_index='name' color_index='name']
# %%opts Chord (cmap='default_colors' edge_cmap='default_colors')
chord

# Archive

# Sparsity records
alpha 0.5: 100% sparse
alpha 1e-3: 99.9999% sparse

In [None]:
np.max(Mdf)

In [None]:
np.min(Mdf)

In [None]:
from datetime import datetime, timedelta
from __future__ import division

might need to use: http://foges.github.io/pogs/
currently ignoring intercept

In [None]:
print("Estimated end: " + str(datetime.now() + timedelta(hours=2,minutes=24)))
dynamics_lasso = Lasso(alpha=1e-5,fit_intercept=False)
dynamics_lasso.fit(X,Y)
np.savez("dynamics_lasso_coef_alpha=1e-5_nointercept_uwindow=6_expwindow=6",dynamics_lasso.coef_)
percent_sparse = np.nonzero(dynamics_lasso.coef_)[0].shape[0]/np.prod(dynamics_lasso.coef_.shape)
print("Finished at: " + str(datetime.now()))
print("percent sparse: " + str(percent_sparse))

In [None]:
coef = np.load("dynamics_lasso_coef_alpha=5e-4_nointercept_uwindow=6_expwindow=6.npz")['arr_0']

In [None]:
coef.shape

In [None]:
np.nonzero(coef)[0].shape[0]

In [None]:
1 - np.nonzero(coef)[0].shape[0]/np.prod(coef.shape)

In [None]:
z=4

#Create a background image by averaging 200 frames and adjusting the gamma.
back_img = np.power(f.get_tif_as_vol(z,range(1000,1200)).mean(axis=2),.4)

#Select rois in raphe in this slices, and get their coordinates.
coords = df[(df.in_l_LHb) & (df.z==z)].coords

#Overlay the ROIs on the background image and display:
img = vizutil.overlay_coords(back_img, coords, [0,0,1], alpha=.5)
plt.figure(figsize=[20,20])
plt.imshow(img,interpolation='nearest')

During the imaging, 1P Scanning stimulation light is rotated between three locations:

In [None]:
print f.stim_locations

In Chr2- fish none of these locations contained ChR2.  
  
In Chr2+ fish, the left LHb and the right LHb contained ChR2 positive cells.   
  
The forebrain never has ChR2 positive cells. It serves as control stimulation location.  It controls for the fact that the stimulation light is visible to the fish, and thus acts as a visual stimulus.

'num_frames_per_stim' indicates the number of volumes that are collected between stimulation locations:

In [None]:
print f.num_frames_per_stim

Lets call this number N.  Thus an experiment looks like:
    
Image N frames - Stim lHb - Image N frames - Stim forebrain - Image N frames - Stim rHb ...

This repeated is `numtrials` times.

In [None]:
print f.numtrials

The frame numbers of all frames immediately following stimulation at a particular location is therefore:

In [None]:
loc = 'lHb'
pos = (np.where(f.stim_locations == 'lHb')[0]+1) * f.num_frames_per_stim
ndx = np.arange(pos, M.shape[1], f.trialsize)
print ndx

### Response Statistics

For each ROI, I have computed whether the response to lHb (and rHb) stim is significantly different from the response to stimulation at the control location (forebrain).  

The response on each trial is taken as the change in flourescence between a baseline window and a response window.  In the example below, the baseline window is the 10 volumes prior to stimulation, and the response windows is the first 3 volumes following stimulation.

pvalues is a list of length # planes.  Each element is a list of the p-values of all the ROIs in that plane.

Note, because `get_norm_response_stats` can be slow to run, it caches results for particular baseline and response windows.  If you specify windows, I haven't run before it can be slow to run.

In [None]:
pvalues, rel_resps, abs_resps = f.get_norm_response_stats(stim_location='lHb', 
                                                          base_window_ndx=[-10,-9,-8,-7,-6,-5,-4,-3,-2,-1], 
                                                          resp_window_ndx=[0,1,2])

In [None]:
df_z = f.get_roi_table(z=0)
print df_z.shape
print pvalues[0].shape

In [None]:
np.arange(5)[:-1]