The purpose of this 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pathlib

import numpy as np
import pandas as pd
import scipy.io

from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import extract_transitions
from keller_zlatic_vnc.data_processing import generate_transition_dff_table
from keller_zlatic_vnc.data_processing import read_raw_transitions_from_excel
from keller_zlatic_vnc.data_processing import recode_beh
from keller_zlatic_vnc.linear_modeling import one_hot_from_table

from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats



## Options for analysis

In [3]:
# Type of cells we fit models to
cell_type = 'a00c' 

# If we fit data to perturbations targeted at 'A4', 'A9' or 'both'
manip_type = 'both'

# Define the cutoff time we use to define quiet behaviors following stimulation
cut_off_time = 9.0034

# Specify if we predict dff 'before' or 'after' the manipulation
period = 'after'

# Specify specific transitions we want to test significance for depending on manipulation type
a4_cand_interactions = [('B', 'B'), ('B', 'F'), ('B', 'H'), ('B', 'T'), ('F', 'F'), ('F', 'P'), ('O', 'F'),
                        ('Q', 'F'), ('Q', 'H'), ('Q', 'O'), ('T', 'B'), ('T', 'F')]

a9_cand_interactions = [('Q', 'F'), ('B', 'F'), ('B', 'P'), ('B', 'T'), ('F', 'F'), ('F', 'P'), ('T', 'P')]

both_cand_interactions = ['TB', 'TP', 'TF', 'BB', 'BP', 'BO', 'BF', 'FB', 'FP', 'FO', 'FF']

## Location of data

In [4]:
#data_folder = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff_v2'
data_folder = r'/Users/williambishop/Desktop/extracted_dff_v2'
transition_file = 'transition_list.xlsx'

a00c_a4_act_data_file = 'A00c_activity_A4.mat'
a00c_a9_act_data_file = 'A00c_activity_A9.mat'

basin_a4_act_data_file = 'Basin_activity_A4.mat'
basin_a9_act_data_file = 'Basin_activity_A9.mat'

handle_a4_act_data_file = 'Handle_activity_A4.mat'
handle_a9_act_data_file = 'Handle_activity_A9.mat'

## Specify some parameters we use in the code below

In [5]:
if cell_type == 'a00c':
    a4_act_file = a00c_a4_act_data_file
    a9_act_file = a00c_a9_act_data_file
elif cell_type == 'basin':
    a4_act_file = basin_a4_act_data_file
    a9_act_file = basin_a9_act_data_file
elif cell_type == 'handle':
    a4_act_file = handle_a4_act_data_file
    a9_act_file = handle_a9_act_data_file
else:
    raise(ValueError('The cell type ' + cell_type + ' is not recogonized.'))

## Load data

In [6]:
# Read in raw transitions
raw_trans = read_raw_transitions_from_excel(pathlib.Path(data_folder) / transition_file)

# Read in activity
a4_act = scipy.io.loadmat(pathlib.Path(data_folder) / a4_act_file, squeeze_me=True)
a9_act = scipy.io.loadmat(pathlib.Path(data_folder) / a9_act_file, squeeze_me=True)

# Correct mistake in labeling if we need to
if cell_type == 'basin' or cell_type == 'handle':
    ind = np.argwhere(a4_act['newTransitions'] == '0824L2CL')[1][0]
    a4_act['newTransitions'][ind] = '0824L2-2CL'

# Recode behavioral annotations
raw_trans = recode_beh(raw_trans, 'Beh Before')
raw_trans = recode_beh(raw_trans, 'Beh After')

# Extract transitions
trans = extract_transitions(raw_trans, cut_off_time)

# Generate table of data 
a4table = generate_transition_dff_table(act_data=a4_act, trans=trans)
a9table = generate_transition_dff_table(act_data=a9_act, trans=trans)

# Put the tables together
a4table['man_tgt'] = 'A4'
a9table['man_tgt'] = 'A9'
data = a4table.append(a9table, ignore_index=True)

In [7]:
data

Unnamed: 0,subject_id,cell_id,event_id,beh_before,beh_after,dff_before,dff_during,dff_after,man_tgt
0,CW_17-08-23-L1,1.0,0,F,P,0.002743,0.075342,0.125693,A4
1,CW_17-08-23-L1,1.0,1,F,P,0.005595,0.117746,0.221240,A4
2,CW_17-08-23-L1,2.0,0,F,P,0.003111,0.110709,0.175382,A4
3,CW_17-08-23-L1,2.0,1,F,P,0.004554,0.182768,0.311344,A4
4,CW_17-08-23-L1,3.0,0,F,P,0.046016,0.084467,0.234622,A4
...,...,...,...,...,...,...,...,...,...
1466,CW_17-11-08-L3,6.0,2,Q,F,0.016744,0.791211,0.939961,A9
1467,CW_17-11-08-L3,6.0,3,Q,F,0.011291,0.725406,0.856928,A9
1468,CW_17-11-08-L3,6.0,4,Q,F,0.016901,0.705781,0.782500,A9
1469,CW_17-11-08-L3,6.0,5,Q,P,0.009434,0.648214,0.664910,A9


## Down select for manipulation target

In [8]:
if manip_type == 'A4':
    print('Analyzing only A4 manipulation events.')
    data = data[data['man_tgt'] == 'A4']
    cand_interactions = a4_cand_interactions
elif manip_type == 'A9':
    print('Analyzing only A9 manipulation events.')
    data = data[data['man_tgt'] == 'A9']
    cand_interactions = a9_cand_interactions
else:
    cand_interactions = both_cand_interactions
    print('Analyzing all manipulation events.')

Analyzing all manipulation events.


## Look at number of subjects we have for each type of transition

In [9]:
count_unique_subjs_per_transition(data)

Unnamed: 0,B,F,H,O,P,Q,T
B,4.0,16.0,0.0,1.0,3.0,2.0,5.0
F,1.0,14.0,0.0,2.0,8.0,1.0,1.0
H,0.0,1.0,0.0,0.0,0.0,0.0,0.0
O,1.0,4.0,0.0,0.0,0.0,0.0,0.0
P,0.0,0.0,0.0,0.0,1.0,0.0,0.0
Q,1.0,10.0,1.0,4.0,1.0,10.0,0.0
T,4.0,7.0,0.0,0.0,4.0,1.0,2.0


## Pull out $\Delta F/F$

In [10]:
if period == 'before':
    dff = data['dff_before'].to_numpy()
elif period == 'after':
    dff = data['dff_after'].to_numpy()
else:
    raise(ValueError('The period ' + ' period is not recogonized.'))

## Find grouping of data by subject

In [11]:
unique_ids = data['subject_id'].unique()
g = np.zeros(len(data))
for u_i, u_id in enumerate(unique_ids):
    g[data['subject_id'] == u_id] = u_i

## Calculate significance of each candidate interaction

In [12]:
n_interactions = len(cand_interactions)
p_vls = np.zeros([4, n_interactions])
beta_vls = np.zeros([4, n_interactions])

for i, interaction in enumerate(cand_interactions):
    one_hot_data, _ = one_hot_from_table(data, beh_before=[interaction[0]], beh_after=[interaction[1]], 
                                         enc_subjects=False, enc_beh_interactions=True)
    one_hot_data = np.concatenate([one_hot_data, np.ones([one_hot_data.shape[0], 1])], axis=1)

    
    beta, acm, n_gprs = grouped_linear_regression_ols_estimator(x=one_hot_data, y=dff, g=g)
    beta_vls[:, i] = beta
    
    stats = grouped_linear_regression_acm_stats(beta=beta, acm=acm, n_grps=n_gprs, alpha=.05)
    p_vls[:, i] = stats['non_zero_p']
    
p_vls = pd.DataFrame(data=p_vls, columns=cand_interactions, index=['Before', 'After', 'Interaction', 'Reference'])
beta_vls = pd.DataFrame(data=beta_vls, columns=cand_interactions, index=['Before', 'After', 'Interaction', 'Reference'])


In [13]:
one_hot_data

array([[1., 0., 0., 1.],
       [1., 0., 0., 1.],
       [1., 0., 0., 1.],
       ...,
       [0., 1., 0., 1.],
       [0., 0., 0., 1.],
       [0., 1., 0., 1.]])

## Look at p-values and beta

In [14]:
def apply_color(val):
    color = 'red' if val < .05 else 'black'
    return 'color: %s' % color

In [15]:
p_vls.style.applymap(apply_color)

Unnamed: 0,TB,TP,TF,BB,BP,BO,BF,FB,FP,FO,FF
Before,0.309758,0.184741,0.119812,0.969096,0.496927,0.562415,0.0493309,0.566076,0.0827537,0.262261,0.657538
After,0.11355,0.00332366,1.59653e-07,0.289472,0.0031383,0.54573,2.34944e-06,3.43007e-05,0.000261344,0.372305,1.55646e-06
Interaction,0.373327,0.806334,0.794421,0.144535,0.743325,0.77682,0.192247,3.95211e-05,0.680157,0.239893,0.584324
Reference,1.11417e-21,9.687709999999999e-21,2.82112e-13,3.5015599999999997e-20,3.18537e-19,3.58426e-20,6.22976e-13,1.39767e-17,7.59554e-17,5.73461e-17,7.75575e-11
