In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pathlib

import numpy as np
import pandas as pd
import scipy.io

from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import extract_transitions
from keller_zlatic_vnc.data_processing import generate_transition_dff_table
from keller_zlatic_vnc.data_processing import read_raw_transitions_from_excel
from keller_zlatic_vnc.data_processing import recode_beh
from keller_zlatic_vnc.linear_modeling import one_hot_from_table
from keller_zlatic_vnc.linear_modeling import reference_one_hot_to_beh

from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats



In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

## Options for analysis

In [4]:
# Type of cells we fit models to
cell_type = 'a00c' 

# If we fit data to perturbations targeted at 'A4', 'A9' or 'both'
manip_type = 'both'

# Define the cutoff time we use to define quiet behaviors following stimulation
cut_off_time = 9.0034 #3.656 #9.0034

# Specify if we enforce closure
enforce_closure = True

# Specify if we predict dff 'before' or 'after' the manipulation
period = 'after'

# Specify how we setup the models

beh_before = ['B', 'F', 'O', 'Q', 'T']
beh_after = ['B', 'F', 'O', 'P', 'Q', 'T']

# For no cutoff
beh_interactions = [('B', 'B'), ('B', 'F'), ('F', 'F'), ('F', 'P'), 
                    ('Q', 'F'), ('Q', 'O'),  ('T', 'F'), ('Q', 'Q')]

beh_ref = 'Q'

## Location of the data

In [5]:
data_folder = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff_v2'
transition_file = 'transition_list.xlsx'

a00c_a4_act_data_file = 'A00c_activity_A4.mat'
a00c_a9_act_data_file = 'A00c_activity_A9.mat'

basin_a4_act_data_file = 'Basin_activity_A4.mat'
basin_a9_act_data_file = 'Basin_activity_A9.mat'

handle_a4_act_data_file = 'Handle_activity_A4.mat'
handle_a9_act_data_file = 'Handle_activity_A9.mat'

## Specify some parameters we use in the code below

In [6]:
if cell_type == 'a00c':
    a4_act_file = a00c_a4_act_data_file
    a9_act_file = a00c_a9_act_data_file
elif cell_type == 'basin':
    a4_act_file = basin_a4_act_data_file
    a9_act_file = basin_a9_act_data_file
elif cell_type == 'handle':
    a4_act_file = handle_a4_act_data_file
    a9_act_file = handle_a9_act_data_file
else:
    raise(ValueError('The cell type ' + cell_type + ' is not recogonized.'))

## Load data

In [7]:
# Read in raw transitions
raw_trans = read_raw_transitions_from_excel(pathlib.Path(data_folder) / transition_file)

# Read in activity
a4_act = scipy.io.loadmat(pathlib.Path(data_folder) / a4_act_file, squeeze_me=True)
a9_act = scipy.io.loadmat(pathlib.Path(data_folder) / a9_act_file, squeeze_me=True)

# Correct mistake in labeling if we need to
if cell_type == 'basin' or cell_type == 'handle':
    ind = np.argwhere(a4_act['newTransitions'] == '0824L2CL')[1][0]
    a4_act['newTransitions'][ind] = '0824L2-2CL'

# Recode behavioral annotations
raw_trans = recode_beh(raw_trans, 'Beh Before')
raw_trans = recode_beh(raw_trans, 'Beh After')

# Extract transitions
trans = extract_transitions(raw_trans, cut_off_time)

# Generate table of data 
a4table = generate_transition_dff_table(act_data=a4_act, trans=trans)
a9table = generate_transition_dff_table(act_data=a9_act, trans=trans)

# Put the tables together
a4table['man_tgt'] = 'A4'
a9table['man_tgt'] = 'A9'
data = a4table.append(a9table, ignore_index=True)

## Down select for manipulation target

In [8]:
if manip_type == 'A4' or manip_type == 'a4':
    print('Analyzing only A4 manipulation events.')
    data = data[data['man_tgt'] == 'A4']
elif manip_type == 'A9' or manip_type == 'a9':
    print('Analyzing only A9 manipulation events.')
    data = data[data['man_tgt'] == 'A9']
else:
    print('Analyzing all manipulation events.')

Analyzing all manipulation events.


## Enforce closure if needed

In [9]:
if enforce_closure:
    print('Enforcing closure.')
    before_closure = np.asarray([b in set(beh_before) for b in data['beh_before']], 
                                dtype=bool)
    after_closure = np.asarray([b in set(beh_after) for b in data['beh_after']], 
                                dtype=bool)
    closure = np.logical_and(before_closure, after_closure)
    
    data = data[closure]

Enforcing closure.


## Get rid of rows of data that have no behavior of interest

In [10]:
before_ignore = np.asarray([b not in set(beh_before) for b in data['beh_before']], 
                                dtype=bool)
after_ignore = np.asarray([b not in set(beh_after) for b in data['beh_after']], 
                                dtype=bool)

ignore_rows = np.logical_and(before_ignore, after_ignore)

data = data[np.logical_not(ignore_rows)]

## See how many subjects we have for each transition

In [11]:
count_unique_subjs_per_transition(data)

Unnamed: 0,B,F,O,P,Q,T
B,4.0,16.0,1.0,3.0,2.0,5.0
F,1.0,14.0,2.0,8.0,1.0,1.0
O,1.0,4.0,0.0,0.0,0.0,0.0
P,0.0,0.0,0.0,0.0,0.0,0.0
Q,1.0,10.0,4.0,1.0,10.0,0.0
T,4.0,7.0,0.0,4.0,1.0,2.0


## Pull out $\Delta F/F$

In [12]:
if period == 'before':
    dff = data['dff_before'].to_numpy()
elif period == 'after':
    dff = data['dff_after'].to_numpy()
else:
    raise(ValueError('The period ' + ' period is not recogonized.'))

## Find grouping of data by subject

In [13]:
unique_ids = data['subject_id'].unique()
g = np.zeros(len(data))
for u_i, u_id in enumerate(unique_ids):
    g[data['subject_id'] == u_id] = u_i

## Calculate stats

In [14]:
one_hot_data, one_hot_vars = one_hot_from_table(data, beh_before=beh_before, beh_after=beh_after, 
                                         enc_subjects=False, enc_beh_interactions=False, 
                                         beh_interactions=beh_interactions)

one_hot_data_ref, one_hot_vars_ref = reference_one_hot_to_beh(one_hot_data=one_hot_data, 
                                                              one_hot_vars=one_hot_vars, 
                                                              beh=beh_ref, 
                                                              remove_interaction_term=False)

one_hot_data_ref = np.concatenate([one_hot_data_ref, np.ones([one_hot_data_ref.shape[0], 1])], axis=1)
one_hot_vars_ref.append('ref')

del_inds: [3, 9]


In [15]:
_, v, _ = np.linalg.svd(one_hot_data_ref)
print(v)
if np.min(v) < .001:
    raise(RuntimeError('regressors are nearly co-linear'))

[47.80452654 23.29198131 21.74269026 17.84916954 15.85475207 14.4669744
 13.64636363 11.78368595  8.25425918  6.51914049  6.02172691  5.92958718
  3.97078767  3.1856338   2.90412716  1.94949347  1.09602185  0.85091877]


In [16]:
beta, acm, n_gprs = grouped_linear_regression_ols_estimator(x=one_hot_data_ref, y=dff, g=g)
stats = grouped_linear_regression_acm_stats(beta=beta, acm=acm, n_grps=n_gprs, alpha=.05)

## View results

In [17]:
rs = pd.DataFrame(data=np.stack([beta, stats['non_zero_p']]).transpose(), index=one_hot_vars_ref, 
                  columns=['beta', 'p'])

In [18]:
def color_small_p(val):
    color = 'red' if val < .05 else 'black'
    return 'color: %s' % color

In [19]:
rs.style.applymap(color_small_p, subset=['p'])

Unnamed: 0,beta,p
beh_before_B,-0.396345,0.000208065
beh_before_F,-0.277198,0.0283546
beh_before_O,-0.47429,6.14945e-07
beh_before_T,-0.418422,2.49377e-05
beh_after_B,-0.0678887,0.677525
beh_after_F,0.233922,0.179664
beh_after_O,-0.0151647,0.927077
beh_after_P,-0.154132,0.29068
beh_after_T,-0.176178,0.248953
beh_interact_BB,-0.119532,0.208516
