Here we fit linear models with only interaction terms

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib qt

In [3]:
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io

from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import extract_transitions
from keller_zlatic_vnc.data_processing import generate_transition_dff_table
from keller_zlatic_vnc.data_processing import read_raw_transitions_from_excel
from keller_zlatic_vnc.data_processing import recode_beh
from keller_zlatic_vnc.linear_modeling import one_hot_from_table
from keller_zlatic_vnc.linear_modeling import order_and_color_interaction_terms
from keller_zlatic_vnc.linear_modeling import reference_one_hot_to_beh


from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats
from janelia_core.stats.regression import visualize_coefficient_stats



In [4]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [5]:
font = {'family' : 'arial',
        'weight' : 'normal',
        'size'   : 15}

plt.rc('font', **font)

## Options for analysis

In [6]:
# Type of cells we fit models to
cell_type = 'handle' 

# If we fit data to perturbations targeted at 'A4', 'A9' or 'both'
manip_type = 'A4'

# Define the cutoff time we use to define quiet behaviors following stimulation
cut_off_time = 9.0034#3.656 #9.0034

# Specify if we predict dff 'before' or 'after' the manipulation
period = 'before'

# Define how many subjects we need to observe a transition from to include in the model
min_n_trans_subjs = 2

# Colors to assoicate with behaviors
clrs = {'F': np.asarray([255, 128, 0])/255,
        'B': np.asarray([0, 0, 153])/255,
        'Q': np.asarray([255, 51, 153])/255,
        'O': np.asarray([204, 153, 255])/255,
        'T': np.asarray([0, 204, 0])/255,
        'P': np.asarray([0, 153, 153])/255,
        'H': np.asarray([52, 225, 235])/255}

## Location of the data

In [7]:
#data_folder = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff_v2'
data_folder = r'/Users/williambishop/Desktop/extracted_dff_v2'
transition_file = 'transition_list.xlsx'

a00c_a4_act_data_file = 'A00c_activity_A4.mat'
a00c_a9_act_data_file = 'A00c_activity_A9.mat'

basin_a4_act_data_file = 'Basin_activity_A4.mat'
basin_a9_act_data_file = 'Basin_activity_A9.mat'

handle_a4_act_data_file = 'Handle_activity_A4.mat'
handle_a9_act_data_file = 'Handle_activity_A9.mat'

## Specify some parameters we use in the code below

In [8]:
if cell_type == 'a00c':
    a4_act_file = a00c_a4_act_data_file
    a9_act_file = a00c_a9_act_data_file
elif cell_type == 'basin':
    a4_act_file = basin_a4_act_data_file
    a9_act_file = basin_a9_act_data_file
elif cell_type == 'handle':
    a4_act_file = handle_a4_act_data_file
    a9_act_file = handle_a9_act_data_file
else:
    raise(ValueError('The cell type ' + cell_type + ' is not recogonized.'))

## Load data

In [9]:
# Read in raw transitions
raw_trans = read_raw_transitions_from_excel(pathlib.Path(data_folder) / transition_file)

# Read in activity
a4_act = scipy.io.loadmat(pathlib.Path(data_folder) / a4_act_file, squeeze_me=True)
a9_act = scipy.io.loadmat(pathlib.Path(data_folder) / a9_act_file, squeeze_me=True)

# Correct mistake in labeling if we need to
if cell_type == 'basin' or cell_type == 'handle':
    ind = np.argwhere(a4_act['newTransitions'] == '0824L2CL')[1][0]
    a4_act['newTransitions'][ind] = '0824L2-2CL'

# Recode behavioral annotations
raw_trans = recode_beh(raw_trans, 'Beh Before')
raw_trans = recode_beh(raw_trans, 'Beh After')

# Extract transitions
trans = extract_transitions(raw_trans, cut_off_time)

# Generate table of data 
a4table = generate_transition_dff_table(act_data=a4_act, trans=trans)
a9table = generate_transition_dff_table(act_data=a9_act, trans=trans)

# Put the tables together
a4table['man_tgt'] = 'A4'
a9table['man_tgt'] = 'A9'
data = a4table.append(a9table, ignore_index=True)

## Down select for manipulation target

In [10]:
if manip_type == 'A4' or manip_type == 'a4':
    print('Analyzing only A4 manipulation events.')
    data = data[data['man_tgt'] == 'A4']
elif manip_type == 'A9' or manip_type == 'a9':
    print('Analyzing only A9 manipulation events.')
    data = data[data['man_tgt'] == 'A9']
else:
    print('Analyzing all manipulation events.')

Analyzing only A4 manipulation events.


## See how many subjects we have for each type of behavioral transition

In [11]:
trans_subj_cnts = count_unique_subjs_per_transition(data)

In [12]:
trans_subj_cnts

Unnamed: 0,B,F,H,O,P,Q,T
B,4.0,13.0,0.0,1.0,1.0,2.0,3.0
F,1.0,10.0,0.0,0.0,1.0,0.0,1.0
H,0.0,1.0,0.0,0.0,0.0,0.0,0.0
O,1.0,4.0,0.0,0.0,0.0,0.0,0.0
P,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Q,1.0,7.0,1.0,4.0,0.0,5.0,0.0
T,4.0,6.0,0.0,0.0,1.0,1.0,1.0


## Get list of transitions with the minimum number of subjects

In [13]:
mdl_trans = []
for from_beh in trans_subj_cnts.index:
    for to_beh in trans_subj_cnts.columns:
        if trans_subj_cnts[to_beh][from_beh] >= min_n_trans_subjs:
            mdl_trans.append((from_beh, to_beh))

## Remove any events that do not display one of the transitions we include in the model

In [14]:
l_data = len(data)
keep_rows = np.zeros(l_data, dtype=np.bool)
for r_i, r_index in enumerate(data.index):
    row_trans = (data['beh_before'][r_index], data['beh_after'][r_index])
    if row_trans in mdl_trans:
        keep_rows[r_i] = True
        
data = data[keep_rows]

## Pull out $\Delta F/F$

In [15]:
if period == 'before':
    dff = data['dff_before'].to_numpy()
elif period == 'after':
    dff = data['dff_after'].to_numpy()
else:
    raise(ValueError('The period ' + ' period is not recogonized.'))

## Find grouping of data by subject

In [16]:
unique_ids = data['subject_id'].unique()
g = np.zeros(len(data))
for u_i, u_id in enumerate(unique_ids):
    g[data['subject_id'] == u_id] = u_i

## Calculate stats

In [17]:
one_hot_data, one_hot_vars = one_hot_from_table(data, beh_before=[], beh_after=[], 
                                         enc_subjects=False, enc_beh_interactions=False, 
                                         beh_interactions=mdl_trans)

In [18]:
_, v, _ = np.linalg.svd(one_hot_data)
print(v)
if np.min(v) < .001:
    raise(RuntimeError('regressors are nearly co-linear'))

[22.11334439 19.57038579 17.34935157 17.17556404 14.17744688 12.40967365
 10.29563014  8.77496439  7.81024968  7.14142843  5.74456265]


In [19]:
beta, acm, n_gprs = grouped_linear_regression_ols_estimator(x=one_hot_data, y=dff, g=g)
stats = grouped_linear_regression_acm_stats(beta=beta, acm=acm, n_grps=n_gprs, alpha=.05)

## View results in table form

In [20]:
rs = pd.DataFrame(data=np.stack([beta, stats['non_zero_p']]).transpose(), index=one_hot_vars, 
                  columns=['beta', 'p'])

In [21]:
def color_small_p(val):
    color = 'red' if val < .05 else 'black'
    return 'color: %s' % color

In [22]:
rs.style.applymap(color_small_p, subset=['p'])

Unnamed: 0,beta,p
beh_interact_BB,0.0764013,5.10916e-07
beh_interact_BF,0.0763019,7.82785e-06
beh_interact_BQ,0.0942642,0.00345968
beh_interact_BT,0.101603,0.0134278
beh_interact_FF,0.05494,1.4631e-05
beh_interact_OF,0.113146,0.0216872
beh_interact_QF,0.0229929,0.000135727
beh_interact_QO,0.0151849,8.411989999999999e-22
beh_interact_QQ,0.0157179,5.84271e-13
beh_interact_TB,0.0677486,1.5092e-05


## View results in plot form

#### Group and color coefficents by behavior before manipulation

In [23]:
before_order, before_clrs = order_and_color_interaction_terms(terms=[t[-2:] for t in one_hot_vars], 
                                                              colors=clrs, sort_by_before=True)

In [24]:
visualize_coefficient_stats(var_strs=[one_hot_vars[i] for i in before_order], 
                            theta=beta[before_order], c_ints=stats['c_ints'][:, before_order],
                            sig=stats['non_zero'][before_order],
                            var_clrs=before_clrs)

plt.tight_layout()
plt.ylabel('$\Delta F / F$')
plt.title('Grouped by Preceeding Behavior')
fig = plt.gcf()
fig.set_size_inches(8, 6)

#### Group and color coefficents by behavior after manipulation

In [25]:
after_order, after_clrs = order_and_color_interaction_terms(terms=[t[-2:] for t in one_hot_vars], 
                                                            colors=clrs, sort_by_before=False)

In [26]:
visualize_coefficient_stats(var_strs=[one_hot_vars[i] for i in after_order], 
                            theta=beta[after_order], c_ints=stats['c_ints'][:, after_order], 
                            sig=stats['non_zero'][after_order],
                            var_clrs=after_clrs)
plt.ylabel('$\Delta F / F$')
plt.title('Grouped by Succeeding Behavior')
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(8, 6)

In [27]:
mdl_trans

[('B', 'B'),
 ('B', 'F'),
 ('B', 'Q'),
 ('B', 'T'),
 ('F', 'F'),
 ('O', 'F'),
 ('Q', 'F'),
 ('Q', 'O'),
 ('Q', 'Q'),
 ('T', 'B'),
 ('T', 'F')]

In [28]:
interactions = [term[0] + term [1] for term in mdl_trans]

In [29]:
interactions

['BB', 'BF', 'BQ', 'BT', 'FF', 'OF', 'QF', 'QO', 'QQ', 'TB', 'TF']