Here we ask if there is a significant interaction term in a model predicting the encoding of neural activity where the reference (base) activity is for QQ

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pathlib

import numpy as np
import pandas as pd
import scipy.io

from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import extract_transitions
from keller_zlatic_vnc.data_processing import generate_transition_dff_table
from keller_zlatic_vnc.data_processing import read_raw_transitions_from_excel
from keller_zlatic_vnc.data_processing import recode_beh
from keller_zlatic_vnc.linear_modeling import one_hot_from_table

from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats



## Options for analysis

In [3]:
# Type of cells we fit models to
cell_type = 'basin' 

# If we fit data to perturbations targeted at 'A4', 'A9' or 'both'
manip_type = 'A4'

# Define the cutoff time we use to define quiet behaviors following stimulation
cut_off_time = 3.656 #3.656 #9.0034

# Specify if we predict dff 'before' or 'after' the manipulation
period = 'before'

# Give min number of subjects that need to be present for each transition type
min_n_subjs = 2

## Location of data

In [4]:
#data_folder = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff_v2'
data_folder = r'/Users/williambishop/Desktop/extracted_dff_v2'
transition_file = 'transition_list.xlsx'

a00c_a4_act_data_file = 'A00c_activity_A4.mat'
a00c_a9_act_data_file = 'A00c_activity_A9.mat'

basin_a4_act_data_file = 'Basin_activity_A4.mat'
basin_a9_act_data_file = 'Basin_activity_A9.mat'

handle_a4_act_data_file = 'Handle_activity_A4.mat'
handle_a9_act_data_file = 'Handle_activity_A9.mat'

## Specify some parameters we use in the code below

In [5]:
if cell_type == 'a00c':
    a4_act_file = a00c_a4_act_data_file
    a9_act_file = a00c_a9_act_data_file
elif cell_type == 'basin':
    a4_act_file = basin_a4_act_data_file
    a9_act_file = basin_a9_act_data_file
elif cell_type == 'handle':
    a4_act_file = handle_a4_act_data_file
    a9_act_file = handle_a9_act_data_file
else:
    raise(ValueError('The cell type ' + cell_type + ' is not recogonized.'))

## Load data

In [6]:
# Read in raw transitions
raw_trans = read_raw_transitions_from_excel(pathlib.Path(data_folder) / transition_file)

# Read in activity
a4_act = scipy.io.loadmat(pathlib.Path(data_folder) / a4_act_file, squeeze_me=True)
a9_act = scipy.io.loadmat(pathlib.Path(data_folder) / a9_act_file, squeeze_me=True)

# Correct mistake in labeling if we need to
if cell_type == 'basin' or cell_type == 'handle':
    ind = np.argwhere(a4_act['newTransitions'] == '0824L2CL')[1][0]
    a4_act['newTransitions'][ind] = '0824L2-2CL'

# Recode behavioral annotations
raw_trans = recode_beh(raw_trans, 'Beh Before')
raw_trans = recode_beh(raw_trans, 'Beh After')

# Extract transitions
trans = extract_transitions(raw_trans, cut_off_time)

# Generate table of data 
a4table = generate_transition_dff_table(act_data=a4_act, trans=trans)
a9table = generate_transition_dff_table(act_data=a9_act, trans=trans)

# Put the tables together
a4table['man_tgt'] = 'A4'
a9table['man_tgt'] = 'A9'
data = a4table.append(a9table, ignore_index=True)

## Down select for manipulation target

In [7]:
if manip_type == 'A4':
    print('Analyzing only A4 manipulation events.')
    data = data[data['man_tgt'] == 'A4']
elif manip_type == 'A9':
    print('Analyzing only A9 manipulation events.')
    data = data[data['man_tgt'] == 'A9']
else:
    print('Analyzing all manipulation events.')

Analyzing only A4 manipulation events.


## Look at number of subjects we have for each type of transition

In [8]:
trans_counts = count_unique_subjs_per_transition(data)

## Determine which interactions we can test for

In [9]:
trans_counts

Unnamed: 0,B,F,H,O,P,Q,T
B,3.0,14.0,0.0,1.0,1.0,6.0,2.0
F,1.0,11.0,0.0,1.0,2.0,6.0,1.0
H,0.0,0.0,0.0,0.0,0.0,1.0,0.0
O,1.0,3.0,0.0,0.0,0.0,1.0,0.0
P,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Q,0.0,7.0,0.0,4.0,0.0,8.0,0.0
T,2.0,5.0,0.0,0.0,1.0,5.0,0.0


In [10]:
all_behs = set(trans_counts.index).difference(['Q'])
interactions = []
for before_beh in all_behs:
    for after_beh in all_behs:
        n_trans = trans_counts[after_beh][before_beh]
        n_before = trans_counts['Q'][before_beh]
        n_after = trans_counts[after_beh]['Q']
        if n_trans >= min_n_subjs and n_before >= min_n_subjs and n_after >= min_n_subjs:
            interactions.append(before_beh + after_beh)

## Get stats for each interaction

In [11]:
n_interactions = len(interactions)
p_vls = np.zeros([4, n_interactions])
beta_vls = np.zeros([4, n_interactions])

for i, int_behs in enumerate(interactions):
    
    # Get rid of rows that don't have the correct behavior
    before_rows = data['beh_before'].isin([int_behs[0], 'Q'])
    after_rows = data['beh_after'].isin([int_behs[1], 'Q'])
    selected_data = data[before_rows & after_rows]
    
    # Pull out Delta F/F
    if period == 'before':
        dff = selected_data['dff_before'].to_numpy()
    elif period == 'after':
        dff = selected_data['dff_after'].to_numpy()
    else:
        raise(ValueError('The period ' + ' period is not recogonized.'))
    
    # Find grouping of data by subject
    unique_ids = selected_data['subject_id'].unique()
    g = np.zeros(len(selected_data))
    for u_i, u_id in enumerate(unique_ids):
        g[selected_data['subject_id'] == u_id] = u_i
    
    # One hot encode data
    one_hot_data, _ = one_hot_from_table(selected_data, beh_before=[int_behs[0]], beh_after=[int_behs[1]], 
                                         enc_subjects=False, enc_beh_interactions=True)
    one_hot_data = np.concatenate([one_hot_data, np.ones([one_hot_data.shape[0], 1])], axis=1)
    
    
    # Calculate stats
    beta, acm, n_gprs = grouped_linear_regression_ols_estimator(x=one_hot_data, y=dff, g=g)
    beta_vls[:, i] = beta
    
    stats = grouped_linear_regression_acm_stats(beta=beta, acm=acm, n_grps=n_gprs, alpha=.05)
    p_vls[:, i] =  stats['non_zero_p']
    
p_vls = pd.DataFrame(data=p_vls, columns=interactions, index=['Before', 'After', 'Interaction', 'Reference'])
beta_vls = pd.DataFrame(data=beta_vls, columns=interactions, index=['Before', 'After', 'Interaction', 'Reference'])


## Look at p-values and beta

In [12]:
def apply_color(val):
    color = 'red' if val < .05 else 'black'
    return 'color: %s' % color

In [13]:
p_vls.style.applymap(apply_color)

Unnamed: 0,BF,FF,TF
Before,0.0106739,0.00133589,0.0452167
After,0.862515,0.86283,0.863671
Interaction,0.404975,0.374431,0.190056
Reference,1.08718e-10,2.70666e-10,2.0289e-09


In [14]:
beta_vls

Unnamed: 0,BF,FF,TF
Before,0.043563,0.026093,0.019518
After,-0.00029,-0.00029,-0.00029
Interaction,-0.008204,-0.006265,0.022063
Reference,0.01665,0.01665,0.01665


In [15]:
interactions

['BF', 'FF', 'TF']