Tests for state, decision or reporting dependence in DFF, referencing each behavior to another single condition (such as quiet) across all voxels in the brain

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import pickle
from time import time

import numpy as np
import pandas as pd

from janelia_core.utils.data_saving import append_ts
from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats

from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import extract_transitions
from keller_zlatic_vnc.linear_modeling import one_hot_from_table

## Parameters go here

In [None]:
ps = {}

# Specigy where the processed data (the produced of dff_extraction.ipynb) is stored
ps['data_folder'] = r'A:\projects\keller_vnc\results\whole_brain_stats'
ps['data_file'] = r'dff_1_5_5_with_ep_2020_08_27_12_32_48_526097.pkl'

# Speccify a cut-off time
ps['cut_off_time'] = 3.656

# Specify manipulation target
ps['manip_type'] = 'both' # 'both', 'A4' or 'A9'

# Specify thresholds for number of subjects we need for each behavior
ps['min_n_subjects_per_beh'] = 3

# Specify the test type  Options are:
#
#   state_dependence - tests if dff after manipulation is sensitive to behavior before
#   prediction_dependence - tests if dff before manipulation is sensitive to behavior after
#   decision_dependence - tests if dff during manipulation is sensitive to behavior after
#   before_reporting - tests if dff before manipulation is sensitive to behavior before
#   after_reporting - tests if dff after manipulation is sensitive to behavior after
#
ps['test_type'] = 'decision_dependence'

# Specify reference behavior
ps['beh_ref'] = 'Q'

# Alpha value for thresholding significance
ps['alpha'] = .05

# Specify where we save results
ps['save_folder'] = r'\\dm11\bishoplab\projects\keller_vnc\results\whole_brain_stats'
ps['save_str'] = 'decision_dep_quiet_ref'

## Load the data

In [None]:
data_path = Path(ps['data_folder']) / Path(ps['data_file'])
with open(data_path, 'rb') as f:
    file_data = pickle.load(f)
data = file_data['event_annots']

## Rename a few columns

In [None]:
data.rename(columns = {'Smp ID':'subject_id', 'Beh Before':'beh_before', 'Beh After':'beh_after'}, inplace = True) 

## Apply cut-off time to define succeeding quiet behaviors

In [None]:
_, data = extract_transitions(data, ps['cut_off_time'])

## Down select for manipulation target if needed

In [None]:
if ps['manip_type'] == 'A4':
    data = data[data['man_tgt'] == 'A4']
elif ps['manip_type'] == 'A9':
    data = data[data['man_tgt'] == 'A9']

## Remove behaviors which are not present in enough subjects

After removing these behaviors, we keep only events which start and stop with retained behaviors

In [None]:
trans_subj_cnts = count_unique_subjs_per_transition(data)

In [None]:
if (ps['test_type'] == 'state_dependence') or (ps['test_type'] == 'before_reporting'):
    after_beh_th = 0
    before_beh_th = ps['min_n_subjects_per_beh']
elif ((ps['test_type'] == 'prediction_dependence') or (ps['test_type'] == 'after_reporting') or 
      (ps['test_type'] == 'decision_dependence')):
    after_beh_th = ps['min_n_subjects_per_beh']
    before_beh_th = 0
else:
    raise(ValueError('The test_type ' + ps['test_type'] + ' is not recognized.'))

In [None]:
after_beh_sum = trans_subj_cnts.sum()
after_behs = [b for b in after_beh_sum[after_beh_sum >= after_beh_th].index]

before_beh_sum = trans_subj_cnts.sum(1)
before_behs = [b for b in before_beh_sum[before_beh_sum >= before_beh_th].index]

before_keep_rows = data['beh_before'].apply(lambda x: x in set(before_behs))
after_keep_rows = data['beh_after'].apply(lambda x: x in set(after_behs))
data = data[before_keep_rows & after_keep_rows]

## Update our list of before and after behaviors

We do this since by removing rows, some of our control behaviors may no longer be present

In [None]:
new_trans_sub_cnts = count_unique_subjs_per_transition(data)
new_after_beh_sum = new_trans_sub_cnts.sum()
after_behs = [b for b in new_after_beh_sum[new_after_beh_sum > 0].index]
new_before_beh_sum = new_trans_sub_cnts.sum(1)
before_behs = [b for b in new_before_beh_sum[new_before_beh_sum>0].index]
print('Using the following before behaviors: ' + str(before_behs))
print('Using the following after behaviors: ' + str(after_behs))
print(['Number of rows remaining in data: ' + str(len(data))])

## Pull out $\Delta F/F$

In [None]:
if (ps['test_type'] == 'state_dependence') or (ps['test_type'] == 'after_reporting'):
    dff = np.stack(data['dff_after'].to_numpy())
    print('Extracting dff after the manipulation.')
elif (ps['test_type'] == 'prediction_dependence') or (ps['test_type'] == 'before_reporting'):
    dff = np.stack(data['dff_before'].to_numpy())
    print('Extracting dff before the manipulation.')
elif ps['test_type'] == 'decision_dependence':
    dff = np.stack(data['dff_during'].to_numpy())
    print('Extracting dff during the manipulation.')
else:
    raise(ValueError('The test_type ' + ps['test_type'] + ' is not recognized.'))

## Find grouping of data by subject

In [None]:
unique_ids = data['subject_id'].unique()
g = np.zeros(len(data))
for u_i, u_id in enumerate(unique_ids):
    g[data['subject_id'] == u_id] = u_i

## Define a function for calculating stats

In [None]:
def stats_f(x_i, y_i, g_i, alpha_i):
    beta, acm, n_grps = grouped_linear_regression_ols_estimator(x=x_i, y=y_i, g=g_i)
    stats = grouped_linear_regression_acm_stats(beta=beta, acm=acm, n_grps=n_grps, alpha=alpha_i)
    stats['beta'] = beta
    return stats

## Fit models and calculate stats

In [None]:
before_behs_ref = list(set(before_behs).difference(ps['beh_ref']))
after_behs_ref = list(set(after_behs).difference(ps['beh_ref']))
before_behs_ref = sorted(before_behs_ref)
after_behs_ref = sorted(after_behs_ref)

n_before_behs = len(before_behs_ref)
n_after_behs = len(after_behs_ref)

one_hot_data_ref, one_hot_vars_ref = one_hot_from_table(data, beh_before=before_behs_ref, beh_after=after_behs_ref)
one_hot_data_ref = np.concatenate([one_hot_data_ref, np.ones([one_hot_data_ref.shape[0], 1])], axis=1)
one_hot_vars_ref = one_hot_vars_ref + ['ref']

In [None]:
n_rois = dff.shape[1]
full_stats = [stats_f(x_i=one_hot_data_ref, y_i=dff[:, r_i], g_i=g, alpha_i=ps['alpha']) for r_i in range(n_rois)]

## Package results

In [None]:
if (ps['test_type'] == 'state_dependence') or (ps['test_type'] == 'before_reporting'):
        test_behs = before_behs_ref
        pull_inds = range(0, n_before_behs)
        #test_betas = beta[0:n_before_behs]
       # test_c_ints = stats['c_ints'][:, 0:n_before_behs]
        #test_sig = stats['non_zero'][0:n_before_behs]
elif ((ps['test_type'] == 'prediction_dependence') or (ps['test_type'] == 'after_reporting') or
      (ps['test_type'] == 'decision_dependence')):
        test_behs = after_behs_ref
        pull_inds = range(n_before_behs, n_before_behs+n_after_behs)
       # test_betas = beta[n_before_behs:n_before_behs+n_after_behs]
        #test_c_ints = stats['c_ints'][:, n_before_behs:n_before_behs+n_after_behs]
       # test_sig = stats['non_zero'][n_before_behs:n_before_behs+n_after_behs]
else:
        raise(ValueError('The test_type ' + ps['test_type'] + ' is not recognized.'))
        
beh_stats = dict()
for b, p_i in zip(test_behs, pull_inds):
    beh_stats[b] = dict()
    beh_stats[b]['p_values'] = [rs_dict['non_zero_p'][p_i] for rs_dict in full_stats]
    beh_stats[b]['beta'] = [rs_dict['beta'][p_i] for rs_dict in full_stats]

## Save results

In [None]:
save_name = append_ts(ps['test_type'] + '_' + ps['save_str']) + '.pkl'
save_path = Path(ps['save_folder']) / save_name

In [None]:
rs = dict()
rs['beh_stats'] = beh_stats
rs['full_stats'] = full_stats
rs['ps'] = ps

In [None]:
with open(save_path, 'wb') as f:
    pickle.dump(rs, f)

In [None]:
print('Saved results to: ' + str(save_path))

In [None]:
file_data