Tests for state, decision or reporting dependence in DFF, referencing each behavior to the "other" condition, across all voxels
in the brain

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import pickle
from time import time

import numpy as np
import pandas as pd

from janelia_core.utils.data_saving import append_ts
from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats

from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import extract_transitions
from keller_zlatic_vnc.linear_modeling import one_hot_from_table

## Parameters go here

In [3]:
ps = {}

# Specigy where the processed data (the produced of dff_extraction.ipynb) is stored
ps['data_folder'] = r'A:\projects\keller_vnc\results\whole_brain_stats'
ps['data_file'] = r'dff_1_5_5_with_ep_2020_08_27_12_32_48_526097.pkl'

# Speccify a cut-off time
ps['cut_off_time'] = 3.656

# Specify manipulation target
ps['manip_type'] = 'A4' # 'both', 'A4' or 'A9'

# Specify thresholds for number of subjects we need for each behavior
ps['min_n_subjects_per_beh'] = 3

# Specify the test type  Options are:
#
#   state_dependence - tests if dff after manipulation is sensitive to behavior before
#   prediction_dependence - tests if dff before manipulation is sensitive to behavior after
#   decision_dependence - tests if dff during manipulation is sensitive to behavior after
#   before_reporting - tests if dff before manipulation is sensitive to behavior before
#   after_reporting - tests if dff after manipulation is sensitive to behavior after
#
ps['test_type'] = 'prediction_dependence'

# Specify reference behavior for the control behaviors - this will not affect the results for test behaviors
ps['beh_ref'] = 'Q'

# Alpha value for thresholding significance
ps['alpha'] = .05

# Specify where we save results
ps['save_folder'] = r'\\dm11\bishoplab\projects\keller_vnc\results\whole_brain_stats'
ps['save_str'] = 'other_ref_A4'

## Load the data

In [4]:
data_path = Path(ps['data_folder']) / Path(ps['data_file'])
with open(data_path, 'rb') as f:
    file_data = pickle.load(f)
data = file_data['event_annots']

## Rename a few columns

In [5]:
data.rename(columns = {'Smp ID':'subject_id', 'Beh Before':'beh_before', 'Beh After':'beh_after'}, inplace = True) 

## Apply cut-off time to define succeeding quiet behaviors

In [6]:
_, data = extract_transitions(data, ps['cut_off_time'])

## Down select for manipulation target if needed

In [7]:
if ps['manip_type'] == 'A4':
    data = data[data['Tgt Site'] == 'A4']
elif ps['manip_type'] == 'A9':
    data = data[data['Tgt Site'] == 'A9']

## Keep only events with behaviors that are present in enough subjects

In [8]:
trans_subj_cnts = count_unique_subjs_per_transition(data)

In [9]:
if (ps['test_type'] == 'state_dependence') or (ps['test_type'] == 'before_reporting'):
    after_beh_th = 0
    before_beh_th = ps['min_n_subjects_per_beh']
elif ((ps['test_type'] == 'prediction_dependence') or (ps['test_type'] == 'after_reporting') or 
      (ps['test_type'] == 'decision_dependence')):
    after_beh_th = ps['min_n_subjects_per_beh']
    before_beh_th = 0
else:
    raise(ValueError('The test_type ' + ps['test_type'] + ' is not recognized.'))

In [10]:
after_beh_sum = trans_subj_cnts.sum()
after_behs = [b for b in after_beh_sum[after_beh_sum > after_beh_th].index]

before_beh_sum = trans_subj_cnts.sum(1)
before_behs = [b for b in before_beh_sum[before_beh_sum > before_beh_th].index]

In [11]:
keep_events = data['beh_before'].isin(set(before_behs)) & data['beh_after'].isin(set(after_behs))
data = data[keep_events]

## Specify the test and control behaviors

In [12]:
if (ps['test_type'] == 'state_dependence') or (ps['test_type'] == 'before_reporting'):
    test_behs = before_behs
    control_behs = after_behs
    print('Setting test behaviors to those before the manipulation.')
elif ((ps['test_type'] == 'prediction_dependence') or (ps['test_type'] == 'after_reporting') or 
      (ps['test_type'] == 'decision_dependence')):
    test_behs = after_behs
    control_behs = before_behs
    print('Setting test behaviors to those after the manipulation.')
else:
    raise(ValueError('The test_type ' + ps['test_type'] + ' is not recognized.'))

Setting test behaviors to those after the manipulation.


## Get groups of subjects

In [13]:
unique_ids = data['subject_id'].unique()
g = np.zeros(len(data))
for u_i, u_id in enumerate(unique_ids):
    g[data['subject_id'] == u_id] = u_i

## Pull out $\Delta F/F$

In [14]:
if (ps['test_type'] == 'state_dependence') or (ps['test_type'] == 'after_reporting'):
    dff = np.stack(data['dff_after'].to_numpy())
    print('Extracting dff after the manipulation.')
elif (ps['test_type'] == 'prediction_dependence') or (ps['test_type'] == 'before_reporting'):
    dff = np.stack(data['dff_before'].to_numpy())
    print('Extracting dff before the manipulation.')
elif ps['test_type'] == 'decision_dependence':
    dff = np.stack(data['dff_during'].to_numpy())
    print('Extracting dff during the manipulation.')
else:
    raise(ValueError('The test_type ' + ps['test_type'] + ' is not recognized.'))

Extracting dff before the manipulation.


## Define a function for calculating stats

In [15]:
def stats_f(x_i, y_i, g_i, alpha_i):
    beta, acm, n_grps = grouped_linear_regression_ols_estimator(x=x_i, y=y_i, g=g_i)
    stats = grouped_linear_regression_acm_stats(beta=beta, acm=acm, n_grps=n_grps, alpha=alpha_i)
    stats['beta'] = beta
    return stats

## Calculate stats

In [16]:
n_rois = dff.shape[1]
n_test_behs = len(test_behs)
full_stats = dict()
t0 = time()
for b_i, b in enumerate(test_behs):
    print('Running tests for behavior ' + str(b_i + 1) + ' of ' + str(n_test_behs) + ': ' + b)
    
    control_behs_ref = list(set(control_behs).difference(ps['beh_ref']))
    
    if (ps['test_type'] == 'state_dependence') or (ps['test_type'] == 'before_reporting'):
        one_hot_data_ref, one_hot_vars_ref = one_hot_from_table(data, beh_before=[b], beh_after=control_behs_ref)
        pull_ind = 0
    elif ((ps['test_type'] == 'prediction_dependence') or (ps['test_type'] == 'after_reporting') or
         (ps['test_type'] == 'decision_dependence')):
        one_hot_data_ref, one_hot_vars_ref = one_hot_from_table(data, beh_before=control_behs_ref, beh_after=[b])
        pull_ind = len(one_hot_vars_ref)-1
    else:
        raise(ValueError('The test_type ' + ps['test_type'] + ' is not recognized.'))
        
    one_hot_data_ref = np.concatenate([one_hot_data_ref, np.ones([one_hot_data_ref.shape[0], 1])], axis=1)
    one_hot_vars_ref = one_hot_vars_ref + ['ref']
    
    full_stats[b] = [(stats_f(x_i=one_hot_data_ref, y_i=dff[:, r_i], g_i=g, alpha_i=ps['alpha']), pull_ind) 
                 for r_i in range(n_rois)]
    
    print('Done.  Elapsed time: ' + str(time() - t0))
    
    

Running tests for behavior 1 of 5: B
Done.  Elapsed time: 1489.9820528030396
Running tests for behavior 2 of 5: F
Done.  Elapsed time: 2267.4333856105804
Running tests for behavior 3 of 5: O
Done.  Elapsed time: 3024.3037209510803
Running tests for behavior 4 of 5: P
Done.  Elapsed time: 3779.1211915016174
Running tests for behavior 5 of 5: Q
Done.  Elapsed time: 4548.0077204704285


## Package results

In [17]:
beh_stats = dict()
for b in test_behs:
    beh_stats[b] = dict()
    beh_stats[b]['p_values'] = [rs_dict['non_zero_p'][rs_pull_ind]
                                for (rs_dict, rs_pull_ind) in full_stats[b]]
    beh_stats[b]['beta'] = [rs_dict['beta'][rs_pull_ind]
                                for (rs_dict, rs_pull_ind) in full_stats[b]]

## Save results

In [18]:
save_name = append_ts(ps['test_type'] + '_' + ps['save_str']) + '.pkl'
save_path = Path(ps['save_folder']) / save_name

In [19]:
rs = dict()
rs['beh_stats'] = beh_stats
rs['full_stats'] = full_stats
rs['ps'] = ps

In [20]:
with open(save_path, 'wb') as f:
    pickle.dump(rs, f)

In [21]:
print('Saved results to: ' + str(save_path))

Saved results to: \\dm11\bishoplab\projects\keller_vnc\results\whole_brain_stats\prediction_dependence_other_ref_A4_2020_09_26_22_54_03_390269.pkl
