Code for fitting linear models 

In [1]:
%load_ext autoreload
%autoreload 2 

In [2]:
from pathlib import Path
import pickle
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from janelia_core.stats.regression import grouped_linear_regression_boot_strap
from janelia_core.stats.regression import grouped_linear_regression_boot_strap_stats
from janelia_core.utils.data_saving import append_ts

from keller_zlatic_vnc.linear_modeling import format_whole_brain_annots_table
from keller_zlatic_vnc.linear_modeling import one_hot_from_table

## Parameters go here

In [3]:
ps = {}
ps['data_folder'] = r'\\dm11\bishoplab\projects\keller_vnc\results\whole_brain_stats'
#ps['data_file'] = r'dff_5_25_25_2019_10_22_10_58_11_107249.pkl'
ps['data_file'] = r'dff_1_5_5_with_ep_2020_01_20_15_52_29_105309.pkl'

# Specify variables that we predict from
ps['beh_before'] = ['Q', 'F', 'B']
ps['beh_after'] = ['Q', 'F', 'B']
ps['enc_beh_interactions'] = True
ps['enc_subjects'] = False
ps['closure'] = True # True if the only events we consider must start with a before_beh 
                     # behavior and end with an beh_after behavior
    
# How many bootstrap samples we use in each analysis
ps['n_bs_smps'] = 1000

ps['save_folder'] = r'\\dm11\bishoplab\projects\keller_vnc\results\whole_brain_stats'
ps['save_str'] = 'whole_brain_boot_strap'

## Load data

In [4]:
data_path = Path(ps['data_folder']) / Path(ps['data_file'])
with open(data_path, 'rb') as f:
    data = pickle.load(f)
event_annots = data['event_annots']

In [5]:
event_annots

Unnamed: 0,Date and sample,Manipulation Start,Manipulation End,Precede Behavior,Time differ between end of PB from Stimulus ONSET,Time differ between start of PB from Stimulus ONSET,Succeed Behavior,Time difference between start of SB from Stimus ONSET,Interval Time,Transtion Time,dff_before,dff_after
0,CW_17-08-23-L1,216,220,forward,1.0,34.0,back hunch,12.0,0.4010,4.8120,"[0.054598767, 0.06078657, 0.03721669, 0.038682...","[0.048961718, 0.069584034, 0.03722217, 0.01916..."
1,CW_17-08-23-L1,1795,1799,forward,2.0,18.0,back hunch,10.0,0.4010,4.0100,"[0.03240184, 0.044733804, 0.024001548, 0.02218...","[0.036694378, -0.010718676, 0.025001233, 0.004..."
2,CW_17-08-23-L2,114,119,forward,1.0,4.0,backward,6.0,0.3634,2.1804,"[0.04760842, 0.080916405, 0.06997735, 0.038695...","[0.07838976, 0.093723916, 0.11073011, 0.063612..."
3,CW_17-08-23-L2,2471,2476,forward,4.0,19.0,backward,6.0,0.3634,2.1804,"[0.014568833, 0.018858666, 0.015992953, 0.0200...","[0.044470083, 0.04860921, 0.0775243, 0.0778435..."
4,CW_17-08-23-L2,4644,4649,forward,5.0,10.0,backward,5.0,0.3634,1.8170,"[0.05529377, 0.004222768, 0.028083421, 0.04188...","[0.14517678, 0.12293305, 0.1137105, 0.08082393..."
...,...,...,...,...,...,...,...,...,...,...,...,...
273,CW_17-12-11-L3,2724,2729,quiet,,,forward,6.0,0.3756,2.2536,"[0.044009406, 0.035009526, 0.06686162, 0.08253...","[0.06621941, 0.07362655, 0.051485617, 0.078565..."
274,CW_17-12-11-L3,3445,3450,quiet,,,forward,9.0,0.3756,3.3804,"[0.028829338, 0.019474708, 0.049133588, 0.0383...","[0.064021826, 0.076009095, 0.066019125, 0.0738..."
275,CW_17-12-11-L3,4144,4149,quiet,,,forward,9.0,0.3756,3.3804,"[0.014759668, 0.010661985, 0.03287143, 0.00906...","[0.030296793, 0.08557811, 0.091722935, 0.08743..."
276,CW_17-12-11-L3,4845,4850,quiet,,,forward,10.0,0.3756,3.7560,"[0.0056119356, -0.0070766318, 0.028791232, 0.0...","[0.052880257, 0.048214655, 0.0755431, 0.049634..."


## Drop columns we don't need in the event annotation table

In [5]:
event_annots = event_annots.drop(['Manipulation Start', 
                                  'Manipulation End',
                                  'Time differ between end of PB from Stimulus ONSET',
                                  'Time differ between start of PB from Stimulus ONSET',
                                  'Time difference between start of SB from Stimus ONSET',
                                  'Interval Time',
                                  'Transtion Time'], axis='columns')

## Format the event annotation table so it is ready to be passed to one_hot_from_table

In [6]:
event_annots = format_whole_brain_annots_table(event_annots)

## Enforce closure if needed

In [7]:
if ps['closure']:
    print('Enforcing closure.')
    before_closure = np.asarray([b in set(ps['beh_before']) for b in event_annots['beh_before']], 
                                dtype=bool)
    after_closure = np.asarray([b in set(ps['beh_after']) for b in event_annots['beh_after']], 
                                dtype=bool)
    closure = np.logical_and(before_closure, after_closure)
    
    event_annots = event_annots[closure]

Enforcing closure.


## Get rid of events that have no behaviors of interest

In [8]:
before_ignore = np.asarray([b not in set(ps['beh_before']) for b in event_annots['beh_before']], 
                                dtype=bool)
after_ignore = np.asarray([b not in set(ps['beh_after']) for b in event_annots['beh_after']], 
                                dtype=bool)

ignore_rows = np.logical_and(before_ignore, after_ignore)

event_annots = event_annots[np.logical_not(ignore_rows)]

## Get groups of data (a group corresponds to each subject)

In [9]:
unique_ids = event_annots['subject_id'].unique()
g = np.zeros(len(event_annots))
for u_i, u_id in enumerate(unique_ids):
    g[event_annots['subject_id'] == u_id] = u_i

## Now get a one-hot encoding of variables we will fit too

In [10]:
one_hot_data, one_hot_vars = one_hot_from_table(table=event_annots, beh_before=ps['beh_before'], beh_after=ps['beh_after'], 
                                                enc_subjects=ps['enc_subjects'], 
                                                enc_beh_interactions=ps['enc_beh_interactions'])

# If we are not encoding subjects, we will include a mean in the regression, so we note that in the variable names
if not ps['enc_subjects']:
    one_hot_vars.append('mean')

## Now perform regression, with a bootstrap, for all supervoxels

In [11]:
dff_before = np.stack(event_annots['dff_before'].to_numpy())
dff_after = np.stack(event_annots['dff_after'].to_numpy())

n_supervoxels = dff_before.shape[1]

par_data_before = [(dff_before[:, n_i], one_hot_data, g, ps['n_bs_smps'], not ps['enc_subjects']) 
                   for n_i in range(n_supervoxels)]

par_data_after = [(dff_after[:, n_i], one_hot_data, g, ps['n_bs_smps'], not ps['enc_subjects']) 
                   for n_i in range(n_supervoxels)]

In [12]:
t0 = time.time()

before_bs_rs = [grouped_linear_regression_boot_strap(*args) for args in par_data_before]

t1 = time.time()
print('Computaton time for ' + str(n_supervoxels) + ' super voxels: ' + str(t1 - t0))

Computaton time for 384628 super voxels: 143647.04658269882


In [13]:
t0 = time.time()

after_bs_rs = [grouped_linear_regression_boot_strap(*args) for args in par_data_after]

t1 = time.time()
print('Computaton time for ' + str(n_supervoxels) + ' super voxels: ' + str(t1 - t0))

Computaton time for 384628 super voxels: 187455.83397960663


## Save results

In [14]:
rs = {'ps': ps, 'one_hot_vars': one_hot_vars, 'before_bs_rs': before_bs_rs, 'after_bs_rs': after_bs_rs}

save_name = append_ts(ps['save_str']) + '.pkl'
save_path = Path(ps['save_folder']) / save_name
with open(save_path, 'wb') as f:
    pickle.dump(rs, f)

In [15]:
save_path

WindowsPath('//dm11/bishoplab/projects/keller_vnc/results/whole_brain_stats/whole_brain_boot_strap_2020_01_25_15_30_35_692930.pkl')