Here we compare statistics for significant coefficients of linear models fit to Keller/Zlatic VNC data (A00c, basin or handle cells) computed with three different methods. 

In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
import copy
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io

from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import extract_transitions
from keller_zlatic_vnc.data_processing import generate_transition_dff_table
from keller_zlatic_vnc.data_processing import read_raw_transitions_from_excel
from keller_zlatic_vnc.data_processing import recode_beh
from keller_zlatic_vnc.linear_modeling import one_hot_from_table
from keller_zlatic_vnc.linear_modeling import reference_one_hot_to_beh

from janelia_core.stats.regression import grouped_linear_regression_boot_strap
from janelia_core.stats.regression import grouped_linear_regression_boot_strap_stats
from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats

In [3]:
pd.set_option('display.max_rows', None)

## Parameters go here

#### Location of data

In [4]:
data_folder = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff_v2'
transition_file = 'transition_list.xlsx'

a00c_a4_act_data_file = 'A00c_activity_A4.mat'
a00c_a9_act_data_file = 'A00c_activity_A9.mat'

basin_a4_act_data_file = 'Basin_activity_A4.mat'
basin_a9_act_data_file = 'Basin_activity_A9.mat'

handle_a4_act_data_file = 'Handle_activity_A4.mat'
handle_a9_act_data_file = 'Handle_activity_A9.mat'

#### Options for analysis

In [5]:
# Type of cells we fit models to
cell_type = 'a00c' 

# If we fit data to perturbations targeted at 'A4', 'A9' or 'both'
manip_type = 'both' 

# Define the cutoff time we use to define quiet behaviors following stimulation
cut_off_time = np.inf

# Specify if we predict dff 'before' or 'after' the manipulation
period = 'before'

# Specify combination of variables we want to test
test_beh = ['Q', 'F']

# Specify number of bootstrap samples we perform with bootstrap methods
n_bs_smps = 1000

## Specify some parameters we use in the code below

In [6]:
if cell_type == 'a00c':
    a4_act_file = a00c_a4_act_data_file
    a9_act_file = a00c_a9_act_data_file
elif cell_type == 'basin':
    a4_act_file = basin_a4_act_data_file
    a9_act_file = basin_a9_act_data_file
elif cell_type == 'handle':
    a4_act_file = handle_a4_act_data_file
    a9_act_file = handle_a9_act_data_file
else:
    raise(ValueError('The cell type ' + cell_type + ' is not recogonized.'))

## Load data

In [7]:
# Read in raw transitions
raw_trans = read_raw_transitions_from_excel(pathlib.Path(data_folder) / transition_file)

# Read in activity
a4_act = scipy.io.loadmat(pathlib.Path(data_folder) / a4_act_file, squeeze_me=True)
a9_act = scipy.io.loadmat(pathlib.Path(data_folder) / a9_act_file, squeeze_me=True)

# Recode behavioral annotations
raw_trans = recode_beh(raw_trans, 'Beh Before')
raw_trans = recode_beh(raw_trans, 'Beh After')

# Extract transitions
trans = extract_transitions(raw_trans, cut_off_time)

# Generate table of data 
a4table = generate_transition_dff_table(act_data=a4_act, trans=trans)
a9table = generate_transition_dff_table(act_data=a9_act, trans=trans)

# Put the tables together
a4table['man_tgt'] = 'A4'
a9table['man_tgt'] = 'A9'
data = a4table.append(a9table, ignore_index=True)

## Down select to only the manipulation events we want to consider

In [8]:
if manip_type == 'A4':
    print('Analyzing only A4 manipulation events.')
    data = data[data['man_tgt'] == 'A4']
elif manip_type == 'A9':
    print('Analyzing only A9 manipulation events.')
    data = data[data['man_tgt'] == 'A9']
else:
    print('Analyzing all manipulation events.')

Analyzing all manipulation events.


## See how many subjects we have for each transition type

In [9]:
count_unique_subjs_per_transition(data)

Unnamed: 0,B,F,H,O,P,Q,T
B,4.0,16.0,2.0,1.0,3.0,0.0,6.0
F,1.0,14.0,0.0,2.0,8.0,0.0,1.0
H,0.0,1.0,0.0,0.0,0.0,0.0,0.0
O,1.0,4.0,0.0,0.0,0.0,0.0,0.0
P,0.0,0.0,0.0,0.0,1.0,0.0,0.0
Q,1.0,16.0,4.0,8.0,2.0,10.0,1.0
T,4.0,7.0,1.0,0.0,4.0,0.0,2.0


## Get groups of data (a group corresponds to each subject)

In [10]:
unique_ids = data['subject_id'].unique()
g = np.zeros(len(data))
for u_i, u_id in enumerate(unique_ids):
    g[data['subject_id'] == u_id] = u_i

## Pull out $\Delta F / F$

In [11]:
if period == 'before':
    dff = data['dff_before'].to_numpy()
elif period == 'after':
    dff = data['dff_after'].to_numpy()
else:
    raise(ValueError('The period ' + ' period is not recogonized.'))

## Get dummy variable representation of the data

In [12]:
one_hot_data, one_hot_vars = one_hot_from_table(data, 
                                                beh_before=[test_beh[0]], 
                                                beh_after=[test_beh[1]],
                                                enc_subjects=False, 
                                                enc_beh_interactions=True)

n_subjects = len(unique_ids)

## Add mean to one hot encoding

In [13]:
one_hot_data = np.concatenate([one_hot_data, np.ones([one_hot_data.shape[0], 1])], axis=1)
one_hot_vars.append('ref')

## Get stats with different methods

#### Pairs bootstrap

In [14]:
pairs_bs_results = grouped_linear_regression_boot_strap(x=one_hot_data, 
                                                        y=dff, 
                                                        g=g, 
                                                        n_bs_smps=n_bs_smps, 
                                                        include_mean=False)

pairs_bs_stats = grouped_linear_regression_boot_strap_stats(pairs_bs_results[0])

In [15]:
pairs_bs_stats

{'alpha': 0.05,
 'c_ints': array([[-0.05090495, -0.03135364, -0.01313172,  0.01950583],
        [-0.00937761,  0.01277496,  0.0310109 ,  0.06086796]]),
 'non_zero_p': array([0.002, 0.864, 0.886, 0.002]),
 'non_zero': array([ True, False, False,  True])}

#### OLS estimator

In [None]:
(ols_beta, 
 ols_acm, 
 ols_n_gprs) = grouped_linear_regression_ols_estimator(x=one_hot_data, y=dff, g=g)

ols_stats = grouped_linear_regression_acm_stats(beta=ols_beta, acm=ols_acm,
                                                n_grps=ols_n_gprs,alpha=.05)

In [None]:
ols_stats

In [None]:
ols_beta