This is a notebook to look at the cross-validated performance of linear models of different forms applied to different cell types. 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib qt

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import pickle

from janelia_core.stats.regression import grouped_linear_regression_boot_strap

from keller_zlatic_vnc.linear_modeling import one_hot_from_table

## Parameters go here

In [4]:
data_dir = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff/A00c'

data_file = 'A00c_activity.pkl'

# Specify variables that we predict from
beh_before = ['Q', 'F', 'B']
beh_after = ['Q', 'F', 'B']
enc_beh_interactions = True
enc_subjects = True
closure = True # True if the only events we consider must start with a before_beh 
               # behavior and end with an beh_after behavior

# Determine what type of manipulation events we look at
manip_type = 'A4' # 'both', 'A4' or 'A9'

## Load the data

In [5]:
data_path = Path(data_dir) / data_file
with open(data_path, 'rb') as f:
    data = pickle.load(f)
data = pd.DataFrame(data)

## Down select to only the manipulation events we want to consider

In [6]:
if manip_type == 'A4':
    print('Analyzing only A4 manipulation events.')
    data = data[data['man_tgt'] == 'A4']
elif manip_type == 'A9':
    print('Analyzing only A9 manipulation events.')
    data = data[data['man_tgt'] == 'A9']
else:
    print('Analyzing all manipulation events.')

Analyzing only A4 manipulation events.


## Enforce closure if needed

In [7]:
if closure:
    print('Enforcing closure.')
    before_closure = np.asarray([b in set(beh_before) for b in data['beh_before']], 
                                dtype=bool)
    after_closure = np.asarray([b in set(beh_after) for b in data['beh_after']], 
                                dtype=bool)
    closure = np.logical_and(before_closure, after_closure)
    
    data = data[closure]

Enforcing closure.


## Get rid of rows of data that have no behavior of interest

In [8]:
before_ignore = np.asarray([b not in set(beh_before) for b in data['beh_before']], 
                                dtype=bool)
after_ignore = np.asarray([b not in set(beh_after) for b in data['beh_after']], 
                                dtype=bool)

ignore_rows = np.logical_and(before_ignore, after_ignore)

data = data[np.logical_not(ignore_rows)]

## Pull out $\Delta F / F$

In [9]:
dff_before = data['dff_before'].to_numpy()
dff_after = data['dff_after'].to_numpy()

## Now form one hot encoding of variables with user specified options

In [10]:
one_hot_data, one_hot_vars = one_hot_from_table(data, 
                                                beh_before=beh_before, 
                                                beh_after=beh_after,
                                                enc_subjects=enc_subjects, 
                                                enc_beh_interactions=enc_beh_interactions)
if not enc_subjects:
    one_hot_vars.append('mean')

## Now we perform leave-one-out cross-validation

In [11]:
n_rows = one_hot_data.shape[0]

true_dff_before = np.zeros(n_rows)
true_dff_after = np.zeros(n_rows)
pred_dff_before = np.zeros(n_rows)
pred_dff_after = np.zeros(n_rows)

for r_i in range(n_rows):
    test_inds = np.zeros(n_rows, dtype=np.bool)
    test_inds[r_i] = True
    train_inds = np.logical_not(test_inds)
    
    train_dff_before = dff_before[train_inds]
    train_dff_after = dff_after[train_inds]
    train_one_hot_data = one_hot_data[train_inds, :]
    
    true_dff_before[r_i] = dff_before[test_inds]
    true_dff_after[r_i] = dff_after[test_inds]
    test_one_hot_data = one_hot_data[test_inds, :]
    
    _, before_mdl = grouped_linear_regression_boot_strap(y=train_dff_before, 
                                                      x=train_one_hot_data,
                                                      g=np.ones(len(train_dff_before)),
                                                      n_bs_smps=0,
                                                      include_mean=not enc_subjects)
    
    _, after_mdl = grouped_linear_regression_boot_strap(y=train_dff_after, 
                                                      x=train_one_hot_data,
                                                      g=np.ones(len(train_dff_after)),
                                                      n_bs_smps=0,
                                                      include_mean=not enc_subjects)
        
    pred_dff_before[r_i] = np.sum(test_one_hot_data*before_mdl)
    pred_dff_after[r_i] = np.sum(test_one_hot_data*after_mdl)

## Plot test results

In [16]:
plt.plot(true_dff_before, 'ro')
plt.plot(pred_dff_before, 'b.')
plt.legend(['True', 'Predicted'])
plt.title('Encoding of $\Delta F / F$ before the manipulation')

Text(0.5, 1.0, 'Encoding of $\\Delta F / F$ before the manipulation')

In [14]:
plt.plot(true_dff_after, 'ro')
plt.plot(pred_dff_after, 'b.')
plt.legend(['True', 'Predicted'])
plt.title('Encoding of $\Delta F / F$ after the manipulation')

Text(0.5, 1, 'Encoding of $\\Delta F / F$ after the manipulation')