Here we calculate means from raw data (e.g., the mean for all transitions from quite to forward) but we also fit our linear models, predict activity from these models and then compute the same means for the predicted activity.  We want to compare and make sure these are not too different. 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import pickle

from janelia_core.stats.regression import grouped_linear_regression_boot_strap
from janelia_core.stats.regression import visualize_boot_strap_results

from keller_zlatic_vnc.linear_modeling import one_hot_from_table


In [3]:
%matplotlib qt

## Parameters go here

In [4]:
data_dir = r'/Users/williambishop/Desktop/extracted_dff/A00c'

data_file = 'A00c_activity.pkl'

# Specify variables that we predict from
beh_before = ['Q', 'F', 'B']
beh_after = ['Q', 'F', 'B']
enc_beh_interactions = True
enc_subjects = False
closure = True # True if the only events we consider must start with a before_beh 
               # behavior and end with an beh_after behavior

# Determine how we will display results
font_size = 15

# Determine what type of manipulation events we look at
manip_type = 'A4' # 'both', 'A4' or 'A9'

# Determine if we use training predictions or cross-valided predictions
pred_type = 'cv' # 'train' or 'cv'


if False:
    # Transitions we want to compute averages for: This section is to reproduce Nadine's figures
    transitions = [('F', 'F'), 
                   ('B', 'F'), 
                   ('Q', 'F'), 
                   ('B', 'B'), 
                   ('Q', 'Q')]

    # Colors for plotting each transition for A00c
    clrs = np.asarray([[28, 76, 124], 
                      [206, 222, 245],
                      [0, 0, 0], 
                      [107, 227, 207], 
                      [213, 213, 213]])/256

    # Colors for plotting each transition for handle
    #clrs = np.asarray([[48, 111, 29], 
    #                  [83, 173, 50],
    #                  [0, 0, 0], 
    #                  [191, 190, 59], 
    #                  [146, 146, 146]])/256
else:
    # Transitions we want to compute averages for: This is for squaring results with linear model results
    transitions = [('F', 'F'), 
                   ('B', 'F'), 
                   ('Q', 'F'),
                   ('F', 'B'),
                   ('B', 'B'),
                   ('Q', 'B'),
                   ('F', 'Q'),
                   ('B', 'Q'),
                   ('Q', 'Q')]
    
    cmap = plt.get_cmap('Dark2')
    clrs = cmap(np.arange(0, 10))[:,0:3]


## Load the data

In [5]:
data_path = Path(data_dir) / data_file
with open(data_path, 'rb') as f:
    data = pickle.load(f)
data = pd.DataFrame(data)

## Down select to only the manipulation events we want to consider

In [6]:
if manip_type == 'A4':
    print('Analyzing only A4 manipulation events.')
    data = data[data['man_tgt'] == 'A4']
elif manip_type == 'A9':
    print('Analyzing only A9 manipulation events.')
    data = data[data['man_tgt'] == 'A9']
else:
    print('Analyzing all manipulation events.')

Analyzing only A4 manipulation events.


## Enforce closure if needed

In [7]:
if closure:
    print('Enforcing closure.')
    before_closure = np.asarray([b in set(beh_before) for b in data['beh_before']], 
                                dtype=bool)
    after_closure = np.asarray([b in set(beh_after) for b in data['beh_after']], 
                                dtype=bool)
    closure = np.logical_and(before_closure, after_closure)
    
    data = data[closure]

Enforcing closure.


## Get rid of rows of data that have no behavior of interest

In [8]:
before_ignore = np.asarray([b not in set(beh_before) for b in data['beh_before']], 
                                dtype=bool)
after_ignore = np.asarray([b not in set(beh_after) for b in data['beh_after']], 
                                dtype=bool)

ignore_rows = np.logical_and(before_ignore, after_ignore)

data = data[np.logical_not(ignore_rows)]

## Get groups of data (a group corresponds to each subject)

In [9]:
unique_ids = data['subject_id'].unique()
g = np.zeros(len(data))
for u_i, u_id in enumerate(unique_ids):
    g[data['subject_id'] == u_id] = u_i

## Pull out $\Delta F / F$

In [10]:
dff_before = data['dff_before'].to_numpy()
dff_after = data['dff_after'].to_numpy()

## Now we fit linear models with user specified options

In [11]:
one_hot_data, one_hot_vars = one_hot_from_table(data, 
                                                beh_before=beh_before, 
                                                beh_after=beh_after,
                                                enc_subjects=enc_subjects, 
                                                enc_beh_interactions=enc_beh_interactions)
if not enc_subjects:
    one_hot_vars.append('mean')

In [12]:
n_rows = one_hot_data.shape[0]

if not enc_subjects:
    one_hot_data_supp = np.concatenate([one_hot_data, np.ones([n_rows, 1])], axis=1)
else:
    one_hot_data_supp = one_hot_data

if pred_type == 'train':
    before_reg_rs, before_mdl = grouped_linear_regression_boot_strap(x=one_hot_data, 
                                                  y=dff_before, 
                                                  g=g, 
                                                  n_bs_smps=0, 
                                                  include_mean=(not enc_subjects))
    after_reg_rs, after_mdl = grouped_linear_regression_boot_strap(x=one_hot_data, 
                                                   y=dff_after, 
                                                   g=g, 
                                                   n_bs_smps=0, 
                                                   include_mean=(not enc_subjects))
    
    dff_pred_before = np.sum(one_hot_data_supp*before_mdl, axis=1)
    dff_pred_after = np.sum(one_hot_data_supp*after_mdl, axis=1)
else:
    dff_pred_before = np.zeros(n_rows)
    dff_pred_after = np.zeros(n_rows)
    dff_true_before = np.zeros(n_rows)
    dff_true_after = np.zeros(n_rows)
    
    for r_i in range(n_rows):
        test_inds = np.zeros(n_rows, dtype=np.bool)
        test_inds[r_i] = True
        train_inds = np.logical_not(test_inds)
    
        train_dff_before = dff_before[train_inds]
        train_dff_after = dff_after[train_inds]
        train_one_hot_data = one_hot_data[train_inds, :]
    

        test_one_hot_data_supp = one_hot_data_supp[test_inds, :]
    
        _, before_mdl = grouped_linear_regression_boot_strap(y=train_dff_before, 
                                                             x=train_one_hot_data,
                                                             g=np.ones(len(train_dff_before)),
                                                             n_bs_smps=0,
                                                             include_mean=not enc_subjects)
    
        _, after_mdl = grouped_linear_regression_boot_strap(y=train_dff_after, 
                                                            x=train_one_hot_data,
                                                            g=np.ones(len(train_dff_after)),
                                                            n_bs_smps=0,
                                                            include_mean=not enc_subjects)
        
        dff_pred_before[r_i] = np.sum(test_one_hot_data_supp*before_mdl)
        dff_pred_after[r_i] = np.sum(test_one_hot_data_supp*after_mdl)
        dff_true_before[r_i] = dff_before[test_inds]
        dff_true_after[r_i] = dff_after[test_inds]
    

## Calculate transition means

In [13]:
def calc_transition_means(activity, before_beh, after_beh, transitions):
    n_transitions = len(transitions)
    means = np.zeros(n_transitions)
    for t_i, tran in enumerate(transitions):
        before_inds = before_beh == tran[0]
        after_inds = after_beh == tran[1]
        keep_inds = np.logical_and(before_inds, after_inds)
        means[t_i] = np.mean(activity[keep_inds])
    
    return means
        

In [14]:
emp_before_means= calc_transition_means(dff_before, data['beh_before'].to_numpy(), 
                                        data['beh_after'].to_numpy(), transitions)

emp_after_means= calc_transition_means(dff_after, data['beh_before'].to_numpy(), 
                                       data['beh_after'].to_numpy(), transitions)



  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [15]:
pred_before_means= calc_transition_means(dff_pred_before, data['beh_before'].to_numpy(), 
                                        data['beh_after'].to_numpy(), transitions)

pred_after_means= calc_transition_means(dff_pred_after, data['beh_before'].to_numpy(), 
                                       data['beh_after'].to_numpy(), transitions)

## Plot results

In [16]:
def plot_means(emp_means, pred_means, clrs, transitions, title_str, ax=None, font_size=10):
    
    if ax is None:
        plt.figure()
        ax = plt.axes()
     
    n_means = len(emp_means)
    for mn_i in range(n_means):
        e_mn = emp_means[mn_i]
        p_mn = pred_means[mn_i]
        
        e_clr = np.ones(4)
        e_clr[0:3] = clrs[mn_i, :]
        plt.bar(2*mn_i, e_mn, color=e_clr)
        
        p_clr = .6*np.ones(4)
        p_clr[0:3] = clrs[mn_i, :]
        plt.bar(2*mn_i + 1, p_mn, color=p_clr)
        
    # Label x-axis
    trans_strs = [t_1 + t_2 for t_1, t_2 in transitions]
    first_str = trans_strs[0]
    trans_strs[0] = first_str + ' emp'
    trans_strs.insert(1, first_str + ' mdl')
    x_ticks = [0, 1.0]
    tick_clrs = [clrs[0,:], clrs[0,:]]
    for t_i in range(1, n_means):
        x_ticks.append(2*t_i)
        tick_clrs.append(clrs[t_i,:])
    plt.xticks(x_ticks, trans_strs, rotation=-90)

    #Set colors of x-axix labels
    for var_i, x_lbl in enumerate(ax.get_xticklabels()):
        x_lbl.set_color(tick_clrs[var_i])
        
    ph = plt.xlabel('Transition Type', fontsize=font_size)
    ph = plt.ylabel('Average $\Delta F$ / F', fontsize=font_size)
    ax.tick_params(axis="x", labelsize=font_size)
    ax.tick_params(axis="y", labelsize=font_size)
    plt.ylim(0, 1.0)
    plt.title(title_str, fontsize=font_size)
    fig = plt.gcf()
    fig.set_size_inches(6, 5)
    plt.tight_layout()

In [17]:
plot_means(emp_means=emp_before_means, 
           pred_means=pred_before_means, 
           clrs=clrs, 
           transitions=transitions, 
           title_str = '$\Delta F / F$ Before Perturbation',
           font_size=font_size)

In [18]:
plot_means(emp_means=emp_after_means, 
           pred_means=pred_after_means, 
           clrs=clrs, 
           transitions=transitions, 
           title_str = '$\Delta F / F$ After Perturbation',
           font_size=font_size)

## Look at predictions for individual events and neurons

In [19]:
plt.figure()
plt.plot(dff_true_before, 'ro')
plt.plot(dff_pred_before, 'b.')
plt.title('$\Delta F /F$ Before Perturbation')

plt.figure()
plt.plot(dff_true_after, 'ro')
plt.plot(dff_pred_after, 'b.')
plt.title('$\Delta F /F$ After Perturbation')

Text(0.5, 1.0, '$\\Delta F /F$ After Perturbation')