Fit simple models predicting female behavior from surrogate neural activity generated using perturbed versions of Baker et al population fits.

In [None]:
%matplotlib inline
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import linear_model
import sys

from disp import set_plot
from my_torch import skl_fit_lin, torch_fit_lin

cc = np.concatenate

NTR = 276
NTRAIN = int(round(.8*NTR))
NSPLIT = 30

FIT_LIN = skl_fit_lin
FIT_KWARGS = {}

In [None]:
# variations on true parameters
NPCS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 50, 100, 150, 224]

TARG = 'FFV'
# TARG = 'FLS'

PFX_BEHAV = f'data/simple/mlv/p_baker_dyn_pca/mlv_p_baker_dyn_pca'

In [None]:
for npc in NPCS:
    pc_cols_use = [f'PC_{ipc}' for ipc in range(npc)]
    save_file = f'data/simple/mlv/p_baker_dyn_pca_zscored_npc_{len(pc_cols_use)}_{TARG.lower()}_{NTR}_tr.npy'
    
    # get means and stds of each PC to zscore results
    xs_all = cc([np.load(f'{PFX_BEHAV}_tr_{ctr}.npy', allow_pickle=True)[0]['df'][pc_cols_use] for ctr in range(NTR)])
    offset = xs_all.mean(axis=0)
    scale = xs_all.std(axis=0)
    
    # fit regression models
    rgrs = []
    for csplit in range(NSPLIT):
        print(f'Split {csplit}')
        rnd_tr_idxs = np.random.permutation(NTR)
        idxs_train = rnd_tr_idxs[:NTRAIN]
        idxs_test = rnd_tr_idxs[NTRAIN:]
        rgr = FIT_LIN(
            PFX_BEHAV, pc_cols_use, TARG, idxs_train, idxs_test,
            offset=offset,
            scale=scale,
            **FIT_KWARGS)
        rgrs.append(rgr)
        
    
    # save r2, weights, and example predictions
    save_data = {
        'r2_train': np.array([rgr.r2_train for rgr in rgrs]),
        'r2_test': np.array([rgr.r2_test for rgr in rgrs]),
        'w': np.array([rgr.w for rgr in rgrs]),
        'ys_train': [rgr.ys_train for rgr in rgrs[:2]],
        'y_hats_train': [rgr.y_hats_train for rgr in rgrs[:2]],
        'ys_test': [rgr.ys_test for rgr in rgrs[:2]],
        'y_hats_test': [rgr.y_hats_test for rgr in rgrs[:2]],
        'fit_fn': FIT_LIN.__name__,
        'fit_kwargs': FIT_KWARGS,
        'ntr': NTR,
        'nsplit': NSPLIT,
        'npc': npc,
    }

    np.save(save_file, np.array([save_data]))
    
    TRAIN_IDX_PLOT = 0
    TEST_IDX_PLOT = 1

    data = np.load(save_file, allow_pickle=True)[0]
    gs = gridspec.GridSpec(3, 3)
    fig = plt.figure(figsize=(12, 8), tight_layout=True)
    axs = [fig.add_subplot(gs[0, 0]), fig.add_subplot(gs[0, 1:]), fig.add_subplot(gs[1, :]), fig.add_subplot(gs[2, :])]

    axs[0].hist(np.transpose([data['r2_train'], data['r2_test']]), bins=30)
    axs[0].legend(['Train', 'Test'])
    set_plot(axs[0], x_label='R2', y_label='# splits', title=f'{npc} PCs', font_size=14)

    axs[1].bar(np.arange(data['w'].shape[1]), np.mean(data['w'], axis=0))
    set_plot(axs[1], x_label='PC', y_label='Weight', font_size=14)

    axs[2].plot(np.arange(len(data['ys_train'][0][TRAIN_IDX_PLOT])), data['ys_train'][0][TRAIN_IDX_PLOT], c='k', lw=2)
    axs[2].plot(np.arange(len(data['y_hats_train'][0][TRAIN_IDX_PLOT])), data['y_hats_train'][0][TRAIN_IDX_PLOT], c='r', lw=2)
    axs[2].legend(['True', 'Predicted'])
    set_plot(axs[2], y_lim=(-.5, 1), x_label='Timestep', y_label='FFV', title='Training data', font_size=14)

    axs[3].plot(np.arange(len(data['ys_test'][0][TEST_IDX_PLOT]))/30.03, data['ys_test'][0][TEST_IDX_PLOT], c='k', lw=2)
    axs[3].plot(np.arange(len(data['y_hats_test'][0][TEST_IDX_PLOT]))/30.03, data['y_hats_test'][0][TEST_IDX_PLOT], c='r', lw=2)
    axs[3].legend(['True', 'Predicted'])
    set_plot(axs[3], y_lim=(-.5, 1), x_lim=(200, 500), x_label='Time (s)', y_label='FFV', title='Held-out data', font_size=14)