In [1]:
import os
import json
import glob
import copy
import copy
import itertools
import re
import pprint 
pp = pprint.PrettyPrinter(indent=4)

import numpy as np
import pylab as pl
import seaborn as sns
import pandas as pd
import statsmodels as sm
import cPickle as pkl

from scipy import stats as spstats

from pipeline.python.classifications import experiment_classes as util
from pipeline.python.classifications import aggregate_data_stats as aggr
from pipeline.python.classifications import rf_utils as rfutils
from pipeline.python import utils as putils

from matplotlib.lines import Line2D
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable


In [2]:
%matplotlib notebook

In [3]:
# Set colors
visual_area, area_colors = putils.set_threecolor_palette()
dpi = putils.set_plot_params()


In [4]:
pl.rcParams['axes.labelsize']

16.0

# Dataset info

In [5]:
#### Set trace ID and FOV/state type
traceid = 'traces001'
fov_type = 'zoom2p0x'
state = 'awake'
aggregate_dir = '/n/coxfs01/julianarhee/aggregate-visual-areas'

#### Responsive params
responsive_test = 'nstds' # 'nstds' #'ROC' #None
nstd_thr=10
responsive_thr = nstd_thr if responsive_test=='nstds' else 0.05 #0.05 #None
n_stds = None if responsive_test=='ROC' else 2.5 #None

response_type = 'dff'

response_str = '%s_%s-thr-%.2f' % (response_type, responsive_test, responsive_thr) 
print(response_str)

dff_nstds-thr-10.00


In [6]:
#### Create data ID for labeling figures with data-types
experiment = 'blobs'


# Output dir

In [7]:
#### Output dir
stats_dir = os.path.join(aggregate_dir, 'data-stats')

In [8]:
pupildir = os.path.join(aggregate_dir, 'behavior-state', 'pupil')
print(pupildir)

if not os.path.exists(pupildir):
    os.makedirs(pupildir)

/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/pupil


#### Get metadata for stimulus

In [9]:
experiment = 'blobs'

In [10]:
# Get all data sets
sdata = aggr.get_aggregate_info(traceid=traceid, fov_type=fov_type, state=state)
sdata['datakey'] = ['%s_%s_fov%i' % (session, animalid, fovnum) \
                        for session, animalid, fovnum in sdata[['session', 'animalid', 'fovnum']].values]

sdata[['visual_area', 'animalid', 'session', 'fov']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,animalid,session,fov
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Li,26,26,26
Lm,22,22,22
V1,18,18,18


In [11]:
# Get blob metadata only - and only if have RFs
sdata_exp = pd.concat([g for k, g in sdata.groupby(['animalid', 'session', 'fov']) if 
                        (experiment in g['experiment'].values 
                         and ('rfs' in g['experiment'].values or 'rfs10' in g['experiment'].values)) ])

sdata_exp[['visual_area', 'animalid', 'session', 'fov']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,animalid,session,fov
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Li,13,13,13
Lm,12,12,12
V1,11,11,11


#### Drop duplicates and whatnot fovs

In [12]:
reload(aggr)

<module 'pipeline.python.classifications.aggregate_data_stats' from '/home/julianarhee/Repositories/2p-pipeline/pipeline/python/classifications/aggregate_data_stats.pyc'>

In [13]:
stim_filterby = None #'first'

if experiment=='blobs':
    has_gratings=False
    g_str = 'hasgratings' if has_gratings else 'blobsonly'
else:
    g_str = 'gratingsonly'
resp_filter_str = 'stim-filter-%s_%s_%s' % (stim_filterby, g_str, response_str)

In [14]:
# Drop duplicates and whatnot fovs
if experiment=='blobs':
    exp_dkeys = aggr.get_blob_datasets(filter_by=stim_filterby, has_gratings=has_gratings, as_dict=True)
elif experiment == 'gratings':
    exp_dkeys = aggr.get_gratings_datasets(filter_by=stim_filterby, as_dict=True)

dictkeys = [d for d in list(itertools.chain(*exp_dkeys.values()))]
stim_datakeys = ['%s_%s_fov%i' % (s.split('_')[0], s.split('_')[1], 
                   sdata[(sdata['animalid']==s.split('_')[1]) 
                         & (sdata['session']==s.split('_')[0])]['fovnum'].unique()[0]) for s in dictkeys]
expmeta = dict((k, [dv for dv in stim_datakeys for vv in v if vv in dv]) for k, v in exp_dkeys.items())

curr_sdata = sdata_exp[sdata_exp['datakey'].isin(stim_datakeys)].copy().reset_index(drop=True)
print('[%s] %i unique datakeys' % (experiment, len(stim_datakeys)))
curr_sdata[['visual_area', 'animalid', 'session', 'fov']].drop_duplicates().groupby(['visual_area']).count()

[blobs] 37 unique datakeys


Unnamed: 0_level_0,animalid,session,fov
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Li,13,13,13
Lm,12,12,12
V1,11,11,11


In [15]:
#### Create data ID for labeling figures with data-types
data_id = '|'.join([traceid, resp_filter_str])
print(data_id)


traces001|stim-filter-None_blobsonly_dff_nstds-thr-10.00


In [16]:
curr_sdata.shape, sdata_exp.shape, len(stim_datakeys)

((141, 7), (141, 7), 37)

# Load gratings or blobs

In [17]:
#### Output dir
stats_dir = os.path.join(aggregate_dir, 'data-stats')

glob.glob(os.path.join(stats_dir, 'aggr_%s_trialmeans_*%s*_%s_stimulus.pkl' 
                                               % (experiment, responsive_test, response_type)))

['/n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-10.00_dff_stimulus.pkl',
 '/n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-15.00_dff_stimulus.pkl']

In [18]:
aggr_trialmeans_dfile = glob.glob(os.path.join(stats_dir, 
                                'aggr_%s_trialmeans_*%s*_%s_stimulus.pkl' 
                                               % (experiment, responsive_test, response_type)))[0]

print(aggr_trialmeans_dfile)
with open(aggr_trialmeans_dfile, 'rb') as f:
    MEANS = pkl.load(f)
    

/n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-10.00_dff_stimulus.pkl


In [19]:
[s for s in stim_datakeys if s not in MEANS.keys()]

[]

In [20]:
# glob.glob(os.path.join(stats_dir, 'aggr_%s_trialmeans_*%s*_%s_baseline.pkl' 
#                                                % (experiment, responsive_test, response_type)))

In [21]:
# aggr_baselines_dfile = glob.glob(os.path.join(stats_dir, 
#                                 'aggr_%s_trialmeans_*%s*_%s_baseline.pkl'
#                                               % (experiment, responsive_test, response_type)))[0]

# print(aggr_baselines_dfile)
# with open(aggr_baselines_dfile, 'rb') as f:
#     BAS = pkl.load(f)

# Face tracker data

In [22]:
#### Set source/dst paths
dlc_home_dir = '/n/coxfs01/julianarhee/face-tracking'
dlc_project = 'facetracking-jyr-2020-01-25' #'sideface-jyr-2020-01-09'
dlc_project_dir = os.path.join(dlc_home_dir, dlc_project)

dlc_video_dir = os.path.join(dlc_home_dir, dlc_project, 'videos')
dlc_results_dir = os.path.join(dlc_project_dir, 'pose-analysis') # DLC analysis output dir

#### Training iteration info
dlc_projectid = 'facetrackingJan25'
scorer='DLC_resnet50'
iteration = 1
shuffle = 1
trainingsetindex=0
videotype='.mp4'
snapshot = 391800 #430200 #20900
DLCscorer = '%s_%sshuffle%i_%i' % (scorer, dlc_projectid, shuffle, snapshot)
print("Extracting results from scorer: %s" % DLCscorer)

Extracting results from scorer: DLC_resnet50_facetrackingJan25shuffle1_391800


#### Check which datakeys are missing DLC -- ALL data

In [23]:
print("Checking for existing results: %s" % dlc_results_dir)
dlc_runkeys = list(set([ os.path.split(f)[-1].split('DLC')[0] \
                       for f in glob.glob(os.path.join(dlc_results_dir, '*.h5'))]))
dlc_analyzed_experiments = ['_'.join(s.split('_')[0:4]) for s in dlc_runkeys]

# Get sdata indices that have experiments analyzed
ixs_wth_dlc = [i for i in sdata.index.tolist() 
                if '%s_%s' % (sdata.loc[i]['datakey'], sdata.loc[i]['experiment']) in dlc_analyzed_experiments]
dlc_dsets = sdata.iloc[ixs_wth_dlc]

dlc_datakeys = dlc_dsets['datakey'].unique()
sdata_datakeys = sdata['datakey'].unique()
print("%i of %i datasets with DLC." % (len(dlc_datakeys),len(sdata_datakeys)))


Checking for existing results: /n/coxfs01/julianarhee/face-tracking/facetracking-jyr-2020-01-25/pose-analysis
35 of 66 datasets with DLC.


In [24]:
glob.glob(os.path.join(dlc_project_dir, 'dlc-models', 'iteration-%i' % iteration,\
                      '%s-trainset95shuffle%i' % (dlc_projectid, shuffle), 'train', 'snapshot-%i.meta' % snapshot))


['/n/coxfs01/julianarhee/face-tracking/facetracking-jyr-2020-01-25/dlc-models/iteration-1/facetrackingJan25-trainset95shuffle1/train/snapshot-391800.meta']

#### Only include dsets with face-tracking

In [25]:
print("Checking for existing results: %s" % dlc_results_dir)
dlc_runkeys = list(set([ os.path.split(f)[-1].split('DLC')[0] \
                       for f in glob.glob(os.path.join(dlc_results_dir, '*.h5'))]))
dlc_analyzed_experiments = ['_'.join(s.split('_')[0:4]) for s in dlc_runkeys]

# Get sdata indices that have experiments analyzed
ixs_wth_dlc = [i for i in curr_sdata.index.tolist() 
                if '%s_%s' % (curr_sdata.loc[i]['datakey'], curr_sdata.loc[i]['experiment']) in dlc_analyzed_experiments]
dsets = curr_sdata.iloc[ixs_wth_dlc]

dlc_datakeys = dsets['datakey'].unique()
exp_datakeys = curr_sdata['datakey'].unique()
print("%i of %i datasets with DLC." % (len(dlc_datakeys),len(exp_datakeys)))
missing_dlc = [i for i in exp_datakeys if i not in dlc_datakeys]
print("Need %i datasets analyzed:" % len(missing_dlc), missing_dlc)

Checking for existing results: /n/coxfs01/julianarhee/face-tracking/facetracking-jyr-2020-01-25/pose-analysis
35 of 36 datasets with DLC.
('Need 1 datasets analyzed:', ['20191008_JC091_fov1'])


#### Loading existing extracted pupil data

In [26]:
from pipeline.python.classifications import dlc_utils as dlcutils

In [27]:
face_feature = 'pupil_area'

bstate_dir = '/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state'

if face_feature == 'pupil_area':
    pupil_fpath = os.path.join(bstate_dir, '%s_pupil_area_traces_snapshot-%i.pkl' % (experiment, snapshot))
else:
    pupil_fpath = glob.glob(os.path.join(bstate_dir, '%s_pupil-traces_snapshot-%i.pkl' % (experiment, snapshot)))[0]
print(pupil_fpath)

# This is a dict, keys are datakeys
with open(pupil_fpath, 'rb') as f:
    PUPIL = pkl.load(f)

/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/blobs_pupil_area_traces_snapshot-391800.pkl


In [28]:
missing_dlc = [i for i in exp_datakeys if '%s_%s' % (i, experiment) not in PUPIL.keys()]
print("Missing %i datasets for DCL extraction:" % len(missing_dlc))
for i in missing_dlc: print('... %s' % i);

Missing 1 datasets for DCL extraction:
... 20191008_JC091_fov1


In [29]:
# Get trial alignment info
rootdir = '/n/coxfs01/2p-data'
animalid = 'JC097'
session = '20190616'
fovnum = 1
alignment_info = aggr.get_trial_alignment(animalid, session, fovnum, experiment, traceid=traceid)
# alignment_info
iti_pre_ms = float(alignment_info['iti_pre'])*1000
iti_post_ms = float(alignment_info['iti_post'])*1000
print("ITI pre/post: %.1f ms, %.1f ms" % (iti_pre_ms, iti_post_ms))

ITI pre/post: 1000.0 ms, 1000.0 ms


In [225]:
reload(dlcutils)

<module 'pipeline.python.classifications.dlc_utils' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/2p-pipeline/pipeline/python/classifications/dlc_utils.pyc'>

In [28]:
if len(missing_dlc) > 0:
    for dkey in missing_dlc:
        print("[%s] getting extracted feature: %s" % (dkey, face_feature))
        session, animalid, fov = dkey.split('_')
        fovnum = int(fov[3:])
        fov = 'FOV%i_zoom2p0x' % fovnum

        #### Get labels
        labels_dfile = glob.glob(os.path.join(rootdir, animalid, session, fov, 'combined_*%s*' % experiment, 'traces',
                              '%s*' % traceid, 'data_arrays', 'labels.npz'))[0]
        l = np.load(labels_dfile)
        labels = pd.DataFrame(data=l['labels_data'], columns=l['labels_columns'])

        #### Load pupil data
        facemeta, pupildata = dlcutils.load_pose_data(animalid, session, fovnum, experiment, dlc_results_dir, 
                                                      feature_list=[face_feature], epoch='trial_alignment', 
                                                      pre_ITI_ms=iti_pre_ms, post_ITI_ms=iti_post_ms)

        #### Parse pupil data into traces
        pupiltraces = dlcutils.get_pose_traces(facemeta, pupildata, labels, feature='pupil_area')

        #### Add to dict
        datakey ='%s_%s_fov%i_%s' % (session, animalid, fovnum, experiment)  
        PUPIL[datakey] = pupiltraces


    # This is a dict, keys are datakeys
    with open(pupil_fpath, 'wb') as f:
        pkl.dump(PUPIL, f, protocol=pkl.HIGHEST_PROTOCOL)


In [30]:
for dkey in sdata_exp[sdata_exp['visual_area']=='V1']['datakey'].unique():
    if '%s_%s' % (dkey, experiment) not in PUPIL.keys():
        print("missing: %s" % dkey)
        continue
    print(dkey, PUPIL['%s_%s' % (dkey, experiment)]['pupil_area'].min(), PUPIL['%s_%s' % (dkey, experiment)]['pupil_area'].max())
    


('20190420_JC076_fov1', 29.754252854134201, 2698.8502848042281)
('20190501_JC076_fov1', 43.802763519651201, 1512.6268916174708)
('20190507_JC083_fov1', 60.296532778983831, 1684.0583695540736)
('20190510_JC083_fov1', 41.702709040068179, 805.37328439412738)
('20190511_JC083_fov1', 64.239848636480687, 1310.2595483268522)
('20190522_JC084_fov1', 55.073919034780424, 1512.6217072997263)
('20190622_JC085_fov1', 243.54176262181758, 5292.6311140967618)
('20190613_JC097_fov1', 59.926359266536359, 3151.942550565534)
('20190616_JC097_fov1', 91.620575517736299, 4433.1290256389839)
('20190617_JC097_fov1', 79.527193924657766, 4994.4659414952012)
('20191006_JC110_fov1', 56.279644477701609, 3600.41671559622)


In [31]:
for dkey in sdata_exp[sdata_exp['visual_area']=='Li']['datakey'].unique():
    if '%s_%s' % (dkey, experiment) not in PUPIL.keys():
        print("missing: %s" % dkey)
        continue
    print(dkey, PUPIL['%s_%s' % (dkey, experiment)]['pupil_area'].min(), PUPIL['%s_%s' % (dkey, experiment)]['pupil_area'].max())
    

('20190502_JC076_fov1', 55.230435334770661, 2637.3497643439082)
('20190605_JC090_fov1', 58.630917108753373, 2086.4911951410859)
('20190602_JC091_fov1', 159.53777946166389, 1871.1595789472303)
('20190606_JC091_fov1', 100.98744026722346, 1878.3210035296406)
('20190607_JC091_fov1', 62.627762024621106, 2021.6855883098526)
('20190614_JC091_fov1', 127.08591608199461, 3708.7772958144105)
missing: 20191008_JC091_fov1
('20190609_JC099_fov1', 146.1181111311171, 2136.766734499528)
('20190612_JC099_fov1', 137.4254936825414, 2556.5583604282542)
('20190617_JC099_fov1', 298.16098744684382, 5869.2821712316527)
('20191018_JC113_fov1', 98.05980171749512, 7029.6955515423524)
('20191105_JC117_fov1', 127.72859824773455, 4569.3869098187079)
('20191111_JC120_fov1', 169.94531884926445, 7883.8137183013887)


# ROI assignments

In [41]:
from pipeline.python.retinotopy import segment_retinotopy as seg

In [42]:
def get_neuraldf_for_cells_in_area(cells, MEANS, datakey=None):
    assert datakey in MEANS.keys(), "%s--not found in RESPONSES" % datakey
    assert datakey in cells['datakey'].values, "%s--not found in SEGMENTED CELLS" % datakey

    #neuraldf = MEANS[datakey].copy() 
    curr_rois = cells[cells['datakey']==datakey]['cell'].values
    curr_cols = list(curr_rois.copy())
    neuraldf = MEANS[datakey][curr_cols].copy()
    neuraldf['config'] = MEANS[datakey]['config'].copy()
    
    return neuraldf

In [40]:
rois = seg.get_cells_by_area(sdata_exp)

d_=[]
for (visual_area, datakey), g in rois.groupby(['visual_area', 'datakey']):
    if datakey not in MEANS.keys():
        print("missing: %s" % datakey)
        continue
    included_cells = [i for i in MEANS[datakey].columns if i in g['cell'].values]
    tmpd = g[g['cell'].isin(included_cells)].copy()
    print('[%s] %s: %i of %i responsive' % (visual_area, datakey, len(included_cells), len(g)))
    d_.append(tmpd)
    
cells = pd.concat(d_, axis=0).reset_index(drop=True)


Need to segment 20 datasets
[Li] 20190602_JC091_fov1: 136 of 197 responsive
[Li] 20190606_JC091_fov1: 82 of 175 responsive
[Li] 20190607_JC091_fov1: 129 of 280 responsive
[Li] 20190609_JC099_fov1: 71 of 256 responsive
[Li] 20190612_JC099_fov1: 49 of 118 responsive
[Li] 20190614_JC091_fov1: 129 of 299 responsive
[Li] 20190617_JC099_fov1: 20 of 83 responsive
[Li] 20191105_JC117_fov1: 60 of 141 responsive
[Li] 20191111_JC120_fov1: 19 of 151 responsive
[Lm] 20190603_JC080_fov1: 190 of 270 responsive
[Lm] 20191105_JC117_fov1: 10 of 17 responsive
[Lm] 20191111_JC120_fov1: 1 of 6 responsive
[V1] 20190511_JC083_fov1: 125 of 176 responsive
[V1] 20190522_JC084_fov1: 182 of 260 responsive
[V1] 20190613_JC097_fov1: 166 of 225 responsive
[V1] 20190622_JC085_fov1: 181 of 312 responsive
[V1] 20191006_JC110_fov1: 217 of 391 responsive


# Load traces

In [87]:
from scipy import interpolate
def resample_traces(samples, in_rate=44.65, out_rate=20.0):

    n_in_samples= len(samples)
    in_samples = samples.copy() #[rid, :] #np.array(tracef['File%03d' % curr_file][trace_type][:])
    in_tpoints = np.arange(0, n_in_samples) #len(in_samples))

    n_out_samples = round(n_in_samples * out_rate/in_rate)
    #print("N out samples: %i" % n_out_samples)

    flinear = interpolate.interp1d(in_tpoints, in_samples, axis=0)

    out_tpoints = np.linspace(in_tpoints[0], in_tpoints[-1], n_out_samples)
    out_samples = flinear(out_tpoints)
    #print("Out samples:", out_samples.shape)
    
    return out_tpoints, out_samples

def resample_pupil_traces(pupiltraces, feature_name='pupil',in_rate=20.0, out_rate=22.325, 
                          min_nframes=None, iti_pre_ms=1000):
    pupildfs = []
    if min_nframes is None:
        min_nframes = int(round(np.mean([len(g) for p, g in pupiltraces.groupby(['trial'])])))
    #print(min_nframes)
    for trial, g in pupiltraces.groupby(['trial']):
        if len(g[feature_name]) < min_nframes:
            npad = min_nframes - len(g[feature_name])
            vals = np.pad(g[feature_name].values, pad_width=((0, npad)), mode='edge')
        else:
            vals = g[feature_name].values[0:min_nframes]
        #print(len(vals))
        out_ixs, out_s = resample_traces(vals, in_rate=in_rate, out_rate=out_rate)
        currconfig = g['config'].unique()[0]
        new_stim_on = (iti_pre_ms/1E3)*out_rate #int(np.where(abs(out_ixs-stim_on) == min(abs(out_ixs-stim_on)))[0])
        pupildfs.append(pd.DataFrame({feature_name: out_s, 
                                       'stim_on': [new_stim_on for _ in np.arange(0, len(out_s))],
                                       'config': [currconfig for _ in np.arange(0, len(out_s))],
                                       'trial': [trial for _ in np.arange(0, len(out_s))]} ))
    pupildfs = pd.concat(pupildfs, axis=0).reset_index(drop=True)
    return pupildfs


def zscore_array(v):
    return (v-v.mean())/v.std()

In [140]:
(iti_pre_ms/1E3)*out_rate


NameError: name 'out_rate' is not defined

# Example FOV

In [90]:
include_stimuli='all'

animalid = 'JC097'
session = '20190616'
fovnum = 1

# Get experiment info
datakey = '%s_%s_fov%i' % (session, animalid, fovnum)
if experiment == 'blobs':
    obj = util.Objects(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
else:
    obj = util.Gratings(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
sdf = obj.get_stimuli()
print(sdf.shape)

# Alignment info
alignment_info = aggr.get_trial_alignment(animalid, session, fovnum, experiment, traceid=traceid)
iti_pre_ms = float(alignment_info['iti_pre'])*1000
iti_post_ms = float(alignment_info['iti_post'])*1000
print("ITI pre/post: %.1f ms, %.1f ms" % (iti_pre_ms, iti_post_ms))

# Stimulus info
all_configs = ['config%03d' % i for i in np.arange(1, sdf.shape[0]+1)]
if experiment=='blobs':
    control_configs = ['config001', 'config002', 'config003', 'config004', 'config005']
elif experiment=='gratings':
    control_configs = sdf[sdf['size']>100].index.tolist()
    
if include_stimuli=='fullscreen':
    included_configs = [c for c in all_configs if c in control_configs]
elif include_stimuli=='image':
    included_configs = [c for c in all_configs if c not in control_configs]
elif include_stimuli=='all':
    included_configs = all_configs
else:
    print("UNKNOWN: %s" % include_stimuli)
print("Restricting stimuli to: %s (%i conditions)" % (include_stimuli, len(included_configs)))

(50, 8)
ITI pre/post: 1000.0 ms, 1000.0 ms


## Get neural data and pupil data

In [43]:
# Get neural means
print(datakey)
neuraldf = get_neuraldf_for_cells_in_area(cells, MEANS, datakey=datakey)

20191006_JC110_fov1


In [187]:
# Get pupil traces
pupiltraces = PUPIL['%s_%s' % (datakey, experiment)].copy()
print("Pupil traces: %s" % str(pupiltraces.shape))
pupiltraces.head()

Pupil traces: (47441, 3)


Unnamed: 0,config,pupil_area,trial
0,config009,,1
0,config027,,2
0,config014,,3
0,config040,,4
0,config049,,5


## Downsample and align traces

In [46]:
#### Load traces of responsive cells
print("Loading <%s> traces: test=%s, thr=%.3f" % (response_type, responsive_test, responsive_thr))
traces, labels_all, sdf = aggr.load_traces(animalid, session, fovnum, experiment, traceid=traceid, 
                                         response_type=response_type,
                                         responsive_test=responsive_test, 
                                         responsive_thr=responsive_thr)
labels_all['trial_num'] = [int(t[5:]) for t in labels_all['trial']]
trial_key = dict((int(t[5:]), g['config'].unique()[0]) for t, g in labels.groupby(['trial']))
roi_list = traces.columns.tolist()


Loading <dff> traces: test=nstds, thr=10.000
... exp.load()
... loading saved data array (dff).
... updating self
... getting responsive cells (test: nstds, thr: 10.00')
... loading stats
296 of 432 cells responsive


In [65]:
#### Filter out trials with no pupil info or no neural
trials_with_pupil = list(pupiltraces['trial'].unique())
trials_with_neural = list(labels_all['trial_num'].unique())
n_pupil_trials = len(trials_with_pupil)
n_neural_trials = len(trials_with_neural)

labels = labels_all[labels_all['trial_num'].isin(trials_with_pupil)].copy()
if n_pupil_trials > n_neural_trials:
    pupiltraces = pupiltraces[pupiltraces['trial'].isin(trials_with_neural)]
elif n_pupil_trials < n_neural_trials:    
    print(labels.shape, labels_all.shape)
    neuraldf = neuraldf.loc[trials_with_pupil]


In [66]:
# Temporal binning params
new_framerate = 20.0
framerate = 44.65

#n_frames_bin = np.ceil(framerate/new_framerate) #11.
#new_framerate = (framerate/n_frames_bin)
print("Downsampled fr: %.2f Hz" % new_framerate)

#### Get trial epochs
stim_on = float(labels['stim_on_frame'].unique())
n_on = float(labels['nframes_on'].unique())
iti_pre = float(alignment_info['iti_pre'])
iti_post = float(alignment_info['iti_post'])
stim_dur = stim_on/framerate

#### Get indices of trial epochs to model
pre_flank = round((iti_pre)*44.65) #n_on/2.
post_flank = round((iti_post)*44.65)  #n_on

# start_ix = int(stim_on-pre_flank)
# end_ix = int(stim_on + n_on + post_flank + 1)
# print(start_ix, end_ix, end_ix-start_ix)


Downsampled fr: 20.00 Hz


In [67]:
len(pupiltraces['trial'].unique())

1500

In [68]:
rid=90
# Create trial mat, downsampled: shape = (ntrials, nframes_per_trial)
trialmat = pd.DataFrame(np.vstack([traces[rid][tg.index] for trial, tg in labels.groupby(['trial'])]),\
                        index=[int(trial[5:]) for trial, tg in labels.groupby(['trial'])])
configs_on_included_trials = [tg['config'].unique()[0] for trial, tg in labels.groupby(['trial'])]
included_trials = [trial for trial, tg in labels.groupby(['trial'])]

#### Bin traces - Each tbin is a column, each row is a sample 
sample_data = trialmat.fillna(method='pad').copy()
sample_data.shape

(1500, 135)

In [69]:
#### Get resampled indices of trial epochs
nframes_per_trial = sample_data.shape[1]
print("%i frames/trial" % nframes_per_trial)
out_tpoints, out_ixs = resample_traces(np.arange(0, nframes_per_trial), in_rate=framerate, out_rate=new_framerate)
new_stim_on = int(np.where(abs(out_ixs-stim_on) == min(abs(out_ixs-stim_on)))[0])
print(new_stim_on)
new_n_on = int(round( stim_dur*new_framerate))
print(new_n_on)


135 frames/trial
20
20


In [70]:
#### Bin traces - Each tbin is a column, each row is a sample 
df = trialmat.fillna(method='pad').copy().T
xdf = df.reindex(df.index.union(out_ixs)).interpolate('values').loc[out_ixs]
binned_trialmat = xdf.T
n_tbins = binned_trialmat.shape[1]
# binned_trialmat = sample_data.T.groupby(np.arange(len(sample_data.columns))//n_frames_bin, axis=0).mean().T
# n_tbins = np.ceil(trialmat.shape[1] / n_frames_bin)

#### Zscore traces 
zscored_neural = binned_trialmat / binned_trialmat.values.ravel().std()

# Reshape roi traces
cfg_list = np.hstack([[c for _ in np.arange(0, n_tbins)] for c in configs_on_included_trials])
r_ = zscored_neural.T.unstack().reset_index() # level_0=trial number, level_1=frame number
r_.rename(columns={'level_0': 'trial', 'level_1': 'frame_ix', 0: rid}, inplace=True)
r_['config'] = cfg_list
#r_list.append(r_)

print(trialmat.shape, binned_trialmat.shape)


((1500, 135), (1500, 60))


### (Example) Test traces

In [71]:
included_trial_nums = sorted(pupiltraces['trial'].unique())

# Check with known config, known RID
ml=40 #-1
sz=20
#print(sdf[(sdf['morphlevel']==ml) & (sdf['size']==sz)])
cfg = sdf[(sdf['morphlevel']==ml) & (sdf['size']==sz)].index[0]
trial_ixs_currcfg = np.array([int(t[5:]) for t in labels[labels['config']==cfg]['trial'].unique()])
trial_ixs = np.array([t for t in trial_ixs_currcfg if t in included_trial_nums])
print("[%s] plotting %i of %i trials" % (cfg, len(trial_ixs), len(trial_ixs_currcfg)))

[config022] plotting 30 of 30 trials


In [72]:
print(trialmat.shape, binned_trialmat.shape)
#raw_ = trialmat.loc[included_trial_nums].copy()
#binned_ = binned_trialmat.loc[included_trial_nums].copy()
#zscored_ = zscored_neural.loc[included_trial_nums].copy()

fig, axn = pl.subplots(1, 3, figsize=(10,4))
ax=axn[0]; ax.set_title('original (%s, %s)' % (response_type, cfg));
ax.plot(trialmat.loc[trial_ixs].values.T, 'k', lw=0.5)
ax.plot(np.nanmean(trialmat.loc[trial_ixs].values.T, axis=1), 'm', lw=2)
ax.axvline(x=stim_on, ls=':')

ax=axn[1]; ax.set_title('binned (fr=%.2fHz)' % (new_framerate));
ax.plot(binned_trialmat.loc[trial_ixs].values.T, 'k', lw=0.5)
ax.plot(np.nanmean(binned_trialmat.loc[trial_ixs].values.T, axis=1), 'm', lw=2)
ax.axvline(x=new_stim_on, ls=':')


ax=axn[2]; ax.set_title('zscored');
ax.plot(zscored_neural.loc[trial_ixs].values.T, 'k', lw=0.5)
ax.plot(np.nanmean(zscored_neural.loc[trial_ixs].values.T, axis=1), 'm', lw=2)
ax.axvline(x=new_stim_on, ls=':')
pl.subplots_adjust(top=0.8)
pl.suptitle('rid %i' % rid)


((1500, 135), (1500, 60))


<IPython.core.display.Javascript object>

Text(0.5,0.98,u'rid 90')

### (Example) Match pupil framerate

In [73]:
len(out_ixs)

60

In [186]:
face_feature = 'pupil_area'
print(new_framerate)
#### Resample pupil
desired_nframes = len(out_ixs) # 60
binned_pupil = resample_pupil_traces(pupiltraces, feature_name=face_feature, in_rate=20.0, out_rate=new_framerate, 
                                     min_nframes=desired_nframes, iti_pre_ms=iti_pre_ms)

pupil_trialmat = pd.DataFrame(np.vstack([p[face_feature].values for trial, p in binned_pupil.groupby(['trial'])]),
                              index=binned_trialmat.index, columns=binned_trialmat.columns)
print(pupil_trialmat.shape)


20.0


ValueError: Shape of passed values is (60, 1499), indices imply (60, 1500)

In [93]:
pupil_trialmat.head()

Unnamed: 0,0.0,2.27118644068,4.54237288136,6.81355932203,9.08474576271,11.3559322034,13.6271186441,15.8983050847,18.1694915254,20.4406779661,...,113.559322034,115.830508475,118.101694915,120.372881356,122.644067797,124.915254237,127.186440678,129.457627119,131.728813559,134.0
1,1740.544175,1723.9001,1738.455597,1747.958927,1759.873518,1774.830054,1801.127997,1812.895952,1858.343912,1860.784421,...,1463.445057,1459.014898,1417.727989,1460.690109,1445.87074,1436.07474,1414.288715,1416.099113,1416.694068,1403.116893
2,1426.626133,1435.579947,1435.002566,1429.337087,1441.88986,1448.027048,1442.732395,1422.150523,1435.618788,1424.470621,...,1164.417596,1152.197725,1127.823104,1157.579458,1190.851455,1159.655056,1167.245751,1218.817992,1216.266407,1214.703955
3,1244.781402,1263.889083,1280.621934,1290.19949,1300.958806,1316.91339,1333.093039,1343.662476,1348.038818,1347.950377,...,666.091397,665.467166,641.267209,639.444657,638.818713,654.488171,635.160886,635.474718,660.392662,641.045582
4,636.122915,645.553973,649.929713,663.355181,696.912773,697.581955,682.506649,728.445529,735.285995,724.472635,...,622.403278,620.162524,615.49124,626.756323,640.424289,633.782339,645.189763,641.614666,633.268045,643.852315
5,645.233866,638.081254,639.870794,669.723013,666.868951,668.834912,669.367609,660.235481,679.886902,662.863287,...,648.697585,603.838951,639.517465,636.216781,653.079787,657.27661,636.240063,657.433917,695.172259,701.686955


In [94]:
fig, axn = pl.subplots(1, 2)
ax=axn[0]
for t, pg in pupiltraces[pupiltraces['config']==cfg].groupby(['trial']):
    ixs = np.arange(0, len(pg[face_feature].values))
    ax.plot(ixs, pg[face_feature].values)
    ax.axvline(x=new_stim_on, ls=':', color='k')
    
ax=axn[1]
for t, pg in binned_pupil[binned_pupil['config']==cfg].groupby(['trial']):
    ixs = np.arange(0, len(pg[face_feature].values))
    ax.plot(ixs, pg[face_feature].values)
    ax.axvline(x=new_stim_on, ls=':', color='k')

<IPython.core.display.Javascript object>

In [95]:
fig, axn = pl.subplots(2, 1)
ax=axn[0]
ax.imshow(binned_trialmat.loc[trial_ixs], cmap='bone')
ax=axn[1]
ax.imshow(pupil_trialmat.loc[trial_ixs], cmap='bone')


<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7f0a90105250>

In [96]:
binned_pupil.head()

Unnamed: 0,config,pupil_area,stim_on,trial
0,config012,1740.544175,20.0,1
1,config012,1723.9001,20.0,1
2,config012,1738.455597,20.0,1
3,config012,1747.958927,20.0,1
4,config012,1759.873518,20.0,1


### (Example) Get all rois and traces

In [115]:
pupil_r = pupil_trialmat.T.unstack().reset_index().rename(columns={'level_0': 'trial', 
                                                                   'level_1': 'frame',
                                                                   0: face_feature})
pupil_r['frame_int'] = [int(round(f)) for f in pupil_r['frame']]
interp_frame_ixs = list(sorted(pupil_r['frame'].unique()))
pupil_r['frame_ix'] = [interp_frame_ixs.index(f) for f in pupil_r['frame']]

print(pupil_r.shape)

# Max of frame_int div. by 2p fps (44.65) should be equal to max. of frame_ix divded by new fps (20Hz)
new = pupil_r['frame_int'].max()/framerate
old = pupil_r['frame_ix'].max()/new_framerate

print("Trial durs (orig/resampled): %.2fs/%.2fs" % (new, old))


(90000, 5)
Trial durs (orig/resampled): 3.00s/2.95s


In [116]:
r_list=[]
#rid=90
for rid in roi_list:
    # Create trial mat, downsampled: shape = (ntrials, nframes_per_trial)
    trialmat = pd.DataFrame(np.vstack([traces[rid][tg.index] for trial, tg in labels.groupby(['trial'])]),\
                            index=[int(trial[5:]) for trial, tg in labels.groupby(['trial'])])
    configs_on_included_trials = [tg['config'].unique()[0] for trial, tg in labels.groupby(['trial'])]
    included_trials = [trial for trial, tg in labels.groupby(['trial'])]


    #### Bin traces - Each tbin is a column, each row is a sample 
    df = trialmat.fillna(method='pad').copy().T
    xdf = df.reindex(df.index.union(out_ixs)).interpolate('values').loc[out_ixs]
    binned_trialmat = xdf.T # should be Ntrials # Nframes
    n_tbins = binned_trialmat.shape[1]

    #### Zscore traces 
    zscored_neural = binned_trialmat / binned_trialmat.values.ravel().std()

    # Reshape roi traces
    cfg_list = np.hstack([[c for _ in np.arange(0, n_tbins)] for c in configs_on_included_trials])
    r_ = zscored_neural.T.unstack().reset_index() # level_0=trial number, level_1=frame number
    r_.rename(columns={'level_0': 'trial', 'level_1': 'frame_ix', 0: rid}, inplace=True)
    r_list.append(r_)

# Create trial mat, downsampled: shape = (ntrials, nframes_per_trial)
trialmat = pd.DataFrame(np.vstack([traces[rid][tg.index] for trial, tg in labels.groupby(['trial'])]),\
                        index=[int(trial[5:]) for trial, tg in labels.groupby(['trial'])])
configs_on_included_trials = [tg['config'].unique()[0] for trial, tg in labels.groupby(['trial'])]
included_trials = [trial for trial, tg in labels.groupby(['trial'])]

# Combine all traces into 1 dataframe (all frames x nrois)
traces_r = pd.concat(r_list, axis=1)
print(r_.shape, traces_r.shape)
traces_r['config'] = cfg_list

_, dii = np.unique(traces_r.columns, return_index=True)
traces_r = traces_r.iloc[:, dii]
print(traces_r.shape)


((90000, 3), (90000, 888))
(90000, 299)


# Create data frames for pupil/neural data - test low/high pupil

In [49]:
# neuraldf = pd.concat([g[roi_list].mean(axis=0) for t, g in traces_r.groupby(['trial'])], axis=1).T
# print(neuraldf.shape)
# config_list = [g['config'].unique()[0] for t, g in traces_r.groupby(['trial'])]

print(datakey)
neuraldf = get_neuraldf_for_cells_in_area(cells, MEANS, datakey=datakey)

20190616_JC097_fov1


In [164]:
use_pre_stimulus=True

# Get mean of pupil during trial - select PRE or PERI stimulus, for ex.
print("Stim on (new fr): %i" % new_stim_on)

if use_pre_stimulus:
    pupildf = pd.concat([g[g['frame_ix'].isin(np.arange(0, new_stim_on))].mean(axis=0) \
                        for t, g in pupil_r.groupby(['trial'])], axis=1).T
else:
    pupildf = pd.concat([g.mean(axis=0) for t, g in pupil_r.groupby(['trial'])], axis=1).T
print(pupildf.shape)

pupil_quantiles = pupildf[face_feature].quantile([0.25,0.5,0.75])

# Can also bin into low, mid, high
pupildf['quantile'] = pd.qcut(pupildf[face_feature], 4, labels=False)

Stim on (new fr): 20
(1500, 5)


## Subdivide pupil levels into high/low

In [166]:

fig, ax = pl.subplots()
ax.hist(pupildf[face_feature], edgecolor='k', facecolor='none', lw=2)
for v in pupil_quantiles:
    ax.axvline(x=v, ls=':', color='k', lw=2)
    
qcolors = ['m', 'gray', 'c', 'b']
for col, (i, pg) in zip(qcolors, pupildf.groupby(['quantile'])):
    ax.hist(pg[face_feature].values, color=col, alpha=0.5)
    
    
sns.despine(trim=True, offset=4)
ax.set_title('RF splits', loc='left')
putils.label_figure(fig, '%s | %s' % (data_id, datakey))

<IPython.core.display.Javascript object>

# Decode classes, split arousal state

In [128]:
import random
import scipy.stats as spstats
import sklearn.metrics as skmetrics

from sklearn.model_selection import train_test_split, cross_validate, KFold, learning_curve, ShuffleSplit
from sklearn.svm import LinearSVC
#from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler


In [129]:
def computeMI(x, y):
    sum_mi = 0.0
    x_value_list = np.unique(x)
    y_value_list = np.unique(y)
    Px = np.array([ len(x[x==xval])/float(len(x)) for xval in x_value_list ]) #P(x)
    Py = np.array([ len(y[y==yval])/float(len(y)) for yval in y_value_list ]) #P(y)
    for i in xrange(len(x_value_list)):
        if Px[i] ==0.:
            continue
        sy = y[x == x_value_list[i]]
        if len(sy)== 0:
            continue
        pxy = np.array([len(sy[sy==yval])/float(len(y))  for yval in y_value_list]) #p(x,y)
        t = pxy[Py>0.]/Py[Py>0.] /Px[i] # log(P(x,y)/( P(x)*P(y))
        sum_mi += sum(pxy[t>0]*np.log2( t[t>0]) ) # sum ( P(x,y)* log(P(x,y)/( P(x)*P(y)) )
    return sum_mi

In [130]:
# def get_trials_for_N_cells(curr_ncells, gdf, MEANS):
#     '''
#     Randomly select N cells from global roi list (gdf), get cell's responses to all trials.
    
#     gdf = dataframe (subset of global_rois dataframe), contains 
#     - all rois for a given visual area
#     - corresponding within-datakey roi IDs
#     '''
#     # Get current global RIDs
#     ncells_t = gdf.shape[0]                      
#     curr_rids = np.array(gdf['roi'].values.copy()) 
    
#     # Random sample w/ replacement
#     rand_ixs = np.array([random.randint(0, ncells_t-1) for x in range(curr_ncells)]) #np.random.shuffle(curr_rids)
#     curr_roi_list = curr_rids[rand_ixs] #curr_rids[0:curr_ncells]
#     curr_roidf = gdf[gdf['roi'].isin(curr_roi_list)].copy()

#     # Make sure equal num trials per condition for all dsets
#     # --- Get fewest total n trials among selected dsets
#     fewest_ntrials_total = np.min([MEANS[k].shape[0] for k in curr_roidf['datakey'].unique()])
#     # Get fewest N trials in a given cond for the datasets with fewest trials
#     keys_with_few = [k for k in curr_roidf['datakey'].unique() if MEANS[k].shape[0]==fewest_ntrials_total]
#     min_ntrials_by_config = min([MEANS[k]['config'].value_counts().min() for k in keys_with_few])

#     # Get data samples for these cells
#     d_list=[]; c_list=[];
#     for datakey, dgroup in curr_roidf.groupby(['datakey']):
#         # Get subset of trials per cond to match min N trials
#         tmp_d=[]
#         for cfg, dmat in MEANS[datakey].groupby(['config']):
#     #             if dmat.shape[0] == min_ntrials:
#     #                 tmp_d.append(dmat)
#     #             else:
#             tixs = np.arange(0, dmat.shape[0]) # Get indices of trials in current dataset
#             np.random.shuffle(tixs)            # Shuffle them to get random order
#             currd_subset = dmat.iloc[tixs[0:min_ntrials_by_config]].copy() # Select min_ntrials randomly
#             tmp_d.append(currd_subset)        # Add current trials of current config to list
#         d_ = pd.concat(tmp_d, axis=0) 

#         # For each RID sample belonging to current dataset, get RID order
#         curr_order = pd.concat([dgroup[dgroup['roi']==globalid][['roi', 'dset_roi']] \
#                                          for globalid in curr_roi_list])
#         curr_order_dset_rois = curr_order['dset_roi'].values
#         curr_order_global_rois = curr_order['roi'].values

#         # Get trial responses (some columns are repeats)
#         currd = d_[curr_order_dset_rois]
#         # currd = d_[dgroup['dset_roi']].copy() # Data should only include ROI columns
#         currd = currd.reset_index(drop=True)  # Reset trial indices 
#         currd.columns = curr_order_global_rois #dgroup['roi'].values  # Rename ROI columns to global-rois
#         config_list = d_['config'].reset_index(drop=True) #()     # Get configs on selected trials
#         d_list.append(currd)
#         c_list.append(config_list)
#     curr_data = pd.concat(d_list, axis=1)
#     cfg_df = pd.concat(c_list, axis=1)
#     cfg_df = cfg_df.T.drop_duplicates().T
#     assert cfg_df.shape[0]==curr_data.shape[0], "Bad trials"
#     assert cfg_df.shape[1]==1, "Bad configs"
#     curr_data['config'] = config_list.values

#     return curr_data


In [131]:
def fit_svm(zdata, targets, C_value=1.0, test_split=0.2):

    sizes = sorted(targets['group'].unique())

    #### For each transformation, split trials into 80% and 20%
    train_data=[]; test_data=[]; 
    train_labels=[]; test_labels=[]; 
    train_data_chance=[]; test_data_chance=[]; 
    train_labels_chance=[]; test_labels_chance=[]; 
    train_data, test_data, train_labels, test_labels = train_test_split(zdata, targets['label'].values, 
                                                        test_size=test_split, stratify=targets['group'])

    #### DATA - Fit classifier
    scaler = StandardScaler()
    scaler.fit(train_data)
    train_data = scaler.transform(train_data)
    # --- cross validate (5-fold) ---
    svc = LinearSVC(multi_class='ovr', C=C_value, penalty='l2')
    scores = cross_validate(svc, train_data, train_labels, cv=5,
                            scoring=('r2', 'neg_mean_squared_error', 'accuracy'),
                            return_train_score=True)
    iterdict = dict((s, values.mean()) for s, values in scores.items())

    #### DATA - Test with held-out data
    test_data = scaler.transform(test_data)
    trained_svc = LinearSVC(multi_class='ovr', C=C_value, penalty='l2').fit(train_data, train_labels)
    test_score = trained_svc.score(test_data, test_labels)

    #### DATA - Calculate MI
    predicted_labels = trained_svc.predict(test_data)
    mi = skmetrics.mutual_info_score(test_labels, predicted_labels)
    ami = skmetrics.adjusted_mutual_info_score(test_labels, predicted_labels)
    log2_mi = computeMI(test_labels, predicted_labels)

    iterdict.update({'heldout_test_score': test_score, 
                     'heldout_MI': mi, 'heldout_aMI': ami, 'heldout_log2MI': log2_mi})
    
    # ------------------------------------------------------------------
    # Shuffle LABELS to calculate chance level
    train_labels_chance = train_labels.copy()
    np.random.shuffle(train_labels_chance)
    test_labels_chance = test_labels.copy()
    np.random.shuffle(test_labels_chance)

    #### CHANCE - Fit classifier
    svc_chance = LinearSVC(multi_class='ovr', C=C_value, penalty='l2')
    scores_chance = cross_validate(svc, train_data, train_labels_chance, cv=5,
                            scoring=('r2', 'neg_mean_squared_error', 'accuracy'),
                            return_train_score=True)
    iterdict_chance = dict((s, values.mean()) for s, values in scores_chance.items())

    # CHANCE - Test with held-out data
    trained_svc_chance = LinearSVC(multi_class='ovr', C=C_value, penalty='l2').fit(train_data, train_labels_chance)
    test_score_chance = trained_svc_chance.score(test_data, test_labels_chance)  

    # Chance - Calculate MI
    predicted_labels = trained_svc_chance.predict(test_data)
    mi = skmetrics.mutual_info_score(test_labels, predicted_labels)
    ami = skmetrics.adjusted_mutual_info_score(test_labels, predicted_labels)
    log2_mi = computeMI(test_labels, predicted_labels)

    iterdict_chance.update({'heldout_test_score': test_score_chance, 
                            'heldout_MI': mi, 'heldout_aMI': ami, 'heldout_log2MI': log2_mi})

    return iterdict, iterdict_chance


In [132]:
# def resample_data_fit_classifier(globalcells_df, MEANS, sdf, sample_ncells=50,
#                                  class_a=0, class_b=106, C_value=1.0, test_size=0.2, n_iterations=100):
#     '''
#     Resample w/ replacement from pooled cells (across datasets). Assumes 'sdf' is same for all datasets.
#     Do n_iterations, return mean/sem/std over iterations as dict of results.
#     Classes (class_a, class_b) should be the actual labels of the target (i.e., value of morph level)
#     '''
    
#     iter_list=[]
#     chance_list=[]
#     for iteration in np.arange(0, n_iterations): #n_iterations):
        
#         # Get new sample set
#         curr_data = get_trials_for_N_cells(sample_ncells, globalcells_df, MEANS)

#         #### Select train/test configs for clf A vs B
#         object_configs = sdf[sdf['morphlevel'].isin([class_a, class_b])].index.tolist() 
#         curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
#         sample_data = curr_data[curr_data['config'].isin(object_configs)]

#         #### Equalize df/f across neurons:  Normalize each neuron to have same (zero) mean, (unit) SD across stimuli
#         data = sample_data[curr_roi_list]
#         zdata = (data - data.mean()) / data.std()

#         #### Get labels
#         targets = pd.DataFrame(sample_data['config'].copy(), columns=['config'])
#         targets['label'] = [sdf['morphlevel'][cfg] for cfg in targets['config'].values]
#         targets['group'] = [sdf['size'][cfg] for cfg in targets['config'].values]

#         #### Fit
#         curr_iter, curr_iter_chance = fit_svm(zdata, targets, C_value=C_value, test_split=test_size)
        
#         #### Add
#         iter_list.append(pd.DataFrame(curr_iter, index=[iteration]))
#         chance_list.append(pd.DataFrame(curr_iter_chance, index=[iteration]))

#     # DATA - get mean across iters
#     iter_results = pd.concat(iter_list, axis=0)
#     iterdict = dict(iter_results.mean())
#     iterdict.update( dict(('%s_std' % k, v) for k, v in \
#                           zip(iter_results.std().index, iter_results.std().values)) )
#     iterdict.update( dict(('%s_sem' % k, v) for k, v in \
#                           zip(iter_results.sem().index, iter_results.sem().values)) )

#     # CHANCE - get mean across iters
#     iter_results_chance = pd.concat(chance_list, axis=0)
#     iterdict_chance = dict(iter_results_chance.mean())
#     iterdict_chance.update( dict(('%s_std' % k, v) for k, v in \
#                           zip(iter_results_chance.std().index, iter_results_chance.std().values)) )
#     iterdict_chance.update( dict(('%s_sem' % k, v) for k, v in \
#                           zip(iter_results_chance.sem().index, iter_results_chance.sem().values)) )
    
#     return iterdict, iterdict_chance


In [133]:
def fit_classifier_bootstrap(curr_data, sdf, m0=0, m100=106, cv_nfolds=5, test_size=0.2, C_value=1.0, n_iterations=100):
    iter_list=[]; chance_list=[];
    for iteration in np.arange(0, n_iterations):

        #### Select train/test configs for clf A vs B
        object_configs = sdf[sdf['morphlevel'].isin([m0, m100])].index.tolist() 
        curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
        sample_data = curr_data[curr_data['config'].isin(object_configs)]

        #### Equalize df/f across neurons:  Normalize each neuron to have same (zero) mean, (unit) SD across stimuli
        data = sample_data[curr_roi_list]
        zdata = (data - data.mean()) / data.std()

        #### Get labels
        targets = pd.DataFrame(sample_data['config'].copy(), columns=['config'])
        targets['label'] = [sdf['morphlevel'][cfg] for cfg in targets['config'].values]
        targets['group'] = [sdf['size'][cfg] for cfg in targets['config'].values]
        
        #### Fit
        curr_iter, curr_iter_chance = fit_svm(zdata, targets, C_value=C_value, test_split=test_size)
        
        #### Add
        iter_list.append(pd.DataFrame(curr_iter, index=[iteration]))
        chance_list.append(pd.DataFrame(curr_iter_chance, index=[iteration]))
        
    # DATA - get mean across iters
    iter_results = pd.concat(iter_list, axis=0)
    iterdict = dict(iter_results.mean())
    iterdict.update( dict(('%s_std' % k, v) for k, v in \
                          zip(iter_results.std().index, iter_results.std().values)) )
    iterdict.update( dict(('%s_sem' % k, v) for k, v in \
                          zip(iter_results.sem().index, iter_results.sem().values)) )


    # CHANCE - get mean across iters
    iter_results_chance = pd.concat(chance_list, axis=0)
    iterdict_chance = dict(iter_results_chance.mean())
    iterdict_chance.update( dict(('%s_std' % k, v) for k, v in \
                          zip(iter_results_chance.std().index, iter_results_chance.std().values)) )
    iterdict_chance.update( dict(('%s_sem' % k, v) for k, v in \
                          zip(iter_results_chance.sem().index, iter_results_chance.sem().values)) )
    
    iterdict.update({'class_a': m0, 'class_b': m100})
    iterdict_chance.update({'class_a': m0, 'class_b': m100})
      
#     df = pd.concat(iter_results, axis=0)
#     df['class_a'] = [m0 for _ in np.arange(0, len(iter_results))]
#     df['class_b'] = [m100 for _ in np.arange(0, len(iter_results))]
#     df_chance = pd.concat(iter_results_chance, axis=0)
#     df_chance['class_a'] = [m0 for _ in np.arange(0, len(iter_results))]
#     df_chance['class_b'] = [m100 for _ in np.arange(0, len(iter_results))]
    
    return iterdict, iterdict_chance # df, df_chance


In [134]:
iter_list=[]; chance_list=[];
for iteration in np.arange(0, n_iterations):

    #### Select train/test configs for clf A vs B
    object_configs = sdf[sdf['morphlevel'].isin([m0, m100])].index.tolist() 
    curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
    sample_data = curr_data[curr_data['config'].isin(object_configs)]

    #### Equalize df/f across neurons:  Normalize each neuron to have same (zero) mean, (unit) SD across stimuli
    data = sample_data[curr_roi_list]
    zdata = (data - data.mean()) / data.std()

    #### Get labels
    targets = pd.DataFrame(sample_data['config'].copy(), columns=['config'])
    targets['label'] = [sdf['morphlevel'][cfg] for cfg in targets['config'].values]
    targets['group'] = [sdf['size'][cfg] for cfg in targets['config'].values]

    #### Fit
    curr_iter, curr_iter_chance = fit_svm(zdata, targets, C_value=C_value, test_split=test_size)

    #### Add
    iter_list.append(pd.DataFrame(curr_iter, index=[iteration]))
    chance_list.append(pd.DataFrame(curr_iter_chance, index=[iteration]))

# DATA - get mean across iters
iter_results = pd.concat(iter_list, axis=0)
iterdict = dict(iter_results.mean())
iterdict.update( dict(('%s_std' % k, v) for k, v in \
                      zip(iter_results.std().index, iter_results.std().values)) )
iterdict.update( dict(('%s_sem' % k, v) for k, v in \
                      zip(iter_results.sem().index, iter_results.sem().values)) )



NameError: name 'n_iterations' is not defined

In [81]:
curr_iter

{'fit_time': 0.001229238510131836,
 'heldout_MI': 0.1423965384507726,
 'heldout_aMI': 0.16299082132532045,
 'heldout_log2MI': 0.2054347798626842,
 'heldout_test_score': 0.75,
 'score_time': 0.0005549430847167969,
 'test_accuracy': 0.6531468531468532,
 'test_neg_mean_squared_error': -3897.241958041958,
 'test_r2': -0.3961904761904764,
 'train_accuracy': 1.0,
 'train_neg_mean_squared_error': 0.0,
 'train_r2': 1.0}

# Set output dir for decoding w.r.t. behavior state

In [135]:
decoding_dir = os.path.join(aggregate_dir, 'behavior-state', 'decoding')
if not os.path.exists(decoding_dir):
    os.makedirs(decoding_dir)
print(decoding_dir)

/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/decoding


In [136]:
low_pupil_thr = pupil_quantiles[0.25]
high_pupil_thr = pupil_quantiles[0.75]

#pupil_low = pupildf[pupildf['pupil']<low_pupil_thr].copy()
#pupil_high = pupildf[pupildf['pupil']>high_pupil_thr].copy()
pupil_low = pupildf[pupildf['quantile']==0].copy()
pupil_high = pupildf[pupildf['quantile']==2].copy()

# Get trial indices of low/high pupil 
low_trial_ixs = sorted(pupil_low['trial'].unique())
high_trial_ixs = sorted(pupil_high['trial'].unique())

print(pupil_low.shape, pupil_high.shape)


((500, 6), (500, 6))


In [137]:
m0=0
m100=106
C_value=1

n_iterations = 50
test_size=0.2
cv_nfolds=5

In [138]:
# Fit ALL
#curr_data = neuraldf.copy()
all_trial_ixs = neuraldf.index.tolist()

i=0
d_list=[]
shuf_list = []
for arousal, curr_trial_ixs in zip(['all', 'low', 'high'], [all_trial_ixs, low_trial_ixs, high_trial_ixs]):
    curr_data = neuraldf.loc[curr_trial_ixs].copy()
    iterdict, iterdict_chance = fit_classifier_bootstrap(curr_data, sdf, m0=m0, m100=m100, n_iterations=n_iterations,
                                  cv_nfolds=cv_nfolds, test_size=test_size, C_value=C_value)

    iterdict['arousal'] = arousal
    iterdict_chance['arousal'] = arousal
    #df['arousal'] = [arousal for _ in np.arange(0, len(df))]
    #df_chance['arousal'] = [arousal for _ in np.arange(0, len(df))]

    d_list.append(pd.DataFrame(iterdict, index=[i])) #df)
    shuf_list.append(pd.DataFrame(iterdict_chance, index=[i])) #df_chance)
                     
    i+=1

df = pd.concat(d_list, axis=0)
df_chance = pd.concat(shuf_list, axis=0)

print(df.shape, d_list[0].shape)

((3, 39), (1, 39))


In [139]:
df

Unnamed: 0,arousal,class_a,class_b,fit_time,fit_time_sem,fit_time_std,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,...,test_r2_std,train_accuracy,train_accuracy_sem,train_accuracy_std,train_neg_mean_squared_error,train_neg_mean_squared_error_sem,train_neg_mean_squared_error_std,train_r2,train_r2_sem,train_r2_std
0,all,0,106,0.015611,0.000355,0.002513,0.077698,0.006102,0.043147,0.102764,...,0.130619,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,low,0,106,0.002908,8.5e-05,0.000599,0.059616,0.007947,0.056193,0.048648,...,0.188152,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,high,0,106,0.002192,7.5e-05,0.000528,0.05055,0.009739,0.068869,0.034758,...,0.188899,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


## Fit all morph pairs

In [82]:
morphs= [m for m in sdf['morphlevel'].unique() if m!=-1]
morph_pairs = list(itertools.combinations(morphs, 2))

# score_metrics = ['train_accuracy', 'test_accuracy', 'heldout_test_score', 'heldout_log2MI', 'heldout_aMI', 'heldout_MI']


In [159]:
# Fit ALL
all_trial_ixs = neuraldf.index.tolist()

d_list=[]
shuf_list = []
for (m0, m100) in morph_pairs:
    for arousal, curr_trial_ixs in zip(['all', 'low', 'high'], [all_trial_ixs, low_trial_ixs, high_trial_ixs]):
        curr_data = neuraldf.loc[curr_trial_ixs].copy()
        iterdict, iterdict_chance = fit_classifier_bootstrap(curr_data, sdf, m0=m0, m100=m100, n_iterations=n_iterations,
                                      cv_nfolds=cv_nfolds, test_size=test_size, C_value=C_value)

        iterdict['arousal'] = arousal
        iterdict_chance['arousal'] = arousal
        #df['arousal'] = [arousal for _ in np.arange(0, len(df))]
        #df_chance['arousal'] = [arousal for _ in np.arange(0, len(df))]

        d_list.append(pd.DataFrame(iterdict, index=[i])) #df)
        shuf_list.append(pd.DataFrame(iterdict_chance, index=[i])) #df_chance)

        i+=1

df = pd.concat(d_list, axis=0)
df_chance = pd.concat(shuf_list, axis=0)

df['class_diff'] = abs(df['class_a'] - df['class_b'])
df_chance['class_diff'] = abs(df['class_a'] - df['class_b'])

NameError: name 'morph_pairs' is not defined

In [86]:
report_metric = 'heldout_test_score'
for arousal, ag in df[(df['class_a']==0) & (df['class_b']==106)].groupby(['arousal']):
    ch = df_chance[(df_chance['class_a']==0) & (df_chance['class_b']==106) & (df_chance['arousal']==arousal)].copy()
    print('%s: %.2f (chance=%.2f)' % (arousal, ag.mean()[report_metric], ch.mean()[report_metric]))

all: 0.77 (chance=0.50)
high: 0.68 (chance=0.51)
low: 0.65 (chance=0.52)


In [88]:
report_metric='heldout_test_score'

fig, ax = pl.subplots(sharex=True, sharey=True, figsize=(5,4), dpi=dpi)

arousal_colors=['gray', 'orange', 'dodgerblue']

for col, (arousal, ag) in zip(arousal_colors, df.groupby(['arousal'])):
    xx = ag.groupby(['class_diff']).mean()[report_metric].index.tolist()
    yy = ag.groupby(['class_diff']).mean()[report_metric].values
    sem_yy = ag.groupby(['class_diff']).sem()[report_metric].values
    ax.errorbar(xx, yy, yerr=sem_yy, color=col, label=arousal)
    
    ch = df_chance[df_chance['arousal']==arousal]
    yy = ch.groupby(['class_diff']).mean()[report_metric].values
    xx = ch.groupby(['class_diff']).mean()[report_metric].index.tolist()
    sem_yy = ch.groupby(['class_diff']).sem()[report_metric].values
    ax.errorbar(xx, yy, yerr=sem_yy, color=col, ls=':', label=None)
    
ax.set_xlabel('Class difference')
ax.set_ylabel(report_metric)

ax.set_ylim([0.4, 1])
ax.legend(bbox_to_anchor=(1.2, 1))
putils.label_figure(fig, '%s | %s' % (data_id, datakey))
pl.subplots_adjust(left=0.2, bottom=0.2, top=0.8, right=0.8)
sns.despine()

figname = '%s_by_class_diff_ex-%s_split-colors' % (report_metric, datakey)
pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))
print(decoding_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/decoding', 'heldout_test_score_by_class_diff_ex-20190616_JC097_fov1_split-colors')


In [209]:
decoding_dir

'/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/decoding'

In [260]:
# # Calculate face feature
# # ----------------------------------------------------------
# face_feature = 'pupil_area'
# #------------------------------------------------------------

# for dkey in curr_dsets['datakey'].unique():

#     print("[%s] getting extracted feature: %s" % (dkey, face_feature))
#     session, animalid, fov = dkey.split('_')
#     fovnum = int(fov[3:])
#     fov = 'FOV%i_zoom2p0x' % fovnum

#     #### Get labels
#     labels_dfile = glob.glob(os.path.join(rootdir, animalid, session, fov, 'combined_*%s*' % experiment, 'traces',
#                           '%s*' % traceid, 'data_arrays', 'labels.npz'))[0]
#     l = np.load(labels_dfile)
#     labels = pd.DataFrame(data=l['labels_data'], columns=l['labels_columns'])

#     #### Load pupil data
#     facemeta, pupildata = dlcutils.load_pose_data(animalid, session, fovnum, experiment, dlc_results_dir, 
#                                                   feature_list=[face_feature], epoch='trial_alignment', 
#                                                   pre_ITI_ms=iti_pre_ms, post_ITI_ms=iti_post_ms)

#     #### Parse pupil data into traces
#     pupiltraces = dlcutils.get_pose_traces(facemeta, pupildata, labels, feature=face_feature)

#     #### Add to dict
#     datakey ='%s_%s_fov%i_%s' % (session, animalid, fovnum, experiment)  
#     PUPIL[datakey] = pupiltraces


# # This is a dict, keys are datakeys
# with open(pupil_fpath, 'wb') as f:
#     pkl.dump(PUPIL, f, protocol=pkl.HIGHEST_PROTOCOL)


[20190502_JC076_fov1] getting extracted feature: pupil_area
Loading pose data (dlc)
('[blobs] Found runs:', ['1', '2', '3', '4', '5', '6'])
... finding movies for dset: 20190502_JC076_fov1_blobs
(0, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f1_20190502161157200591')
(1, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f2_20190502165806929905')
(2, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f3_20190502173703971093')
(3, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f4_20190502182003125103')
(4, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f4b_20190502183313065375')
(5, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f4b_20190502183351360255')
(6, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f4c_20190502183919522127')
(7, '/n/coxfs01/2p-data/eyetracker_tmp/20190502_JC076_fov1_blobs_f6_20190502195321083042')
----- File 1.-----
... Eyetracker dir: 20190502_JC076_fov1_blobs_f1_201905

Traceback (most recent call last):
  File "/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/2p-pipeline/pipeline/python/classifications/dlc_utils.py", line 391, in align_trials_to_facedata
    curr_face_srcdir = [s for s in facetracker_srcdirs if '_f%s_' % run_num in s][0]
IndexError: list index out of range


Getting pose metrics by trial
... found 3 DLC outfiles, expecting 8 based on found eyetracker dirs.
...curr run: 1 [20190502_JC076_fov1_blobs_f1DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
...curr run: 2 [20190502_JC076_fov1_blobs_f2DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
...curr run: 3 [20190502_JC076_fov1_blobs_f3DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
... done parsing!
Parsing pose data with MW
Missing 625 trials total
[20190605_JC090_fov1] getting extracted feature: pupil_area
Loading pose data (dlc)
('[blobs] Found runs:', ['1', '2', '3', '4', '5', '6', '7'])
... finding movies for dset: 20190605_JC090_fov1_blobs
(0, '/n/coxfs01/2p-data/eyetracker_tmp/20190605_JC090_fov1_blobs_f1_20190605141933120131')
(1, '/n/coxfs01/2p-data/eyetracker_tmp/20190605_JC090_fov1_blobs_f2_20190605151304470267')
(2, '/n/coxfs01/2p-data/eyetracker_tmp/20190605_JC090_fov1_blobs_f3_20190605160757630689')
(3, '/n/coxfs01/2p-data/eyetracker_tmp/20190605_JC090_fov1_blobs_f4_20190605

Loading pose data (dlc)
('[blobs] Found runs:', ['1', '2', '3', '4', '5', '6'])
... finding movies for dset: 20190609_JC099_fov1_blobs
(0, '/n/coxfs01/2p-data/eyetracker_tmp/20190609_JC099_fov1_blobs_f1_20190609111435073145')
(1, '/n/coxfs01/2p-data/eyetracker_tmp/20190609_JC099_fov1_blobs_f2_20190609120626917118')
(2, '/n/coxfs01/2p-data/eyetracker_tmp/20190609_JC099_fov1_blobs_f3_20190609131907762511')
(3, '/n/coxfs01/2p-data/eyetracker_tmp/20190609_JC099_fov1_blobs_f4_20190609142912202443')
(4, '/n/coxfs01/2p-data/eyetracker_tmp/20190609_JC099_fov1_blobs_f5_20190609154127271261')
(5, '/n/coxfs01/2p-data/eyetracker_tmp/20190609_JC099_fov1_blobs_f6_20190609155730800664')
----- File 1.-----
... Eyetracker dir: 20190609_JC099_fov1_blobs_f1_20190609111435073145
... found 2 funky frame chunks: [8879, 12579]
... Full run duration: 14.10 min
----- File 2.-----
... Eyetracker dir: 20190609_JC099_fov1_blobs_f2_20190609120626917118
... found 5 funky frame chunks: [3959, 10439, 13475, 15059, 15

...curr run: 10 [20191018_JC113_fov1_blobs_f10DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
...curr run: 11 [20191018_JC113_fov1_blobs_f11DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
...curr run: 12 [20191018_JC113_fov1_blobs_f12DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
... done parsing!
Parsing pose data with MW
Missing 0 trials total
[20191105_JC117_fov1] getting extracted feature: pupil_area
Loading pose data (dlc)
('[blobs] Found runs:', ['1', '2', '3', '4', '5', '6'])
... finding movies for dset: 20191105_JC117_fov1_blobs
(0, '/n/coxfs01/2p-data/eyetracker_tmp/20191105_JC117_fov1_blobs_f1_20191105114840149566')
(1, '/n/coxfs01/2p-data/eyetracker_tmp/20191105_JC117_fov1_blobs_f2_20191105120405410749')
(2, '/n/coxfs01/2p-data/eyetracker_tmp/20191105_JC117_fov1_blobs_f3_20191105132317902204')
(3, '/n/coxfs01/2p-data/eyetracker_tmp/20191105_JC117_fov1_blobs_f4_20191105134239875931')
(4, '/n/coxfs01/2p-data/eyetracker_tmp/20191105_JC117_fov1_blobs_f5_201911051430137572

In [70]:
segmented_datasets = cells['datakey'].unique()

# Decode

In [74]:
# Temporal binning params
new_framerate = 20.0
framerate = 44.65

#### Get trial epochs
stim_on = float(labels['stim_on_frame'].unique())
n_on = float(labels['nframes_on'].unique())
new_stim_on = int(np.where(abs(out_ixs-stim_on) == min(abs(out_ixs-stim_on)))[0])
print(stim_on, new_stim_on)
stim_dur = stim_on/framerate
new_n_on = int(round( stim_dur*new_framerate))
print(new_n_on)

In [151]:
pupil_r.head()

Unnamed: 0,trial,frame,pupil_area,frame_int,frame_ix
0,1,0.0,1740.544175,0,0
1,1,2.271186,1723.9001,2,1
2,1,4.542373,1738.455597,5,2
3,1,6.813559,1747.958927,7,3
4,1,9.084746,1759.873518,9,4


In [181]:
pupil_r.shape

(89940, 3)

In [192]:
def get_pupil_df(pupil_r, trial_epoch='pre', new_stim_on=20., nframes_on=20.):
    '''
    Turn resampled pupil traces into reponse vectors
    
    trial_epoch : (str)
        'pre': Use PRE-stimulus period for response metric.
        'stim': Use stimulus period
        'all': Use full trial period
    
    new_stim_on: (int)
        Frame index for stimulus start (only needed if trial_epoch is 'pre' or 'stim')
        
    pupil_r : resampled pupil traces (columns are trial, frame, pupil_area, frame_int, frame_ix)
    '''
    if trial_epoch=='pre':
        pupildf = pd.concat([g[g['frame_ix'].isin(np.arange(0, new_stim_on))].mean(axis=0) \
                            for t, g in pupil_r.groupby(['trial'])], axis=1).T
    elif trial_epoch=='stim':
        pupildf = pd.concat([g[g['frame_ix'].isin(np.arange(new_stim_on, new_stim_on+nframes_on))].mean(axis=0) \
                            for t, g in pupil_r.groupby(['trial'])], axis=1).T
    else:
        pupildf = pd.concat([g.mean(axis=0) for t, g in pupil_r.groupby(['trial'])], axis=1).T
    #print(pupildf.shape)

    return pupildf

def get_resampled_traces(pupiltraces, in_rate=20., out_rate=20., iti_pre_ms=1000, desired_nframes=60, 
                         feature_name='pupil_area'):
    '''
    resample pupil traces to make sure we have exactly the right # of frames to match neural data
    '''
    binned_pupil = resample_pupil_traces(pupiltraces, feature_name=face_feature,
                                         in_rate=in_rate, out_rate=out_rate, 
                                         min_nframes=desired_nframes, iti_pre_ms=iti_pre_ms)
    trials_ = sorted(pupiltraces['trial'].unique())
    frames_ = np.arange(0, desired_nframes)
    pupil_trialmat = pd.DataFrame(np.vstack([p[feature_name].values for trial, p in binned_pupil.groupby(['trial'])]),
                                  index=trials_, columns=frames_)
    pupil_r = pupil_trialmat.T.unstack().reset_index().rename(columns={'level_0': 'trial', 
                                                                       'level_1': 'frame',
                                                                       0: feature_name})
    pupil_r['frame_int'] = [int(round(f)) for f in pupil_r['frame']]
    interp_frame_ixs = list(sorted(pupil_r['frame'].unique()))
    pupil_r['frame_ix'] = [interp_frame_ixs.index(f) for f in pupil_r['frame']]

    return pupil_r
    
def match_trials(neuraldf, pupiltraces, labels_all):
    '''
    make sure neural data trials = pupil data trials
    '''
    trials_with_pupil = list(pupiltraces['trial'].unique())
    trials_with_neural = list(labels_all['trial_num'].unique())
    n_pupil_trials = len(trials_with_pupil)
    n_neural_trials = len(trials_with_neural)

    labels = labels_all[labels_all['trial_num'].isin(trials_with_pupil)].copy()
    if n_pupil_trials > n_neural_trials:
        pupiltraces = pupiltraces[pupiltraces['trial'].isin(trials_with_neural)]
    elif n_pupil_trials < n_neural_trials:    
        print(labels.shape, labels_all.shape)
        neuraldf = neuraldf.loc[trials_with_pupil]
    
    return neuraldf, pupiltraces

def split_pupil_range(pupildf, feature_name='pupil_area', n_cuts=3):
    '''
    n_cuts (int)
        4: use quartiles (0.25,  0.5 ,  0.75)
        3: use H/M/L (0.33, 0.66)
    '''

    bins = np.linspace(0, 1, n_cuts+1)[1:-1]
    low_bin = bins[0]
    high_bin = bins[-1]
    pupil_quantiles = pupildf[feature_name].quantile(bins)
    low_pupil_thr = pupil_quantiles[low_bin]
    high_pupil_thr = pupil_quantiles[high_bin]
    pupil_low = pupildf[pupildf[feature_name]<=low_pupil_thr].copy()
    pupil_high = pupildf[pupildf[feature_name]>=high_pupil_thr].copy()
    # Can also bin into low, mid, high
    #pupildf['quantile'] = pd.qcut(pupildf[face_feature], n_cuts, labels=False)
    
    return pupil_low, pupil_high

In [None]:

use_pre_stimulus=True
# Get mean of pupil during trial - select PRE or PERI stimulus, for ex.
print("Stim on (new fr): %i" % new_stim_on)


# 1 visual area only

In [176]:
visual_area = 'Li'
curr_dsets = dsets[dsets['visual_area']==visual_area].copy()

segmented_datasets = cells['datakey'].unique()

In [177]:
face_feature='pupil_area'
trial_epoch='pre'
face_framerate=20.
desired_nframes = 60 #len(out_ixs) # 60


In [185]:
pupil_r.head()


Unnamed: 0,trial,frame,pupil_area
0,1,0,
1,1,1,
2,1,2,
3,1,3,
4,1,4,


In [None]:
use_quartiles = False
n_cuts = 4 if use_quartiles else 3

# Classifier params
m0=0
m100=106
C_value=1
cv_nfolds=5
test_size=0.2
n_iterations = 100
report_metric = 'heldout_test_score'
skip = [] # ['20190502_JC076_fov1'] # ['20190420_JC076_fov1']

need_to_segment=[]

d_list=[]; d_list_chance=[]; 
n_datakeys = len(dsets['datakey'].unique())
for di, ((visual_area, datakey), ds) in enumerate(dsets.groupby(['visual_area', 'datakey'])):
    if datakey not in segmented_datasets:
        need_to_segment.append(datakey)
        continue
    if di % 5 == 0:
        print("%i of %i datakeys" % (int(di+1), n_datakeys))
        
    print("----- %s | %s -----" % (visual_area, datakey))
    # ------ STIMULUS INFO -----------------------------------------
    session, animalid, fov_ = datakey.split('_')
    fovnum = int(fov_[3:])
    obj = util.Objects(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
    sdf = obj.get_stimuli()
    
    #### Get neural means
    neuraldf = get_neuraldf_for_cells_in_area(cells, MEANS, datakey=datakey) # MEANS[datakey].copy() 

    #### Resample pupil
    pupiltraces = PUPIL['%s_%s' % (datakey, experiment)].copy()
    pupil_r = get_resampled_traces(pupiltraces, in_rate=face_framerate, out_rate=new_framerate, 
                                   desired_nframes=desired_nframes, feature_name=face_feature, 
                                   iti_pre_ms=iti_pre_ms)
    pupildf = get_pupil_df(pupil_r, trial_epoch=trial_epoch, new_stim_on=new_stim_on)
    
    #### Make sure neural df matches
    trials_with_pupil = list(pupildf['trial'].unique())
    neuraldf=neuraldf.loc[trials_with_pupil]

    #### Filter out trials with no pupil info or no neural
    neuraldf, pupiltraces = match_trials(neuraldf, pupiltraces, labels_all)
    
    # ------ Split trials by quantiles ---------------------------------
    pupil_low, pupil_high = split_pupil_range(pupildf, feature_name=face_feature, n_cuts=n_cuts)

    # Get trial indices of low/high pupil 
    low_trial_ixs = pupil_low['trial'].unique()
    high_trial_ixs = pupil_high['trial'].unique()
    all_trial_ixs = pupildf['trial'].unique()
    
    # Fit ALL
    tmp_d_list=[]; tmp_shuf_list=[];
    i=0
    for arousal, curr_trial_ixs in zip(['all', 'low', 'high'], [all_trial_ixs, low_trial_ixs, high_trial_ixs]):
        curr_data = neuraldf.loc[curr_trial_ixs].copy()
        iterdict, iterdict_chance = fit_classifier_bootstrap(curr_data, sdf, m0=m0, m100=m100, n_iterations=n_iterations,
                                      cv_nfolds=cv_nfolds, test_size=test_size, C_value=C_value)

        iterdict['arousal'] = arousal
        iterdict_chance['arousal'] = arousal
        tmp_d_list.append(pd.DataFrame(iterdict, index=[i])) #df)
        tmp_shuf_list.append(pd.DataFrame(iterdict_chance, index=[i])) #df_chance)
        print("...%s: %.2f (chance=%.2f)" % (arousal, iterdict[report_metric], iterdict_chance[report_metric]))
        
        i+=1
        
    tmp_df = pd.concat(tmp_d_list, axis=0)
    metadict = {'visual_area': visual_area, 'datakey': datakey}
    tmp_df = putils.add_meta_to_df(tmp_df, metadict)
    print(tmp_df.shape)
    tmp_chance = pd.concat(tmp_shuf_list, axis=0)
    tmp_chance = putils.add_meta_to_df(tmp_chance, metadict)
    
    d_list.append(tmp_df)
    d_list_chance.append(tmp_chance)

df = pd.concat(d_list, axis=0)
df_chance = pd.concat(d_list_chance, axis=0)

----- Li | 20190602_JC091_fov1 -----
((202365, 9), (202500, 9))
...all: 0.72 (chance=0.49)
...low: 0.74 (chance=0.50)
...high: 0.74 (chance=0.49)
(3, 41)
----- Li | 20190606_JC091_fov1 -----
...all: 0.58 (chance=0.50)
...low: 0.59 (chance=0.50)
...high: 0.53 (chance=0.48)
(3, 41)
----- Li | 20190607_JC091_fov1 -----
...all: 0.52 (chance=0.51)
...low: 0.50 (chance=0.50)
...high: 0.48 (chance=0.51)
(3, 41)
6 of 35 datakeys
----- Li | 20190609_JC099_fov1 -----
...all: 0.64 (chance=0.50)
...low: 0.56 (chance=0.48)
...high: 0.55 (chance=0.47)
(3, 41)
----- Li | 20190612_JC099_fov1 -----
...all: 0.52 (chance=0.50)
...low: 0.46 (chance=0.51)
...high: 0.51 (chance=0.51)
(3, 41)
----- Li | 20190614_JC091_fov1 -----
...all: 0.64 (chance=0.50)
...low: 0.58 (chance=0.49)
...high: 0.53 (chance=0.49)
(3, 41)
----- Li | 20190617_JC099_fov1 -----
((131355, 9), (202500, 9))
...all: 0.58 (chance=0.49)
...low: 0.58 (chance=0.52)
...high: 0.48 (chance=0.49)
(3, 41)
11 of 35 datakeys
----- Li | 20191105_JC

In [147]:
df.head()

Unnamed: 0,arousal,class_a,class_b,fit_time,fit_time_sem,fit_time_std,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,...,test_r2_std,train_accuracy,train_accuracy_sem,train_accuracy_std,train_neg_mean_squared_error,train_neg_mean_squared_error_sem,train_neg_mean_squared_error_std,train_r2,train_r2_sem,train_r2_std
0,all,0,106,0.015611,0.000355,0.002513,0.077698,0.006102,0.043147,0.102764,...,0.130619,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,low,0,106,0.002908,8.5e-05,0.000599,0.059616,0.007947,0.056193,0.048648,...,0.188152,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,high,0,106,0.002192,7.5e-05,0.000528,0.05055,0.009739,0.068869,0.034758,...,0.188899,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [196]:
visual_area='V1'

skip = ['20190602_JC091_fov1', '20191105_JC117_fov1', '20191111_JC120_fov1']#'20190602_JC091_fov1', '20190502_JC076_fov1'

for visual_area, vdf in df.groupby(['visual_area']):

    plotdf = vdf[(~vdf['datakey'].isin(skip)) & (vdf['arousal'].isin(['low','high']))]
    metrics = ['heldout_log2MI', 'heldout_test_score', 'heldout_aMI']

    fig, axn = pl.subplots(1, 3, figsize=(10,4), dpi=dpi)
    for ai, (ax, metric) in enumerate(zip(axn.flat, metrics)):

        # Plot each fov
        sns.pointplot(x='arousal', y=metric, hue='datakey', data=plotdf, ax=ax, 
                      palette='colorblind', s=10, alpha=0.5)
        sns.barplot(x='arousal', y=metric, data=plotdf, ax=ax, 
                    edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
        plotdf.apply(aggr.annotateBars, ax=ax,fontsize=12, fontcolor='k', xytext=(0, -30))
        if ai<2:
            ax.legend_.remove()
        ax.set_xlabel('')

    ax.legend(bbox_to_anchor=(1.1, 1))
    pl.subplots_adjust(right=0.7, wspace=0.8, hspace=0.2, top=0.8, bottom=0.2)
    fig.suptitle(visual_area)

    putils.label_figure(fig, data_id)

    figname = '%s_%s-test_%iv%i_C-%.2f, %i-iters' % (face_feature, visual_area, m0, m100, C_value, n_iterations)
    #pl.savefig(os.path.join(decoding_dir, '%s.svg' %figname))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [113]:

metric = 'heldout_test_score'
for metric in metrics:
    print('%s--------------------' % metric)
    a = plotdf[(plotdf['arousal']=='low')][metric]
    b = plotdf[(plotdf['arousal']=='high')][metric]
    tstat, pval = spstats.ttest_rel(a, b)
    print("Paired t-test, t=%.2f (p=%.2f)" % (tstat, pval))

heldout_log2MI--------------------
Paired t-test, t=1.42 (p=0.21)
heldout_test_score--------------------
Paired t-test, t=3.57 (p=0.02)
heldout_aMI--------------------
Paired t-test, t=1.98 (p=0.10)


In [97]:
tstats, pval = spstats.ttest_rel(a, b)

In [108]:
cell_counts = pd.concat([pd.DataFrame({'visual_area':visual_area, 
                                       'datakey': datakey, 
                                       'n_cells': len(g['cell'])}, index=[i]) \
                    for i, ((visual_area, datakey), g) in enumerate(cells.groupby(['visual_area', 'datakey']))], axis=0)
cell_counts

Unnamed: 0,datakey,n_cells,visual_area
0,20190602_JC091_fov1,136,Li
1,20190606_JC091_fov1,82,Li
2,20190607_JC091_fov1,129,Li
3,20190609_JC099_fov1,71,Li
4,20190612_JC099_fov1,49,Li
5,20190614_JC091_fov1,129,Li
6,20190617_JC099_fov1,20,Li
7,20191105_JC117_fov1,60,Li
8,20191111_JC120_fov1,19,Li
9,20190603_JC080_fov1,190,Lm


In [105]:
cell_counts = pd.concat([pd.DataFrame({'visual_area':visual_area, 
                                       'datakey': datakey, 
                                       'n_cells': MEANS[datakey].shape[1]-1}, index=[i]) \
                    for i, ((visual_area, datakey), g) in enumerate(dsets.groupby(['visual_area', 'datakey']))], axis=0)
cell_counts

Unnamed: 0,datakey,n_cells,visual_area
0,20190502_JC076_fov1,150,Li
1,20190602_JC091_fov1,241,Li
2,20190605_JC090_fov1,129,Li
3,20190606_JC091_fov1,136,Li
4,20190607_JC091_fov1,260,Li
5,20190609_JC099_fov1,78,Li
6,20190612_JC099_fov1,95,Li
7,20190614_JC091_fov1,193,Li
8,20190617_JC099_fov1,25,Li
9,20191018_JC113_fov1,64,Li


In [108]:
decoding_dir

'/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/decoding'

In [277]:
visual_area='V1'
if visual_area == 'V1':
    skip = ['20190507_JC083_fov1', '20190617_JC097_fov1', '20190622_JC085_fov1'] # fix eyetracker
elif visual_area == 'Li':
    skip = ['20190602_JC091_fov1', '20191018_JC113_fov1']
else:
    skip = []

plotdf = df[~df['datakey'].isin(skip)]

fig, axn = pl.subplots(1, 2, figsize=(6,4), dpi=dpi)
for ai, (ax, metric) in enumerate(zip(axn.flat, ['heldout_log2MI', 'heldout_test_score'])):


    # Plot each fov
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=plotdf, ax=ax, 
                  palette='colorblind', s=10, alpha=0.5)
    sns.barplot(x='arousal', y=metric, data=plotdf, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    plotdf.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')
    if ai==0:
        ax.legend_.remove()
        
ax.legend(bbox_to_anchor=(1.1, 1))
pl.subplots_adjust(right=0.7, wspace=0.5, hspace=0.2, top=0.8, bottom=0.2)
fig.suptitle(visual_area)

putils.label_figure(fig, data_id)

figname = 'split_%s_%s-test_classify_%iv%i_C-%.2f, %i-iters' % (face_feature, visual_area, m0, m100, C_value, n_iterations)
pl.savefig(os.path.join(decoding_dir, '%s.svg' %figname))


<IPython.core.display.Javascript object>

#### snapshot 1

In [272]:
skip = ['20190420_JC076_fov1', '20190507_JC083_fov1', '20190617_JC097_fov1']

fig, axn = pl.subplots(1, 2, figsize=(12,4), dpi=dpi)
for ai, (ax, metric) in enumerate(zip(axn.flat, ['heldout_aMI', 'heldout_test_score'])):

    # Create dataframe
    m_ = pd.concat([full_df[[metric]], lo_df[metric], hi_df[metric]], axis=1)
    m_.columns = ['all', 'low', 'high']
    m_df = m_.unstack().reset_index().rename(columns={'level_0': 'arousal', 'level_1': 'datakey', 0: metric})
    m_df['datakey'] = [full_df.loc[i]['datakey'] for i in m_df['datakey']]
    m_df = m_df[~m_df['datakey'].isin(skip)]
    
    # Plot each fov
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=m_df, ax=ax, 
                  palette='colorblind', s=10, alpha=0.5)
    sns.barplot(x='arousal', y=metric, data=m_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    m_df.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')
    if ai==0:
        ax.legend_.remove()
        
ax.legend(bbox_to_anchor=(1.1, 1))
pl.subplots_adjust(right=0.5, wspace=0.5, hspace=0.2, top=0.8, bottom=0.2)


figname = 'split_%s_%s-test_classify_%iv%i_C-%.2f, %i-iters' % (face_feature, visual_area, m0, m100, C_value, n_iterations)
pl.savefig(os.path.join(decoding_dir, '%s.svg' %figname))


<IPython.core.display.Javascript object>

#### snapshot 2

In [202]:
skip = ['20190420_JC076_fov1', '20190507_JC083_fov1']

fig, axn = pl.subplots(1, 2, figsize=(12,4), dpi=dpi)
for ai, (ax, metric) in enumerate(zip(axn.flat, ['heldout_aMI', 'heldout_test_score'])):

    # Create dataframe
    m_ = pd.concat([full_df[[metric]], lo_df[metric], hi_df[metric]], axis=1)
    m_.columns = ['all', 'low', 'high']
    m_df = m_.unstack().reset_index().rename(columns={'level_0': 'arousal', 'level_1': 'datakey', 0: metric})
    m_df['datakey'] = [full_df.loc[i]['datakey'] for i in m_df['datakey']]
    m_df = m_df[~m_df['datakey'].isin(skip)]
    
    # Plot each fov
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=m_df, ax=ax, 
                  palette='colorblind', s=10, alpha=0.5)
    sns.barplot(x='arousal', y=metric, data=m_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    m_df.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')
    if ai==0:
        ax.legend_.remove()
        
ax.legend(bbox_to_anchor=(1.1, 1))
pl.subplots_adjust(right=0.5, wspace=0.5, hspace=0.2, top=0.8, bottom=0.2)

<IPython.core.display.Javascript object>

In [80]:
skip = ['20190420_JC076_fov1', '20190507_JC083_fov1']

fig, axn = pl.subplots(1, 2, figsize=(12,4), dpi=dpi)
for ai, (ax, metric) in enumerate(zip(axn.flat, ['heldout_aMI', 'heldout_test_score'])):

    # Create dataframe
    m_ = pd.concat([full_df[[metric]], lo_df[metric], hi_df[metric]], axis=1)
    m_.columns = ['all', 'low', 'high']
    m_df = m_.unstack().reset_index().rename(columns={'level_0': 'arousal', 'level_1': 'datakey', 0: metric})
    m_df['datakey'] = [full_df.loc[i]['datakey'] for i in m_df['datakey']]
    m_df = m_df[~m_df['datakey'].isin(skip)]
    
    # Plot each fov
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=m_df, ax=ax, 
                  palette='colorblind', s=10)
    sns.barplot(x='arousal', y=metric, data=m_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    m_df.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')
    if ai==0:
        ax.legend_.remove()
        
ax.legend(bbox_to_anchor=(1.1, 1))
pl.subplots_adjust(right=0.5, wspace=0.5, hspace=0.2, top=0.8, bottom=0.2)

<IPython.core.display.Javascript object>

In [305]:
fig, ax = pl.subplots(figsize=(6,4), dpi=dpi)
sns.pointplot(x='arousal', y=metric, hue='datakey', data=m_df, ax=ax, palette='colorblind',
             s=10)
sns.barplot(x='arousal', y=metric, data=m_df, ax=ax, 
            edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
m_df.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')
ax.legend(bbox_to_anchor=(1.1, 1))

pl.subplots_adjust(right=0.5)

<IPython.core.display.Javascript object>

In [124]:
# multi_comp_test='fdr_bh'
# # Annotate w stats
# print("********* [%s|%s] *********" % (experiment, metric))
# #y_loc = 16 if fov_means else 23
# statresults = aggr.do_mannwhitney(m_df, metric=metric, multi_comp_test=multi_comp_test)
# aggr.annotate_stats_areas(statresults, ax) #, y_loc=y_loc, offset=1)


# All areas

In [238]:
report_metric = 'heldout_test_score'
m0=0
m100=106
C_value=1
n_iterations = 100
skip = [] # ['20190502_JC076_fov1'] # ['20190420_JC076_fov1']
use_quartiles = True

d_list=[]; d_list_chance=[]; 
n_datakeys = len(dsets['datakey'].unique())
for di, ((visual_area, datakey), ds) in enumerate(dsets.groupby(['visual_area', 'datakey'])):
    if datakey in skip:
        continue
    if di % 5 == 0:
        print("%i of %i datakeys" % (int(di+1), n_datakeys))
        
    print("----- %s -----" % datakey)
    neuraldf = MEANS[datakey].copy() 
    
    # ------ STIMULUS INFO -----------------------------------------
    session, animalid, fov_ = datakey.split('_')
    fovnum = int(fov_[3:])
    obj = util.Objects(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
    sdf = obj.get_stimuli()
    
    # ------ PUPIL --------------------------------------------------
    pupiltraces = PUPIL['%s_%s' % (datakey, experiment)].copy()

    #### Resample pupil
    desired_nframes = 60 #len(out_ixs) # 60
    binned_pupil = resample_pupil_traces(pupiltraces, feature_name=face_feature,
                                         in_rate=20.0, out_rate=new_framerate, 
                                         min_nframes=desired_nframes)
    trials_ = sorted(pupiltraces['trial'].unique())
    frames_ = np.arange(0, desired_nframes)
    pupil_trialmat = pd.DataFrame(np.vstack([p[face_feature].values for trial, p in binned_pupil.groupby(['trial'])]),
                                  index=trials_, columns=frames_)
    pupil_r = pupil_trialmat.T.unstack().reset_index().rename(columns={'level_0': 'trial', 
                                                                   'level_1': 'frame',
                                                                   0: 'pupil'})
    pupildf = pd.concat([g.mean(axis=0) for t, g in pupil_r.groupby(['trial'])], axis=1).T
    
    # ------ Split trials by quantiles ---------------------------------
    if use_quartiles:
        pupil_quantiles = pupildf.pupil.quantile([0.25,0.5,0.75])
        low_pupil_thr = pupil_quantiles[0.25]
        high_pupil_thr = pupil_quantiles[0.75]
        pupil_low = pupildf[pupildf['pupil']<=low_pupil_thr].copy()
        pupil_high = pupildf[pupildf['pupil']>=high_pupil_thr].copy()
    else:
        pupildf['quantile'] = pd.qcut(pupildf['pupil'], 3, labels=False)
        pupil_low = pupildf[pupildf['quantile']==0].copy()
        pupil_high = pupildf[pupildf['quantile']==2].copy()

    # Get trial indices of low/high pupil 
    low_trial_ixs = pupil_low['trial'].unique()
    high_trial_ixs = pupil_high['trial'].unique()
    all_trial_ixs = pupildf['trial'].unique()
    
    # Fit ALL
    tmp_d_list=[]
    tmp_shuf_list = []
    for arousal, curr_trial_ixs in zip(['all', 'low', 'high'], [all_trial_ixs, low_trial_ixs, high_trial_ixs]):
        curr_data = neuraldf.loc[curr_trial_ixs].copy()
        iterdict, iterdict_chance = fit_classifier_bootstrap(curr_data, sdf, m0=m0, m100=m100, n_iterations=n_iterations,
                                      cv_nfolds=cv_nfolds, test_size=test_size, C_value=C_value)
        iterdict['arousal'] = arousal
        iterdict_chance['arousal'] = arousal
        tmp_d_list.append(pd.DataFrame(iterdict, index=[i])) #df)
        tmp_shuf_list.append(pd.DataFrame(iterdict_chance, index=[i])) #df_chance)
        print("%s: %.2f (chance=%.2f)" % (arousal, iterdict[report_metric], iterdict_chance[report_metric]))

        i+=1
        
    tmp_df = pd.concat(tmp_d_list, axis=0)
    metadict = {'visual_area': visual_area, 'datakey': datakey}
    tmp_df = putils.add_meta_to_df(tmp_df, metadict)
    print(tmp_df.shape)
    tmp_chance = pd.concat(tmp_shuf_list, axis=0)
    tmp_chance = putils.add_meta_to_df(tmp_chance, metadict)
    
    d_list.append(tmp_df)
    d_list_chance.append(tmp_chance)

df = pd.concat(d_list, axis=0)
df_chance = pd.concat(d_list_chance, axis=0)

1 of 35 datakeys
----- 20190502_JC076_fov1 -----
Creating blobs object [JC076|20190502|FOV1_zoom2p0x|traces001]
all: 0.54 (chance=0.50)
low: 0.58 (chance=0.51)
high: 0.55 (chance=0.49)
(3, 41)
----- 20190602_JC091_fov1 -----
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
all: 0.72 (chance=0.49)
low: 0.79 (chance=0.48)
high: 0.72 (chance=0.48)
(3, 41)
----- 20190605_JC090_fov1 -----
Creating blobs object [JC090|20190605|FOV1_zoom2p0x|traces001]
all: 0.53 (chance=0.49)
low: 0.62 (chance=0.50)
high: 0.61 (chance=0.50)
(3, 41)
----- 20190606_JC091_fov1 -----
Creating blobs object [JC091|20190606|FOV1_zoom2p0x|traces001]
all: 0.59 (chance=0.51)
low: 0.62 (chance=0.50)
high: 0.43 (chance=0.49)
(3, 41)
----- 20190607_JC091_fov1 -----
Creating blobs object [JC091|20190607|FOV1_zoom2p0x|traces001]
all: 0.54 (chance=0.50)
low: 0.48 (chance=0.48)
high: 0.47 (chance=0.54)
(3, 41)
6 of 35 datakeys
----- 20190609_JC099_fov1 -----
Creating blobs object [JC099|20190609|FOV1_zoom2p0x|tr

### Get cell counts

In [239]:
cell_counts = pd.concat([pd.DataFrame({'visual_area':visual_area, 
                                       'datakey': datakey, 
                                       'n_cells': MEANS[datakey].shape[1]-1}, index=[i]) \
                    for i, ((visual_area, datakey), g) in enumerate(dsets.groupby(['visual_area', 'datakey']))], axis=0)

### Save data

In [252]:
results_fpath = os.path.join(decoding_dir, 'results.pkl')
params_fpath = os.path.join(decoding_dir, 'params.json')

params = {'m0': m0, 'm100': m100, 'C_value': C_value, 'n_iterations': n_iterations}
results = {'df': df, 'df_chance': df_chance, 'cell_counts': cell_counts}

with open(results_fpath, 'wb') as f:
    pkl.dump(results, f, protocol=pkl.HIGHEST_PROTOCOL)
    
with open(params_fpath, 'w') as f:
    json.dump(params, f, indent=4)
    


In [279]:
# report_metric = 'heldout_test_score'
# m0=0
# m100=106
# C_value=1
# n_iterations = 100
# skip = [] # ['20190502_JC076_fov1'] # ['20190420_JC076_fov1']
# use_quartiles = True

# d_list=[]; d_list_chance=[]; 
# n_datakeys = len(dsets['datakey'].unique())

# for di, ((visual_area, datakey), ds) in enumerate(dsets.groupby(['visual_area', 'datakey'])):
#     if di % 5 == 0:
#         print("%i of %i datakeys" % (int(di+1), n_datakeys))
#     print("----- [%s] %s -----" % (visual_area, datakey))
#     neuraldf = MEANS[datakey].copy() 
    
#     # ------ STIMULUS INFO -----------------------------------------
#     session, animalid, fov_ = datakey.split('_')
#     fovnum = int(fov_[3:])
#     obj = util.Objects(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
#     sdf = obj.get_stimuli()
    
#     # ------ PUPIL --------------------------------------------------
#     pupiltraces = PUPIL['%s_%s' % (datakey, experiment)].copy()

#     #### Resample pupil
#     desired_nframes = 60 #len(out_ixs) # 60
#     binned_pupil = resample_pupil_traces(pupiltraces, feature_name=face_feature,
#                                          in_rate=20.0, out_rate=new_framerate, 
#                                          min_nframes=desired_nframes)
#     trials_ = sorted(pupiltraces['trial'].unique())
#     frames_ = np.arange(0, desired_nframes)
#     pupil_trialmat = pd.DataFrame(np.vstack([p[face_feature].values for trial, p in binned_pupil.groupby(['trial'])]),
#                                   index=trials_, columns=frames_)
#     pupil_r = pupil_trialmat.T.unstack().reset_index().rename(columns={'level_0': 'trial', 
#                                                                    'level_1': 'frame',
#                                                                    0: 'pupil'})
#     pupildf = pd.concat([g.mean(axis=0) for t, g in pupil_r.groupby(['trial'])], axis=1).T
    
#     # ------ Split trials by quantiles ---------------------------------
#     if use_quartiles:
#         pupil_quantiles = pupildf.pupil.quantile([0.25,0.5,0.75])
#         low_pupil_thr = pupil_quantiles[0.25]
#         high_pupil_thr = pupil_quantiles[0.75]
#         pupil_low = pupildf[pupildf['pupil']<=low_pupil_thr].copy()
#         pupil_high = pupildf[pupildf['pupil']>=high_pupil_thr].copy()
#     else:
#         pupildf['quantile'] = pd.qcut(pupildf['pupil'], 3, labels=False)
#         pupil_low = pupildf[pupildf['quantile']==0].copy()
#         pupil_high = pupildf[pupildf['quantile']==2].copy()

#     # Get trial indices of low/high pupil 
#     low_trial_ixs = pupil_low['trial'].unique()
#     high_trial_ixs = pupil_high['trial'].unique()
#     all_trial_ixs = pupildf['trial'].unique()
    
#     tmp_d_list = []
#     tmp_shuf_list = []
#     for arousal, curr_trial_ixs in zip(['all', 'low', 'high'], [all_trial_ixs, low_trial_ixs, high_trial_ixs]):
#         curr_data = neuraldf.loc[curr_trial_ixs].copy()
#         df_, ch_ = fit_svm(curr_data, sdf, m0=m0, m100=m100, n_iterations=n_iterations,
#                                 cv_nfolds=cv_nfolds, test_size=test_size, C_value=C_value)
#         df_['arousal'] = [arousal for _ in np.arange(0, len(df_))]
#         ch_['arousal'] = [arousal for _ in np.arange(0, len(ch_))]

#         tmp_d_list.append(df_)
#         tmp_shuf_list.append(ch_)
#         print("%s: %.2f (chance=%.2f)" % (arousal, df_.mean()[report_metric], ch_.mean()[report_metric]))

#     tmp_df = pd.concat(tmp_d_list, axis=0)
#     tmp_chance = pd.concat(tmp_shuf_list, axis=0)

#     metadict={'visual_area': visual_area, 'datakey': datakey}
#     tmp_df = aggr.add_meta_to_df(tmp_df, metadict)
#     tmp_chance = aggr.add_meta_to_df(tmp_chance, metadict)

#     d_list.append(tmp_df)
#     d_list_chance.append(tmp_chance)

# df = pd.concat(d_list, axis=0)
# df_chance = pd.concat(d_list_chance, axis=0)

1 of 25 datakeys
----- [Li] 20190502_JC076_fov1 -----
Creating blobs object [JC076|20190502|FOV1_zoom2p0x|traces001]
... getting stimulus info for: combined_blobs_static
all: 0.54 (chance=0.49)
low: 0.56 (chance=0.50)
high: 0.52 (chance=0.50)
----- [Li] 20190602_JC091_fov1 -----
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
... getting stimulus info for: combined_blobs_static
all: 0.73 (chance=0.51)
low: 0.78 (chance=0.51)
high: 0.72 (chance=0.50)
----- [Li] 20190605_JC090_fov1 -----
Creating blobs object [JC090|20190605|FOV1_zoom2p0x|traces001]
... getting stimulus info for: combined_blobs_static
all: 0.54 (chance=0.50)
low: 0.61 (chance=0.51)
high: 0.60 (chance=0.50)
----- [Li] 20190606_JC091_fov1 -----
Creating blobs object [JC091|20190606|FOV1_zoom2p0x|traces001]
... getting stimulus info for: combined_blobs_static
all: 0.60 (chance=0.49)
low: 0.63 (chance=0.51)
high: 0.43 (chance=0.50)
----- [Li] 20190609_JC099_fov1 -----
Creating blobs object [JC099|20190609|FOV1

In [259]:
min_ncells=50

too_few = cell_counts[cell_counts['n_cells']<min_ncells]['datakey'].unique()
too_few

array(['20190617_JC099_fov1', '20191111_JC120_fov1'], dtype=object)

In [260]:
'20190618_JC097_fov1' in cell_counts['datakey'].unique()


True

In [261]:
#few = [] #['20191018_JC113_fov1', '20190605_JC090_fov1']
skip = ['20190507_JC083_fov1', '20190617_JC097_fov1', '20190510_JC083_fov1',
        '20190618_JC097_fov1', '20190508_JC083_fov1']
# skip=[]
skip.extend(too_few)

metric='heldout_test_score'
#metric='heldout_log2MI'

plotdf = df[~df['datakey'].isin(skip)]

ai=0
fig, axn = pl.subplots(1, 3, figsize=(8,8), dpi=dpi, sharex=True, sharey=True)
for ax, (visual_area, a_df) in zip(axn.flat[::-1], plotdf.groupby(['visual_area'])):
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=a_df, ax=ax, palette='colorblind',
                     s=10, join=True, scale=0.5)
    sns.barplot(x='arousal', y=metric, data=a_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    plotdf.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')

    ax.set_title(visual_area, loc='left', fontsize=16)
    #ax.axhline(y=0.5, color='k', linestyle=':')
    
    ax.legend(bbox_to_anchor=(0.9, -0.5))
    ai+=1
    
pl.subplots_adjust(wspace=0.5, bottom=0.5, top=0.8)
#sns.despine(trim=True)

putils.label_figure(fig, data_id)
figname = '%s_%s_by-area_min-%i-cells' % (metric, face_feature, min_ncells)
pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))
print(decoding_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/decoding', 'heldout_test_score_pupil_area_by-area_min-50-cells')


In [245]:
#few = [] #['20191018_JC113_fov1', '20190605_JC090_fov1']
# skip = ['20190507_JC083_fov1', '20190617_JC097_fov1',
#         '20190618_JC097_fov1', '20190508_JC083_fov1']
skip=[]
skip.extend(too_few)

metric='heldout_test_score'
#metric='heldout_log2MI'

plotdf = df[~df['datakey'].isin(skip)]

ai=0
fig, axn = pl.subplots(1, 3, figsize=(8,8), dpi=dpi, sharex=True, sharey=True)
for ax, (visual_area, a_df) in zip(axn.flat[::-1], plotdf.groupby(['visual_area'])):
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=a_df, ax=ax, palette='colorblind',
                     s=10, join=True, scale=0.5)
    sns.barplot(x='arousal', y=metric, data=a_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    plotdf.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')

    ax.set_title(visual_area, loc='left', fontsize=16)
    #ax.axhline(y=0.5, color='k', linestyle=':')
    
    ax.legend(bbox_to_anchor=(0.9, -0.5))
    ai+=1
    
pl.subplots_adjust(wspace=0.5, bottom=0.5, top=0.8)
#sns.despine(trim=True)

<IPython.core.display.Javascript object>

In [286]:
cell_counts

Unnamed: 0,datakey,n_cells,visual_area
0,20190502_JC076_fov1,150,Li
1,20190602_JC091_fov1,241,Li
2,20190605_JC090_fov1,129,Li
3,20190606_JC091_fov1,136,Li
4,20190609_JC099_fov1,78,Li
5,20190617_JC099_fov1,25,Li
6,20191018_JC113_fov1,64,Li
7,20191105_JC117_fov1,79,Li
8,20191111_JC120_fov1,26,Li
9,20190430_JC078_fov1,187,Lm


# Load RF data

In [131]:
from pipeline.python.retinotopy import fit_2d_rfs as fitrf
from pipeline.python.rois.utils import load_roi_coords

In [132]:
reliable_only = True
#rfs_filter_by = None # Get all # match <EXP> datasets # 'drop_repeats'
rf_fit_thr = 0.05
fit_desc = fitrf.get_fit_desc(response_type=response_type)
reliable_str = 'reliable' if reliable_only else ''
rf_str = 'match%s_%s' % (experiment, reliable_str)
print(rf_str)


#### Get non-repeated FOV datasets
rf_dsets = sdata_exp[(sdata_exp['datakey'].isin(stim_datakeys))
                     & (sdata_exp['experiment'].isin(['rfs', 'rfs10']))].copy()
rf_dpaths, no_fits = rfutils.get_fit_dpaths(rf_dsets, traceid=traceid, fit_desc=fit_desc)
print("%i with no fits" % len(no_fits))

rfdf = rfutils.aggregate_rf_data(rf_dpaths, reliable_only=reliable_only, fit_desc=fit_desc,
                                            traceid=traceid, verbose=False)
rfdf = rfdf.reset_index(drop=True)

matchblobs_reliable
N dpaths: 31, N unfit: 0
N datasets included: 31, N sessions excluded: 4
0 with no fits


#### Get position info

In [133]:
aggr_rf_dir = os.path.join(aggregate_dir, 'receptive-fields', '%s__%s' % (traceid, fit_desc))
print(aggr_rf_dir)
rf_filter_by=None

#df_fpath = os.path.join(aggr_rf_dir, 'fits_and_coords_%s_%s_%s.pkl' % (rf_filter_by, reliable_str, rfname))
df_fpath =  os.path.join(aggr_rf_dir, 'fits_and_coords_%s_%s.pkl' % (rf_filter_by, reliable_str))
print(df_fpath)

get_positions = False
if os.path.exists(df_fpath) and get_positions is False:
    print("Loading existing RF coord conversions...")
    try:
        with open(df_fpath, 'rb') as f:
            df= pkl.load(f)
        rfdf = df['df']
    except Exception as e:
        get_positions = True
print(get_positions)

/n/coxfs01/julianarhee/aggregate-visual-areas/receptive-fields/traces001__fit-2dgaus_dff-no-cutoff
/n/coxfs01/julianarhee/aggregate-visual-areas/receptive-fields/traces001__fit-2dgaus_dff-no-cutoff/fits_and_coords_None_reliable.pkl
Loading existing RF coord conversions...
False


In [134]:
if get_positions:
    print("Calculating RF coord conversions...")
    pos_params = ['fov_xpos', 'fov_xpos_pix', 'fov_ypos', 'fov_ypos_pix', 'ml_pos','ap_pos']
    for p in pos_params:
        rfdf[p] = ''
    p_list=[]
    for (animalid, session, fovnum), g in rfdf.groupby(['animalid', 'session', 'fovnum']):
        fcoords = load_roi_coords(animalid, session, 'FOV%i_zoom2p0x' % fovnum, 
                                  traceid=traceid, create_new=False)

        for ei, e_df in g.groupby(['experiment']):
            cell_ids = e_df['cell'].unique()
            p_ = fcoords['roi_positions'].loc[cell_ids]
            for p in pos_params:
                rfdf[p][e_df.index] = p_[p].values
    # with open(df_fpath, 'wb') as f:
    #     pkl.dump(expdf, f, protocol=pkl.HIGHEST_PROTOCOL)

In [262]:
rfdf.head()

Unnamed: 0,r2,fwhm_x,fwhm_y,theta,x0,y0,animalid,cell,datakey,experiment,...,fwhm_avg,std_avg,area,datakey_ix,fov_xpos,fov_xpos_pix,fov_ypos,fov_ypos_pix,ml_pos,ap_pos
0,0.826039,23.640365,13.12577,0.117896,6.700889,-8.282818,JC076,0,20190502_JC076_fov1,rfs,...,18.383068,7.822582,176.519678,0,85.2665,37,218.928,115,753.872,1092.33
1,0.72507,43.796452,29.207447,0.234407,-22.165338,-2.970096,JC076,84,20190502_JC076_fov1,rfs,...,36.501949,15.532744,727.690433,0,638.347,277,605.382,318,367.418,539.253
2,0.701576,47.503138,24.790225,0.458457,15.301437,31.251545,JC076,115,20190502_JC076_fov1,rfs,...,36.146682,15.381567,669.91072,0,1034.72,449,788.139,414,184.661,142.879
3,0.7814,23.130716,44.809417,0.155088,2.496442,37.699467,JC076,120,20190502_JC076_fov1,rfs,...,33.970067,14.455348,589.620427,0,956.368,415,761.487,400,211.313,221.232
4,0.627347,32.13207,25.065036,5.892774,0.014677,17.244112,JC076,135,20190502_JC076_fov1,rfs,...,28.598553,12.169597,458.164233,0,1115.38,484,740.546,389,232.254,62.2215


## Calculate overlap with stimulus (all RFs)

In [142]:
rf_fit_params = ['cell', 'std_x', 'std_y', 'theta', 'x0', 'y0']

o_list=[]
for (visual_area, animalid, session, fovnum, datakey, rfname), g in rfdf.groupby(['visual_area', 'animalid', 'session', 'fovnum', 'datakey', 'experiment']):

    if datakey not in MEANS.keys(): # No blob data
        continue
        
    # Convert RF fit params to polygon
    g.index = g['cell'].values
    rf_polys = rfutils.rfs_to_polys(g[rf_fit_params])

    # Get stimulus info
    S = util.Session(animalid, session, 'FOV%i_zoom2p0x' % fovnum)
    stim_xpos, stim_ypos = S.get_stimulus_coordinates(experiments=[experiment])
    stim_sizes = S.get_stimulus_sizes(size_tested=[experiment])

    # Convert stimuli to polyon bounding boxes
    stim_polys = [(blob_sz, rfutils.stimsize_poly(blob_sz, xpos=stim_xpos, ypos=stim_ypos)) \
                  for blob_sz in stim_sizes[experiment]]
    
    # Get all pairwise overlaps (% of smaller ellipse that overlaps larger ellipse)
    overlaps = pd.concat([rfutils.get_proportion_overlap(rf_poly, stim_poly) \
                for stim_poly in stim_polys for rf_poly in rf_polys]).rename(columns={'row': 'cell', 'col': 'stim_size'})
    metadict={'visual_area': visual_area, 'animalid': animalid, 
              'session': session, 'fovnum': fovnum, 'datakey': datakey, 'rfname': rfname}
    o_ = putils.add_meta_to_df(overlaps, metadict)
    o_list.append(o_)

stim_overlaps = pd.concat(o_list, axis=0).reset_index(drop=True)

Creating blobs object [JC076|20190502|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (-5, 8)
Creating blobs object [JC076|20190502|FOV1_zoom2p0x|traces001]
Creating blobs object [JC090|20190605|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (15, 0)
Creating blobs object [JC090|20190605|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (20, -8)
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (20, -8)
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190606|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (20, -8)
Creating blobs object [JC091|20190606|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190607|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (20, -5)
Creating blobs object [JC091|20190607|FOV1_zoom2p0x|traces001]
Creating blobs obj

#### Select subset of RF data

In [136]:
r_list=[]
for datakey, expdf in dsets.groupby(['datakey']):
    # Get active blob cells
    exp_rids = sorted([r for r in MEANS[datakey].columns if putils.isnumber(r)])
    
    # Get current fov's RFs
    rdf = rfdf[rfdf['datakey']==datakey].copy()
    if len(rdf)==0:
        print("--- no RF! %s" % datakey)
        continue
    
    # If have both rfs/rfs10, pick the best one
    if len(rdf['experiment'].unique())>1:
        rf_rids = rdf[rdf['experiment']=='rfs']['cell'].unique()
        rf10_rids = rdf[rdf['experiment']=='rfs10']['cell'].unique()
        same_as_rfs = np.intersect1d(rf_rids, exp_rids)
        same_as_rfs10 = np.intersect1d(rf10_rids, exp_rids)
        rfname = 'rfs' if len(same_as_rfs) > len(same_as_rfs10) else 'rfs10'
        print("(%s) Selecting %s, overlappig rfs, %i | rfs10, %i (of %i cells)" 
              % (datakey, rfname, len(same_as_rfs), len(same_as_rfs10), len(exp_rids)))
        r_list.append(rdf[rdf['experiment']==rfname])
    else:
        r_list.append(rdf)
RFs = pd.concat(r_list, axis=0)


(20190602_JC091_fov1) Selecting rfs10, overlappig rfs, 33 | rfs10, 59 (of 241 cells)
(20190613_JC097_fov1) Selecting rfs10, overlappig rfs, 61 | rfs10, 88 (of 166 cells)
(20190616_JC097_fov1) Selecting rfs10, overlappig rfs, 98 | rfs10, 119 (of 296 cells)
(20190618_JC097_fov1) Selecting rfs10, overlappig rfs, 13 | rfs10, 24 (of 116 cells)
(20190622_JC085_fov1) Selecting rfs10, overlappig rfs, 26 | rfs10, 97 (of 203 cells)
(20191006_JC110_fov1) Selecting rfs10, overlappig rfs, 46 | rfs10, 72 (of 217 cells)


In [155]:
RFs[['visual_area', 'datakey']].drop_duplicates()['visual_area'].value_counts()

Li    9
Lm    8
V1    8
Name: visual_area, dtype: int64

## Get cells in common to both BLOBS-responsive and RF-fits

In [263]:
d_list = []
c_list = []
for (visual_area, datakey), rdf in stim_overlaps.groupby(['visual_area', 'datakey']):
    if datakey not in MEANS.keys():
        print("No <%s> for RF key: %s" % (experiment, datakey))
        continue
    
    # Get EXPERIMENT responsive cells
    exp_rids = [r for r in MEANS[datakey].columns if putils.isnumber(r)]
    
    # If have both rfs/rfs10, pick the best one
    if len(rdf['rfname'].unique())>1:
        rf_rids = rdf[rdf['rfname']=='rfs']['cell'].unique()
        rf10_rids = rdf[rdf['rfname']=='rfs10']['cell'].unique()
        same_as_rfs = np.intersect1d(rf_rids, exp_rids)
        same_as_rfs10 = np.intersect1d(rf10_rids, exp_rids)
        rfname = 'rfs' if len(same_as_rfs) > len(same_as_rfs10) else 'rfs10'
        print("(%s) Selecting %s, overlappig rfs, %i | rfs10, %i (of %i cells)" 
              % (datakey, rfname, len(same_as_rfs), len(same_as_rfs10), len(exp_rids)))
        curr_rfdf = rdf[rdf['rfname']==rfname].copy()
    else:
        curr_rfdf = rdf.copy()
    
    rf_rids = curr_rfdf['cell'].unique()
    # Get cells in common
    common_rids = np.intersect1d(exp_rids, rf_rids)
    print("[%s] %s, %i cells" % (visual_area, datakey, len(common_rids)))
    c_list.append(pd.DataFrame({'visual_area': visual_area, 'datakey': datakey, 
                                'rfname': rfname, 'n_cells': len(common_rids)}, index=[i]))
    
    d_list.append(rdf[rdf['cell'].isin(common_rids)].copy())
    
has_blobs = pd.concat(d_list, axis=0)
print(has_blobs.shape)

common_counts = pd.concat(c_list, axis=0)


[Li] 20190502_JC076_fov1, 15 cells
(20190602_JC091_fov1) Selecting rfs10, overlappig rfs, 33 | rfs10, 59 (of 241 cells)
[Li] 20190602_JC091_fov1, 59 cells
[Li] 20190605_JC090_fov1, 9 cells
[Li] 20190606_JC091_fov1, 26 cells
[Li] 20190607_JC091_fov1, 41 cells
[Li] 20190609_JC099_fov1, 22 cells
[Li] 20190612_JC099_fov1, 21 cells
[Li] 20190614_JC091_fov1, 52 cells
[Li] 20190617_JC099_fov1, 4 cells
[Li] 20191008_JC091_fov1, 20 cells
[Li] 20191018_JC113_fov1, 6 cells
[Li] 20191105_JC117_fov1, 0 cells
[Li] 20191111_JC120_fov1, 1 cells
[Lm] 20190430_JC078_fov1, 31 cells
[Lm] 20190504_JC078_fov1, 10 cells
[Lm] 20190506_JC080_fov1, 30 cells
[Lm] 20190508_JC083_fov1, 61 cells
[Lm] 20190509_JC078_fov1, 11 cells
[Lm] 20190512_JC083_fov1, 39 cells
[Lm] 20190513_JC078_fov1, 17 cells
[Lm] 20190517_JC083_fov1, 20 cells
[Lm] 20190525_JC084_fov1, 36 cells
[Lm] 20190603_JC080_fov1, 53 cells
(20190618_JC097_fov1) Selecting rfs10, overlappig rfs, 13 | rfs10, 24 (of 116 cells)
[Lm] 20190618_JC097_fov1, 24 c

In [264]:
has_blobs[['visual_area', 'datakey']].drop_duplicates()['visual_area'].value_counts()

Li    12
Lm    12
V1    11
Name: visual_area, dtype: int64

In [152]:
stim_overlaps.head()

Unnamed: 0,area_overlap,stim_size,perc_overlap,cell,animalid,session,rfname,datakey,visual_area,fovnum
0,0.778584,10,0.007786,0.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
1,100.0,10,1.0,84.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
2,100.0,10,1.0,115.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
3,96.249421,10,0.962494,120.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
4,100.0,10,1.0,135.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1


In [266]:
overlap_thr = 0.8
has_blobs[has_blobs['perc_overlap']>=overlap_thr][['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey,cell
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1
Li,241,241
Lm,273,273
V1,734,734


In [265]:
curr_min_pass = has_blobs[has_blobs['perc_overlap']>=overlap_thr][['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area']).count()['cell'].min()
print("Min N pass: %i (overlap thr=%.2f)" % (curr_min_pass, overlap_thr))

Min N pass: 241 (overlap thr=0.80)


In [216]:
# SDF={}
# for datakey, g in globaldf.groupby(['datakey']):
#     session, animalid, fov_ = datakey.split('_')
#     fovnum = int(fov_[3:])
#     obj = util.Objects(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
#     sdf = obj.get_stimuli()
#     SDF[datakey] = sdf

In [269]:
overlap_thr=0.8
n_iterations=100
NCELLS =  60
min_ncells=20

#for overlap_thr, NCELLS in ncells_dict.items():
print("-------- Overlap: %.2f --------" % overlap_thr)
# Get list of eligible cells from all FOVs
# globaldf = filter_rois(has_blobs[has_blobs['datakey'].isin(stim_datakeys)], overlap_thr=overlap_thr) 



-------- Overlap: 0.80 --------


In [270]:
# for k, d in SDF.items():
#     if len(np.where(sdf['morphlevel']!=d['morphlevel'])[0]) > 0:
#         print(k)
#     if len(np.where(sdf['size']!=d['size'])[0]) > 0:
#         print(k)

In [271]:
overlap_thr=0.8
pass_dsets = has_blobs[(has_blobs['perc_overlap']>=overlap_thr)].copy()

cell_counts = pass_dsets[['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area', 'datakey']).count().reset_index()
cell_counts = cell_counts.rename(columns={'cell': 'n_cells'})
cell_counts

Unnamed: 0,visual_area,datakey,n_cells
0,Li,20190502_JC076_fov1,14
1,Li,20190602_JC091_fov1,57
2,Li,20190605_JC090_fov1,8
3,Li,20190606_JC091_fov1,21
4,Li,20190607_JC091_fov1,39
5,Li,20190609_JC099_fov1,20
6,Li,20190612_JC099_fov1,18
7,Li,20190614_JC091_fov1,45
8,Li,20190617_JC099_fov1,1
9,Li,20191008_JC091_fov1,12


In [272]:
min_ncells =20
too_few_cells = cell_counts[cell_counts['n_cells'] < min_ncells]['datakey'].unique()
print("%i datasets, too few (min=%i cells)" % (len(too_few_cells), min_ncells))
#print too_few_cells

cell_counts[~cell_counts['datakey'].isin(too_few_cells)]

13 datasets, too few (min=20 cells)


Unnamed: 0,visual_area,datakey,n_cells
1,Li,20190602_JC091_fov1,57
3,Li,20190606_JC091_fov1,21
4,Li,20190607_JC091_fov1,39
5,Li,20190609_JC099_fov1,20
7,Li,20190614_JC091_fov1,45
12,Lm,20190430_JC078_fov1,24
14,Lm,20190506_JC080_fov1,27
15,Lm,20190508_JC083_fov1,52
17,Lm,20190512_JC083_fov1,33
20,Lm,20190525_JC084_fov1,30


In [273]:
pass_dsets.head()

Unnamed: 0,area_overlap,stim_size,perc_overlap,cell,animalid,session,rfname,datakey,visual_area,fovnum
1,100.0,10,1.0,84.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
2,100.0,10,1.0,115.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
3,96.249421,10,0.962494,120.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
4,100.0,10,1.0,135.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1
6,100.0,10,1.0,169.0,JC076,20190502,rfs,20190502_JC076_fov1,Li,1


In [274]:
curr_dkeys = pass_dsets['datakey'].unique()
curr_missing_dlc = [k for k in curr_dkeys if '%s_%s' % (k, experiment) not in PUPIL.keys()]
curr_missing_dlc


['20191008_JC091_fov1']

In [177]:
reload(dlcutils)

<module 'pipeline.python.classifications.dlc_utils' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/2p-pipeline/pipeline/python/classifications/dlc_utils.py'>

In [178]:
no_dlc = []
for dkey in curr_missing_dlc:
    if '%s_%s' % (dkey, experiment) in PUPIL.keys():
        continue
    print("[%s] getting extracted feature: %s" % (dkey, face_feature))
    session, animalid, fov = dkey.split('_')
    fovnum = int(fov[3:])
    fov = 'FOV%i_zoom2p0x' % fovnum

    #### Get labels
    labels_dfile = glob.glob(os.path.join(rootdir, animalid, session, fov, 'combined_*%s*' % experiment, 'traces',
                          '%s*' % traceid, 'data_arrays', 'labels.npz'))[0]
    l = np.load(labels_dfile)
    labels = pd.DataFrame(data=l['labels_data'], columns=l['labels_columns'])

    #### Load pupil data
    facemeta, pupildata = dlcutils.load_pose_data(animalid, session, fovnum, experiment, dlc_results_dir, 
                                                  feature_list=[face_feature], epoch='trial_alignment', 
                                                  pre_ITI_ms=iti_pre_ms, post_ITI_ms=iti_post_ms)
    
    if pupildata is None:
        print("----- skipping %s (no dlc)" % dkey)
        no_dlc.append(dkey)
        continue
        
    #### Parse pupil data into traces
    pupiltraces = dlcutils.get_pose_traces(facemeta, pupildata, labels, feature='pupil_area')

    #### Add to dict
    datakey ='%s_%s_fov%i_%s' % (session, animalid, fovnum, experiment)  
    PUPIL[datakey] = pupiltraces


# This is a dict, keys are datakeys
with open(pupil_fpath, 'wb') as f:
    pkl.dump(PUPIL, f, protocol=pkl.HIGHEST_PROTOCOL)


[20191008_JC091_fov1] getting extracted feature: pupil_area
Loading pose data (dlc)
('[blobs] Found runs:', ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13'])
... finding movies for dset: 20191008_JC091_fov1_blobs
(0, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f1_20191008114954692819')
(1, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f2_20191008130145399159')
(2, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f3_20191008135135055724')
(3, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f4_20191008143825761217')
(4, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f5_20191008155442610578')
(5, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f6_20191008163153063257')
(6, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f7_20191008170323771409')
(7, '/n/coxfs01/2p-data/eyetracker_tmp/20191008_JC091_fov1_blobs_f8_20191008174953035553')
(8, '/n/coxfs01/2p-data/eyetracker_tmp

...curr run: 3 [20190511_JC083_fov1_blobs_f3DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
...curr run: 4 [20190511_JC083_fov1_blobs_f4DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
...curr run: 5 [20190511_JC083_fov1_blobs_f5DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
...curr run: 6 [20190511_JC083_fov1_blobs_f6DLC_resnet50_facetrackingJan25shuffle1_391800.h5]
... done parsing!
Parsing pose data with MW
Missing 1 trials total


In [275]:
report_metric = 'heldout_test_score'
m0=0
m100=106
C_value=1
n_iterations = 100
skip = []
use_quartiles = True

n_datakeys = len(pass_dsets['datakey'].unique())
metric = 'heldout_test_score'

d_list=[]; d_list_chance=[]; dkeys=[]; vkeys=[];with open(params_fpath, 'w') as f:
    json.dump(params, f, indent=4)
n_datakeys = len(pass_dsets['datakey'].unique())
for di, ((visual_area, datakey), curr_stimoverlaps) in enumerate(pass_dsets.groupby(['visual_area', 'datakey'])):
    if datakey in too_few_cells:
        print("(%s) Too few pass: %i"% (datakey, len(curr_stimoverlaps['cell'].unique())))
        continue

    if di % 5 == 0:
        print("%i of %i datakeys" % (int(di+1), n_datakeys))
        
    if '%s_%s' % (datakey, experiment) not in PUPIL.keys():
        print("Missing PUPIL: %s" % datakey)
        continue
        
    # ------ Neural ----------------------------------------------
    # Only select cells that pass conditions
    print("**** [%s] %s *****" % (visual_area, datakey))
    config_list = MEANS[datakey]['config']
    roi_list = curr_stimoverlaps['cell'].unique()
    ncells_t = MEANS[datakey].shape[1]-1
    neuraldf = MEANS[datakey][roi_list].copy() 
    neuraldf['config'] = config_list
    # sdf = SDF[datakey].copy()
    print("... (%s | %s) %i of %i cells" % (visual_area, datakey, len(roi_list), ncells_t))    
    
    # ------ STIMULUS INFO -----------------------------------------
    session, animalid, fov_ = datakey.split('_')
    fovnum = int(fov_[3:])
    obj = util.Objects(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
    sdf = obj.get_stimuli()
    
    # ------ PUPIL --------------------------------------------------
    pupiltraces = PUPIL['%s_%s' % (datakey, experiment)].copy()

    #### Resample pupil
    desired_nframes = 60 #len(out_ixs) # 60
    binned_pupil = resample_pupil_traces(pupiltraces, feature_name=face_feature,
                                         in_rate=20.0, out_rate=new_framerate, 
                                         min_nframes=desired_nframes, stim_on=stim_on)
    trials_ = sorted(pupiltraces['trial'].unique())
    frames_ = np.arange(0, desired_nframes)
    pupil_trialmat = pd.DataFrame(np.vstack([p[face_feature].values for trial, p in binned_pupil.groupby(['trial'])]),
                                  index=trials_, columns=frames_)
    pupil_r = pupil_trialmat.T.unstack().reset_index().rename(columns={'level_0': 'trial', 
                                                                   'level_1': 'frame',
                                                                   0: 'pupil'})
    pupildf = pd.concat([g.mean(axis=0) for t, g in pupil_r.groupby(['trial'])], axis=1).T
    
    # ------ Split trials by quantiles ---------------------------------
    if use_quartiles:
        pupil_quantiles = pupildf.pupil.quantile([0.25,0.5,0.75])
        low_pupil_thr = pupil_quantiles[0.25]
        high_pupil_thr = pupil_quantiles[0.75]
        pupil_low = pupildf[pupildf['pupil']<=low_pupil_thr].copy()
        pupil_high = pupildf[pupildf['pupil']>=high_pupil_thr].copy()
    else:
        pupildf['quantile'] = pd.qcut(pupildf['pupil'], 3, labels=False)
        pupil_low = pupildf[pupildf['quantile']==0].copy()
        pupil_high = pupildf[pupildf['quantile']==2].copy()

    # Get trial indices of low/high pupil 
    low_trial_ixs = pupil_low['trial'].unique()
    high_trial_ixs = pupil_high['trial'].unique()
    all_trial_ixs = pupildf['trial'].unique()
    
    # Fit ALL
    tmp_d_list=[]
    tmp_shuf_list = []
    for arousal, curr_trial_ixs in zip(['all', 'low', 'high'], [all_trial_ixs, low_trial_ixs, high_trial_ixs]):
        curr_data = neuraldf.loc[curr_trial_ixs].copy()
        iterdict, iterdict_chance = fit_classifier_bootstrap(curr_data, sdf, m0=m0, m100=m100, n_iterations=n_iterations,
                                      cv_nfolds=cv_nfolds, test_size=test_size, C_value=C_value)

        iterdict['arousal'] = arousal
        iterdict_chance['arousal'] = arousal
        tmp_d_list.append(pd.DataFrame(iterdict, index=[i])) #df)
        tmp_shuf_list.append(pd.DataFrame(iterdict_chance, index=[i])) #df_chance)
        print("%s: %.2f (chance=%.2f)" % (arousal, iterdict[report_metric], iterdict_chance[report_metric]))

        i+=1
        
    tmp_df = pd.concat(tmp_d_list, axis=0)
    metadict = {'visual_area': visual_area, 'datakey': datakey}
    tmp_df = putils.add_meta_to_df(tmp_df, metadict)
    print(tmp_df.shape)
    tmp_chance = pd.concat(tmp_shuf_list, axis=0)
    tmp_chance = putils.add_meta_to_df(tmp_chance, metadict)
    
    d_list.append(tmp_df)
    d_list_chance.append(tmp_chance)

df = pd.concat(d_list, axis=0)
df_chance = pd.concat(d_list_chance, axis=0)

(20190502_JC076_fov1) Too few pass: 14
**** [Li] 20190602_JC091_fov1 *****
... (Li | 20190602_JC091_fov1) 57 of 241 cells
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
all: 0.74 (chance=0.49)
low: 0.76 (chance=0.49)
high: 0.70 (chance=0.50)
(3, 41)
(20190605_JC090_fov1) Too few pass: 8
**** [Li] 20190606_JC091_fov1 *****
... (Li | 20190606_JC091_fov1) 21 of 136 cells
Creating blobs object [JC091|20190606|FOV1_zoom2p0x|traces001]
all: 0.57 (chance=0.49)
low: 0.58 (chance=0.49)
high: 0.43 (chance=0.48)
(3, 41)
**** [Li] 20190607_JC091_fov1 *****
... (Li | 20190607_JC091_fov1) 39 of 260 cells
Creating blobs object [JC091|20190607|FOV1_zoom2p0x|traces001]
all: 0.62 (chance=0.49)
low: 0.50 (chance=0.49)
high: 0.45 (chance=0.53)
(3, 41)
6 of 35 datakeys
**** [Li] 20190609_JC099_fov1 *****
... (Li | 20190609_JC099_fov1) 20 of 78 cells
Creating blobs object [JC099|20190609|FOV1_zoom2p0x|traces001]
all: 0.62 (chance=0.51)
low: 0.57 (chance=0.54)
high: 0.49 (chance=0.49)
(3, 41)

### Save overlap results

In [276]:
results_fpath = os.path.join(decoding_dir, 'results_overlap-thr-%.2f.pkl' % overlap_thr)
# params_fpath = os.path.join(decoding_dir, 'params_overlap.json')

# params = {'m0': m0, 'm100': m100, 'C_value': C_value, 'n_iterations': n_iterations,
#          'overlap_thr': overlap_thr}
results = {'df': df, 'df_chance': df_chance, 'cell_counts': cell_counts, 
           'has_blobs': has_blobs, 'pass_dsets': pass_dsets}

with open(results_fpath, 'wb') as f:
    pkl.dump(results, f, protocol=pkl.HIGHEST_PROTOCOL)
    
# with open(params_fpath, 'w') as f:
#     json.dump(params, f, indent=4)
    


In [278]:
#few = [] #['20191018_JC113_fov1', '20190605_JC090_fov1']
skip = ['20190507_JC083_fov1', '20190617_JC097_fov1', '20190510_JC083_fov1'] #,
       #'20190618_JC097_fov1', '20190508_JC083_fov1']
# skip=[]
skip.extend(too_few)

#metric='heldout_test_score'
metric='heldout_log2MI'

plotdf = df[~df['datakey'].isin(skip)]

ai=0
fig, axn = pl.subplots(1, 3, figsize=(8,8), dpi=dpi, sharex=True, sharey=True)
for ax, (visual_area, a_df) in zip(axn.flat[::-1], plotdf.groupby(['visual_area'])):
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=a_df, ax=ax, palette='colorblind',
                     s=10, join=True, scale=0.5)
    sns.barplot(x='arousal', y=metric, data=a_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    plotdf.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')

    ax.set_title(visual_area, loc='left', fontsize=16)
    #ax.axhline(y=0.5, color='k', linestyle=':')
    
    ax.legend(bbox_to_anchor=(0.9, -0.5))
    ai+=1
    
pl.subplots_adjust(wspace=0.5, bottom=0.5, top=0.8)
#sns.despine(trim=True)

pl.subplots_adjust(wspace=0.5, bottom=0.5, top=0.8)
#sns.despine(trim=True)

putils.label_figure(fig, data_id)
figname = '%s_overlap-thr-%.2f_%s_by-area_min-%i-cells' % (metric, overlap_thr, face_feature, min_ncells)
pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))
print(decoding_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/behavior-state/decoding', 'heldout_log2MI_overlap-thr-0.80_pupil_area_by-area_min-20-cells')


In [192]:
#few = [] #['20191018_JC113_fov1', '20190605_JC090_fov1']
skip = ['20190507_JC083_fov1', '20190617_JC097_fov1', '20190510_JC083_fov1'] #,
       #'20190618_JC097_fov1', '20190508_JC083_fov1']
# skip=[]
skip.extend(too_few)

metric='heldout_test_score'
#metric='heldout_log2MI'

plotdf = df[~df['datakey'].isin(skip)]

ai=0
fig, axn = pl.subplots(1, 3, figsize=(8,8), dpi=dpi, sharex=True, sharey=True)
for ax, (visual_area, a_df) in zip(axn.flat[::-1], plotdf.groupby(['visual_area'])):
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=a_df, ax=ax, palette='colorblind',
                     s=10, join=True, scale=0.5)
    sns.barplot(x='arousal', y=metric, data=a_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    plotdf.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')

    ax.set_title(visual_area, loc='left', fontsize=16)
    #ax.axhline(y=0.5, color='k', linestyle=':')
    
    ax.legend(bbox_to_anchor=(0.9, -0.5))
    ai+=1
    
pl.subplots_adjust(wspace=0.5, bottom=0.5, top=0.8)
#sns.despine(trim=True)

cell_counts


<IPython.core.display.Javascript object>

In [237]:
metric = 'heldout_test_score'

d_list=[]; d_list_chance=[]; dkeys=[]; vkeys=[];
lo_list=[]; lo_list_chance=[]; 
hi_list=[]; hi_list_chance=[]; 

n_datakeys = len(pass_dsets['datakey'].unique())
for di, ((visual_area, datakey), ds) in enumerate(pass_dsets.groupby(['visual_area', 'datakey'])):
    if datakey in too_few_cells:
        print("(%s) Too few pass: %i"% (datakey, len(ds['cell'].unique())))
        continue

    if di % 5 == 0:
        print("%i of %i datakeys" % (int(di+1), n_datakeys))
    
    # ------ Neural ----------------------------------------------
    # Only select cells that pass conditions
    print("**** [%s] %s *****" % (visual_area, datakey))
    config_list = MEANS[datakey]['config']
    roi_list = ds['cell'].unique()
    ncells_t = MEANS[datakey].shape[1]-1
    neuraldf = MEANS[datakey][roi_list].copy() 
    neuraldf['config'] = config_list
    # sdf = SDF[datakey].copy()
    print("... (%s | %s) %i of %i cells" % (visual_area, datakey, len(roi_list), ncells_t))    
    
    # ------ PUPIL --------------------------------------------------
    pupiltraces = PUPIL['%s_%s' % (datakey, experiment)].copy()

    #### Resample pupil
    desired_nframes = 60 #len(out_ixs) # 60
    binned_pupil = resample_pupil_traces(pupiltraces, in_rate=20.0, out_rate=new_framerate, 
                                         min_nframes=desired_nframes)
    trials_ = sorted(pupiltraces['trial'].unique())
    frames_ = np.arange(0, desired_nframes)
    pupil_trialmat = pd.DataFrame(np.vstack([p['pupil'].values for trial, p in binned_pupil.groupby(['trial'])]),
                                  index=trials_, columns=frames_)
    pupil_r = pupil_trialmat.T.unstack().reset_index().rename(columns={'level_0': 'trial', 
                                                                   'level_1': 'frame',
                                                                   0: 'pupil'})
    pupildf = pd.concat([g.mean(axis=0) for t, g in pupil_r.groupby(['trial'])], axis=1).T
    
    # ------ Split trials by quantiles ---------------------------------
    pupil_quantiles = pupildf.pupil.quantile([0.25,0.5,0.75])
    low_pupil_thr = pupil_quantiles[0.25]
    high_pupil_thr = pupil_quantiles[0.75]
    pupil_low = pupildf[pupildf['pupil']<=low_pupil_thr].copy()
    pupil_high = pupildf[pupildf['pupil']>=high_pupil_thr].copy()

    # Get trial indices of low/high pupil 
    low_trial_ixs = sorted(pupil_low['trial'].unique())
    high_trial_ixs = sorted(pupil_high['trial'].unique())
    
    # Fit all
    curr_data = neuraldf.copy()
    df, df_chance = fit_svm(curr_data, sdf, m0=m0, m100=m100, C_value=C_value, n_iterations=n_iterations)
    print("all: %.2f (chance=%.2f)" % (df.mean()[metric], df_chance.mean()[metric]))
    d_list.append(df.mean(axis=0).T)
    d_list_chance.append(df_chance.mean(axis=0).T)
    
    # Fit LOW
    curr_data = neuraldf.loc[low_trial_ixs].copy()
    df, df_chance = fit_svm(curr_data, sdf, m0=m0, m100=m100, C_value=C_value, n_iterations=n_iterations)
    print("low: %.2f (chance=%.2f)" % (df.mean()[metric], df_chance.mean()[metric]))
    lo_list.append(df.mean(axis=0).T)
    lo_list_chance.append(df_chance.mean(axis=0).T)
    
    # Fit LOW
    curr_data = neuraldf.loc[high_trial_ixs].copy()
    df, df_chance = fit_svm(curr_data, sdf, m0=m0, m100=m100, C_value=C_value, n_iterations=n_iterations)
    print("high: %.2f (chance=%.2f)" % (df.mean()[metric], df_chance.mean()[metric]))
    hi_list.append(df.mean(axis=0).T)
    hi_list_chance.append(df_chance.mean(axis=0).T)
    
    dkeys.append(datakey)
    vkeys.append(visual_area)

(20190502_JC076_fov1) Too few pass: 14
**** [Li] 20190602_JC091_fov1 *****
... (Li | 20190602_JC091_fov1) 57 of 241 cells
all: 0.76 (chance=0.49)
low: 0.72 (chance=0.51)
high: 0.75 (chance=0.51)
(20190605_JC090_fov1) Too few pass: 8
**** [Li] 20190606_JC091_fov1 *****
... (Li | 20190606_JC091_fov1) 21 of 136 cells
all: 0.58 (chance=0.49)
low: 0.44 (chance=0.56)
high: 0.44 (chance=0.51)
**** [Li] 20190609_JC099_fov1 *****
... (Li | 20190609_JC099_fov1) 20 of 78 cells
all: 0.61 (chance=0.51)
low: 0.49 (chance=0.52)
high: 0.48 (chance=0.48)
(20190617_JC099_fov1) Too few pass: 1
(20191018_JC113_fov1) Too few pass: 5
(20191111_JC120_fov1) Too few pass: 1
**** [Lm] 20190430_JC078_fov1 *****
... (Lm | 20190430_JC078_fov1) 24 of 187 cells
all: 0.59 (chance=0.49)
low: 0.55 (chance=0.51)
high: 0.53 (chance=0.46)
(20190504_JC078_fov1) Too few pass: 8
11 of 24 datakeys
**** [Lm] 20190506_JC080_fov1 *****
... (Lm | 20190506_JC080_fov1) 27 of 226 cells
all: 0.67 (chance=0.51)
low: 0.70 (chance=0.51)

In [238]:
print(d_list[0].shape)
full_df = pd.concat(d_list, axis=1).T
full_chance = pd.concat(d_list_chance, axis=1).T
full_df['datakey'] = dkeys
full_chance['datakey'] = dkeys
full_df['visual_area'] = vkeys
full_chance['visual_area'] = vkeys

lo_df = pd.concat(lo_list, axis=1).T
lo_chance = pd.concat(lo_list_chance, axis=1).T
lo_df['datakey'] = dkeys
lo_chance['datakey'] = dkeys
lo_df['visual_area'] = vkeys
lo_chance['visual_area'] = vkeys

hi_df = pd.concat(hi_list, axis=1).T
hi_chance = pd.concat(hi_list_chance, axis=1).T
hi_df['datakey'] = dkeys
hi_chance['datakey'] = dkeys
hi_df['visual_area'] = vkeys
hi_chance['visual_area'] = vkeys


(12,)


In [254]:
few = [] #['20191018_JC113_fov1', '20190605_JC090_fov1']
skip = ['20190507_JC083_fov1', '20190420_JC076_fov1']

metric='heldout_test_score'
#metric='heldout_aMI'

m_ = pd.concat([full_df[metric], lo_df[metric], hi_df[metric]], axis=1)
m_.columns = ['all', 'low', 'high']
# m_df['datakey'] = full_df['datakey']
m_df = m_.unstack().reset_index().rename(columns={'level_0': 'arousal', 'level_1': 'datakey', 0: metric})

m_df['datakey'] = [full_df.loc[i]['datakey'] for i in m_df['datakey']]
#m_df['visual_area'] = [full_df.loc[i]['visual_area'] for i in m_df['datakey']]
v_labels = pd.concat([full_df['visual_area'], lo_df['visual_area'], hi_df['visual_area']], axis=0).values
m_df['visual_area'] = v_labels

plotdf = m_df[~m_df['datakey'].isin(skip)]

# plot
ai=0
fig, axn = pl.subplots(1, 3, figsize=(8,6), dpi=dpi, sharex=True, sharey=True)
for ax, (visual_area, a_df) in zip(axn.flat[::-1], plotdf.groupby(['visual_area'])):
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=a_df, ax=ax, palette='colorblind',
                 s=10, join=True, scale=0.5)
    sns.barplot(x='arousal', y=metric, data=a_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    m_df.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')

    ax.set_title(visual_area, loc='left', fontsize=16)
    ax.axhline(y=0.5, color='k', linestyle=':')
    
    ax.legend(bbox_to_anchor=(0.9, -0.5))
    ai+=1
    
pl.subplots_adjust(wspace=0.5, bottom=0.5, top=0.8)
#sns.despine(trim=True)

<IPython.core.display.Javascript object>

In [123]:
few = [] #['20191018_JC113_fov1', '20190605_JC090_fov1']
metric='heldout_test_score'
#metric='heldout_aMI'

m_ = pd.concat([full_df[metric], lo_df[metric], hi_df[metric]], axis=1)
m_.columns = ['all', 'low', 'high']
# m_df['datakey'] = full_df['datakey']
m_df = m_.unstack().reset_index().rename(columns={'level_0': 'arousal', 'level_1': 'datakey', 0: metric})

m_df['datakey'] = [full_df.loc[i]['datakey'] for i in m_df['datakey']]
#m_df['visual_area'] = [full_df.loc[i]['visual_area'] for i in m_df['datakey']]
v_labels = pd.concat([full_df['visual_area'], lo_df['visual_area'], hi_df['visual_area']], axis=0).values
m_df['visual_area'] = v_labels

plotdf = m_df[~m_df['datakey'].isin(few)]

# plot
ai=0
fig, axn = pl.subplots(1, 3, figsize=(8,6), dpi=dpi, sharex=True, sharey=True)
for ax, (visual_area, a_df) in zip(axn.flat, plotdf.groupby(['visual_area'])):
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=a_df, ax=ax, palette='colorblind',
                 s=10, join=True)
    sns.barplot(x='arousal', y=metric, data=a_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    m_df.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')
    #ax.legend(bbox_to_anchor=(1.1, 1))
#     if ai!=2:
#         ax.legend_.remove()
#     else:
    ax.set_title(visual_area, loc='left', fontsize=16)
    
    ax.legend(bbox_to_anchor=(0.9, -0.5))
    ai+=1
pl.subplots_adjust(wspace=0.5, bottom=0.5, top=0.8)
sns.despine(trim=True)

<IPython.core.display.Javascript object>

In [202]:
few = [] #['20191018_JC113_fov1', '20190605_JC090_fov1']

In [203]:
metric='heldout_test_score'

m_ = pd.concat([full_df[metric], lo_df[metric], hi_df[metric]], axis=1)
m_.columns = ['all', 'low', 'high']
# m_df['datakey'] = full_df['datakey']
m_df = m_.unstack().reset_index().rename(columns={'level_0': 'arousal', 'level_1': 'datakey', 0: metric})

m_df['datakey'] = [full_df.loc[i]['datakey'] for i in m_df['datakey']]
#m_df['visual_area'] = [full_df.loc[i]['visual_area'] for i in m_df['datakey']]
v_labels = pd.concat([full_df['visual_area'], lo_df['visual_area'], hi_df['visual_area']], axis=0).values
m_df['visual_area'] = v_labels

plotdf = m_df[~m_df['datakey'].isin(few)]

# plot
fig, axn = pl.subplots(1, 3, figsize=(8,4), dpi=dpi, sharex=True, sharey=True)
for ax, (arousal, a_df) in zip(axn.flat, plotdf.groupby(['visual_area'])):
    sns.pointplot(x='arousal', y=metric, hue='datakey', data=a_df, ax=ax, palette='colorblind',
                 s=10, join=True)
    sns.barplot(x='arousal', y=metric, data=a_df, ax=ax, 
                edgecolor=('k', 'k', 'k'), facecolor=(1,1,1,0))
    m_df.apply(aggr.annotateBars, ax=ax, axis=1, fontsize=12, fontcolor='k')
    #ax.legend(bbox_to_anchor=(1.1, 1))
    ax.legend_.remove()
pl.subplots_adjust(wspace=0.5, bottom=0.2, top=0.8)
sns.despine(trim=True)

<IPython.core.display.Javascript object>

In [101]:
def filter_rois(has_blobs, overlap_thr=0.50):
    visual_areas=['V1', 'Lm', 'Li']
    nocells=[]; notrials=[];
    global_rois = dict((v, []) for v in visual_areas)
    roi_counters = dict((v, 0) for v in visual_areas)
    roidf = []
    datakeys = dict((v, []) for v in visual_areas)
    
    for (visual_area, datakey), g in has_blobs[has_blobs['perc_overlap']>=overlap_thr].groupby(['visual_area', 'datakey']):

        roi_counter = roi_counters[visual_area]
        datakeys[visual_area].append(datakey)

        #ddf.sort_values(by='config')
        roi_list = sorted([s for s in g['cell'].unique()]) #[int(r) for r in ddf.columns if r != 'config']

        # Reindex roi ids for global
        roi_ids = [i+roi_counter for i, r in enumerate(roi_list)]
        nrs = len(roi_list)

        global_rois[visual_area].extend(roi_ids)
        roidf.append(pd.DataFrame({'roi': roi_ids,
                                   'dset_roi': roi_list,
                                   'visual_area': [visual_area for _ in np.arange(0, nrs)],
                                   'datakey': [datakey for _ in np.arange(0, nrs)]}))

        # Update global roi id counter
        roi_counters[visual_area] += len(roi_ids)

    roidf = pd.concat(roidf, axis=0) #.groupby(['visual_area']).count()
    #print("%i datasets fail all thresholds:" % len(nocells), nocells)
    for k, v in global_rois.items():
        print(k, len(v))
        
    return roidf