In [1]:
import os
import json
import glob
import copy
import copy
import itertools
import pprint 
pp = pprint.PrettyPrinter(indent=4)

import numpy as np
import pylab as pl
import seaborn as sns
import pandas as pd
import statsmodels as sm
import cPickle as pkl

from scipy import stats as spstats

from pipeline.python.classifications import experiment_classes as util
from pipeline.python.classifications import aggregate_data_stats as aggr
from pipeline.python.classifications import rf_utils as rfutils
from pipeline.python import utils as putils

from pipeline.python.classifications import decode_by_ncells as dc
from pipeline.python.classifications import decode_utils as decutils
from pipeline.python.retinotopy import fit_2d_rfs as fitrf

from matplotlib.lines import Line2D
import matplotlib.patches as patches

In [2]:
%matplotlib notebook

In [3]:
# Set colors
visual_areas, area_colors = putils.set_threecolor_palette()
dpi = putils.set_plot_params()


In [4]:
n_iterations=100
overlap_thr=0.5
n_processes=1
responsive_test='nstds'
responsive_thr=10.
experiment='blobs'
C_value=None

options=['-E', experiment, '-R', responsive_test, '-n', n_processes, '-N', n_iterations, '-o', overlap_thr, '-C', C_value]

opts = dc.extract_options(options)
fov_type = 'zoom2p0x'
state = 'awake'
aggregate_dir = '/n/coxfs01/julianarhee/aggregate-visual-areas'

traceid = opts.traceid #'traces001'
response_type = opts.response_type #'dff'
responsive_test = opts.responsive_test #'nstds' # 'nstds' #'ROC' #None
responsive_thr = float(opts.responsive_thr) #10

# CV stuff
m0=int(opts.class_a) #0
m100=int(opts.class_b) #106
n_iterations=int(opts.n_iterations) #100 
n_processes=int(opts.n_processes) #2
overlap_thr = None if opts.overlap_thr in ['None', None] else float(opts.overlap_thr)

stim_filterby = None #'first'
has_gratings = experiment!='blobs'
g_str = 'hasgratings' if has_gratings else 'blobsonly'

C_value = opts.C_value
do_cv = C_value is None
C_value = None if do_cv else float(opts.C_value)
print('Classify Morph %i v %i\nN=%i iterations (%i proc), overlap=%.2f, C=%s' % (m0, m100, n_iterations, n_processes, overlap_thr, str(C_value)))


Classify Morph 0 v 106
N=100 iterations (1 proc), overlap=0.50, C=None


In [5]:
train_str = 'traintest_by-ncells_iter-%i' % (n_iterations)

# Set colors
visual_area, area_colors = putils.set_threecolor_palette()
dpi = putils.set_plot_params()

#### Responsive params
n_stds = None if responsive_test=='ROC' else 2.5 #None
response_str = '%s_%s-thr-%.2f' % (response_type, responsive_test, responsive_thr) 

#### Output dir
stats_dir = os.path.join(aggregate_dir, 'data-stats')
decoding_dir = os.path.join(aggregate_dir, 'decoding')


# Create data ID for labeling figures with data-types
filter_str = 'filter_%s_%s' % (stim_filterby, g_str)
data_id = '|'.join([traceid, filter_str, response_str])
print(data_id)


traces001|filter_None_blobsonly|dff_nstds-thr-10.00


#### Current output dir

In [6]:
dst_dir = os.path.join(aggregate_dir, 'decoding', 'by_ncells')
if not os.path.exists(dst_dir):
    os.makedirs(dst_dir)
    print("...making dir")
print(dst_dir)

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells


In [7]:
#### Get metadata for experiment type
sdata = aggr.get_aggregate_info(traceid=traceid, fov_type=fov_type, state=state)
edata, expmeta = aggr.experiment_datakeys(sdata, experiment=experiment,
                                      has_gratings=has_gratings, stim_filterby=stim_filterby)
    
# Get blob metadata only - and only if have RFs
dsets = pd.concat([g for k, g in edata.groupby(['animalid', 'session', 'fov']) if 
                        (experiment in g['experiment'].values 
                         and ('rfs' in g['experiment'].values or 'rfs10' in g['experiment'].values)) ])
dsets[['visual_area', 'datakey']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey
visual_area,Unnamed: 1_level_1
Li,13
Lm,12
V1,11


In [8]:
#### Check stimulus configs
stim_datakeys = dsets['datakey'].unique()
SDF = aggr.check_sdfs(stim_datakeys, traceid=traceid)

In [9]:
#### Load neural responses
trial_epoch='stimulus'
responsive_test='nstds' #'roc'
responsive_thr=10. #0.05
response_type='dff'

#### Check for equal trial counts
_, cells, MEANS = aggr.get_source_data(experiment, equalize_now=True, response_type=response_type,
                                      responsive_test=responsive_test, responsive_thr=responsive_thr, 
                                      trial_epoch=trial_epoch) 
cells = cells[cells['visual_area'].isin(visual_areas)]
cells.groupby(['visual_area']).count()

...loading: /n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-10.00_dff_stimulus.pkl
---equalizing now---
Segmentation, missing:
20190502_JC076_fov1
20191008_JC091_fov1


Unnamed: 0_level_0,cell,fov,animalid,session,datakey,fovnum
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Li,764,764,764,764,764,764
Lm,1888,1888,1888,1888,1888,1888
V1,2232,2232,2232,2232,2232,2232


In [10]:
#### Load RF fits -------------------------------------
rf_filter_by=None
reliable_only = True
rf_fit_thr = 0.05
# -----------------------------------------------------
rfdf = aggr.load_rfdf_and_pos(dsets, rf_filter_by=rf_filter_by, reliable_only=reliable_only, traceid=traceid)

N dpaths: 42, N unfit: 0
N datasets included: 42, N sessions excluded: 4


#### How many responsive cells?

In [11]:
# N2, R2 = aggr.get_neuraldata_and_rfdata_2(cells, rfdf, MEANS, stack=True, verbose=False)
N2 = aggr.get_neuraldata(cells, MEANS, stack=True)
N2[['visual_area', 'datakey','cell']].drop_duplicates()['visual_area'].value_counts()

V1    2232
Lm    1888
Li     764
Name: visual_area, dtype: int64

#### How many RF fits?

In [55]:
R2 = aggr.get_rfdata(cells, rfdf)
R2[['visual_area', 'datakey','cell']].drop_duplicates()['visual_area'].value_counts()

V1    773
Lm    319
Li    189
Name: visual_area, dtype: int64

#### How many with both?

In [51]:
NEURALDATA, RFDATA = aggr.get_common_cells_from_dataframes(N2, R2)

In [69]:
N2_r, selected_cells = aggr.match_neuraldata_distn(N2, src='Li')


             datakey  cell
visual_area               
Li               764   764
Lm               764   764
V1               764   764


In [80]:
selected_cells.shape

(2292, 6)

In [78]:
selected_cells[['visual_area', 'datakey', 'cell']].drop_duplicates().shape

(2292, 3)

# subset of blob responses with RF fits?

In [238]:
#neuraldf_dict, RFDATA = aggr.get_neuraldata_and_rfdata(cells, rfdf, MEANS)
stack_neuraldf=True
NEURALDATA, RFDATA = aggr.get_neuraldata_and_rfdata(cells, rfdf, MEANS, stack=stack_neuraldf)

# Stack
#NEURALDATA = aggr.neuraldf_dict_to_dataframe(neuraldf_dict)
NEURALDATA[['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey,cell
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1
Li,189,189
Lm,319,319
V1,773,773


# match stimulus repsonse levels?

In [80]:
# max_ndata, dist_mean, dist_sigma = aggr.get_params_for_source_distn(NEURALDATA, src='Li')

In [239]:
# sel = aggr.generate_matched_distn(max_ndata, mean=dist_mean, sigma=dist_sigma, n_samples=189)
# N2, selected_cells = aggr.match_neuraldata_distn(NEURALDATA, src='Li')

In [240]:
# count_of_sel = selected_cells.groupby(['visual_area', 'datakey']).count().reset_index()
# count_of_sel.groupby(['visual_area']).sum()

In [144]:
# R2 = aggr.select_dataframe_subset(selected_cells, RFDATA)

In [145]:
# N2, R2 = aggr.match_distns_neuraldata_and_rfdata(NEURALDATA, RFDATA)  
# stim_overlaps = rfutils.calculate_overlaps(R2, experiment=experiment) 

In [166]:
# print(overlap_thr)
# pass_overlaps = stim_overlaps[stim_overlaps['perc_overlap']>=overlap_thr]


0.5


In [149]:
# print("N assigned in area: %i" % cells[(cells['visual_area']==visual_area) & (cells['datakey']==datakey)].shape[0])
# print("N cells responsive in fov: %i" % int(MEANS[datakey].shape[1]-1))
# print("N cells w/ RFs: %i" % len(rfdf[rfdf['datakey']==datakey]['cell'].unique()))
# print("N cells response w/ RFs: %i" % len(NEURALDATA[(NEURALDATA['visual_area']==visual_area) 
#                & (NEURALDATA['datakey']==datakey)]['cell'].unique()))

# print("N pass overlap: %i" % len(pass_overlaps[(pass_overlaps['visual_area']==visual_area) 
#                                          & (pass_overlaps['datakey']==datakey)]['cell'].unique()))


N assigned in area: 228
N cells responsive in fov: 228
N cells w/ RFs: 32
N cells response w/ RFs: 31
N pass overlap: 16


In [150]:
# rf_rois = R2[R2['datakey']==datakey]['cell'].unique()
# stim_rois = [r for r in MEANS[datakey].columns if putils.isnumber(r)]

# len(rf_rois), len(stim_rois), len(np.intersect1d(rf_rois, stim_rois))


(16, 228, 16)

In [184]:
# count_of_sel = pass_overlaps[['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area', 'datakey']).count().reset_index()
# count_of_sel.groupby(['visual_area']).sum()
# #pass_overlaps.groupby(['visual_area', 'datakey']).count()

Unnamed: 0_level_0,cell
visual_area,Unnamed: 1_level_1
Li,181
Lm,183
V1,187


In [62]:
reload(decutils)

<module 'pipeline.python.classifications.decode_utils' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/2p-pipeline/pipeline/python/classifications/decode_utils.pyc'>

In [83]:
stim_overlaps = rfutils.calculate_overlaps(R2, experiment=experiment) 

In [88]:
c = stim_overlaps[['visual_area', 'datakey', 'cell']].drop_duplicates()
c.groupby(['visual_area', 'datakey']).count().sum()

cell    1281
dtype: int64

In [102]:
c.shape

(1281, 3)

In [87]:
R2.groupby(['visual_area', 'datakey']).count()['animalid'].sum()

1281

In [241]:
N2, R2 = aggr.match_distns_neuraldata_and_rfdata(NEURALDATA, RFDATA)  
stim_overlaps = rfutils.calculate_overlaps(R2, experiment=experiment) 

             datakey  cell
visual_area               
Li               189   189
Lm               189   189
V1               189   189


In [96]:
remove_too_few=False
min_ncells=0
overlap_thr=0.0
# ----------------------------------
pass_overlaps = stim_overlaps[stim_overlaps['perc_overlap']>=overlap_thr]

globalcells, cell_counts = decutils.get_pooled_cells(stim_overlaps,overlap_thr=overlap_thr, 
                                                     remove_too_few=remove_too_few, min_ncells=min_ncells)
print(cell_counts)

             cell
visual_area      
Li            189
Lm            319
V1            773
('V1', 773)
('Lm', 319)
('Li', 189)
{'V1': 773, 'Lm': 319, 'Li': 189}


In [105]:
globalcells[['visual_area', 'datakey', 'dset_roi']].drop_duplicates().rename(columns={'dset_roi': 'cell'})

Unnamed: 0,visual_area,datakey,cell
0,Li,20190602_JC091_fov1,15.0
1,Li,20190602_JC091_fov1,16.0
2,Li,20190602_JC091_fov1,22.0
3,Li,20190602_JC091_fov1,23.0
4,Li,20190602_JC091_fov1,27.0
5,Li,20190602_JC091_fov1,29.0
6,Li,20190602_JC091_fov1,33.0
7,Li,20190602_JC091_fov1,34.0
8,Li,20190602_JC091_fov1,37.0
9,Li,20190602_JC091_fov1,38.0


#### 2. Try drawing from matched distn (assumes Gauss)

In [232]:
# # For each cell, get activity profile (averaged across trial reps)
# mean_ndata = NEURALDATA.groupby(['visual_area', 'datakey', 'cell', 'config']).mean().reset_index()
# # For each cell, get MAX across configs
# max_ndata = mean_ndata.groupby(['visual_area', 'datakey', 'cell']).max().reset_index()

# # Same but resampled distribution
# mean_ndata_r = N2.groupby(['visual_area', 'datakey', 'cell', 'config']).mean().reset_index()
# # For each cell, get MAX across configs
# max_ndata_r = mean_ndata_r.groupby(['visual_area', 'datakey', 'cell']).max().reset_index()


# # ----------------------------------------------
# fig, axn = pl.subplots(1,2)
# ax=axn[0]
# for visual_area, g in max_ndata.groupby(['visual_area']):
#     ax.hist(g['response'], label=visual_area, cumulative=False, histtype='step',
#            color=area_colors[visual_area])
# ax=axn[1]
# for visual_area, g in max_ndata_r.groupby(['visual_area']):
#     ax.hist(g['response'], label=visual_area, cumulative=False, histtype='step',
#            color=area_colors[visual_area])
# ax.legend()

In [233]:
# # Generate random distn of values from true distn
# selected_cells_rand = pd.concat([vdf.sample(min_ncells, replace=True) for visual_area, vdf 
#                                    in max_ndata.groupby(['visual_area'])], axis=0)

# fig, axn = pl.subplots(1, 2, figsize=(5,3))

# ax=axn[0]
# for visual_area, g in selected_cells_rand.groupby(['visual_area']):
#     vals = g['response'].values
#     ax.hist(vals, label=visual_area, histtype='step', color=area_colors[visual_area], lw=2)
# ax.set_title("src is self (n=%i samples)" % min_ncells)

# ax=axn[1]
# for visual_area, g in selected_cells.groupby(['visual_area']):
#     vals = g['response'].values
#     ax.hist(vals, label=visual_area, histtype='step', color=area_colors[visual_area], lw=2)
# ax.set_title("src distn = Li")

<IPython.core.display.Javascript object>

Text(0.5,1,u'src distn = Li')

In [None]:
globalcells

In [226]:
for k, g in RFDATA.groupby(['visual_area', 'datakey']):
    if len(g['cell'].unique()) < min_ncells:
        print(k)
    

(u'Li', '20190617_JC099_fov1')
(u'Li', '20191105_JC117_fov1')
(u'Li', '20191111_JC120_fov1')
(u'Lm', '20190504_JC078_fov1')
(u'Lm', '20190509_JC078_fov1')
(u'Lm', '20190627_JC091_fov1')
(u'Lm', '20191008_JC091_fov1')
(u'V1', '20190420_JC076_fov1')
(u'V1', '20190502_JC076_fov1')
(u'V1', '20190504_JC078_fov1')
(u'V1', '20190508_JC083_fov1')
(u'V1', '20190509_JC078_fov1')
(u'V1', '20190510_JC083_fov1')
(u'V1', '20190512_JC083_fov1')
(u'V1', '20190517_JC083_fov1')


In [235]:
RFDATA = R2.copy()
NEURALDATA = N2.copy()

In [223]:
incl_dkeys = NEURALDATA['visual_area']

<module 'pipeline.python.classifications.rf_utils' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/2p-pipeline/pipeline/python/classifications/rf_utils.pyc'>

In [236]:
#### Get screen and stimulus info
# Note:  Some of RFDATA might have ncells < min_ncells if there is another visual area that datakey included with > min_ncells
screeninfo = putils.get_screen_dims() #aggr.get_aggregate_stimulation_info(curr_sdata) #, experiment='blobs')

# Plot
fig = rfutils.plot_all_rfs(RFDATA, screeninfo=screeninfo, cmap='cubehelix')
pl.suptitle("RF positions (+ CoM), Area-assigned cells (%s)" % experiment)
putils.label_figure(fig, data_id)
figname = 'CoM_label-fovs_common_to_blobs_and_rfs__blobs-%s__Li-distN-min-%i-cells' % (filter_str, min_ncells)
pl.savefig(os.path.join(dst_dir, '%s.svg' % figname))
print(dst_dir, figname)


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells', 'CoM_label-fovs_common_to_blobs_and_rfs__blobs-filter_None_blobsonly__Li-distN-min-5-cells')


#### Reshape stacked dataframe to dict

In [251]:
# # Double check that cells are common to both neural and rfs
# c_list=[]
# i=0
# for (visual_area, datakey, rfname), g in stim_overlaps.groupby(['visual_area', 'datakey', 'rfname']):
#     if datakey not in NEURALDATA[visual_area].keys():
#         print("no %s: %s" % (experiment, datakey))
#         continue
#     exp_rids = [r for r in NEURALDATA[visual_area][datakey].columns if putils.isnumber(r)]
#     rf_rids = sorted(g['cell'].unique())
#     common_rids = np.intersect1d(exp_rids, rf_rids)
#     if len(exp_rids)!=len(common_rids):
#         print("[%s] %s, %i common cells (%i, %i)" % (visual_area, datakey, len(common_rids), len(exp_rids), len(rf_rids)))
#     c_list.append(pd.DataFrame({'visual_area': visual_area, 'datakey': datakey, 
#                                 'rfname': rfname, 'n_cells': len(common_rids)}, index=[i])) 
#     i+=1    
# common_counts = pd.concat(c_list, axis=0)


# Threshold by overlap with stimuli
stim_overlaps = rfutils.calculate_overlaps(RFDATA, experiment=experiment)
pass_overlaps = stim_overlaps[stim_overlaps['perc_overlap']>=overlap_thr].copy()
common_counts = aggr.get_counts_by_datakey(pass_overlaps)

In [252]:
pass_overlaps = stim_overlaps[stim_overlaps['perc_overlap']>=overlap_thr].copy()
common_counts = aggr.get_counts_by_datakey(pass_overlaps)

f, ax = pl.subplots(dpi=dpi, figsize=(4,3))
sns.stripplot(x='visual_area', y='n_cells', data=common_counts, ax=ax,
             order=visual_areas, color='k', s=8, alpha=1)
sns.despine(bottom=True, trim=True, ax=ax)
ax.tick_params(which='both', axis='x', size=0)
ax.set_xlabel('')
pl.subplots_adjust(left=0.2, bottom=0.2, top=0.8)
ax.set_title("N assigned cells in each area with RF fits>%.2f" % rf_fit_thr, loc='left', fontsize=8)
putils.label_figure(f, data_id)

figname ='ncells_assigned_with_rfs_%s-%s__match-Li-distn' % (response_type, responsive_test)
pl.savefig(os.path.join(dst_dir, '%s.svg' % figname))
print(dst_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells', 'ncells_assigned_with_rfs_dff-nstds__match-Li-distn')


In [753]:
reload(decutils)

<module 'pipeline.python.classifications.decode_utils' from '/home/julianarhee/Repositories/2p-pipeline/pipeline/python/classifications/decode_utils.pyc'>

#### Filter out cells that ain't even lookin

In [255]:
remove_too_few = True
min_ncells=5
overlap_thr=0.5

####
globalcells, cell_counts = decutils.get_pooled_cells(stim_overlaps, overlap_thr=overlap_thr, 
                                                     remove_too_few=True, min_ncells=min_ncells)

print("Final cell counts after ROI-assign, RF-fit, and overlap-thr:")
print(globalcells[['visual_area', 'datakey', 'roi']].drop_duplicates()['visual_area'].value_counts())


             cell
visual_area      
Li            178
Lm            296
V1            764
('V1', 764)
('Lm', 296)
('Li', 178)
Final cell counts after ROI-assign, RF-fit, and overlap-thr:
V1    764
Lm    296
Li    178
Name: visual_area, dtype: int64


##### Decode
decode_vs_ncells(rfs_and_blobs, curr_datakeys, MEANS, sdf, train_str=train_str,
                n_iterations=n_iterations, overlap_thr=overlap_thr, 
                n_processes=n_processes, 
                test_split=test_split, cv_nfolds=cv_nfolds, C_value=C_value,# cv=cv, 
                class_a=m0, class_b=m100, data_id='%s|%s' % (traceid, fig_str), #data_id,
                dst_dir=decoding_dir)


In [256]:
# rfs_and_blobs # replaced by pass_overlap
# stim_datakeys # final stimulus datakeys, replaced with incl_datakeys
# MEANS - replaced with NEURALDATA
# sdf
# train_str='clf-by-ncells',


# n_iterations=100, overlap_thr=0.8, n_processes=1 # these are set above
test_split=0.2
cv_nfolds=5
C_value=None

class_a=0
class_b=106


In [257]:
cell_counts

{'Li': 178, 'Lm': 296, 'V1': 764}

In [258]:
min_cells_total = min(cell_counts.values())

reasonable_range = [2**i for i in np.arange(0, 10)]
incl_range = [i for i in reasonable_range if i<min_cells_total]
incl_range.append(min_cells_total)
NCELLS = incl_range
print("Testing NCELLS: %s" % str(NCELLS))

Testing NCELLS: [1, 2, 4, 8, 16, 32, 64, 128, 178]


#### Debug for 1 sample size

In [762]:
curr_ncells = 1
gdf = globalcells[globalcells['visual_area']=='V1'].copy() # globalcells for current visual_area
sdf = SDF[datakey]

In [763]:
for visual_area, gdf in globalcells.groupby(['visual_area']):
    print("... %s" % visual_area)
    for inum in np.arange(0, n_iterations):
        iterdf = decutils.do_fit(inum, sample_ncells=curr_ncells, global_rois=gdf, sdf=sdf,
                                    MEANS=NEURALDATA, df_is_split=True,
                                    C_value=C_value, test_split=test_split, cv_nfolds=cv_nfolds, 
                                    class_a=class_a, class_b=class_b)
        iterdf['visual_area'] = visual_area
        iterdf['n_cells'] = curr_ncells
        i_.append(iterdf)

... Li


IndexingError: Too many indexers

In [747]:
iter_results_df = pd.concat(i_, axis=0)
iter_results_df.groupby(['visual_area', 'n_cells']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,fit_time,heldout_MI,heldout_aMI,heldout_log2MI,heldout_test_score,score_time,test_score,train_score
visual_area,n_cells,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Li,128,0.0478,0.003134,0.244254,0.345724,0.352384,0.824138,0.000723,0.829939,0.939307
Lm,128,0.0307,0.002642,0.153731,0.21099,0.221787,0.7568,0.000623,0.761854,0.913499
V1,128,0.02998,0.003277,0.215486,0.303622,0.310881,0.805862,0.000774,0.812776,0.91328


#### Run small subset of NCELLS

In [260]:
sdf = SDF[SDF.keys()[-1]]

In [261]:
n_iterations=10
i_=[]
for curr_ncells in NCELLS: #[1, 4, 16, 64, 148]:
    for visual_area, gdf in globalcells.groupby(['visual_area']):
        print("... %s (n=%i)" % (visual_area, curr_ncells))
        for inum in np.arange(0, n_iterations):
            iterdf = decutils.do_fit(inum, sample_ncells=curr_ncells, global_rois=gdf, sdf=sdf,
                                        MEANS=NEURALDATA, df_is_split=True,
                                        C_value=C_value, test_split=test_split, cv_nfolds=cv_nfolds, 
                                        class_a=class_a, class_b=class_b)
            iterdf['visual_area'] = visual_area
            iterdf['n_cells'] = curr_ncells
            i_.append(iterdf)
iter_results_df = pd.concat(i_, axis=0)


... Li
... Lm
... V1
... Li
... Lm
... V1
... Li
... Lm
... V1
... Li
... Lm
... V1
... Li
... Lm
... V1
... Li
... Lm
... V1
... Li
... Lm
... V1
... Li
... Lm
... V1
... Li
... Lm
... V1


In [264]:
dict(iter_results_df.groupby(['n_cells']).mean().T)


{1: C                     3.440500
 fit_time              0.000864
 heldout_MI            0.010241
 heldout_aMI           0.007181
 heldout_log2MI        0.014775
 heldout_test_score    0.503342
 score_time            0.000308
 test_score            0.541918
 train_score           0.542539
 Name: 1, dtype: float64, 2: C                     4.370800
 fit_time              0.001174
 heldout_MI            0.010813
 heldout_aMI           0.004467
 heldout_log2MI        0.015599
 heldout_test_score    0.524425
 score_time            0.000315
 test_score            0.563752
 train_score           0.576255
 Name: 2, dtype: float64, 4: C                     8.027800
 fit_time              0.002640
 heldout_MI            0.020395
 heldout_aMI           0.019047
 heldout_log2MI        0.029424
 heldout_test_score    0.552453
 score_time            0.000331
 test_score            0.573042
 train_score           0.598572
 Name: 4, dtype: float64, 8: C                     38.793700
 fit_time       

In [265]:
# print(iter_results_df.shape, iterdf.shape)
# iter_results_df.mean()

iter_results_df.groupby(['visual_area', 'n_cells']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,fit_time,heldout_MI,heldout_aMI,heldout_log2MI,heldout_test_score,score_time,test_score,train_score
visual_area,n_cells,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Li,1,0.1306,0.000807,0.016409,0.017626,0.023673,0.485679,0.000314,0.546133,0.54829
Li,2,10.1602,0.001456,0.006373,-0.001385,0.009194,0.482989,0.000319,0.542921,0.558989
Li,4,11.3302,0.00398,0.021543,0.021165,0.031081,0.551897,0.000339,0.567739,0.597683
Li,8,110.134,0.091345,0.025798,0.025312,0.037219,0.576034,0.000376,0.61596,0.665467
Li,16,0.163,0.001451,0.040147,0.045803,0.05792,0.624138,0.000372,0.672897,0.725764
Li,32,2.107,0.010553,0.129784,0.177136,0.187238,0.718966,0.000449,0.725768,0.81649
Li,64,10.018,0.196515,0.183682,0.256291,0.264997,0.784483,0.00059,0.772829,0.884159
Li,128,0.0172,0.005119,0.217599,0.305533,0.313929,0.805172,0.001529,0.807384,0.92436
Li,178,0.0055,0.005787,0.266826,0.377863,0.384948,0.834483,0.001372,0.845582,0.935016
Lm,1,0.1405,0.000799,0.006719,0.001894,0.009693,0.501713,0.000301,0.540479,0.536202


In [266]:
metric = 'heldout_test_score'
fig, ax = pl.subplots()
for visual_area, g in iter_results_df.groupby(['visual_area']):
    means_by_ncells = g.groupby(['n_cells']).mean().reset_index()
    ncells = means_by_ncells['n_cells'].values
    means_ = means_by_ncells[metric].values
    print(visual_area, ncells)
    stds_ =  g.groupby(['n_cells']).sem().reset_index()[metric]
    
    ax.plot(ncells, means_, color=area_colors[visual_area])
    ax.errorbar(ncells, means_, yerr=stds_, color=area_colors[visual_area])

ax.set_ylim([0.3, 1.0])

<IPython.core.display.Javascript object>

(u'Li', array([  1,   2,   4,   8,  16,  32,  64, 128, 178]))
(u'Lm', array([  1,   2,   4,   8,  16,  32,  64, 128, 178]))
(u'V1', array([  1,   2,   4,   8,  16,  32,  64, 128, 178]))


(0.3, 1.0)

In [797]:
ncells

array([  1,   4,  16,  64, 148])

In [210]:
means_by_ncells = g.groupby(['n_cells']).mean().reset_index()
means_by_ncells

Unnamed: 0,n_cells,C,fit_time,heldout_MI,heldout_aMI,heldout_log2MI,heldout_test_score,score_time,test_score,train_score
0,1,0.62038,0.000616,0.007958,0.003273,0.011481,0.498424,0.000217,0.53293,0.540627
1,4,1.06912,0.001014,0.027896,0.029533,0.040246,0.565531,0.000249,0.586634,0.607042
2,16,24.82624,0.042093,0.074357,0.095681,0.107274,0.669788,0.000367,0.666689,0.730654
3,64,0.04564,0.00234,0.163145,0.22579,0.235368,0.766021,0.000543,0.76122,0.875193
4,178,0.01144,0.004182,0.226619,0.318771,0.326942,0.812361,0.000984,0.815719,0.948196


In [25]:
reload(dc)

<module 'pipeline.python.classifications.decode_by_ncells' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/2p-pipeline/pipeline/python/classifications/decode_by_ncells.py'>

# Plot all results

In [12]:
reload(dc)

<module 'pipeline.python.classifications.decode_by_ncells' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/2p-pipeline/pipeline/python/classifications/decode_by_ncells.pyc'>

In [11]:
match_str

NameError: name 'match_str' is not defined

In [13]:
dst_dir

'/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells'

In [87]:
trial_epoch

'stimulus'

In [107]:
analysis_type = 'by_ncells'
trial_epoch='plushalf'

match_distns=True

response_type = 'dff'
responsive_test='nstds' #'nstds'
responsive_thr = 5.0

overlap_thr=0.
C_value=None
n_iterations = 100
# -----------------------------------------------------------------------------------

C_str = 'tuneC' if C_value is None else 'C-%.2f' % float(C_value) 
match_str = 'matchdistns_' if match_distns else ''
overlap_str='no-rfs' if overlap_thr is None else 'overlap-%.1f' % overlap_thr

tmpid = dc.create_results_id(prefix=analysis_type, response_type=response_type,
                     responsive_test=responsive_test,
                     visual_area=visual_area,trial_epoch=trial_epoch,
                     C_value=C_value, overlap_thr=overlap_thr)
_, results_subid, tepoch = tmpid.split('__')
#results_subid = '__'.join(tmpid.split('__')[1:])

#subdir = '%s%s_%s_overlap-%.1f_iter-%i' % (match_str, response_type, responsive_test, overlap_thr, n_iterations)
subdir = '%s%s_iter-%i' % (match_str, results_subid, n_iterations)
data_id = '%s__%s' % (subdir, tepoch)

assert os.path.exists(os.path.join(dst_dir, subdir)), "Results <%s> does not exist" % subdir
curr_src_dir = os.path.join(dst_dir, subdir)
print(curr_src_dir)

# load
r_=[]
for visual_area in visual_areas:
    tmp_files = glob.glob(os.path.join(curr_src_dir, '%s_%s_%s*_%s_*.pkl' % (analysis_type, visual_area, C_str, trial_epoch)))
    for f in tmp_files:
        with open(f, 'rb') as f:
            res = pkl.load(f)
        res['visual_area'] = [visual_area for _ in np.arange(0, res.shape[0])]
        r_.append(res)
results = pd.concat(r_, axis=0)
results.shape

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells/matchdistns_dff-nstds_overlap-0.0_iter-100


(2700, 11)

In [105]:
sorted(glob.glob(os.path.join(dst_dir, 'matchdistns_dff*')))

['/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells/matchdistns_dff-nstds_no-rfs_iter-100',
 '/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells/matchdistns_dff-nstds_overlap-0.0_iter-100']

In [86]:
os.listdir(curr_src_dir)

['by_ncells_Li_tuneC__dff-nstds_overlap-0.0_1.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_2.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_1.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_4.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_2.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_8.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_128.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_256.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_4.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_8.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_64.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_16.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_128.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_256.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_16.pkl',
 'by_ncells_V1_tuneC__dff-nstds_overlap-0.0_64.pkl',
 'by_ncells_Li_tuneC__dff-nstds_overlap-0.0_32.pkl',
 'matchdistns_dff-nstds_overlap-0.0_iter-100_heldout_test_score_2.svg',
 'by_ncells_V1_tuneC__dff-nstds

In [72]:
'__'.join(tmpid.split('__')[1:])

'dff-nstds_overlap-0.0__stimulus'

In [108]:
# -----------------------------------------------------------------
metric='heldout_test_score'
#metric='heldout_aMI'
fig, ax = pl.subplots(dpi=dpi, figsize=(5,4))
fig.patch.set_alpha(1)

for visual_area, iterdf in results.groupby(['visual_area']):
    ncells = [k for k, v in iterdf.groupby(['n_cells'])]
    means = [v.mean()[metric] for k, v in iterdf.groupby(['n_cells'])]
    stds = [v.sem()[metric] for k, v in iterdf.groupby(['n_cells'])]
    plot_str='%s (%.2f)' % (visual_area, max(means))
    ax.plot(ncells, means, color= area_colors[visual_area], label=plot_str)
    ax.errorbar(ncells, means, yerr=stds, color= area_colors[visual_area])

ax.legend(bbox_to_anchor=(1.0, 1))
if 'test_score' in metric:
    ax.set_ylim([0.3, 1])
    ax.axhline(y=0.5, c='k', linestyle=':')
    ax.set_ylabel('Classifier accuracy (%)')
else:
    ax.set_ylabel(metric)
ax.set_xticks(ncells)
ax.set_xticklabels(ncells)

ax.set_xlabel('N cells')
sns.despine(trim=True, offset=2, ax=ax)
ax.set_title('A/B, overlap=%s (%s, n=%i)' % (overlap_str, C_str, n_iterations))

putils.label_figure(fig, subdir)
#ax.set_xlim([0, 30])

pl.subplots_adjust(left=0.2, bottom=0.2, right=0.7, top=0.8)

figname='%s_%s' % (metric, data_id)
#pl.savefig(os.path.join(curr_src_dir, '%s.svg' % figname))
print(curr_src_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/by_ncells/matchdistns_dff-nstds_overlap-0.0_iter-100', 'heldout_test_score_matchdistns_dff-nstds_overlap-0.0_iter-100__plushalf')


In [46]:
stds

[0.0769299640665852,
 0.07150358521430734,
 0.09775005712647954,
 0.09918952354126463,
 0.06850716677566468,
 0.08906922040885619,
 0.0658807254039542,
 0.0796318995851141,
 0.053539431151238354]

In [96]:
input_dfile = os.path.join(curr_src_dir, 'input_dataframes.pkl')
with open(input_dfile, 'rb') as f:
    indata = pkl.load(f)
    
indata.keys()

Index([u'datakey', u'dset_roi', u'roi', u'visual_area', u'animalid',
       u'session', u'fovnum'],
      dtype='object')

In [97]:
indata.groupby(['visual_area', 'datakey']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,dset_roi,roi,animalid,session,fovnum
visual_area,datakey,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Li,20190602_JC091_fov1,66,66,66,66,66
Li,20190606_JC091_fov1,22,22,22,22,22
Li,20190607_JC091_fov1,50,50,50,50,50
Li,20190609_JC099_fov1,32,32,32,32,32
Li,20190612_JC099_fov1,22,22,22,22,22
Li,20190614_JC091_fov1,64,64,64,64,64
Lm,20190430_JC078_fov1,29,29,29,29,29
Lm,20190506_JC080_fov1,23,23,23,23,23
Lm,20190508_JC083_fov1,50,50,50,50,50
Lm,20190512_JC083_fov1,25,25,25,25,25


In [98]:
indata.groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey,dset_roi,roi,animalid,session,fovnum
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Li,256,256,256,256,256,256
Lm,262,262,262,262,262,262
V1,254,254,254,254,254,254


# Check datasets

In [43]:
responsive_test='nstds'
responsive_thr=5
match_distns=False
response_type='dff'
trial_epoch='plushalf'
overlap_thr = 0

curr_visual_area=None
curr_datakey=None

In [40]:
has_rfs = overlap_thr is not None
overlap_str = 'no-rfs' if overlap_thr is None else 'overlap-%.1f' % overlap_thr

In [41]:
_, cells, MEANS = aggr.get_source_data(experiment, 
                    equalize_now=True, zscore_now=True,
                    response_type=response_type, responsive_test=responsive_test, 
                    responsive_thr=responsive_thr, trial_epoch=trial_epoch, use_all=False,
                    visual_area=None if match_distns else curr_visual_area, 
                    datakey=None if match_distns else curr_datakey)
cells = cells[cells['visual_area'].isin(['V1', 'Lm', 'Li'])]
stack_neuraldf = match_distns==True


...loading: /n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-5.00_dff_plushalf.pkl
---equalizing now---
Segmentation, missing:
20190502_JC076_fov1
20191008_JC091_fov1


In [44]:
#### Load RFs
if has_rfs: 
    print("~~~~~~~~~~~~~~~~Loading RFs~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    rf_fit_desc = fitrf.get_fit_desc(response_type=response_type)
    reliable_str = 'reliable'
    #rf_str = 'match%s_%s' % (experiment, reliable_str)
    # Get position info for RFs 
    rfdf = aggr.load_rfdf_and_pos(dsets, rf_filter_by=None, 
                                    reliable_only=True, traceid=traceid)
    # RF dataframes
    NEURALDATA, RFDATA = aggr.get_neuraldata_and_rfdata(cells, rfdf, MEANS, 
                                            stack=stack_neuraldf)
else:
    print("~~~~~~~~~~~~~~~~No Receptive Fields~~~~~~~~~~~~~~~~~~~~~~~~~")
    # EXP dataframes 
    NEURALDATA = aggr.get_neuraldata(cells, MEANS, stack=stack_neuraldf)

if match_distns:
    print("~~~~~~~~~~~~~~~~Matching max %s distNs~~~~~~~~~~~~~~~~~~~~~" % response_type)
    NEURALDATA, matched_distn_cells = aggr.match_neuraldata_distn(NEURALDATA, src='Li')
    if has_rfs:
        RFDATA = aggr.select_dataframe_subset(matched_distn_cells, RFDATA)
dist_str = 'matchdist_' if match_distns else ''

if NEURALDATA is None or RFDATA is None:
    print("There is no data. Aborting.")

~~~~~~~~~~~~~~~~Loading RFs~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
N dpaths: 42, N unfit: 0
N datasets included: 42, N sessions excluded: 4


In [47]:
remove_too_few=False
min_ncells=5

if has_rfs:
    print("~~~~~~~~~~~~~~~~Calculating overlaps (thr=%.2f)~~~~~~~~~~~~~" % overlap_thr)
    # Calculate overlap with stimulus
    stim_overlaps = rfutils.calculate_overlaps(RFDATA, experiment=experiment)
    # Filter cells
    globalcells, cell_counts = aggr.get_pooled_cells(stim_overlaps,
                                        remove_too_few=remove_too_few, 
                                        overlap_thr=0 if overlap_thr is None else overlap_thr,
                                        min_ncells=min_ncells)
else:
    if match_distns:
        globalcells, cell_counts = aggr.global_cells(matched_distn_cells,
                                        remove_too_few=remove_too_few, 
                                        min_ncells=min_ncells, return_counts=True)
    else:
        globalcells, cell_counts = aggr.global_cells(cells,
                                        remove_too_few=remove_too_few, 
                                        min_ncells=min_ncells, return_counts=True)

~~~~~~~~~~~~~~~~Calculating overlaps (thr=0.00)~~~~~~~~~~~~~
393 of 393 cells pass overlap (thr=0.00)


In [48]:
globalcells.groupby(['visual_area', 'datakey']).count() #.drop_duplicates()['visual_area'].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,dset_roi,roi,animalid,session,fovnum
visual_area,datakey,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Li,20190602_JC091_fov1,66,66,66,66,66
Li,20190606_JC091_fov1,22,22,22,22,22
Li,20190607_JC091_fov1,50,50,50,50,50
Li,20190609_JC099_fov1,32,32,32,32,32
Li,20190612_JC099_fov1,22,22,22,22,22
Li,20190614_JC091_fov1,64,64,64,64,64
Li,20190617_JC099_fov1,7,7,7,7,7
Li,20191018_JC113_fov1,8,8,8,8,8
Li,20191105_JC117_fov1,4,4,4,4,4
Li,20191111_JC120_fov1,4,4,4,4,4


In [29]:
globalcells.groupby(['visual_area', 'datakey']).count() #.drop_duplicates()['visual_area'].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,dset_roi,roi,animalid,session,fovnum
visual_area,datakey,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Li,20190602_JC091_fov1,58,58,58,58,58
Li,20190606_JC091_fov1,16,16,16,16,16
Li,20190607_JC091_fov1,31,31,31,31,31
Li,20190609_JC099_fov1,21,21,21,21,21
Li,20190612_JC099_fov1,13,13,13,13,13
Li,20190614_JC091_fov1,40,40,40,40,40
Li,20191018_JC113_fov1,6,6,6,6,6
Lm,20190430_JC078_fov1,17,17,17,17,17
Lm,20190506_JC080_fov1,21,21,21,21,21
Lm,20190508_JC083_fov1,35,35,35,35,35


In [35]:
globalcells.groupby(['visual_area', 'datakey']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,dset_roi,roi,animalid,session,fovnum
visual_area,datakey,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Li,20190602_JC091_fov1,58,58,58,58,58
Li,20190606_JC091_fov1,16,16,16,16,16
Li,20190607_JC091_fov1,31,31,31,31,31
Li,20190609_JC099_fov1,21,21,21,21,21
Li,20190612_JC099_fov1,13,13,13,13,13
Li,20190614_JC091_fov1,40,40,40,40,40
Li,20191018_JC113_fov1,6,6,6,6,6
Lm,20190430_JC078_fov1,17,17,17,17,17
Lm,20190506_JC080_fov1,21,21,21,21,21
Lm,20190508_JC083_fov1,35,35,35,35,35


# --------------------------------------------------------------------------------------------------------
# Scratch Space
# --------------------------------------------------------------------------------------------------------

# Dataset info

In [4]:
#### Set trace ID and FOV/state type
traceid = 'traces001'
fov_type = 'zoom2p0x'
state = 'awake'
aggregate_dir = '/n/coxfs01/julianarhee/aggregate-visual-areas'

#### Responsive params
responsive_test = 'nstds' # 'nstds' #'ROC' #None
nstd_thr=10
response_type = 'dff'

#### some strings and whatnot
responsive_thr = nstd_thr if responsive_test=='nstds' else 0.05 #0.05 #None
n_stds = None if responsive_test=='ROC' else 2.5 #None
response_str = '%s_resptest-%s_respthr-%.2f' % (response_type, responsive_test, responsive_thr) 

In [5]:
#### Create data ID for labeling figures with data-types
experiment = 'blobs'
print(response_str)

dff_resptest-nstds_respthr-10.00


# Output dir

In [6]:
#### Output dir
stats_dir = os.path.join(aggregate_dir, 'data-stats')

decoding_dir = os.path.join(aggregate_dir, 'decoding')
print(decoding_dir)

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding


# Get metadata for experiment type

In [7]:
experiment = 'blobs'

In [8]:
# Get all data sets
sdata = aggr.get_aggregate_info(traceid=traceid, fov_type=fov_type, state=state)
sdata[['visual_area', 'datakey']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey
visual_area,Unnamed: 1_level_1
Li,26
Lm,22
V1,18


# Exclude datasets without RFs

In [9]:
# Get blob metadata only - and only if have RFs
sdata_exp = pd.concat([g for k, g in sdata.groupby(['animalid', 'session', 'fov']) if 
                        (experiment in g['experiment'].values 
                         and ('rfs' in g['experiment'].values or 'rfs10' in g['experiment'].values)) ])
sdata_exp[['visual_area', 'datakey']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey
visual_area,Unnamed: 1_level_1
Li,13
Lm,12
V1,11


# Drop duplicates and whatnot fovs

In [10]:
stim_filterby = 'first'

if experiment=='blobs':
    has_gratings=False
    g_str = 'hasgratings' if has_gratings else 'blobsonly'
else:
    g_str = 'gratingsonly'
filter_str = 'stim-filter-%s_%s_%s' % (stim_filterby, g_str, response_str)

In [11]:
# Drop duplicates and whatnot fovs
if experiment=='blobs':
    exp_dkeys = aggr.get_blob_datasets(filter_by=stim_filterby, has_gratings=has_gratings, as_dict=True)
elif experiment == 'gratings':
    exp_dkeys = aggr.get_gratings_datasets(filter_by=stim_filterby, as_dict=True)

for k, v in exp_dkeys.items():
    print('%s: %i datasets' % (k, len(v)))

dictkeys = [d for d in list(itertools.chain(*exp_dkeys.values()))]
stim_datakeys = ['%s_%s_fov%i' % (s.split('_')[0], s.split('_')[1], 
                   sdata[(sdata['animalid']==s.split('_')[1]) & (sdata['session']==s.split('_')[0])]['fovnum'].unique()[0]) for s in dictkeys]
expmeta = dict((k, [dv for dv in stim_datakeys for vv in v if vv in dv]) for k, v in exp_dkeys.items())

V1: 8 datasets
Lm: 8 datasets
Li: 9 datasets


In [12]:
#### Create data ID for labeling figures with data-types
data_id = '|'.join([traceid, filter_str])
print(data_id)


traces001|stim-filter-first_blobsonly_dff_resptest-nstds_respthr-10.00


# Load neural responses

In [13]:
glob.glob(os.path.join(stats_dir, 'aggr_%s_trialmeans_*%s*_%s_stimulus.pkl' 
                                               % (experiment, responsive_test, response_type)))

['/n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-10.00_dff_stimulus.pkl',
 '/n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-15.00_dff_stimulus.pkl']

In [14]:
aggr_trialmeans_dfile = glob.glob(os.path.join(stats_dir, 
                                'aggr_%s_trialmeans_*%s*_%s_stimulus.pkl' 
                                               % (experiment, responsive_test, response_type)))[0]

print(aggr_trialmeans_dfile)
with open(aggr_trialmeans_dfile, 'rb') as f:
    MEANS = pkl.load(f)
    

/n/coxfs01/julianarhee/aggregate-visual-areas/data-stats/aggr_blobs_trialmeans_traces001_nstds-thr-10.00_dff_stimulus.pkl


In [15]:
# aggr_baselines_dfile = glob.glob(os.path.join(stats_dir, 
#                                 'aggr_%s_trialmeans_*%s*_%s_baseline.pkl'
#                                               % (experiment, responsive_test, response_type)))[1]

# print(aggr_baselines_dfile)
# with open(aggr_baselines_dfile, 'rb') as f:
#     BAS = pkl.load(f)

## Check that all datasets have same stim configs

In [16]:
SDF={}
for datakey in stim_datakeys:
    session, animalid, fov_ = datakey.split('_')
    fovnum = int(fov_[3:])
    obj = util.Objects(animalid, session, 'FOV%i_zoom2p0x' %  fovnum, traceid=traceid)
    sdf = obj.get_stimuli()
    SDF[datakey] = sdf

nonpos_params = [p for p in sdf.columns if p not in ['xpos', 'ypos', 'position']] 
assert all([all(sdf[nonpos_params]==d[nonpos_params]) for k, d in SDF.items()]), "Incorrect stimuli..."

Creating blobs object [JC084|20190522|FOV1_zoom2p0x|traces001]
Creating blobs object [JC085|20190622|FOV1_zoom2p0x|traces001]
Creating blobs object [JC097|20190613|FOV1_zoom2p0x|traces001]
Creating blobs object [JC097|20190616|FOV1_zoom2p0x|traces001]
Creating blobs object [JC097|20190617|FOV1_zoom2p0x|traces001]
Creating blobs object [JC110|20191006|FOV1_zoom2p0x|traces001]
Creating blobs object [JC076|20190420|FOV1_zoom2p0x|traces001]
Creating blobs object [JC083|20190507|FOV1_zoom2p0x|traces001]
Creating blobs object [JC084|20190525|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190627|FOV1_zoom2p0x|traces001]
Creating blobs object [JC097|20190618|FOV1_zoom2p0x|traces001]
Creating blobs object [JC083|20190512|FOV1_zoom2p0x|traces001]
Creating blobs object [JC078|20190504|FOV1_zoom2p0x|traces001]
Creating blobs object [JC078|20190430|FOV1_zoom2p0x|traces001]
Creating blobs object [JC080|20190506|FOV1_zoom2p0x|traces001]
Creating blobs object [JC083|20190508|FOV1_zoom2p0x|tra

# Get screen and stimulus info

In [17]:
screeninfo = putils.get_screen_dims() #aggr.get_aggregate_stimulation_info(curr_sdata) #, experiment='blobs')
screenright = float(screeninfo['azimuth_deg']/2)
screenleft = -1*screenright #float(screeninfo['screen_right'].unique())
screentop = float(screeninfo['altitude_deg']/2)
screenbottom = -1*screentop
screenaspect = float(screeninfo['resolution'][0]) / float(screeninfo['resolution'][1])


In [18]:
screeninfo

{'altitude_deg': 67.323,
 'azimuth_deg': 119.5564,
 'deg_per_pixel': (0.06226895833333333, 0.062336111111111106),
 'resolution': [1920, 1080]}

# Load RF fits

In [19]:
from pipeline.python.retinotopy import fit_2d_rfs as fitrf
from pipeline.python.rois.utils import load_roi_coords
reload(rfutils)

<module 'pipeline.python.classifications.rf_utils' from '/home/julianarhee/Repositories/2p-pipeline/pipeline/python/classifications/rf_utils.pyc'>

In [20]:
reliable_only = True

rf_fit_thr = 0.05
fit_desc = fitrf.get_fit_desc(response_type=response_type)
reliable_str = 'reliable' if reliable_only else ''
rf_str = 'match%s_%s' % (experiment, reliable_str)
print(rf_str)

#### Get non-repeated FOV datasets
rf_dsets = sdata_exp[(sdata_exp['datakey'].isin(stim_datakeys))
                     & (sdata_exp['experiment'].isin(['rfs', 'rfs10']))].copy()
rf_dpaths, no_fits = rfutils.get_fit_dpaths(rf_dsets, traceid=traceid, fit_desc=fit_desc)
print("%i with no fits" % len(no_fits))

rfdf = rfutils.aggregate_rf_data(rf_dpaths, reliable_only=reliable_only, fit_desc=fit_desc,
                                            traceid=traceid, verbose=False)
rfdf = rfdf.reset_index(drop=True)

matchblobs_reliable
N dpaths: 31, N unfit: 0
N datasets included: 31, N sessions excluded: 4
0 with no fits


# Get position info for RFs

In [21]:
aggr_rf_dir = os.path.join(aggregate_dir, 'receptive-fields', '%s__%s' % (traceid, fit_desc))
print(aggr_rf_dir)
rf_filter_by=None

#df_fpath = os.path.join(aggr_rf_dir, 'fits_and_coords_%s_%s_%s.pkl' % (rf_filter_by, reliable_str, rfname))
df_fpath =  os.path.join(aggr_rf_dir, 'fits_and_coords_%s_%s.pkl' % (rf_filter_by, reliable_str))
print(df_fpath)

get_positions = False
if os.path.exists(df_fpath) and get_positions is False:
    print("Loading existing RF coord conversions...")
    try:
        with open(df_fpath, 'rb') as f:
            df= pkl.load(f)
        rfdf = df['df']
    except Exception as e:
        get_positions = True
print(get_positions)

/n/coxfs01/julianarhee/aggregate-visual-areas/receptive-fields/traces001__fit-2dgaus_dff-no-cutoff
/n/coxfs01/julianarhee/aggregate-visual-areas/receptive-fields/traces001__fit-2dgaus_dff-no-cutoff/fits_and_coords_None_reliable.pkl
Loading existing RF coord conversions...
False


In [22]:
if get_positions:
    print("Calculating RF coord conversions...")
    pos_params = ['fov_xpos', 'fov_xpos_pix', 'fov_ypos', 'fov_ypos_pix', 'ml_pos','ap_pos']
    for p in pos_params:
        rfdf[p] = ''
    p_list=[]
    for (animalid, session, fovnum), g in rfdf.groupby(['animalid', 'session', 'fovnum']):
        fcoords = load_roi_coords(animalid, session, 'FOV%i_zoom2p0x' % fovnum, 
                                  traceid=traceid, create_new=False)

        for ei, e_df in g.groupby(['experiment']):
            cell_ids = e_df['cell'].unique()
            p_ = fcoords['roi_positions'].loc[cell_ids]
            for p in pos_params:
                rfdf[p][e_df.index] = p_[p].values
    # with open(df_fpath, 'wb') as f:
    #     pkl.dump(expdf, f, protocol=pkl.HIGHEST_PROTOCOL)

In [23]:
[r for r in MEANS.keys() if r not in rfdf['datakey'].unique() ]

['20190422_JC076_fov1']

# Select RFs, whichever (rfs/rfs10) in common with blob rids

In [24]:
r_list=[]
for datakey, expdf in MEANS.items(): #corrs.groupby(['datakey']):
    # Get active blob cells
    exp_rids = [r for r in expdf.columns if putils.isnumber(r)] #sorted(np.union1d(expdf['row'].unique(), expdf['col'].unique()))    
    
    # Get current fov's RFs
    rdf = rfdf[rfdf['datakey']==datakey].copy()
    
    # If have both rfs/rfs10, pick the best one
    if len(rdf['experiment'].unique())>1:
        rf_rids = rdf[rdf['experiment']=='rfs']['cell'].unique()
        rf10_rids = rdf[rdf['experiment']=='rfs10']['cell'].unique()
        same_as_rfs = np.intersect1d(rf_rids, exp_rids)
        same_as_rfs10 = np.intersect1d(rf10_rids, exp_rids)
        rfname = 'rfs' if len(same_as_rfs) > len(same_as_rfs10) else 'rfs10'
        print("%s: Selecting %s, overlappig rfs, %i | rfs10, %i (of %i cells)" 
              % (datakey, rfname, len(same_as_rfs), len(same_as_rfs10), len(exp_rids)))
        r_list.append(rdf[rdf['experiment']==rfname])
    else:
        r_list.append(rdf)
RFs = pd.concat(r_list, axis=0)


20190613_JC097_fov1: Selecting rfs10, overlappig rfs, 61 | rfs10, 88 (of 166 cells)
20190622_JC085_fov1: Selecting rfs10, overlappig rfs, 26 | rfs10, 97 (of 203 cells)
20190618_JC097_fov1: Selecting rfs10, overlappig rfs, 13 | rfs10, 24 (of 116 cells)
20190602_JC091_fov1: Selecting rfs10, overlappig rfs, 33 | rfs10, 59 (of 241 cells)
20190616_JC097_fov1: Selecting rfs10, overlappig rfs, 98 | rfs10, 119 (of 296 cells)
20191006_JC110_fov1: Selecting rfs10, overlappig rfs, 46 | rfs10, 72 (of 217 cells)


In [25]:
RFs[['visual_area', 'datakey']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey
visual_area,Unnamed: 1_level_1
Li,13
Lm,12
V1,11


In [26]:
print("All RFs-----------------------------------")
pp.pprint(rfdf[['visual_area', 'datakey']].drop_duplicates().groupby(['visual_area']).count())
print("RFs with blobs -----------------------------------")
pp.pprint(RFs[['visual_area', 'datakey']].drop_duplicates().groupby(['visual_area']).count())

All RFs-----------------------------------
             datakey
visual_area         
Li                24
Lm                21
V1                16
RFs with blobs -----------------------------------
             datakey
visual_area         
Li                13
Lm                12
V1                11


# Plot

In [27]:
visual_areas = ['V1', 'Lm', 'Li']
fig, axn = pl.subplots(1,3, figsize=(10,6), dpi=dpi)
for visual_area, v_df in RFs.groupby(['visual_area']):
    ai = visual_areas.index(visual_area)
    ax = axn[ai]
    dcolors = sns.color_palette('cubehelix', n_colors=len(v_df['datakey'].unique()))
    for di, (datakey, d_df) in enumerate(v_df.groupby(['datakey'])):
        
        exp_rids = [r for r in MEANS[datakey] if putils.isnumber(r)] #sorted(np.union1d(expdf['row'].unique(), expdf['col'].unique()))    
        rf_rids = d_df['cell'].unique()
        common_to_rfs_and_blobs = np.intersect1d(rf_rids, exp_rids)
        curr_df = d_df[d_df['cell'].isin(common_to_rfs_and_blobs)].copy()
        
        sns.scatterplot('x0', 'y0', data=curr_df, ax=ax, color=dcolors[di],
                        #palette=dcolors, #area_colors[visual_area],
                       s=10, marker='o', alpha=0.5) #, edgecolor='k') #area_colors[visual_area])

        x = curr_df['x0'].values
        y=curr_df['y0'].values
        
        ncells_rfs = len(rf_rids)
        ncells_common = len(common_to_rfs_and_blobs) #curr_df.shape[0]
        m=np.ones(curr_df['x0'].shape)
        cgx = np.sum(x*m)/np.sum(m)
        cgy = np.sum(y*m)/np.sum(m)
        #print('The center of mass: (%.2f, %.2f)' % (cgx, cgy))
        ax.plot(cgx, cgy, marker='+', markersize=20, color=dcolors[di], #area_colors[visual_area],
                label='%s (%s, %i/%i)' % (visual_area, datakey, ncells_common, ncells_rfs), lw=3)
        
    ax.set_title(visual_area)
    ax.legend(bbox_to_anchor=(0.95, -0.4), fontsize=8) #1))

for ax in axn:
    ax.set_xlim([screenleft, screenright])
    ax.set_ylim([screenbottom, screentop])
    ax.set_aspect('equal')
    ax.set_ylabel('')
    ax.set_xlabel('')
    
pl.suptitle("RF positions (+ CoM), responsive cells (%s)" % experiment)
pl.subplots_adjust(top=0.9, bottom=0.4)


putils.label_figure(fig, data_id)
figname = 'CoM_label-fovs_common_to_blobs_and_rfs'
pl.savefig(os.path.join(aggr_rf_dir, '%s.svg' % figname))
print(aggr_rf_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/receptive-fields/traces001__fit-2dgaus_dff-no-cutoff', 'CoM_label-fovs_common_to_blobs_and_rfs')


# Calculate overlap with stimulus

#### Test with example FOV

In [35]:
experiment = 'blobs'

# TEST
animalid = 'JC110'
session = '20191006'
fovnum = 1

datakey = '%s_%s_fov%i' % (session, animalid, fovnum)
print(datakey)
S = util.Session(animalid, session, 'FOV%i_zoom2p0x' % fovnum)
stim_xpos, stim_ypos = S.get_stimulus_coordinates(experiments=[experiment])
stim_sizes = S.get_stimulus_sizes(size_tested=[experiment])

20191006_JC110_fov1
Creating blobs object [JC110|20191006|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (30, 0)
Creating blobs object [JC110|20191006|FOV1_zoom2p0x|traces001]


In [36]:
rf_fit_params = ['cell', 'std_x', 'std_y', 'theta', 'x0', 'y0']

g = RFs[RFs['datakey']==datakey].copy()

# Convert RF fit params to polygon
g.index = g['cell'].values
rf_polys = rfutils.rfs_to_polys(g[rf_fit_params])

# Convert stimuli to polyon bounding boxes
stim_polys = [(blob_sz, rfutils.stimsize_poly(blob_sz, xpos=stim_xpos, ypos=stim_ypos)) \
                  for blob_sz in stim_sizes[experiment]]
    
# Concatenate into dataframe
perc_overlap = pd.concat([rfutils.get_proportion_overlap(rf_poly, stim_poly) \
                for stim_poly in stim_polys for rf_poly in rf_polys]).rename(columns={'row': 'cell', 'col': 'stim_size'})

In [36]:
perc_overlap

Unnamed: 0,area_overlap,stim_size,perc_overlap,cell
0,100.000000,10,1.000000,4.0
0,100.000000,10,1.000000,5.0
0,100.000000,10,1.000000,6.0
0,100.000000,10,1.000000,11.0
0,100.000000,10,1.000000,12.0
0,100.000000,10,1.000000,18.0
0,100.000000,10,1.000000,19.0
0,100.000000,10,1.000000,23.0
0,100.000000,10,1.000000,27.0
0,100.000000,10,1.000000,29.0


In [69]:
fig, axn = pl.subplots(1,5, figsize=(15,3))

p_rf = rf_polys[0][1]

p_stim = stim_polys[0]
for stim_ix, p_stim in enumerate(stim_polys):
    ax = axn[stim_ix]

    #perc_overlap = rfutils.get_proportion_overlap(rf_polys[0], (stim_ix, p_stim))
    perc_overlap = rfutils.get_proportion_overlap(rf_poly, p_stim) 
    
    #print(perc_overlap)rfutils.get_proportion_overlap(rf_poly, stim_poly)

    x, y = p_rf.exterior.xy
    ax.plot(x, y)
    ax.set_title(perc_overlap['perc_overlap'].values, fontsize=8)
    print(stim_ix, perc_overlap['perc_overlap'].values)

    x, y = p_stim[1].exterior.xy
    ax.plot(x, y)
    ax.set_aspect('equal')
    
pl.subplots_adjust(wspace=0.5)

<IPython.core.display.Javascript object>

(0, array([ 1.]))
(1, array([ 1.]))
(2, array([ 0.93254434]))
(3, array([ 0.71950976]))
(4, array([ 0.84616861]))


In [67]:
perc_overlap

Unnamed: 0,area_overlap,col,perc_overlap,row
0,1273.943458,50,0.846169,384.0


### For all RFs measured, calculate overlap with stimuli

In [28]:
rf_fit_params = ['cell', 'std_x', 'std_y', 'theta', 'x0', 'y0']

o_list=[]
for (visual_area, animalid, session, fovnum, datakey), g in RFs.groupby(['visual_area', 'animalid', 'session', 'fovnum', 'datakey']):
#for (visual_area, animalid, session, fovnum, datakey, rfname), g in rfdf.groupby(['visual_area', 'animalid', 'session', 'fovnum', 'datakey', 'experiment']):
   
    if datakey not in MEANS.keys():
        continue
    
    # Convert RF fit params to polygon
    g.index = g['cell'].values
    rf_polys = rfutils.rfs_to_polys(g[rf_fit_params])

    S = util.Session(animalid, session, 'FOV%i_zoom2p0x' % fovnum)
    stim_xpos, stim_ypos = S.get_stimulus_coordinates(experiments=[experiment])
    stim_sizes = S.get_stimulus_sizes(size_tested=[experiment])

    # Convert stimuli to polyon bounding boxes
    stim_polys = [(blob_sz, rfutils.stimsize_poly(blob_sz, xpos=stim_xpos, ypos=stim_ypos)) \
                  for blob_sz in stim_sizes[experiment]]
    
    # Get all pairwise overlaps (% of smaller ellipse that overlaps larger ellipse)
    overlaps = pd.concat([rfutils.get_proportion_overlap(rf_poly, stim_poly) \
                for stim_poly in stim_polys for rf_poly in rf_polys]).rename(columns={'row': 'cell', 'col': 'stim_size'})
    metadict={'visual_area': visual_area, 'animalid': animalid, 'rfname': rfname,
              'session': session, 'fovnum': fovnum, 'datakey': datakey}
    o_ = putils.add_meta_to_df(overlaps, metadict)
    o_list.append(o_)

stim_overlaps = pd.concat(o_list, axis=0).reset_index(drop=True)

Creating blobs object [JC076|20190502|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (-5, 8)
Creating blobs object [JC076|20190502|FOV1_zoom2p0x|traces001]
Creating blobs object [JC090|20190605|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (15, 0)
Creating blobs object [JC090|20190605|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (20, -8)
Creating blobs object [JC091|20190602|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190606|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (20, -8)
Creating blobs object [JC091|20190606|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190607|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (20, -5)
Creating blobs object [JC091|20190607|FOV1_zoom2p0x|traces001]
Creating blobs object [JC091|20190614|FOV1_zoom2p0x|traces001]
Stimuli presented at coords: (10, -5)
Creating blobs object [JC091|20190614|FOV1_zoom2p0x|traces001]
Creating blobs obj

In [71]:
# sns.catplot(x='visual_area', y='perc_overlap', col='stim_size', data=stim_overlaps,
#            kind='kdeplot', palette=area_colors, order=visual_areas)

fig, axn = pl.subplots(1, 5, figsize=(9, 3), sharex=True, sharey=True, dpi=dpi)
for ax, (sz, sg) in zip(axn.flat, stim_overlaps[['perc_overlap', 'visual_area', 'stim_size']].groupby(['stim_size'])):
    for v, vg in sg.groupby(['visual_area']):
        
        ax.hist(vg['perc_overlap'].values, color=area_colors[v], label=v,
                facecolor='none', edgecolor=area_colors[v], lw=2, alpha=1,
                density=True, cumulative=True, histtype='step')
    ax.set_title('%i deg' % (sz))
fig.text(0.5, 0.05, '% overlap')
fig.text(0.0, 0.5, 'density', rotation=90, va='center')

ax.legend(bbox_to_anchor=(1.1, 1))
pl.subplots_adjust(wspace=0.5, top=0.7, right=0.8, bottom=0.2, left=0.1)

putils.label_figure(fig, data_id)
fig.text(0., 0.85, 'RF overlaps with stimuli, by size')

<IPython.core.display.Javascript object>

Text(0,0.85,u'RF overlaps with stimuli, by size')

In [39]:
sns.catplot(x='visual_area', y='perc_overlap', col='stim_size', data=stim_overlaps,
           kind='violin', palette=area_colors, order=visual_areas)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x7efe0ab15750>

# Get data common to RFs + blobs

In [29]:
c_list=[]

i=0
d_list = []
for (visual_area, datakey, rfname), g in stim_overlaps.groupby(['visual_area', 'datakey', 'rfname']):
    if datakey not in MEANS.keys():
        print("no %s: %s" % (experiment, datakey))
        continue
        
    exp_rids = [r for r in MEANS[datakey].columns if putils.isnumber(r)]
    rf_rids = sorted(g['cell'].unique())
    common_rids = np.intersect1d(exp_rids, rf_rids)
    print("[%s] %s, (%s) %i common cells" % (visual_area, datakey, rfname, len(common_rids)))

    c_list.append(pd.DataFrame({'visual_area': visual_area, 'datakey': datakey, 
                                'rfname': rfname, 'n_cells': len(common_rids)}, index=[i]))
    
    d_list.append(g[g['cell'].isin(common_rids)].copy())
    i+=1
    
rfs_and_blobs = pd.concat(d_list, axis=0)
print(rfs_and_blobs.shape)
   
common_counts = pd.concat(c_list, axis=0)
    

[Li] 20190502_JC076_fov1, (rfs10) 15 common cells
[Li] 20190602_JC091_fov1, (rfs10) 59 common cells
[Li] 20190605_JC090_fov1, (rfs10) 9 common cells
[Li] 20190606_JC091_fov1, (rfs10) 26 common cells
[Li] 20190607_JC091_fov1, (rfs10) 41 common cells
[Li] 20190609_JC099_fov1, (rfs10) 22 common cells
[Li] 20190612_JC099_fov1, (rfs10) 21 common cells
[Li] 20190614_JC091_fov1, (rfs10) 52 common cells
[Li] 20190617_JC099_fov1, (rfs10) 4 common cells
[Li] 20191008_JC091_fov1, (rfs10) 20 common cells
[Li] 20191018_JC113_fov1, (rfs10) 6 common cells
[Li] 20191105_JC117_fov1, (rfs10) 0 common cells
[Li] 20191111_JC120_fov1, (rfs10) 1 common cells
[Lm] 20190430_JC078_fov1, (rfs10) 31 common cells
[Lm] 20190504_JC078_fov1, (rfs10) 10 common cells
[Lm] 20190506_JC080_fov1, (rfs10) 30 common cells
[Lm] 20190508_JC083_fov1, (rfs10) 61 common cells
[Lm] 20190509_JC078_fov1, (rfs10) 11 common cells
[Lm] 20190512_JC083_fov1, (rfs10) 39 common cells
[Lm] 20190513_JC078_fov1, (rfs10) 17 common cells
[Lm] 

In [30]:
common_counts.groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey,n_cells,rfname
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Li,13,13,13
Lm,12,12,12
V1,11,11,11


In [31]:
rfs_and_blobs[['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area']).count()

Unnamed: 0_level_0,datakey,cell
visual_area,Unnamed: 1_level_1,Unnamed: 2_level_1
Li,276,276
Lm,338,338
V1,736,736


In [37]:
stim_overlaps[['visual_area', 'datakey']].drop_duplicates()['visual_area'].value_counts()

Li    13
Lm    12
V1    11
Name: visual_area, dtype: int64

In [38]:
RFs[['visual_area', 'datakey']].drop_duplicates()['visual_area'].value_counts()

Li    13
Lm    12
V1    11
Name: visual_area, dtype: int64

In [32]:
overlap_int = 0.2
overlap_thr_values = np.arange(0, 1+overlap_int, overlap_int)

for overlap_thr in overlap_thr_values:
    pdf = rfs_and_blobs[rfs_and_blobs['perc_overlap']>=overlap_thr].copy()
    print("Thr: %.2f --------------" % overlap_thr)
    print(pdf[['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area']).count())

Thr: 0.00 --------------
             datakey  cell
visual_area               
Li               276   276
Lm               338   338
V1               736   736
Thr: 0.20 --------------
             datakey  cell
visual_area               
Li               264   264
Lm               337   337
V1               736   736
Thr: 0.40 --------------
             datakey  cell
visual_area               
Li               254   254
Lm               322   322
V1               736   736
Thr: 0.60 --------------
             datakey  cell
visual_area               
Li               248   248
Lm               302   302
V1               736   736
Thr: 0.80 --------------
             datakey  cell
visual_area               
Li               241   241
Lm               272   272
V1               734   734
Thr: 1.00 --------------
             datakey  cell
visual_area               
Li               217   217
Lm               210   210
V1               664   664


# Set output dir

In [34]:
overlap_dir = os.path.join(decoding_dir, 'match_RF_overlap')
if not os.path.exists(overlap_dir):
    os.makedirs(overlap_dir)
print(overlap_dir)

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/match_RF_overlap


# Linear separability, by RF overlap

In [33]:
import scipy.stats as spstats
import sklearn.metrics as skmetrics
import random

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC
from sklearn import preprocessing
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler

In [35]:
def computeMI(x, y):
    sum_mi = 0.0
    x_value_list = np.unique(x)
    y_value_list = np.unique(y)
    Px = np.array([ len(x[x==xval])/float(len(x)) for xval in x_value_list ]) #P(x)
    Py = np.array([ len(y[y==yval])/float(len(y)) for yval in y_value_list ]) #P(y)
    for i in xrange(len(x_value_list)):
        if Px[i] ==0.:
            continue
        sy = y[x == x_value_list[i]]
        if len(sy)== 0:
            continue
        pxy = np.array([len(sy[sy==yval])/float(len(y))  for yval in y_value_list]) #p(x,y)
        t = pxy[Py>0.]/Py[Py>0.] /Px[i] # log(P(x,y)/( P(x)*P(y))
        sum_mi += sum(pxy[t>0]*np.log2( t[t>0]) ) # sum ( P(x,y)* log(P(x,y)/( P(x)*P(y)) )
    return sum_mi

In [36]:
def filter_rois(rfs_and_blobs, overlap_thr=0.50, return_counts=False):
    visual_areas=['V1', 'Lm', 'Li']
    
    nocells=[]; notrials=[];
    global_rois = dict((v, []) for v in visual_areas)
    roi_counters = dict((v, 0) for v in visual_areas)
    
    roidf = []
    datakeys = dict((v, []) for v in visual_areas)
    for (visual_area, datakey), g in rfs_and_blobs[rfs_and_blobs['perc_overlap']>=overlap_thr].groupby(['visual_area', 'datakey']):

        roi_counter = roi_counters[visual_area]
        datakeys[visual_area].append(datakey)

        roi_list = sorted(g['cell'].unique()) #[int(r) for r in ddf.columns if r != 'config']

        # Reindex roi ids for global
        roi_ids = [i+roi_counter for i, r in enumerate(roi_list)]
        nrs = len(roi_list)

        global_rois[visual_area].extend(roi_ids)
        
        roidf.append(pd.DataFrame({'roi': roi_ids,
                                   'dset_roi': roi_list,
                                   'visual_area': [visual_area for _ in np.arange(0, nrs)],
                                   'datakey': [datakey for _ in np.arange(0, nrs)]}))

        # Update global roi id counter
        roi_counters[visual_area] += len(roi_ids)

    roidf = pd.concat(roidf, axis=0) #.groupby(['visual_area']).count()
    for k, v in global_rois.items():
        print(k, len(v))
    
    if return_counts:
        return roidf, roi_counters
    else:
        return roidf

In [37]:
def get_trials_for_N_cells(curr_ncells, gdf, MEANS):
    '''
    Randomly select N cells from global roi list (gdf), get cell's responses to all trials.
    
    gdf = dataframe (subset of global_rois dataframe), contains 
    - all rois for a given visual area
    - corresponding within-datakey roi IDs
    '''

    # Get current global RIDs
    ncells_t = gdf.shape[0]                      
    roi_ids = np.array(gdf['roi'].values.copy()) 

    # Random sample w/ replacement
    rand_ixs = np.array([random.randint(0, ncells_t-1) for _ in np.arange(0, curr_ncells)])
    curr_roi_list = roi_ids[rand_ixs]
    curr_roidf = gdf[gdf['roi'].isin(curr_roi_list)].copy()

    # Make sure equal num trials per condition for all dsets
    # --- Get fewest total n trials among selected dsets
    #fewest_ntrials_total = np.min([MEANS[k].shape[0] for k in curr_roidf['datakey'].unique()])
    # Get fewest N trials in a given cond for the datasets with fewest trials
    #keys_with_few = [k for k in curr_roidf['datakey'].unique() if MEANS[k].shape[0]==fewest_ntrials_total]
    min_ntrials_by_config = min([MEANS[k]['config'].value_counts().min() for k in curr_roidf['datakey'].unique()])

    # Get data samples for these cells
    d_list=[]; c_list=[];
    for datakey, dkey_rois in curr_roidf.groupby(['datakey']):
        # Get subset of trials per cond to match min N trials
        tmpd_list=[]
        for cfg, trialmat in MEANS[datakey].groupby(['config']):
            trial_ixs = trialmat.index.tolist() #[0]) # Get indices of trials in current dataset
            np.random.shuffle(trial_ixs)                # Shuffle them to get random order
            curr_cfg_trials = trialmat.loc[trial_ixs[0:min_ntrials_by_config]].copy() # Select min_ntrials randomly
            tmpd_list.append(curr_cfg_trials)        # Add current trials of current config to list
        tmpd = pd.concat(tmpd_list, axis=0) 

        # For each RID sample belonging to current dataset, get RID order
        sampled_cells = pd.concat([dkey_rois[dkey_rois['roi']==globalid][['roi', 'dset_roi']] \
                                         for globalid in curr_roi_list])
        sampled_dset_rois = sampled_cells['dset_roi'].values
        sampled_global_rois = sampled_cells['roi'].values

        # Get trial responses (some columns are repeats)
        curr_roidata = tmpd[sampled_dset_rois].copy().reset_index(drop=True)
        curr_roidata.columns = sampled_global_rois # Rename ROI columns to global-rois
        config_list = tmpd['config'].reset_index(drop=True)  # Get configs on selected trials
        d_list.append(curr_roidata)
        c_list.append(config_list)
    curr_neuraldf = pd.concat(d_list, axis=1).reset_index(drop=True)

    cfg_df = pd.concat(c_list, axis=1)
    cfg_df = cfg_df.T.drop_duplicates().T
    assert cfg_df.shape[0]==curr_neuraldf.shape[0], "Bad trials"
    assert cfg_df.shape[1]==1, "Bad configs"
    df = pd.concat([curr_neuraldf, cfg_df], axis=1)

    return df

In [38]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn import svm

def tune_C(sample_data, target_labels, scoring_metric='accuracy', cv_nfolds=5, test_split=0.2, verbose=False):
    
    train_data, test_data, train_labels, test_labels = train_test_split(sample_data, target_labels,
                                                                        test_size=test_split)
    
    #### DATA - Fit classifier
    scaler = StandardScaler()
    scaler.fit(train_data)
    train_data = scaler.transform(train_data)
    test_data = scaler.transform(test_data)

    # Set the parameters by cross-validation
    tuned_parameters = [{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}]

    #scores = ['accuracy', 'precision_macro', 'recall_macro']
    scoring = ('accuracy') #, 'precision_macro', 'recall_macro')
    # scoring_metric = 'accuracy' 
    results ={}
    #for scorer in scoring:
    scorer = scoring_metric
    
    if verbose:
        print("# Tuning hyper-parameters for %s" % scorer)
    #print()
    clf = GridSearchCV(
        svm.SVC(kernel='linear'), tuned_parameters, scoring=scorer, cv=cv_nfolds #scoring='%s_macro' % score
    )
    clf.fit(train_data, train_labels)
    if verbose:
        print("Best parameters set found on development set:")
        print(clf.best_params_)
    if verbose:
        print("Grid scores on development set (scoring=%s):" % scoring_metric)
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r"
                  % (mean, std * 2, params))

    y_true, y_pred = test_labels, clf.predict(test_data)
    if verbose:
        print("Detailed classification report:")
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print(classification_report(y_true, y_pred))
    test_score = clf.score(test_data, test_labels)
    if verbose:
        print("Held out test score: %.2f" % test_score)
    results.update({'%s' % scorer: {'C': clf.best_params_['C'], 'test_score': test_score}})
    return results #clf.best_params_



In [39]:
def fit_svm(zdata, targets, test_split=0.2, cv_nfolds=5, verbose=False, cv=True, C_value=None):

    # sizes = sorted(targets['group'].unique())

    #### For each transformation, split trials into 80% and 20%
    train_data, test_data, train_labels, test_labels = train_test_split(zdata, targets['label'].values, 
                                                        test_size=test_split, stratify=targets['group'])
    #print("CV:", cv)
    #### Cross validate (tune C w/ train data)
    if cv:
        cv_results = tune_C(train_data, train_labels, scoring_metric='accuracy', cv_nfolds=cv_nfolds, 
                           test_split=test_split, verbose=verbose)
        C_value = cv_results['accuracy']['C']
    else:
        assert C_value is not None, "Provide value for hyperparam C..."

    #### Fit SVM
    scaler = StandardScaler().fit(train_data)
    train_data = scaler.transform(train_data)
    trained_svc = svm.SVC(kernel='linear', C=C_value, random_state=10)
    scores = cross_validate(trained_svc, train_data, train_labels, cv=5,
                            scoring=('precision_macro', 'recall_macro', 'accuracy'),
                            return_train_score=True)
    iterdict = dict((s, values.mean()) for s, values in scores.items())
    trained_svc = svm.SVC(kernel='linear', C=C_value, random_state=10).fit(train_data, train_labels)
        
    #### DATA - Test with held-out data
    test_data = scaler.transform(test_data)
    test_score = trained_svc.score(test_data, test_labels)

    #### DATA - Calculate MI
    predicted_labels = trained_svc.predict(test_data)
    mi = skmetrics.mutual_info_score(test_labels, predicted_labels)
    ami = skmetrics.adjusted_mutual_info_score(test_labels, predicted_labels)
    log2_mi = computeMI(test_labels, predicted_labels)
    iterdict.update({'heldout_test_score': test_score, 
                     'heldout_MI': mi, 'heldout_aMI': ami, 'heldout_log2MI': log2_mi,
                     'C': C_value})
    # ------------------------------------------------------------------
    # Shuffle LABELS to calculate chance level
    train_labels_chance = train_labels.copy()
    np.random.shuffle(train_labels_chance)
    test_labels_chance = test_labels.copy()
    np.random.shuffle(test_labels_chance)

    #### CHANCE - Fit classifier
    chance_svc = svm.SVC(kernel='linear', C=C_value, random_state=10)
    scores_chance = cross_validate(chance_svc, train_data, train_labels_chance, cv=5,
                            scoring=('precision_macro', 'recall_macro', 'accuracy'),
                            return_train_score=True)
    iterdict_chance = dict((s, values.mean()) for s, values in scores_chance.items())

    # CHANCE - Test with held-out data
    trained_svc_chance = chance_svc.fit(train_data, train_labels_chance)
    test_score_chance = trained_svc_chance.score(test_data, test_labels_chance)  

    # Chance - Calculate MI
    predicted_labels = trained_svc_chance.predict(test_data)
    mi = skmetrics.mutual_info_score(test_labels, predicted_labels)
    ami = skmetrics.adjusted_mutual_info_score(test_labels, predicted_labels)
    log2_mi = computeMI(test_labels, predicted_labels)

    iterdict_chance.update({'heldout_test_score': test_score_chance, 
                            'heldout_MI': mi, 'heldout_aMI': ami, 'heldout_log2MI': log2_mi})

    return iterdict, iterdict_chance


In [40]:
def do_fit(iter_num, global_rois=None, MEANS=None, sdf=None, sample_ncells=None,
           C_value=None, test_size=0.2, cv_nfolds=5, class_a=0, class_b=106):
    #[gdf, MEANS, sdf, sample_ncells, cv] * n_times)
    '''
    Resample w/ replacement from pooled cells (across datasets). Assumes 'sdf' is same for all datasets.
    Do n_iterations, return mean/sem/std over iterations as dict of results.
    Classes (class_a, class_b) should be the actual labels of the target (i.e., value of morph level)
    '''
    #iter_list=[]
    #chance_list=[]
    #for iteration in np.arange(0, n_iterations): #n_iterations):
    
    # Get new sample set
    curr_data = get_trials_for_N_cells(sample_ncells, global_rois, MEANS)

    #### Select train/test configs for clf A vs B
    object_configs = sdf[sdf['morphlevel'].isin([class_a, class_b])].index.tolist() 
    curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
    sample_data = curr_data[curr_data['config'].isin(object_configs)]

    #### Equalize df/f across neurons:  Normalize each neuron to have same (zero) mean, (unit) SD across stimuli
    zdata = sample_data.drop('config', 1) #sample_data[curr_roi_list].copy()
    #zdata = (data - data.mean()) / data.std()

    #### Get labels
    targets = pd.DataFrame(sample_data['config'].copy(), columns=['config'])
    targets['label'] = [sdf['morphlevel'][cfg] for cfg in targets['config'].values]
    targets['group'] = [sdf['size'][cfg] for cfg in targets['config'].values]

    #### Fit
    curr_iter, _ = fit_svm(zdata, targets, cv=cv, C_value=C_value,
                                          test_split=test_split, cv_nfolds=cv_nfolds)

    return pd.DataFrame(curr_iter, index=[iter_num])


In [41]:
import multiprocessing as mp
from functools import partial
from contextlib import contextmanager

@contextmanager
def poolcontext(*args, **kwargs):
    pool = multiprocessing.Pool(*args, **kwargs)
    yield pool
    pool.terminate()
    pool.join()

def initializer(terminating_):
    # This places terminating in the global namespace of the worker subprocesses.
    # This allows the worker function to access `terminating` even though it is
    # not passed as an argument to the function.
    global terminating
    terminating = terminating_

def pool_bootstrap(global_rois, MEANS, sdf, sample_ncells, n_iterations=50, n_processes=1):   
    #try:
    results = []# None
    terminating = mp.Event()

    pool = mp.Pool(initializer=initializer, initargs=(terminating, ), processes=n_processes)  
    try:
        print("... n: %i (%i procs)" % (sample_ncells, n_processes))
        func = partial(do_fit, global_rois=global_rois, MEANS=MEANS, sdf=sdf, sample_ncells=sample_ncells)
        results = pool.map_async(func, range(n_iterations)).get(99999999)
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        print("**interupt")
        pool.terminate()
        print("***Terminating!")
    finally:
        pool.close()
        pool.join()

    return results

In [42]:
import datetime


## Run for 1 overlap_thr, 1 iter, select M0 / M100

In [43]:
min_ncells = 20

overlap_thr=0.8
not_in_stimkeys =[]
too_few = []
for (visual_area, datakey), g in rfs_and_blobs[rfs_and_blobs['perc_overlap']>=overlap_thr].groupby(['visual_area', 'datakey']):
    if len(g['cell'].unique()) < min_ncells:
        print(datakey, len(g['cell'].unique()))
        too_few.append(datakey)
    if datakey not in stim_datakeys:
        not_in_stimkeys.append(datakey)


('20190502_JC076_fov1', 14)
('20190605_JC090_fov1', 8)
('20190612_JC099_fov1', 18)
('20190617_JC099_fov1', 1)
('20191008_JC091_fov1', 12)
('20191018_JC113_fov1', 5)
('20191111_JC120_fov1', 1)
('20190504_JC078_fov1', 8)
('20190509_JC078_fov1', 6)
('20190513_JC078_fov1', 9)
('20190517_JC083_fov1', 15)
('20190627_JC091_fov1', 5)
('20190501_JC076_fov1', 11)


In [74]:
rfs_and_blobs[rfs_and_blobs['perc_overlap']<0.5]['visual_area'].value_counts()


Lm    304
Li    166
V1     86
Name: visual_area, dtype: int64

In [59]:
overlap_thr=0.5
filter_fovs = True
remove_too_few = False

filter_str = 'filter-repeat-fovs' if filter_fovs else 'all-fovs'
filter_str = '%s_%s' % (filter_str, 'remove-few') if remove_too_few else filter_str
print(filter_str)
if filter_fovs:
    curr_dkeys = [s for s in stim_datakeys if s not in too_few] if remove_too_few else stim_datakeys
else:
    curr_dkeys = rfs_and_blobs['datakey'].unique()
print(len(stim_datakeys), len(curr_dkeys))

globalcells_df, cell_counts = filter_rois(rfs_and_blobs[rfs_and_blobs['datakey'].isin(curr_dkeys)], 
                                overlap_thr=overlap_thr, return_counts=True)

filter-repeat-fovs
(25, 25)
('V1', 652)
('Lm', 222)
('Li', 133)


In [62]:
m0=0
m100=106
n_iterations=500
print(m0, m100, '%i iters' % n_iterations)

# CV stuff
test_split=0.2
cv_nfolds=5
C_value=None
cv=True

# Set overlap amount
overlap_thr_int = 0.2
overlap_thr_values = np.arange(0, 1+overlap_thr_int, overlap_thr_int)

# Make sure have SAME N trials total
keys_with_min_reps = [k for k, v in MEANS.items() if v['config'].value_counts().min() < 29]
print(globalcells_df[~globalcells_df['datakey'].isin(keys_with_min_reps)]['visual_area'].value_counts())
filt_globaldf = globalcells_df[~globalcells_df['datakey'].isin(keys_with_min_reps)]

(0, 106, '500 iters')
V1    652
Lm    222
Li    116
Name: visual_area, dtype: int64


In [63]:
if overlap_thr==0:
    NCELLS = [2, 4, 8, 16, 32, 64, 82, 123, 186, 237, 448, 556, 652]
else:
    #NCELLS = [2, 4, 8, 16, 32, 64, 82, 112, 164, 201, 448, 556, 652]
    NCELLS = [2, 4, 8, 16, 32, 64, 116, 222, 448, 652]

print("NCELLS: %s" % (str(NCELLS)))
ncells_dict = dict((k, NCELLS) for k in overlap_thr_values)

NCELLS: [2, 4, 8, 16, 32, 64, 116, 222, 448, 652]


In [64]:
n_processes = 2

popdf = []
#popdf_chance = []

#for overlap_thr, NCELLS in ncells_dict.items():
print("-------- Overlap: %.2f --------" % overlap_thr)
i=0
for visual_area, global_rois in filt_globaldf.groupby(['visual_area']):
#gdf = filt_globaldf[filt_globaldf['visual_area']==visual_area].copy()

    for sample_ncells in NCELLS: #[0::2]:
        print("... [%s] popn size: %i" % (visual_area, sample_ncells))
        if sample_ncells > cell_counts[visual_area]:
            continue
            
        iter_list = pool_bootstrap(global_rois, MEANS, sdf, sample_ncells, 
                                   n_iterations=n_iterations, n_processes=n_processes)

        # DATA - get mean across iters
        iter_results = pd.concat(iter_list, axis=0)
        iterd = dict(iter_results.mean())
        iterd.update( dict(('%s_std' % k, v) for k, v in \
                              zip(iter_results.std().index, iter_results.std().values)) )
        iterd.update( dict(('%s_sem' % k, v) for k, v in \
                              zip(iter_results.sem().index, iter_results.sem().values)) )
        iterd.update({'n_units': sample_ncells, 'overlap': overlap_thr, 'visual_area': visual_area})

        popdf.append(pd.DataFrame(iterd, index=[i]))
        #popdf_chance.append(pd.DataFrame(iterd_chance, index=[i]))
        i += 1

pooled = pd.concat(popdf, axis=0)
#pooled_chance = pd.concat(popdf_chance, axis=0)
#print(pooled.shape, pooled_chance.shape)
pooled.head()

-------- Overlap: 0.50 --------
... [Li] popn size: 2
... n: 2 (2 procs)
... [Li] popn size: 4
... n: 4 (2 procs)
... [Li] popn size: 8
... n: 8 (2 procs)
... [Li] popn size: 16
... n: 16 (2 procs)
... [Li] popn size: 32
... n: 32 (2 procs)
... [Li] popn size: 64
... n: 64 (2 procs)
... [Li] popn size: 116
... n: 116 (2 procs)
... [Li] popn size: 222
... [Li] popn size: 448
... [Li] popn size: 652
... [Lm] popn size: 2
... n: 2 (2 procs)
... [Lm] popn size: 4
... n: 4 (2 procs)
... [Lm] popn size: 8
... n: 8 (2 procs)
... [Lm] popn size: 16
... n: 16 (2 procs)
... [Lm] popn size: 32
... n: 32 (2 procs)
... [Lm] popn size: 64
... n: 64 (2 procs)
... [Lm] popn size: 116
... n: 116 (2 procs)
... [Lm] popn size: 222
... n: 222 (2 procs)
... [Lm] popn size: 448
... [Lm] popn size: 652
... [V1] popn size: 2
... n: 2 (2 procs)
... [V1] popn size: 4
... n: 4 (2 procs)
... [V1] popn size: 8
... n: 8 (2 procs)
... [V1] popn size: 16
... n: 16 (2 procs)
... [V1] popn size: 32
... n: 32 (2 procs)


Unnamed: 0,C,C_sem,C_std,fit_time,fit_time_sem,fit_time_std,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,...,train_accuracy,train_accuracy_sem,train_accuracy_std,train_precision_macro,train_precision_macro_sem,train_precision_macro_std,train_recall_macro,train_recall_macro_sem,train_recall_macro_std,visual_area
0,16.466518,4.912238,109.840989,0.00194,0.000319,0.007133,0.020176,0.001416,0.031656,0.020125,...,0.57481,0.002409,0.053858,0.513423,0.006869,0.153603,0.569504,0.002572,0.057515,Li
1,5.625676,2.099634,46.949248,0.001985,0.000322,0.007207,0.029979,0.001636,0.036574,0.032393,...,0.616642,0.002619,0.058554,0.595115,0.005569,0.124519,0.613698,0.002725,0.060939,Li
2,24.59782,6.281696,140.462999,0.018267,0.004671,0.104447,0.04525,0.001871,0.041827,0.053698,...,0.667348,0.002243,0.05015,0.66694,0.003277,0.07328,0.665613,0.002308,0.051608,Li
3,13.2895,4.057835,90.735949,0.033715,0.010351,0.231457,0.070528,0.002355,0.052669,0.090584,...,0.72631,0.001736,0.038821,0.730132,0.001664,0.037201,0.725335,0.001769,0.039546,Li
4,4.38184,0.834053,18.65,0.036454,0.007123,0.159265,0.097976,0.002536,0.056717,0.130923,...,0.789517,0.001445,0.032316,0.791695,0.001395,0.0312,0.788943,0.001459,0.032631,Li


In [52]:
datestr = datetime.datetime.now().strftime("%Y%m%d")
train_str = 'traintest_by-ncells_iter-%i' % (n_iterations)

pooled_outfile = os.path.join(decoding_dir, '%s_overlap-%.2f_results_%s.pkl' % (train_str, overlap_thr, datestr))
params_outfile = os.path.join(decoding_dir, '%s_overlap-%.2f_params_%s.json' % (train_str, overlap_thr, datestr))

with open(pooled_outfile, 'wb') as f:
    pkl.dump({'pooled': pooled}, f, protocol=pkl.HIGHEST_PROTOCOL)
    
params = {'test_split':0.2, 'cv_nfolds':5, 'C_value':None, 'cv':True,
          'n_iterations': n_iterations, 'overlap_thr': overlap_thr}
with open(params_outfile, 'w') as f:
    json.dump(params, f,  indent=4, sort_keys=True)
    
print(pooled_outfile)
print(params_outfile)


/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/traintest_by-ncells_iter-500_overlap-0.80_results_20200926.pkl
/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/traintest_by-ncells_iter-500_overlap-0.80_params_20200926.json


In [50]:
#zoom=True

lw=2
capsize=2
#metric='heldout_aMI' #'heldout_test_score'

for metric in ['heldout_aMI', 'heldout_log2MI', 'heldout_test_score']:
    for zoom in [True,False]:
        fig, ax = pl.subplots(figsize=(5,4), sharex=True, sharey=True, dpi=dpi)
        for ai, (visual_area, g) in enumerate(pooled.groupby(['visual_area'])):
            mean_scores = g.sort_values(by='n_units')[metric]
            std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
            n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
            ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                    alpha=1, lw=lw,
                    label='%s' % (visual_area))
            ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                        capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                        lw=lw)
        ax.legend(bbox_to_anchor=(1., 1))
        ax.set_title(overlap_thr)
        ax.set_xlabel("N units")
        ax.set_ylabel(metric)

        if metric=='heldout_test_score':
            ax.set_ylim([0.4, 1.0])
        ax.set_ylabel(metric)

        zoom_str=''
        if zoom:
            ax.set_xlim([0, 124])
            zoom_str = 'zoom'

        sns.despine(trim=True, offset=4)
        pl.subplots_adjust(right=0.7, left=0.2, wspace=0.5, bottom=0.2, top=0.8)


        putils.label_figure(fig, data_id)
        figname = '%s__%s%s_overlap-thr=%.2f_%s' % (train_str, metric, zoom_str, overlap_thr, filter_str)
        pl.savefig(os.path.join(decoding_dir, '%s_%s.svg' % (figname, datestr)))
        print(decoding_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', 'traintest_by-ncells_iter-500__heldout_aMIzoom_overlap-thr=0.80_filter-repeat-fovs')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', 'traintest_by-ncells_iter-500__heldout_aMI_overlap-thr=0.80_filter-repeat-fovs')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', 'traintest_by-ncells_iter-500__heldout_log2MIzoom_overlap-thr=0.80_filter-repeat-fovs')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', 'traintest_by-ncells_iter-500__heldout_log2MI_overlap-thr=0.80_filter-repeat-fovs')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', 'traintest_by-ncells_iter-500__heldout_test_scorezoom_overlap-thr=0.80_filter-repeat-fovs')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', 'traintest_by-ncells_iter-500__heldout_test_score_overlap-thr=0.80_filter-repeat-fovs')


In [313]:
popdf = []
popdf_chance = []
i = 0

#for overlap_thr, NCELLS in ncells_dict.items():
print("-------- Overlap: %.2f --------" % overlap_thr)

# Get list of eligible cells from all FOVs
for visual_area, gdf in filt_globaldf.groupby(['visual_area']):
    for sample_ncells in NCELLS: #[0::2]:
        print("... [%s] popn size: %i" % (visual_area, sample_ncells))
        if sample_ncells > cell_counts[visual_area]:
            continue
        iterd, iterd_chance = resample_data_fit_classifier(gdf, MEANS, sdf, sample_ncells=sample_ncells,
                                                           C_value=C_value, test_size=test_split, cv_nfolds=cv_nfolds,
                                                           class_a=m0, class_b=m100, 
                                                           n_iterations=n_iterations)

        iterd.update({'n_units': sample_ncells, 'overlap': overlap_thr, 'visual_area': visual_area})
        iterd_chance.update({'n_units': sample_ncells, 'overlap': overlap_thr, 'visual_area': visual_area})

        popdf.append(pd.DataFrame(iterd, index=[i]))
        popdf_chance.append(pd.DataFrame(iterd_chance, index=[i]))
        i += 1

pooled = pd.concat(popdf, axis=0)
pooled_chance = pd.concat(popdf_chance, axis=0)
print(pooled.shape, pooled_chance.shape)
pooled.head()

-------- Overlap: 0.80 --------
... [Li] popn size: 2
... [Li] popn size: 4
... [Li] popn size: 8
... [Li] popn size: 16
... [Li] popn size: 32
... [Li] popn size: 64
... [Li] popn size: 82
... [Li] popn size: 102
... [Li] popn size: 148
... [Li] popn size: 186
... [Li] popn size: 256
... [Li] popn size: 448
... [Li] popn size: 610
... [Lm] popn size: 2
... [Lm] popn size: 4
... [Lm] popn size: 8
... [Lm] popn size: 16
... [Lm] popn size: 32
... [Lm] popn size: 64
... [Lm] popn size: 82
... [Lm] popn size: 102
... [Lm] popn size: 148
... [Lm] popn size: 186
... [Lm] popn size: 256
... [Lm] popn size: 448
... [Lm] popn size: 610
... [V1] popn size: 2
... [V1] popn size: 4
... [V1] popn size: 8
... [V1] popn size: 16
... [V1] popn size: 32
... [V1] popn size: 64
... [V1] popn size: 82
... [V1] popn size: 102
... [V1] popn size: 148
... [V1] popn size: 186
... [V1] popn size: 256
... [V1] popn size: 448
... [V1] popn size: 610
((31, 39), (31, 39))


Unnamed: 0,fit_time,fit_time_sem,fit_time_std,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,heldout_aMI_sem,heldout_aMI_std,heldout_log2MI,...,train_accuracy,train_accuracy_sem,train_accuracy_std,train_neg_mean_squared_error,train_neg_mean_squared_error_sem,train_neg_mean_squared_error_std,train_r2,train_r2_sem,train_r2_std,visual_area
0,0.008256,2.5e-05,0.000253,0.012733,0.002021,0.020214,0.007258,0.002872,0.028724,0.01837,...,0.523601,0.003444,0.03444,-5352.823309,38.696644,386.966438,-0.907139,0.013803,0.138028,Li
1,0.008552,2.3e-05,0.000232,0.014421,0.00229,0.022896,0.008191,0.003396,0.033956,0.020805,...,0.535539,0.003376,0.033759,-5218.680408,37.931839,379.318387,-0.859211,0.013499,0.13499,Li
2,0.009405,5.7e-05,0.000572,0.021756,0.002912,0.029115,0.018923,0.004302,0.043019,0.031387,...,0.573207,0.00419,0.041899,-4795.443025,47.077679,470.776792,-0.708283,0.016729,0.167287,Li
3,0.013747,0.000111,0.001111,0.027711,0.0035,0.035002,0.027662,0.005147,0.051475,0.039978,...,0.614939,0.004112,0.041118,-4326.542887,46.199963,461.999625,-0.541617,0.016478,0.164778,Li
4,0.018535,0.000415,0.00415,0.051287,0.004501,0.045012,0.062212,0.006605,0.066045,0.073991,...,0.700284,0.003792,0.037924,-3367.606197,42.611536,426.115361,-0.199629,0.015184,0.151839,Li


In [289]:
#zoom=True

#metric = 'heldout_test_score' #'heldout_aMI' #'heldout_test_score'
overlap_thr=0.8

res_df = pooled.copy() #pooled[pooled['overlap_thr']==overlap_thr].copy()
chance_df = pooled_chance.copy() #[pooled_chance['overlap_thr']==overlap_thr].copy()

lw = 2
capsize=2

for zoom in [True, False]:
    for metric in ['heldout_aMI', 'heldout_log2MI', 'heldout_test_score']:
        metric_base = metric #'heldout_test' if metric=='heldout_test_score' else metric
        
        fig, ax = pl.subplots(figsize=(5,4), sharex=True, sharey=True, dpi=dpi)
        for ai, (visual_area, g) in enumerate(res_df.groupby(['visual_area'])):
            mean_scores = g.sort_values(by='n_units')[metric]
            std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
            n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
            ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                    alpha=1, lw=lw,
                    label='%s' % (visual_area))
            ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                        capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                        lw=lw)
        ax.legend(bbox_to_anchor=(1., 1))
        ax.set_title(overlap_thr)
        ax.set_xlabel("N units")
        ax.set_ylabel(metric)

        for ai, (visual_area, g) in enumerate(chance_df.groupby(['visual_area'])):
            mean_scores = g.sort_values(by='n_units')[metric]
            std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
            n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
            ax.plot(n_units_per, mean_scores, color='k', #area_colors[visual_area], 
                    alpha=1, lw=lw, linestyle=':', 
                    label=None)
            ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                        capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                        lw=lw, linestyle=':')
        if metric=='heldout_test_score':
            ax.set_ylim([0.4, 1.0])
        ax.set_ylabel(metric)

        if zoom:
            ax.set_xlim([0, 128])
            zoom_str = 'zoom_'
        else:
            zoom_str=''

        sns.despine(trim=True, offset=4)
        pl.subplots_adjust(right=0.75, left=0.2, wspace=0.5, bottom=0.2, top=0.8)

        analysis_str = '%s|%i-iters_C%.2f_compare-overlaps' % (response_str, n_iterations, C_value)

        putils.label_figure(fig, analysis_str)

        figname = '_sameNtrials_%s_%s%s_overlap%.2f_%ivs%i_split-test-%.2f_%i-fold-cv_C%.2f_niter%i__%s' % (filter_str, zoom_str, metric, overlap_thr, m0, m100, test_split, cv_nfolds, C_value, n_iterations, response_str)
        pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))

        print(decoding_dir, figname)

<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', '_sameNtrials_filter-repeat-fovs_zoom_heldout_aMI_overlap0.80_0vs106_split-test-0.20_5-fold-cv_C1.00_niter100__dff_resptest-nstds_respthr-10.00')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', '_sameNtrials_filter-repeat-fovs_zoom_heldout_log2MI_overlap0.80_0vs106_split-test-0.20_5-fold-cv_C1.00_niter100__dff_resptest-nstds_respthr-10.00')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', '_sameNtrials_filter-repeat-fovs_zoom_heldout_test_score_overlap0.80_0vs106_split-test-0.20_5-fold-cv_C1.00_niter100__dff_resptest-nstds_respthr-10.00')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', '_sameNtrials_filter-repeat-fovs_heldout_aMI_overlap0.80_0vs106_split-test-0.20_5-fold-cv_C1.00_niter100__dff_resptest-nstds_respthr-10.00')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', '_sameNtrials_filter-repeat-fovs_heldout_log2MI_overlap0.80_0vs106_split-test-0.20_5-fold-cv_C1.00_niter100__dff_resptest-nstds_respthr-10.00')


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', '_sameNtrials_filter-repeat-fovs_heldout_test_score_overlap0.80_0vs106_split-test-0.20_5-fold-cv_C1.00_niter100__dff_resptest-nstds_respthr-10.00')


In [None]:
response_str

In [233]:
screeninfo

{'altitude_deg': 67.323,
 'azimuth_deg': 119.5564,
 'deg_per_pixel': (0.06226895833333333, 0.062336111111111106),
 'resolution': [1920, 1080]}

# Cycle thru all visual areas

In [174]:
overlap_thr=0.8
    
popdf = []
popdf_chance = []
i = 0

#for overlap_thr, NCELLS in ncells_dict.items():
print("-------- Overlap: %.2f --------" % overlap_thr)

# Get list of eligible cells from all FOVs
globaldf = filter_rois(has_blobs[has_blobs['datakey'].isin(stim_datakeys)], overlap_thr=overlap_thr) 
#globaldf = filter_rois(has_blobs, overlap_thr=overlap_thr) 


for curr_ncells in [20, 40, 80, 100]: #NCELLS:
    for visual_area, gdf in globaldf.groupby(['visual_area']):
        print("... [%s] popn size: %i" % (visual_area, curr_ncells))

        iter_results=[]
        iter_results_chance=[]
        for iteration in np.arange(0, n_iterations): #n_iterations):
            
            curr_data = get_trials_for_N_cells(curr_ncells, gdf, MEANS)

            #### Select train/test configs for clf A vs B
            object_configs = sdf[sdf['morphlevel'].isin([m0, m100])].index.tolist() 
            curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
            sample_data = curr_data[curr_data['config'].isin(object_configs)]

            #### Equalize df/f across neurons:  Normalize each neuron to have same (zero) mean, (unit) SD across stimuli
            data = sample_data[curr_roi_list]
            zdata = (data - data.mean()) / data.std()

            #### Get labels
            targets = pd.DataFrame(sample_data['config'].copy(), columns=['config'])
            targets['label'] = [sdf['morphlevel'][cfg] for cfg in targets['config'].values]
            targets['group'] = [sdf['size'][cfg] for cfg in targets['config'].values]
            
            #### Fit
            curr_iter, curr_iter_chance = fit_svm(zdata, targets, C_value=1.0, test_split=0.2)
            iter_results.append(pd.DataFrame(curr_iter, index=[iteration]))
            iter_results_chance.append(pd.DataFrame(curr_iter_chance, index=[iteration]))

        # DATA - get mean across iters
        iter_results = pd.concat(iter_results, axis=0)
        iterdict = dict(iter_results.mean())
        iterdict.update( dict(('%s_std' % k, v) for k, v in \
                              zip(iter_results.std().index, iter_results.std().values)) )
        iterdict.update( dict(('%s_sem' % k, v) for k, v in \
                              zip(iter_results.sem().index, iter_results.sem().values)) )
        iterdict.update({'n_units': curr_ncells, 
                         'overlap': overlap_thr,
                         'visual_area': visual_area})
        popdf.append(pd.DataFrame(iterdict, index=[i]))

        # CHANCE - get mean across iters
        iter_results_chance = pd.concat(iter_results_chance, axis=0)
        iterdict_chance = dict(iter_results_chance.mean())
        iterdict_chance.update( dict(('%s_std' % k, v) for k, v in \
                              zip(iter_results_chance.std().index, iter_results_chance.std().values)) )
        iterdict_chance.update( dict(('%s_sem' % k, v) for k, v in \
                              zip(iter_results_chance.sem().index, iter_results_chance.sem().values)) )
        iterdict_chance.update({'n_units': curr_ncells, 
                                'overlap': overlap_thr,
                                'visual_area': visual_area})
        popdf_chance.append(pd.DataFrame(iterdict_chance, index=[i]))
        i += 1

pooled = pd.concat(popdf, axis=0)
pooled_chance = pd.concat(popdf_chance, axis=0)
print(pooled.shape, pooled_chance.shape)
pooled.head()

-------- Overlap: 0.80 --------
('V1', 572)
('Lm', 185)
('Li', 100)
{'V1': 572, 'Lm': 185, 'Li': 100}
... [Li] popn size: 20
... [Lm] popn size: 20
... [V1] popn size: 20
... [Li] popn size: 40
... [Lm] popn size: 40
... [V1] popn size: 40
... [Li] popn size: 80
... [Lm] popn size: 80
... [V1] popn size: 80
... [Li] popn size: 100
... [Lm] popn size: 100
... [V1] popn size: 100
((12, 39), (12, 39))


Unnamed: 0,fit_time,fit_time_sem,fit_time_std,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,heldout_aMI_sem,heldout_aMI_std,heldout_log2MI,...,train_accuracy,train_accuracy_sem,train_accuracy_std,train_neg_mean_squared_error,train_neg_mean_squared_error_sem,train_neg_mean_squared_error_std,train_r2,train_r2_sem,train_r2_std,visual_area
0,0.012825,0.000153,0.001526,0.084204,0.005645,0.056454,0.110287,0.00829,0.082901,0.121481,...,0.753935,0.003685,0.03685,-2764.786598,41.404411,414.044107,0.014907,0.014734,0.147337,Li
1,0.01431,0.000103,0.001032,0.049176,0.004341,0.043415,0.05908,0.006362,0.063624,0.070946,...,0.715883,0.003049,0.030495,-3192.343603,34.263902,342.639019,-0.13728,0.012193,0.121926,Lm
2,0.013875,9.6e-05,0.000958,0.067498,0.004451,0.044508,0.086056,0.006525,0.065246,0.097379,...,0.746677,0.003333,0.03333,-2846.336993,37.449029,374.490286,-0.014178,0.013338,0.133383,V1
3,0.013824,0.00018,0.001803,0.106442,0.005841,0.058411,0.142634,0.008603,0.086033,0.153563,...,0.827296,0.002714,0.02714,-1940.501668,30.494494,304.944938,0.308511,0.010876,0.108759,Li
4,0.01751,0.000138,0.001375,0.064833,0.004603,0.046032,0.08214,0.006764,0.067636,0.093534,...,0.794026,0.002396,0.023961,-2314.326118,26.922625,269.226252,0.17531,0.009589,0.095894,Lm


In [162]:
pooled_outfile = os.path.join(decoding_dir, 'results_overlap-%.2f.pkl' % overlap_thr)
with open(pooled_outfile, 'wb') as f:
    pkl.dump(pooled, f, protocol=pkl.HIGHEST_PROTOCOL)
    
chance_outfile = os.path.join(decoding_dir, 'shuffled_overlap-%.2f.pkl' % overlap_thr)
with open(chance_outfile, 'wb') as f:
    pkl.dump(pooled_chance, f, protocol=pkl.HIGHEST_PROTOCOL)
    

In [172]:
pooled.columns

Index([u'fit_time', u'fit_time_sem', u'fit_time_std', u'heldout_MI',
       u'heldout_MI_sem', u'heldout_MI_std', u'heldout_aMI',
       u'heldout_aMI_sem', u'heldout_aMI_std', u'heldout_log2MI',
       u'heldout_log2MI_sem', u'heldout_log2MI_std', u'heldout_test_score',
       u'heldout_test_score_sem', u'heldout_test_score_std', u'n_units',
       u'overlap', u'score_time', u'score_time_sem', u'score_time_std',
       u'test_accuracy', u'test_accuracy_sem', u'test_accuracy_std',
       u'test_neg_mean_squared_error', u'test_neg_mean_squared_error_sem',
       u'test_neg_mean_squared_error_std', u'test_r2', u'test_r2_sem',
       u'test_r2_std', u'train_accuracy', u'train_accuracy_sem',
       u'train_accuracy_std', u'train_neg_mean_squared_error',
       u'train_neg_mean_squared_error_sem',
       u'train_neg_mean_squared_error_std', u'train_r2', u'train_r2_sem',
       u'train_r2_std', u'visual_area'],
      dtype='object')

In [175]:
#metric = 'heldout_test_score' #'heldout_aMI' #'heldout_test_score'
metric = 'heldout_aMI' #'heldout_aMI' #'heldout_test_score'

metric_base = metric #'heldout_test' if metric=='heldout_test_score' else metric

lw = 2
capsize=2

fig, ax = pl.subplots(figsize=(5,4), sharex=True, sharey=True, dpi=dpi)
for ai, (visual_area, g) in enumerate(pooled.groupby(['visual_area'])):
    mean_scores = g.sort_values(by='n_units')[metric]
    std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
    n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
    ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
            alpha=1, lw=lw,
            label='%s' % (visual_area))
    ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                lw=lw)
ax.legend(bbox_to_anchor=(1., 1))
ax.set_title(overlap_thr)
ax.set_xlabel("N units")
ax.set_ylabel(metric)


for ai, (visual_area, g) in enumerate(pooled_chance.groupby(['visual_area'])):
    mean_scores = g.sort_values(by='n_units')[metric]
    std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
    n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
    ax.plot(n_units_per, mean_scores, color='k', #area_colors[visual_area], 
            alpha=1, lw=lw, linestyle=':', 
            label=None)
    ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                lw=lw, linestyle=':')
if metric=='heldout_test_score':
    ax.set_ylim([0.4, 1.0])
ax.set_ylabel(metric)


sns.despine(trim=True, offset=4)
pl.subplots_adjust(right=0.75, left=0.2, wspace=0.5, bottom=0.2, top=0.8)

analysis_str = '%s|%i-iters_C%.2f_compare-overlaps' % (response_str, n_iterations, C_value)

putils.label_figure(fig, analysis_str)
   
#figname = '%s_%ivs%i_split-test-%.2f_%i-fold-cv_overlap%.2f_C%.2f_niter%i__%s' % (metric, m0, m100, test_split, cv_nfolds, overlap_thr, C_value, n_iterations, response_str)
#pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))

print(decoding_dir, figname)


<IPython.core.display.Javascript object>

('/n/coxfs01/julianarhee/aggregate-visual-areas/decoding', 'CoM_label-fovs_common_to_blobs_and_rfs')


# Get all overlaps, all iters

In [85]:
NCELLS

[4, 8, 16, 32, 64, 128, 168]

In [86]:
overlap_int=0.2
overlap_thr_values = np.arange(0, 1+overlap_int, overlap_int)
overlap_thr_values

array([ 0. ,  0.2,  0.4,  0.6,  0.8,  1. ])

In [87]:
ncells_dict = dict((k, NCELLS) for k in overlap_thr_values)
ncells_dict

{0.0: [4, 8, 16, 32, 64, 128, 168],
 0.20000000000000001: [4, 8, 16, 32, 64, 128, 168],
 0.40000000000000002: [4, 8, 16, 32, 64, 128, 168],
 0.60000000000000009: [4, 8, 16, 32, 64, 128, 168],
 0.80000000000000004: [4, 8, 16, 32, 64, 128, 168],
 1.0: [4, 8, 16, 32, 64, 128, 168]}

In [88]:
n_iterations

100

In [89]:
#if load_popdata:
    
popdf = []
popdf_chance = []
i = 0
for overlap_thr, NCELLS in ncells_dict.items():
    print("-------- Overlap: %.2f --------" % overlap_thr)
    # Get list of eligible cells from all FOVs
    globaldf = filter_rois(has_blobs[has_blobs['datakey'].isin(stim_datakeys)], overlap_thr=overlap_thr) 


    for curr_ncells in NCELLS:
        for visual_area, gdf in globaldf.groupby(['visual_area']):
            print("... [%s] popn size: %i" % (visual_area, curr_ncells))

            iter_results=[]
            iter_results_chance=[]
            for iteration in np.arange(0, n_iterations): #n_iterations):

                curr_data = get_trials_for_N_cells(curr_ncells, gdf, MEANS)

                #### Select train/test configs for clf A vs B
                object_configs = sdf[sdf['morphlevel'].isin([m0, m100])].index.tolist() 
                curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
                sample_data = curr_data[curr_data['config'].isin(object_configs)]

                #### Equalize df/f across neurons:  Normalize each neuron to have same (zero) mean, (unit) SD across stimuli
                data = sample_data[curr_roi_list]
                zdata = (data - data.mean()) / data.std()

                #### Get labels
                targets = pd.DataFrame(sample_data['config'].copy(), columns=['config'])
                targets['label'] = [sdf['morphlevel'][cfg] for cfg in targets['config'].values]
                targets['group'] = [sdf['size'][cfg] for cfg in targets['config'].values]

                #### Fit
                curr_iter, curr_iter_chance = fit_svm(zdata, targets, C_value=1.0, test_split=0.2)
                iter_results.append(pd.DataFrame(curr_iter, index=[iteration]))
                iter_results_chance.append(pd.DataFrame(curr_iter_chance, index=[iteration]))

            # DATA - get mean across iters
            iter_results = pd.concat(iter_results, axis=0)
            iterdict = dict(iter_results.mean())
            iterdict.update( dict(('%s_std' % k, v) for k, v in \
                                  zip(iter_results.std().index, iter_results.std().values)) )
            iterdict.update( dict(('%s_sem' % k, v) for k, v in \
                                  zip(iter_results.sem().index, iter_results.sem().values)) )
            iterdict.update({'n_units': curr_ncells, 
                             'overlap': overlap_thr,
                             'visual_area': visual_area})
            popdf.append(pd.DataFrame(iterdict, index=[i]))

            # CHANCE - get mean across iters
            iter_results_chance = pd.concat(iter_results_chance, axis=0)
            iterdict_chance = dict(iter_results_chance.mean())
            iterdict_chance.update( dict(('%s_std' % k, v) for k, v in \
                                  zip(iter_results_chance.std().index, iter_results_chance.std().values)) )
            iterdict_chance.update( dict(('%s_sem' % k, v) for k, v in \
                                  zip(iter_results_chance.sem().index, iter_results_chance.sem().values)) )
            iterdict_chance.update({'n_units': curr_ncells, 
                                    'overlap': overlap_thr,
                                    'visual_area': visual_area})
            popdf_chance.append(pd.DataFrame(iterdict_chance, index=[i]))
            i += 1

pooled = pd.concat(popdf, axis=0)
pooled_chance = pd.concat(popdf_chance, axis=0)
print(pooled.shape, pooled_chance.shape)
pooled.head()

-------- Overlap: 0.00 --------
('V1', 652)
('Lm', 237)
('Li', 142)
{'V1': 652, 'Lm': 237, 'Li': 142}
... [Li] popn size: 4
... [Lm] popn size: 4
... [V1] popn size: 4
... [Li] popn size: 8
... [Lm] popn size: 8
... [V1] popn size: 8
... [Li] popn size: 16
... [Lm] popn size: 16
... [V1] popn size: 16
... [Li] popn size: 32
... [Lm] popn size: 32
... [V1] popn size: 32
... [Li] popn size: 64
... [Lm] popn size: 64
... [V1] popn size: 64
... [Li] popn size: 128
... [Lm] popn size: 128
... [V1] popn size: 128
... [Li] popn size: 168
... [Lm] popn size: 168
... [V1] popn size: 168
-------- Overlap: 1.00 --------
('V1', 473)
('Lm', 155)
('Li', 89)
{'V1': 473, 'Lm': 155, 'Li': 89}
... [Li] popn size: 4
... [Lm] popn size: 4
... [V1] popn size: 4
... [Li] popn size: 8
... [Lm] popn size: 8
... [V1] popn size: 8
... [Li] popn size: 16
... [Lm] popn size: 16
... [V1] popn size: 16
... [Li] popn size: 32
... [Lm] popn size: 32
... [V1] popn size: 32
... [Li] popn size: 64
... [Lm] popn size: 64

Unnamed: 0,fit_time,fit_time_sem,fit_time_std,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,heldout_aMI_sem,heldout_aMI_std,heldout_log2MI,...,train_accuracy,train_accuracy_sem,train_accuracy_std,train_neg_mean_squared_error,train_neg_mean_squared_error_sem,train_neg_mean_squared_error_std,train_r2,train_r2_sem,train_r2_std,visual_area
0,0.004631,0.000146,0.001462,0.033441,0.003553,0.035533,0.034441,0.005263,0.052634,0.048245,...,0.616248,0.005384,0.053843,-4311.836682,60.497972,604.979716,-0.536624,0.021543,0.215431,Li
1,0.006269,0.000151,0.001513,0.021921,0.003462,0.03462,0.019172,0.005073,0.050735,0.031625,...,0.605438,0.004147,0.041474,-4433.302853,46.600598,466.005979,-0.579652,0.016599,0.165995,Lm
2,0.005198,0.000147,0.001474,0.028299,0.003141,0.031408,0.028716,0.004608,0.046081,0.040827,...,0.614874,0.003806,0.038059,-4327.276421,42.76286,427.628604,-0.542153,0.015284,0.152842,V1
3,0.006957,0.00017,0.001704,0.052132,0.004871,0.048708,0.062061,0.007204,0.072043,0.07521,...,0.667021,0.004869,0.048688,-3741.347,54.706396,547.063964,-0.333138,0.019471,0.194715,Li
4,0.008349,0.000101,0.00101,0.033749,0.003508,0.035082,0.03649,0.00514,0.051398,0.04869,...,0.634245,0.003304,0.033044,-4109.621918,37.128067,371.28067,-0.464166,0.013217,0.132167,Lm


In [90]:
pooled_decoding_results = os.path.join(decoding_dir, 'pooled_%i-iter_%iv%i_stratified-folds_perc_overlap__%s.pkl' % (n_iterations, m0, m100, response_str))

with open(pooled_decoding_results, 'wb') as f:
    pkl.dump({'pooled': pooled, 'pooled_chance': pooled_chance}, f, protocol=pkl.HIGHEST_PROTOCOL)
print("Saved results:\n--->%s" % pooled_decoding_results)

Saved results:
--->/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/pooled_100-iter_0v106_stratified-folds_perc_overlap__dff_resptest-nstds_respthr-10.00.pkl


## split by v area

In [1121]:
metric = 'heldout_test_score'
metric_base = 'heldout_test' if metric=='heldout_test_score' else metric

lw = 2
capsize=2

overlap_levels = sorted(ncells_dict.keys())
overlap_alphas = dict((v, a) for v, a in zip(overlap_levels, np.linspace(0.2, 0.99, len(overlap_levels))))

fig, axes = pl.subplots(1, 3, figsize=(15,5), sharex=True, sharey=True, dpi=dpi)
ai = 0
for visual_area, pgroup in pooled.groupby(['visual_area']):
    ax = axes[ai]
    for overlap_l, g in pgroup.groupby(['overlap']):
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                alpha=overlap_alphas[overlap_l], lw=lw,
                label='%.1f' % (overlap_l))
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                    capthick=lw, capsize=capsize, label=None, alpha=overlap_alphas[overlap_l], 
                    lw=lw)
    if ai==0:
        ax.set_ylabel("test accuracy")
    
    ax.legend(bbox_to_anchor=(1.0, 1))
    ax.set_title(visual_area)
    ai += 1
    ax.set_xlabel("N units")

ai=0
for visual_area, pgroup in pooled_chance.groupby(['visual_area']):
    ax = axes[ai]
    for overlap_l, g in pgroup.groupby(['overlap']):
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color='k', #area_colors[visual_area], 
                alpha=overlap_alphas[overlap_l], lw=lw, linestyle=':', 
                label=None)
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color='k', #area_colors[visual_area], 
                    capthick=lw, capsize=capsize, label=None, alpha=overlap_alphas[overlap_l], 
                    lw=lw, linestyle=':')
        
    ai += 1
    
ax.set_ylim([0.4, 0.8])
sns.despine(trim=True, offset=4)
pl.subplots_adjust(right=0.8, left=0.09, wspace=0.5, bottom=0.2)

analysis_str = '%i-iters_C%.2f_compare-overlaps' % (n_iterations, C_value)
putils.label_figure(fig, analysis_str) 

figname = '%s_compare-overlaps_split-visual-area_C%.2f_niter%i__%s' % (metric, C_value, n_iterations, response_str)
pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))

print(figname)

<IPython.core.display.Javascript object>

heldout_test_score_compare-overlaps_split-visual-area_C1.00_niter50__dff_resptest-nstds_respthr-0.05


## Plot visual areas together, split by overlap_thr

In [91]:
metric = 'heldout_aMI' #'heldout_test_score'
metric_base = 'heldout_test' if metric=='heldout_test_score' else metric

lw = 2
capsize=2

overlap_levels = sorted(ncells_dict.keys())
overlap_alphas = dict((v, a) for v, a in zip(overlap_levels, np.linspace(0.2, 0.99, len(overlap_levels))))


fig, axn = pl.subplots(1, 6, figsize=(16,3), sharex=True, sharey=True, dpi=dpi)
for ci, (overlap_l, pgroup) in enumerate(pooled.groupby(['overlap'])):
    for ai, (visual_area, g) in enumerate(pgroup.groupby(['visual_area'])):
        ax = axn[ci]
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                alpha=overlap_alphas[overlap_l], lw=lw,
                label='%s' % (visual_area))
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                    capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                    lw=lw)
    ax.set_ylabel('')
    if ci==5:
        ax.legend(bbox_to_anchor=(1.8, 1))
    ax.set_title(overlap_l)
    ax.set_xlabel("N units")
    ax.set_ylabel('classifier accuracy')

for ci, (overlap_l, pgroup) in enumerate(pooled_chance.groupby(['overlap'])):
    for ai, (visual_area, g) in enumerate(pgroup.groupby(['visual_area'])):
        ax = axn[ci]
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color='k', #area_colors[visual_area], 
                alpha=overlap_alphas[overlap_l], lw=lw, linestyle=':', 
                label=None)
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                    capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                    lw=lw, linestyle=':')
    if metric=='heldout_test_score':
        ax.set_ylim([0.4, 1.0])
    if ci==0:
        ax.set_ylabel(metric)
    else:
        ax.set_ylabel('')

sns.despine(trim=True, offset=4)
pl.subplots_adjust(right=0.75, left=0.09, wspace=0.5, bottom=0.2, top=0.8)

analysis_str = '%i-iters_C%.2f_compare-overlaps' % (n_iterations, C_value)
putils.label_figure(fig, analysis_str)
   

figname = '%s_perc_overlap_statified-folds_C%.2f_niter%i__%s' % (metric, C_value, n_iterations, response_str)
pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))
#pl.savefig(os.path.join(decoding_dir, '%s.png' % figname))

print(figname)

<IPython.core.display.Javascript object>

heldout_aMI_perc_overlap_statified-folds_C1.00_niter100__dff_resptest-nstds_respthr-10.00


In [1002]:
os.path.join(decoding_dir, 'pooled_%i-iter_%iv%i_stratified-folds_perc_overlap__%s.pkl' % (n_iterations, m0, m100, response_str))


'/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/linear-separability'

In [92]:
NCELLS


[4, 8, 16, 32, 64, 128, 168]

In [1126]:
overlap_palette = sns.color_palette('cubehelix', n_colors=len(overlap_thr_values))

fig, axn = pl.subplots(1,3, figsize=(12,4), sharex=True, sharey=True)

for ai, (ax, (visual_area, g)) in enumerate(zip(axn.flat, pooled.groupby(['visual_area']))):
    
    sns.scatterplot('train_accuracy', 'test_accuracy', hue='overlap', data=g, ax=ax,
                   palette=overlap_palette)
    ax.set_xlim([0.5, 1.2])
    ax.set_ylim([0.5, 1.2])
    #ax.set_aspect('equal')
    
    if ai<2:
        ax.legend_.remove()
        
ax.legend(bbox_to_anchor=(1.1, 1))

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fd8b2cf0c50>

In [130]:
metric = 'heldout_test_score' #'heldout_test_score'
#metric_base = 'heldout_test' if metric=='heldout_test_score' else metric

lw = 2
capsize=2

overlap_levels = sorted(ncells_dict.keys())
overlap_alphas = dict((v, a) for v, a in zip(overlap_levels, np.linspace(0.2, 0.99, len(overlap_levels))))


fig, axn = pl.subplots(1, 6, figsize=(16,3), sharex=True, sharey=True, dpi=dpi)
for ci, (overlap_l, pgroup) in enumerate(pooled.groupby(['overlap'])):
    for ai, (visual_area, g) in enumerate(pgroup.groupby(['visual_area'])):
        ax = axn[ci]
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                alpha=overlap_alphas[overlap_l], lw=lw,
                label='%s' % (visual_area))
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                    capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                    lw=lw)
    ax.set_ylabel('')
    if ci==5:
        ax.legend(bbox_to_anchor=(1.8, 1))
    ax.set_title(overlap_l)
    ax.set_xlabel("N units")
    ax.set_ylabel('classifier accuracy')
    ax.set_xlim([0, 100])

for ci, (overlap_l, pgroup) in enumerate(pooled_chance.groupby(['overlap'])):
    for ai, (visual_area, g) in enumerate(pgroup.groupby(['visual_area'])):
        ax = axn[ci]
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color='k', #area_colors[visual_area], 
                alpha=overlap_alphas[overlap_l], lw=lw, linestyle=':', 
                label=None)
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                    capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                    lw=lw, linestyle=':')
    if metric=='heldout_test_score':
        ax.set_ylim([0.4, 1.0])
    if ci==0:
        ax.set_ylabel(metric)
    else:
        ax.set_ylabel('')

sns.despine(trim=True, offset=4)
pl.subplots_adjust(right=0.75, left=0.09, wspace=0.5, bottom=0.2, top=0.8)

analysis_str = '%i-iters_C%.2f_compare-overlaps' % (n_iterations, C_value)
putils.label_figure(fig, analysis_str)
   

#figname = '%s_perc_overlap_statified-folds_C%.2f_niter%i__%s' % (metric, C_value, n_iterations, response_str)
#pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))
#pl.savefig(os.path.join(decoding_dir, '%s.png' % figname))

print(figname)

<IPython.core.display.Javascript object>

filter-repeat-fovs_heldout_test_score_0vs106_split-test-0.20_5-fold-cv_overlap0.80_C1.00_niter500__dff_resptest-nstds_respthr-10.00


In [None]:
#zoom=True

#metric = 'heldout_test_score' #'heldout_aMI' #'heldout_test_score'
#metric = 'heldout_aMI' #'heldout_aMI' #'heldout_test_score'

#metric_base = metric #'heldout_test' if metric=='heldout_test_score' else metric
overlap_thr=0.8

res_df = pooled[pooled['overlap_thr']==overlap_thr].copy()
chance_df = pooled_chance[pooled_chance['overlap_thr']==overlap_thr].copy()

lw = 2
capsize=2

for zoom in [True, False]:
    for metric in ['heldout_aMI', 'heldout_log2MI', 'heldout_test_score']:
        metric_base = metric #'heldout_test' if metric=='heldout_test_score' else metric
        
        fig, ax = pl.subplots(figsize=(5,4), sharex=True, sharey=True, dpi=dpi)
        for ai, (visual_area, g) in enumerate(res_df.groupby(['visual_area'])):
            mean_scores = g.sort_values(by='n_units')[metric]
            std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
            n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
            ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                    alpha=1, lw=lw,
                    label='%s' % (visual_area))
            ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                        capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                        lw=lw)
        ax.legend(bbox_to_anchor=(1., 1))
        ax.set_title(overlap_thr)
        ax.set_xlabel("N units")
        ax.set_ylabel(metric)

        for ai, (visual_area, g) in enumerate(chance_df.groupby(['visual_area'])):
            mean_scores = g.sort_values(by='n_units')[metric]
            std_scores = g.sort_values(by='n_units')['%s_sem' % metric]
            n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
            ax.plot(n_units_per, mean_scores, color='k', #area_colors[visual_area], 
                    alpha=1, lw=lw, linestyle=':', 
                    label=None)
            ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                        capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                        lw=lw, linestyle=':')
        if metric=='heldout_test_score':
            ax.set_ylim([0.4, 1.0])
        ax.set_ylabel(metric)

        if zoom:
            ax.set_xlim([0, 100])
            zoom_str = 'zoom_'
        else:
            zoom_str=''

        sns.despine(trim=True, offset=4)
        pl.subplots_adjust(right=0.75, left=0.2, wspace=0.5, bottom=0.2, top=0.8)

        analysis_str = '%s|%i-iters_C%.2f_compare-overlaps' % (response_str, n_iterations, C_value)

        putils.label_figure(fig, analysis_str)

        figname = '%s_%s%s_overlap%.2f_%ivs%i_split-test-%.2f_%i-fold-cv_C%.2f_niter%i__%s' % (filter_str, zoom_str, metric, overlap_thr, m0, m100, test_split, cv_nfolds, C_value, n_iterations, response_str)
        pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))

        print(decoding_dir, figname)

# Select 1 overlap_thr and do more fine-grained sampling of NCELLS

In [1141]:
overlap_thr = 0.8
#overlap_thr_values = np.arange(0, 1+overlap_int, overlap_int)

pdf = has_blobs[has_blobs['perc_overlap']>=overlap_thr].copy()
print("Thr: %.2f --------------" % overlap_thr)
print(pdf[['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area']).count())

NCELLS = [2, 4, 8, 16, 32, 48, 56, 64, 72, 80, 100]


Thr: 0.80 --------------
             datakey  cell
visual_area               
Li               100   100
Lm               185   185
V1               572   572


In [1142]:
print(overlap_thr, NCELLS)
n_iterations=500

(0.8, [2, 4, 8, 16, 32, 48, 56, 64, 72, 80, 100])


In [1143]:
#if load_popdata:
    popdf = []
popdf_chance = []
overlap_thr=0.8
i = 0

print("-------- Overlap: %.2f --------" % overlap_thr)
# Get list of eligible cells from all FOVs
globaldf = filter_rois(has_blobs[has_blobs['datakey'].isin(stim_datakeys)], overlap_thr=overlap_thr) 

for curr_ncells in NCELLS:
    for visual_area, gdf in globaldf.groupby(['visual_area']):
        print("... [%s] popn size: %i" % (visual_area, curr_ncells))

        iter_results=[]; iter_test_results=[];
        iter_results_chance=[]; iter_test_results_chance=[];
        for iteration in np.arange(0, n_iterations):
            # Get random selection of NCELLS for current iter (from global ROI list)
            ncells_t = gdf.shape[0]
            curr_rids = np.array(sorted(gdf['roi'].values.copy()))
            # Random w/ replacement
            rand_ixs = np.array([random.randint(0, ncells_t-1) for x in range(curr_ncells)]) #np.random.shuffle(curr_rids)
            curr_roi_list = curr_rids[rand_ixs] #curr_rids[0:curr_ncells]
            curr_roidf = gdf[gdf['roi'].isin(curr_roi_list)].copy()

            # Make sure equal num trials per condition for all dsets
            # Get fewest total n trials among dsets
            fewest_ntrials = np.min([MEANS[k].shape[0] for k in curr_roidf['datakey'].unique()])
            # Get trial counts per cond for dset w. fewest trials
            min_ntrials = []
            keys_with_few = [k for k in curr_roidf['datakey'].unique() if MEANS[k].shape[0]==fewest_ntrials]
            for k in keys_with_few:
                currd = MEANS[k].sort_values(by='config') #[dgroup['dset_roi']]
                min_ntrials.append(currd.groupby(['config']).count().min().min())
            # set min N trials per condition
            min_ntrials = min(min_ntrials)

            # Get data samples for these cells
            d_list=[]
            for datakey, dgroup in curr_roidf.groupby(['datakey']):
                currd = MEANS[datakey].sort_values(by='config') #[dgroup['dset_roi']]
                min_ntrials = min([min_ntrials, currd.groupby(['config']).count().min().min()])
                # Get subset of trials per cond to match min N trials
                tmp_d=[]
                for cfg, dmat in currd.groupby(['config']):
                    if dmat.shape[0] == min_ntrials:
                        tmp_d.append(dmat)
                    else:
                        tixs = np.arange(0, dmat.shape[0])
                        np.random.shuffle(tixs)
                        currd_subset = dmat.iloc[tixs[0:min_ntrials]].copy()
                        tmp_d.append(currd_subset)
                d_ = pd.concat(tmp_d, axis=0) 
                config_list = d_['config'].values     # Get configs on selected trials
                currd = d_[dgroup['dset_roi']].copy() # Data should only include ROI columns
                currd = currd.reset_index(drop=True)  # Reset trial indices 
                currd.columns = dgroup['roi'].values  # Rename ROI columns to global-rois
                d_list.append(currd)
            curr_data = pd.concat(d_list, axis=1)
            curr_data['config'] = config_list 

            #### Select train/test configs for clf A vs B
            object_configs = sdf[sdf['morphlevel'].isin([m0, m100])].index.tolist() 
            curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
            sample_data = curr_data[curr_data['config'].isin(object_configs)]

            #### Equalize df/f across neurons:  Normalize each neuron to have same (zero) mean, (unit) SD across stimuli
            data = sample_data[curr_roi_list]
            zdata = (data - data.mean()) / data.std()

            #### Get labels
            targets = pd.DataFrame(sample_data['config'].copy(), columns=['config'])
            targets['label'] = [sdf.loc[cfg]['morphlevel'][0] for cfg in targets.values]

            stimdf = sdf[sdf['morphlevel'].isin([m0, m100])]
            sizes = sorted(stimdf['size'].unique())

            #### For each transformation, split trials into 80% and 20%
            train_data=[]; test_data=[]; 
            train_labels=[]; test_labels=[]; 
            train_data_chance=[]; test_data_chance=[]; 
            train_labels_chance=[]; test_labels_chance=[]; 
            for sz in sizes: 
                curr_configs = sdf[sdf['size']==sz].index.tolist()
                curr_trials = np.array(targets[targets['config'].isin(curr_configs)].index.tolist())
                xi = zdata.loc[curr_trials]
                yi = targets.loc[curr_trials]['label'].values

                # Split train/test
                train_x, test_x, train_y, test_y = train_test_split(xi, yi, test_size=0.2)
                train_data.append(train_x)
                test_data.append(test_x)
                train_labels.append(train_y)
                test_labels.append(test_y)

            # Aggregate TRAIN/TEST data        
            train_data = np.vstack(train_data)
            train_labels = np.hstack(train_labels)
            test_data = np.vstack(test_data)
            test_labels = np.hstack(test_labels)

            #### DATA - Fit classifier
            svc = LinearSVC(multi_class='ovr', C=C_value, penalty='l2')
            scores = cross_validate(svc, train_data, train_labels, cv=5,
                                    scoring=('r2', 'neg_mean_squared_error', 'accuracy'),
                                    return_train_score=True)
            iterdict = dict((s, values.mean()) for s, values in scores.items())

            #### DATA - Test with held-out data
            trained_svc = LinearSVC(multi_class='ovr', C=C_value, penalty='l2').fit(train_data, train_labels)
            test_score = trained_svc.score(test_data, test_labels)
            
            #### DATA - Calculate MI
            predicted_labels = trained_svc.predict(test_data)
            mi = skmetrics.mutual_info_score(test_labels, predicted_labels)
            ami = skmetrics.adjusted_mutual_info_score(test_labels, predicted_labels)
            log2_mi = computeMI(test_labels, predicted_labels)
            
            iterdict.update({'heldout_test_score': test_score, 
                             'heldout_MI': mi, 'heldout_aMI': ami, 'heldout_log2MI': log2_mi})
            iter_results.append(pd.DataFrame(iterdict, index=[iteration]))
            
            # ------------------------------------------------------------------
            # Shuffle LABELS to calculate chance level
            train_labels_chance = train_labels.copy()
            np.random.shuffle(train_labels_chance)
            test_labels_chance = test_labels.copy()
            np.random.shuffle(test_labels_chance)

            #### CHANCE - Fit classifier
            svc_chance = LinearSVC(multi_class='ovr', C=C_value, penalty='l2')
            scores_chance = cross_validate(svc, train_data, train_labels_chance, cv=5,
                                    scoring=('r2', 'neg_mean_squared_error', 'accuracy'),
                                    return_train_score=True)
            iterdict_chance = dict((s, values.mean()) for s, values in scores_chance.items())

            # CHANCE - Test with held-out data
            trained_svc_chance = LinearSVC(multi_class='ovr', C=C_value, penalty='l2').fit(train_data, train_labels_chance)
            test_score_chance = trained_svc_chance.score(test_data, test_labels_chance)  
            
            # Chance - Calculate MI
            predicted_labels = trained_svc_chance.predict(test_data)
            mi = skmetrics.mutual_info_score(test_labels, predicted_labels)
            ami = skmetrics.adjusted_mutual_info_score(test_labels, predicted_labels)
            log2_mi = computeMI(test_labels, predicted_labels)
            
            iterdict_chance.update({'heldout_test_score': test_score_chance, 
                                    'heldout_MI': mi, 'heldout_aMI': ami, 'heldout_log2MI': log2_mi})
            iter_results_chance.append(pd.DataFrame(iterdict_chance, index=[iteration]))

        # DATA - get mean across iters
        iter_results = pd.concat(iter_results, axis=0)
        iterdict = dict(iter_results.mean())
        iterdict.update({'n_units': curr_ncells, 
                         'overlap': overlap_thr,
                         'visual_area': visual_area,
                          'heldout_test_sem': spstats.sem(iter_results['heldout_test_score'].values),
                          'heldout_test_std': float(iter_results['heldout_test_score'].std()),
                          'heldout_MI_sem': spstats.sem(iter_results['heldout_MI'].values),
                          'heldout_MI_std': float(iter_results['heldout_MI'].std()),
                          'heldout_aMI_sem': spstats.sem(iter_results['heldout_aMI'].values),
                          'heldout_aMI_std': float(iter_results['heldout_aMI'].std()),
                          'heldout_log2MI_sem': spstats.sem(iter_results['heldout_log2MI'].values),
                          'heldout_log2MI_std': float(iter_results['heldout_log2MI'].std())})
        popdf.append(pd.DataFrame(iterdict, index=[i]))

        # CHANCE - get mean across iters
        iter_results_chance = pd.concat(iter_results_chance, axis=0)
        iterdict_chance = dict(iter_results_chance.mean())
        iterdict_chance.update({'n_units': curr_ncells, 
                                'overlap': overlap_thr,
                                'visual_area': visual_area,
                              'heldout_test_sem': spstats.sem(iter_results_chance['heldout_test_score'].values),
                              'heldout_test_std': float(iter_results_chance['heldout_test_score'].std()),
                              'heldout_MI_sem': spstats.sem(iter_results_chance['heldout_MI'].values),
                              'heldout_MI_std': float(iter_results_chance['heldout_MI'].std()),
                              'heldout_aMI_sem': spstats.sem(iter_results_chance['heldout_aMI'].values),
                              'heldout_aMI_std': float(iter_results_chance['heldout_aMI'].std()),
                              'heldout_log2MI_sem': spstats.sem(iter_results_chance['heldout_log2MI'].values),
                              'heldout_log2MI_std': float(iter_results_chance['heldout_log2MI'].std())})
        popdf_chance.append(pd.DataFrame(iterdict_chance, index=[i]))
        i += 1
results = pd.concat(popdf, axis=0)
results_chance = pd.concat(popdf_chance, axis=0)
print(results.shape, results_chance.shape)
results.head()

-------- Overlap: 0.80 --------
('V1', 572)
('Lm', 185)
('Li', 100)
... [Li] popn size: 2
... [Lm] popn size: 2
... [V1] popn size: 2
... [Li] popn size: 4
... [Lm] popn size: 4
... [V1] popn size: 4
... [Li] popn size: 8
... [Lm] popn size: 8
... [V1] popn size: 8
... [Li] popn size: 16
... [Lm] popn size: 16
... [V1] popn size: 16
... [Li] popn size: 32
... [Lm] popn size: 32
... [V1] popn size: 32
... [Li] popn size: 48
... [Lm] popn size: 48
... [V1] popn size: 48
... [Li] popn size: 56
... [Lm] popn size: 56
... [V1] popn size: 56
... [Li] popn size: 64
... [Lm] popn size: 64
... [V1] popn size: 64
... [Li] popn size: 72
... [Lm] popn size: 72
... [V1] popn size: 72
... [Li] popn size: 80
... [Lm] popn size: 80
... [V1] popn size: 80
... [Li] popn size: 100
... [Lm] popn size: 100
... [V1] popn size: 100
((33, 23), (33, 23))


Unnamed: 0,fit_time,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,heldout_aMI_sem,heldout_aMI_std,heldout_log2MI,heldout_log2MI_sem,heldout_log2MI_std,...,n_units,overlap,score_time,test_accuracy,test_neg_mean_squared_error,test_r2,train_accuracy,train_neg_mean_squared_error,train_r2,visual_area
0,0.00365,0.020443,0.001229,0.027485,0.017418,0.001805,0.040362,0.029493,0.001773,0.039652,...,2,0.8,0.000512,0.561056,-4931.974762,-0.757373,0.581073,-4707.06923,-0.677117,Li
1,0.004448,0.01692,0.001019,0.022787,0.012203,0.0015,0.033538,0.024411,0.00147,0.032875,...,2,0.8,0.000518,0.545976,-5101.414273,-0.81796,0.568528,-4848.021922,-0.72754,Lm
2,0.003726,0.021042,0.001225,0.027384,0.018354,0.001798,0.040195,0.030358,0.001767,0.039507,...,2,0.8,0.000524,0.55562,-4993.058108,-0.779308,0.578534,-4735.59429,-0.687442,V1
3,0.00508,0.037578,0.001785,0.039924,0.042524,0.002622,0.058638,0.054213,0.002576,0.057598,...,4,0.8,0.000526,0.593985,-4561.980516,-0.625561,0.626014,-4202.102657,-0.497224,Li
4,0.005957,0.021228,0.001163,0.026011,0.018572,0.001708,0.038181,0.030626,0.001678,0.037526,...,4,0.8,0.000521,0.562647,-4914.093122,-0.751028,0.599295,-4502.316516,-0.604191,Lm


#### plot MI - overlap_thr fixed

In [1154]:
metric = 'heldout_aMI' #'heldout_aMI' #'heldout_test_score'
metric_base = 'heldout_test' if metric=='heldout_test_score' else metric

lw = 2
capsize=2

fig, ax = pl.subplots(figsize=(5,4), sharex=True, sharey=True, dpi=dpi)
for ai, (visual_area, g) in enumerate(results.groupby(['visual_area'])):
    mean_scores = g.sort_values(by='n_units')[metric]
    std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
    n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
    ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
            alpha=overlap_alphas[overlap_l], lw=lw,
            label='%s' % (visual_area))
    ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                lw=lw)
ax.legend(bbox_to_anchor=(1., 1))
ax.set_title(overlap_thr)
ax.set_xlabel("N units")
ax.set_ylabel(metric)


for ai, (visual_area, g) in enumerate(results_chance.groupby(['visual_area'])):
    mean_scores = g.sort_values(by='n_units')[metric]
    std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
    n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
    ax.plot(n_units_per, mean_scores, color='k', #area_colors[visual_area], 
            alpha=overlap_alphas[overlap_l], lw=lw, linestyle=':', 
            label=None)
    ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                lw=lw, linestyle=':')
if metric=='heldout_test_score':
    ax.set_ylim([0.4, 1.0])
ax.set_ylabel(metric)


sns.despine(trim=True, offset=4)
pl.subplots_adjust(right=0.75, left=0.2, wspace=0.5, bottom=0.2, top=0.8)

analysis_str = '%i-iters_C%.2f_compare-overlaps' % (n_iterations, C_value)
putils.label_figure(fig, analysis_str)
   
figname = '%s_lin-sep_overlap%.2f_C%.2f_niter%i__%s' % (metric, overlap_thr, C_value, n_iterations, response_str)
pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))
# #pl.savefig(os.path.join(decoding_dir, '%s.png' % figname))

print(figname)

<IPython.core.display.Javascript object>

heldout_aMI_lin-sep_overlap0.80_C1.00_niter500__dff_resptest-nstds_respthr-0.05


In [1098]:
results.head()

Unnamed: 0,fit_time,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,heldout_aMI_sem,heldout_aMI_std,heldout_log2MI,heldout_log2MI_sem,heldout_log2MI_std,...,n_units,overlap,score_time,test_accuracy,test_neg_mean_squared_error,test_r2,train_accuracy,train_neg_mean_squared_error,train_r2,visual_area
0,0.003581,0.017625,0.002604,0.018415,0.013306,0.003855,0.027258,0.025427,0.003757,0.026568,...,2,0.8,0.000538,0.570314,-4827.954641,-0.720135,0.589152,-4616.292795,-0.644598,Li
1,0.004261,0.019583,0.003882,0.027446,0.015988,0.00572,0.04045,0.028252,0.0056,0.039597,...,2,0.8,0.00052,0.549545,-5061.31773,-0.80379,0.573573,-4791.333748,-0.70742,Lm
2,0.003904,0.01686,0.003525,0.024925,0.012421,0.005144,0.036373,0.024323,0.005085,0.03596,...,2,0.8,0.000546,0.561047,-4932.073987,-0.757595,0.583879,-4675.531498,-0.666071,V1
3,0.005282,0.034933,0.005187,0.036678,0.038543,0.007593,0.053692,0.050398,0.007483,0.052915,...,4,0.8,0.000534,0.596552,-4533.14714,-0.615229,0.627538,-4184.987095,-0.491034,Li
4,0.006122,0.023208,0.003956,0.027977,0.021617,0.005797,0.040994,0.033482,0.005708,0.040362,...,4,0.8,0.000522,0.574038,-4786.113244,-0.705895,0.606195,-4424.792566,-0.576986,Lm


## Compare information metrics

In [1155]:

lw = 2
capsize=2

fig, axn = pl.subplots(1, 3, figsize=(10,3), sharex=True, sharey=True, dpi=dpi)

metric = 'heldout_MI'
for ax, metric in zip(axn.flat, ['heldout_MI', 'heldout_aMI', 'heldout_log2MI']):
    metric_base = 'heldout_test' if metric=='heldout_test_score' else metric
    #ax = axn[0]
    for ai, (visual_area, g) in enumerate(results.groupby(['visual_area'])):
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                alpha=1, lw=lw, label='%s' % (visual_area))
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                    capthick=lw, capsize=capsize, lw=lw, label=None, alpha=1) #overlap_alphas[overlap_l], 
    ax.set_title(overlap_thr)
    ax.set_xlabel("N units")
    ax.set_ylabel(metric)
    
    for ai, (visual_area, g) in enumerate(results_chance.groupby(['visual_area'])):
        mean_scores = g.sort_values(by='n_units')[metric]
        std_scores = g.sort_values(by='n_units')['%s_sem' % metric_base]
        n_units_per = g.groupby(['n_units'])[metric].mean().index.tolist()
        ax.plot(n_units_per, mean_scores, color=area_colors[visual_area], 
                alpha=1, lw=lw, linestyle=':', label=None)
        ax.errorbar(n_units_per, mean_scores, yerr=std_scores, color=area_colors[visual_area], 
                    capthick=lw, capsize=capsize, label=None, alpha=1, #overlap_alphas[overlap_l], 
                    lw=lw, linestyle=':')
    #ax.set_ylim([0.4, 1.0])
    ax.set_ylabel(metric)

axn[-1].legend(bbox_to_anchor=(1.6, 1))
sns.despine(trim=True, offset=4)
pl.subplots_adjust(right=0.75, left=0.15, wspace=0.5, bottom=0.2, top=0.8)

analysis_str = '%i-iters_C%.2f_compare-overlaps' % (n_iterations, C_value)
putils.label_figure(fig, analysis_str)
   
# figname = 'lin-sep_overlap%.2f_C%.2f_niter%i__%s' % (overlap_thrC_value, n_iterations, response_str)
# pl.savefig(os.path.join(decoding_dir, '%s.svg' % figname))
# #pl.savefig(os.path.join(decoding_dir, '%s.png' % figname))

# print(figname)

<IPython.core.display.Javascript object>

In [1102]:
results.groupby(['visual_area', 'n_units']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,fit_time,heldout_MI,heldout_MI_sem,heldout_MI_std,heldout_aMI,heldout_aMI_sem,heldout_aMI_std,heldout_log2MI,heldout_log2MI_sem,heldout_log2MI_std,...,heldout_test_sem,heldout_test_std,overlap,score_time,test_accuracy,test_neg_mean_squared_error,test_r2,train_accuracy,train_neg_mean_squared_error,train_r2
visual_area,n_units,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Li,2,0.003581,0.017625,0.002604,0.018415,0.013306,0.003855,0.027258,0.025427,0.003757,0.026568,...,0.010659,0.075372,0.8,0.000538,0.570314,-4827.954641,-0.720135,0.589152,-4616.292795,-0.644598
Li,4,0.005282,0.034933,0.005187,0.036678,0.038543,0.007593,0.053692,0.050398,0.007483,0.052915,...,0.010504,0.074277,0.8,0.000534,0.596552,-4533.14714,-0.615229,0.627538,-4184.987095,-0.491034
Li,8,0.007254,0.044066,0.0063,0.04455,0.05187,0.009241,0.06534,0.063574,0.009089,0.064271,...,0.011563,0.081766,0.8,0.000517,0.627318,-4187.451913,-0.492223,0.674686,-3655.231318,-0.302422
Li,16,0.01159,0.083034,0.006016,0.042542,0.108867,0.008813,0.062314,0.119792,0.00868,0.061375,...,0.007461,0.052759,0.8,0.000523,0.66087,-3810.463386,-0.357912,0.732227,-3008.697744,-0.072133
Li,32,0.012882,0.105491,0.010048,0.071051,0.141849,0.014744,0.104258,0.152191,0.014496,0.102505,...,0.008857,0.06263,0.8,0.000514,0.702122,-3346.957592,-0.192593,0.806556,-2173.53627,0.225591
Li,64,0.013936,0.116153,0.009318,0.065889,0.15724,0.013708,0.096933,0.167573,0.013443,0.095057,...,0.008904,0.06296,0.8,0.000607,0.69507,-3426.192093,-0.221384,0.888305,-1255.001218,0.552666
Li,100,0.019367,0.115751,0.009491,0.06711,0.156581,0.013931,0.098509,0.166993,0.013692,0.096819,...,0.009712,0.068675,0.8,0.000968,0.692313,-3457.167234,-0.232269,0.968633,-352.44467,0.87439
Lm,2,0.004261,0.019583,0.003882,0.027446,0.015988,0.00572,0.04045,0.028252,0.0056,0.039597,...,0.012623,0.089258,0.8,0.00052,0.549545,-5061.31773,-0.80379,0.573573,-4791.333748,-0.70742
Lm,4,0.006122,0.023208,0.003956,0.027977,0.021617,0.005797,0.040994,0.033482,0.005708,0.040362,...,0.012075,0.085382,0.8,0.000522,0.574038,-4786.113244,-0.705895,0.606195,-4424.792566,-0.576986
Lm,8,0.008209,0.037167,0.005817,0.041135,0.041846,0.008507,0.060156,0.053621,0.008393,0.059345,...,0.012838,0.090779,0.8,0.000521,0.590292,-4603.473623,-0.640174,0.648745,-3946.703192,-0.406078


In [None]:
luminance_dir = os.path.join(aggregate_dir, 'luminance-analysis')

overlaps_datafile = os.path.join(luminance_dir, 'overlaps-all-RFs.pkl')
assert os.path.exists(overlaps_datafile), "OVERLAPS NOT FOUND."
with open(overlaps_datafile, 'rb') as f:
    overlaprois = pkl.load(f)



# Test generalization

In [None]:
exp = util.Objects(animalid, session, fov, traceid=traceid, rootdir=rootdir)
# Get stimulus conditions
sdf = exp.get_stimuli()
sdf = reformat_morph_values(sdf)
sizes = np.array(sorted(sdf['size'].unique()))
morphs = np.array(sorted(sdf['morphlevel'].unique()))

In [1156]:
overlap_thr=0.8
print("-------- Overlap: %.2f --------" % overlap_thr)
# Get list of eligible cells from all FOVs
globaldf = filter_rois(has_blobs[has_blobs['datakey'].isin(stim_datakeys)], overlap_thr=overlap_thr) 


-------- Overlap: 0.80 --------
('V1', 572)
('Lm', 185)
('Li', 100)


In [1230]:
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler


In [1372]:
overlap_thr = 0.8
#overlap_thr_values = np.arange(0, 1+overlap_int, overlap_int)

pdf = has_blobs[has_blobs['perc_overlap']>=overlap_thr].copy()
print("Thr: %.2f --------------" % overlap_thr)
print(pdf[['visual_area', 'datakey', 'cell']].drop_duplicates().groupby(['visual_area']).count())

#NCELLS = [2, 4, 8, 16, 32, 48, 56, 64, 72, 80, 100]
NCELLS = [2, 4, 8, 16, 32, 64, 82, 100]

print(overlap_thr, NCELLS)
n_iterations=500

Thr: 0.80 --------------
             datakey  cell
visual_area               
Li               100   100
Lm               185   185
V1               572   572
(0.8, [2, 4, 8, 16, 32, 64, 82, 100])


In [1373]:
#if load_popdata:
    
res_list=[]
i = 0
print("-------- Overlap: %.2f --------" % overlap_thr)
# Get list of eligible cells from all FOVs
globaldf = filter_rois(has_blobs[has_blobs['datakey'].isin(stim_datakeys)], overlap_thr=overlap_thr) 

for curr_ncells in NCELLS:
    for visual_area, gdf in globaldf.groupby(['visual_area']):
        print("... [%s] popn size: %i" % (visual_area, curr_ncells))
        r_list=[]
        for iteration in np.arange(0, n_iterations):
            # Get random selection of NCELLS for current iter (from global ROI list)
            ncells_t = gdf.shape[0]
            curr_rids = np.array(sorted(gdf['roi'].values.copy()))

            # Random w/ replacement
            rand_ixs = np.array([random.randint(0, ncells_t-1) for x in range(curr_ncells)]) #np.random.shuffle(curr_rids)
            curr_roi_list = curr_rids[rand_ixs] #curr_rids[0:curr_ncells]
            curr_roidf = gdf[gdf['roi'].isin(curr_roi_list)].copy() # Get rois/datasets that correspond to these global rids

            # Make sure equal num trials per condition for all dsets
            if not exact_trials:
                # Get fewest total n trials among dsets
                fewest_ntrials = np.min([MEANS[k].shape[0] for k in curr_roidf['datakey'].unique()])
                # Get trial counts per cond for dset w. fewest trials
                min_ntrials = []
                keys_with_few = [k for k in curr_roidf['datakey'].unique() if MEANS[k].shape[0]==fewest_ntrials]
                for k in keys_with_few:
                    currd = MEANS[k].sort_values(by='config') #[dgroup['dset_roi']]
                    min_ntrials.append(currd.groupby(['config']).count().min().min())
                # set min N trials per condition
                min_ntrials = min(min_ntrials)

            # For each cell, get is particular trials by loading the dataset it came from
            d_list=[]
            for datakey, dgroup in curr_roidf.groupby(['datakey']):
                currd = MEANS[datakey].sort_values(by='config') #[dgroup['dset_roi']]
                min_ntrials = min([min_ntrials, currd.groupby(['config']).count().min().min()])
                if exact_trials:
                    tmp_d = currd.copy()
                else:
                    # get subset of trials per cond to match min N trials
                    tmp_d=[]
                    for cfg, dmat in currd.groupby(['config']):
                        if dmat.shape[0] == min_ntrials:
                            tmp_d.append(dmat)
                        else:
                            tixs = np.arange(0, dmat.shape[0])
                            np.random.shuffle(tixs)
                            currd_subset = dmat.iloc[tixs[0:min_ntrials]].copy()
                            tmp_d.append(currd_subset)
                    d_ = pd.concat(tmp_d, axis=0)     # Get current cell's trials
                config_list = d_['config'].values     # Get configs on selected trials
                currd = d_[dgroup['dset_roi']].copy() # Data should only include ROI columns
                currd = currd.reset_index(drop=True)  # Reset trial indices 
                currd.columns = dgroup['roi'].values  # Rename ROI columns to global-rois
                d_list.append(currd)
            #curr_data = pd.concat(d_list, axis=1)
            #curr_data['config'] = config_list 
            sample_data = pd.concat(d_list, axis=1)
            sample_labels = config_list

            #### Select train/test configs for clf A vs B
            # object_configs = sdf[sdf['morphlevel'].isin([m0, m100])].index.tolist() 
            curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
            #sample_data = curr_data[curr_data['config'].isin(object_configs)]

            #-----------------
            stimdf = sdf.copy()
            tested_sizes = sorted(sdf['size'].unique())
            tested_morphs = sorted(sdf['morphlevel'].unique())

            #% Specify train/test conditions:     
            class_name = 'morphlevel'
            class_types = [0, 106]
            restrict_transform = True
            constant_transform = 'size'

            # C = 1e3
            m0 = 0
            m100 = 106

            # Go thru all training sizes, then test on non-trained sizes
            for train_transform in tested_sizes:
                # Get train configs
                train_configs = stimdf[((stimdf[class_name].isin(class_types))\
                                        & (stimdf[constant_transform]==train_transform))].index.tolist()

                # Get corresponding indices into data array
                train_ixs = [i for i, l in enumerate(sample_labels) if l in train_configs]

                # Get data and labels
                X = sample_data.iloc[train_ixs].values #[train_configs]
                y = np.array([stimdf[class_name][c] for c in sample_labels[train_ixs]])

                train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2)

                
                # Select generalization-test set
                untrained_class_types = [c for c in stimdf[class_name].unique() if c not in class_types]
                test_configs = stimdf[( (stimdf[constant_transform]!=train_transform)\
                                       & (stimdf[class_name].isin(class_types)) )].index.tolist()
                test_ixs = [i for i, l in enumerate(sample_labels) if l in test_configs]
                X_test = sample_data.iloc[test_ixs].values
                y_test_labels = sample_labels[test_ixs]
                test_transforms = stimdf[stimdf[constant_transform]!=train_transform][constant_transform].unique()

                #% Train/validation split
                n_splits = 5 #len(X)
                kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) #KFold(n_splits=n_splits)
                kf.get_n_splits(train_x) #(X)

                # Do K-fold CV for current stimulus configs
                scores=[]
                test_scores = dict((tv, []) for tv in tested_sizes)
                for train_index, validate_index in kf.split(train_x, train_y): #(X, y): #kf.split(X):

                    # Get current train/validate split data
#                     X_train, X_validate = X[train_index], X[validate_index]
#                     y_train, y_validate = y[train_index], y[validate_index]
                    X_train, X_validate = train_x[train_index], train_x[validate_index]
                    y_train, y_validate = train_y[train_index], train_y[validate_index]
        
                    scaler = preprocessing.StandardScaler().fit(X_train)
                    X_train_transformed = scaler.transform(X_train)

                    # Fit SVM
                    trained_svc = LinearSVC(multi_class='ovr', C=C_value).fit(X_train_transformed, y_train)

                    # Validate with held-out data
                    X_validate_transformed = scaler.transform(X_validate)
                    curr_score = trained_svc.score(X_validate_transformed, y_validate)  
                    y_pred = trained_svc.predict(X_validate_transformed)

                    # Add current fold score
                    test_scores[train_transform].append(curr_score)

                    # Test (generalization) to each size and update test scores
                    for test_transform in sorted(test_transforms):
                        curr_test_ixs = [i for i, tl in enumerate(y_test_labels)\
                                         if stimdf[constant_transform][tl]==test_transform]

                        X_test_transformed = scaler.transform(X_test[curr_test_ixs, :])
                        y_test_curr_labels = [stimdf['morphlevel'][tc] for tc in y_test_labels[curr_test_ixs]]
                        curr_score_test = trained_svc.score(X_test_transformed, y_test_curr_labels)
                        test_scores[test_transform].append( curr_score_test )

                test_df = pd.DataFrame(dict((k, np.mean(v)) for k, v in test_scores.items()), index=[train_transform])
                test_df['iteration'] = iteration
                test_df['train_cond'] = train_transform
                r_list.append(test_df)
        iter_res = pd.concat(r_list, axis=0)
        #iter_res = res.groupby(['train_cond', 'iteration']).mean().reset_index()
        iter_res['visual_area'] = [visual_area for _ in np.arange(0, len(iter_res))]
        iter_res['n_cells'] = [curr_ncells for _ in np.arange(0, len(iter_res))]
        res_list.append(iter_res)
        

print(len(res_list))

res = pd.concat(res_list)
print(res.shape)

-------- Overlap: 0.80 --------
('V1', 572)
('Lm', 185)
('Li', 100)
... [Li] popn size: 2
... [Lm] popn size: 2
... [V1] popn size: 2
... [Li] popn size: 4
... [Lm] popn size: 4
... [V1] popn size: 4
... [Li] popn size: 8
... [Lm] popn size: 8
... [V1] popn size: 8
... [Li] popn size: 16
... [Lm] popn size: 16
... [V1] popn size: 16
... [Li] popn size: 32
... [Lm] popn size: 32
... [V1] popn size: 32
... [Li] popn size: 64
... [Lm] popn size: 64
... [V1] popn size: 64
... [Li] popn size: 82
... [Lm] popn size: 82
... [V1] popn size: 82
... [Li] popn size: 100
... [Lm] popn size: 100
... [V1] popn size: 100


In [1456]:
n_iterations = 3

In [1460]:
i=0
r_list=[]
for iteration in np.arange(0, n_iterations):
    # Get random selection of NCELLS for current iter (from global ROI list)
    ncells_t = gdf.shape[0]
    curr_rids = np.array(sorted(gdf['roi'].values.copy()))

    # Random w/ replacement
    rand_ixs = np.array([random.randint(0, ncells_t-1) for x in range(curr_ncells)]) #np.random.shuffle(curr_rids)
    curr_roi_list = curr_rids[rand_ixs] #curr_rids[0:curr_ncells]
    curr_roidf = gdf[gdf['roi'].isin(curr_roi_list)].copy() # Get rois/datasets that correspond to these global rids

    # Make sure equal num trials per condition for all dsets
    # if not exact_trials:
    # Get fewest total n trials among dsets
    fewest_ntrials = np.min([MEANS[k].shape[0] for k in curr_roidf['datakey'].unique()])
    # Get trial counts per cond for dset w. fewest trials
    min_ntrials = []
    keys_with_few = [k for k in curr_roidf['datakey'].unique() if MEANS[k].shape[0]==fewest_ntrials]
    for k in keys_with_few:
        currd = MEANS[k].sort_values(by='config') #[dgroup['dset_roi']]
        min_ntrials.append(currd.groupby(['config']).count().min().min())
    # set min N trials per condition
    min_ntrials = min(min_ntrials)

    # For each cell, get is particular trials by loading the dataset it came from
    d_list=[]
    for datakey, dgroup in curr_roidf.groupby(['datakey']):
        currd = MEANS[datakey].sort_values(by='config') #[dgroup['dset_roi']]
        min_ntrials = min([min_ntrials, currd.groupby(['config']).count().min().min()])
        #if exact_trials:
        #    tmp_d = currd.copy()
        #else:
        # Get subset of trials per cond to match min N trials
        tmp_d=[]
        for cfg, dmat in currd.groupby(['config']):
            if dmat.shape[0] == min_ntrials:
                tmp_d.append(dmat)
            else:
                tixs = np.arange(0, dmat.shape[0])
                np.random.shuffle(tixs)
                currd_subset = dmat.iloc[tixs[0:min_ntrials]].copy()
                tmp_d.append(currd_subset)
        d_ = pd.concat(tmp_d, axis=0)     # Get current cell's trials
        config_list = d_['config'].values     # Get configs on selected trials
        currd = d_[dgroup['dset_roi']].copy() # Data should only include ROI columns
        currd = currd.reset_index(drop=True)  # Reset trial indices 
        currd.columns = dgroup['roi'].values  # Rename ROI columns to global-rois
        d_list.append(currd)
    #curr_data = pd.concat(d_list, axis=1)
    #curr_data['config'] = config_list 
    sample_data = pd.concat(d_list, axis=1)
    sample_labels = config_list

    #### Select train/test configs for clf A vs B
    # object_configs = sdf[sdf['morphlevel'].isin([m0, m100])].index.tolist() 
    curr_roi_list = [int(c) for c in curr_data.columns if c != 'config']
    #sample_data = curr_data[curr_data['config'].isin(object_configs)]

    #-----------------
    stimdf = sdf.copy()
    tested_sizes = sorted(sdf['size'].unique())
    tested_morphs = sorted(sdf['morphlevel'].unique())

    #% Specify train/test conditions:     
    class_name = 'morphlevel'
    class_types = [0, 106]
    restrict_transform = True
    constant_transform = 'size'

    # C = 1e3
    m0 = 0
    m100 = 106

    # Go thru all training sizes, then test on non-trained sizes
    for train_transform in tested_sizes:
        # Get train configs
        train_configs = stimdf[((stimdf[class_name].isin(class_types))\
                                & (stimdf[constant_transform]==train_transform))].index.tolist()

        # Get corresponding indices into data array
        train_ixs = [i for i, l in enumerate(sample_labels) if l in train_configs]

        # Get data and labels
        X = sample_data.iloc[train_ixs].values #[train_configs]
        y = np.array([stimdf[class_name][c] for c in sample_labels[train_ixs]])
        
        # Save test set for train transform
        train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2)

        # Select generalization-test set
        untrained_class_types = [c for c in stimdf[class_name].unique() if c not in class_types]
        test_configs = stimdf[( (stimdf[constant_transform]!=train_transform)\
                               & (stimdf[class_name].isin(class_types)) )].index.tolist()
        test_ixs = [i for i, l in enumerate(sample_labels) if l in test_configs]
        X_test = sample_data.iloc[test_ixs].values
        y_test_labels = sample_labels[test_ixs]
        test_transforms = stimdf[stimdf[constant_transform]!=train_transform][constant_transform].unique()

        #% Train/validation split
        n_splits = 3 #len(X)
        kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0) #KFold(n_splits=n_splits)
        kf.get_n_splits(train_x) #(X)

        test_scores = dict((tv, []) for tv in tested_sizes)
        
        # Do K-fold CV for current stimulus configs
        cv_scores=[]
        for train_index, validate_index in kf.split(train_x, train_y): #(X, y): #kf.split(X):
            # Get current train/validate split data
#                     X_train, X_validate = X[train_index], X[validate_index]
#                     y_train, y_validate = y[train_index], y[validate_index]
            X_train, X_validate = train_x[train_index], train_x[validate_index]
            y_train, y_validate = train_y[train_index], train_y[validate_index]

            scaler = preprocessing.StandardScaler().fit(X_train)
            X_train_transformed = scaler.transform(X_train)

            # Fit SVM
            trained_svc = LinearSVC(multi_class='ovr', C=C_value).fit(X_train_transformed, y_train)

            # Validate with held-out data
            X_validate_transformed = scaler.transform(X_validate)
            cv_score = trained_svc.score(X_validate_transformed, y_validate)  
            #y_pred = trained_svc.predict(X_validate_transformed)
            cv_scores.append(cv_score)

        # Save CV and test scores
        mean_cv_score = np.mean(cv_scores)
        
        # Train clf on all (train) data
        trained_svc = LinearSVC(multi_class='ovr', C=C_value).fit(train_x, train_y)
        heldout_train_score = trained_svc.score(test_x, test_y)  
        
        # test_scores[train_transform].append(curr_score)

        # Test (generalization) to each size and update test scores
        for test_transform in sorted(test_transforms):
            curr_test_ixs = [i for i, tl in enumerate(y_test_labels)\
                             if stimdf[constant_transform][tl]==test_transform]

            X_test_transformed = scaler.transform(X_test[curr_test_ixs, :])
            y_test_curr_labels = [stimdf['morphlevel'][tc] for tc in y_test_labels[curr_test_ixs]]
            curr_score_test = trained_svc.score(X_test_transformed, y_test_curr_labels)
            test_scores[test_transform].append( curr_score_test )


        test_scores[train_transform] = heldout_train_score
        tdf = pd.concat([pd.DataFrame({'test_cond': k, 'test_score': np.mean(v)}, index=[i]) \
                   for k, v in test_scores.items()], axis=0)
        tdf['cv_score'] = [mean_cv_score for _ in np.arange(0, len(tdf))]
        tdf['iteration'] = [iteration for _ in np.arange(0, len(tdf))]
        tdf['train_cond'] = [train_transform for _ in np.arange(0, len(tdf))]
        i+=1

        #test_df = pd.DataFrame(dict((k, np.mean(v)) for k, v in test_scores.items()), index=[train_transform])
        #test_df['iteration'] = iteration
        #test_df['train_cond'] = train_transform
        r_list.append(tdf.reset_index(drop=True))

In [1461]:
print(len(r))
r = pd.concat(r_list)
print(r.shape)
r.head()

75
(75, 5)


Unnamed: 0,test_cond,test_score,cv_score,iteration,train_cond
0,40.0,0.448276,0.476389,0,10.0
1,10.0,0.833333,0.476389,0,10.0
2,20.0,0.517241,0.476389,0,10.0
3,50.0,0.517241,0.476389,0,10.0
4,30.0,0.431034,0.476389,0,10.0


In [1464]:
r.groupby(['train_cond', 'test_cond']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,test_score,cv_score,iteration
train_cond,test_cond,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10.0,10.0,0.583333,0.702579,1
10.0,20.0,0.557471,0.702579,1
10.0,30.0,0.41954,0.702579,1
10.0,40.0,0.5,0.702579,1
10.0,50.0,0.534483,0.702579,1
20.0,10.0,0.551724,0.686177,1
20.0,20.0,0.777778,0.686177,1
20.0,30.0,0.83908,0.686177,1
20.0,40.0,0.752874,0.686177,1
20.0,50.0,0.775862,0.686177,1


In [1385]:
res.groupby(['train_cond']).count()

Unnamed: 0_level_0,10.0,20.0,30.0,40.0,50.0,iteration,visual_area,n_cells
train_cond,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10.0,12000,12000,12000,12000,12000,12000,12000,12000
20.0,12000,12000,12000,12000,12000,12000,12000,12000
30.0,12000,12000,12000,12000,12000,12000,12000,12000
40.0,12000,12000,12000,12000,12000,12000,12000,12000
50.0,12000,12000,12000,12000,12000,12000,12000,12000


In [1386]:
res.groupby(['visual_area', 'train_cond', 'iteration', 'n_cells']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,10.0,20.0,30.0,40.0,50.0
visual_area,train_cond,iteration,n_cells,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Li,10.0,0,2,0.566667,0.606897,0.513793,0.624138,0.613793
Li,10.0,0,4,0.556667,0.548276,0.610345,0.541379,0.568966
Li,10.0,0,8,0.620000,0.644828,0.582759,0.593103,0.582759
Li,10.0,0,16,0.593333,0.557692,0.584615,0.488462,0.530769
Li,10.0,0,32,0.530000,0.589655,0.620690,0.651724,0.582759
Li,10.0,0,64,0.573333,0.582759,0.510345,0.531034,0.493103
Li,10.0,0,82,0.653333,0.615385,0.576923,0.557692,0.569231
Li,10.0,0,100,0.623333,0.657692,0.634615,0.653846,0.546154
Li,10.0,1,2,0.703333,0.603448,0.693103,0.631034,0.613793
Li,10.0,1,4,0.643333,0.520690,0.517241,0.506897,0.503448


In [1387]:
meandf = res.groupby(['visual_area', 'n_cells', 'train_cond']).mean().reset_index()

meandf[meandf['n_cells']==100]

Unnamed: 0,visual_area,n_cells,train_cond,10.0,20.0,30.0,40.0,50.0,iteration
35,Li,100,10.0,0.622267,0.597957,0.619528,0.578224,0.520182,249.5
36,Li,100,20.0,0.618292,0.67072,0.685196,0.652398,0.638937,249.5
37,Li,100,30.0,0.612711,0.666957,0.751507,0.740051,0.669888,249.5
38,Li,100,40.0,0.603901,0.634257,0.739493,0.768947,0.745054,249.5
39,Li,100,50.0,0.55734,0.626703,0.679067,0.737358,0.7014,249.5
75,Lm,100,10.0,0.52248,0.536097,0.556034,0.547131,0.569414,249.5
76,Lm,100,20.0,0.525862,0.569893,0.643297,0.572766,0.528366,249.5
77,Lm,100,30.0,0.537034,0.629359,0.70422,0.669138,0.644103,249.5
78,Lm,100,40.0,0.509145,0.575434,0.673779,0.707907,0.750166,249.5
79,Lm,100,50.0,0.518759,0.576069,0.627724,0.695966,0.813753,249.5


In [1424]:
for ncells in NCELLS:

    fig, axn = pl.subplots(1, 5, figsize=(8,3), sharey=True, sharex=True)

    for ax, (tcond, traindf) in zip(axn.flat, (res[res['n_cells']==ncells].groupby(['train_cond']))):
        for visual_area, v_df in traindf.groupby(['visual_area']):
            means = v_df.mean()[tested_sizes]
            # print(means.values)
            ax.plot(tested_sizes, means.values, 
                   color=area_colors[visual_area], alpha=0.5)

            ax.errorbar(tested_sizes, means.values, yerr=v_df[tested_sizes].std(), capsize=5, lw=3,
                   color=area_colors[visual_area], alpha=0.5)
            ax.set_ylim([0.4, 1])

            ax.plot(tcond, means[tcond], 'o', color=area_colors[visual_area], markersize=10)
    fig.suptitle(ncells)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1407]:
spstats.sem(v_df[tested_sizes])

array([ 0.00213105,  0.00313161,  0.00237023,  0.00194066,  0.00244481])

In [1422]:
fig, axn = pl.subplots(1, 5, figsize=(8,3), sharey=True, sharex=True)

    for ax, (tcond, traindf) in zip(axn.flat, (res[res['n_cells']==ncells].groupby(['train_cond']))):
        for visual_area, v_df in traindf.groupby(['visual_area']):
            means = v_df.mean()[tested_sizes]
            # print(means.values)
            ax.plot(tested_sizes, means.values, 
                   color=area_colors[visual_area], alpha=0.5)

            ax.errorbar(tested_sizes, means.values, yerr=v_df[tested_sizes].std(), capsize=5, lw=3,
                   color=area_colors[visual_area], alpha=0.5)
            ax.set_ylim([0.4, 1])

            ax.plot(tcond, means[tcond], 'o', color=area_colors[visual_area], markersize=10)
    fig.suptitle(ncells)

10.0    0.002131
20.0    0.003132
30.0    0.002370
40.0    0.001941
50.0    0.002445
dtype: float64

In [1429]:
res[res['train_cond']==50].groupby(['n_cells', 'visual_area']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,10.0,20.0,30.0,40.0,50.0,iteration,train_cond
n_cells,visual_area,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,Li,0.546525,0.549205,0.569532,0.569921,0.569234,249.5,50.0
2,Lm,0.512468,0.525767,0.530065,0.563154,0.589091,249.5,50.0
2,V1,0.503806,0.532294,0.564314,0.576856,0.607513,249.5,50.0
4,Li,0.544108,0.557034,0.589919,0.592929,0.580822,249.5,50.0
4,Lm,0.518287,0.53051,0.544892,0.583309,0.629044,249.5,50.0
4,V1,0.501355,0.538276,0.580536,0.612316,0.641693,249.5,50.0
8,Li,0.560511,0.577638,0.616656,0.625477,0.613333,249.5,50.0
8,Lm,0.522712,0.539898,0.564462,0.60705,0.666387,249.5,50.0
8,V1,0.501614,0.552315,0.604851,0.655354,0.68672,249.5,50.0
16,Li,0.555031,0.588138,0.629946,0.647784,0.638853,249.5,50.0


In [1431]:
curr_mean = res[res['train_cond']==50].groupby(['n_cells', 'visual_area']).mean().reset_index()
curr_mean

Unnamed: 0,n_cells,visual_area,10.0,20.0,30.0,40.0,50.0,iteration,train_cond
0,2,Li,0.546525,0.549205,0.569532,0.569921,0.569234,249.5,50.0
1,2,Lm,0.512468,0.525767,0.530065,0.563154,0.589091,249.5,50.0
2,2,V1,0.503806,0.532294,0.564314,0.576856,0.607513,249.5,50.0
3,4,Li,0.544108,0.557034,0.589919,0.592929,0.580822,249.5,50.0
4,4,Lm,0.518287,0.53051,0.544892,0.583309,0.629044,249.5,50.0
5,4,V1,0.501355,0.538276,0.580536,0.612316,0.641693,249.5,50.0
6,8,Li,0.560511,0.577638,0.616656,0.625477,0.613333,249.5,50.0
7,8,Lm,0.522712,0.539898,0.564462,0.60705,0.666387,249.5,50.0
8,8,V1,0.501614,0.552315,0.604851,0.655354,0.68672,249.5,50.0
9,16,Li,0.555031,0.588138,0.629946,0.647784,0.638853,249.5,50.0


In [1433]:
for visual_area, v_df in curr_mean.groupby(['visual_area']):
    v_df

Unnamed: 0,n_cells,visual_area,10.0,20.0,30.0,40.0,50.0,iteration,train_cond
2,2,V1,0.503806,0.532294,0.564314,0.576856,0.607513,249.5,50.0
5,4,V1,0.501355,0.538276,0.580536,0.612316,0.641693,249.5,50.0
8,8,V1,0.501614,0.552315,0.604851,0.655354,0.68672,249.5,50.0
11,16,V1,0.498886,0.55764,0.626551,0.688114,0.720453,249.5,50.0
14,32,V1,0.50302,0.569164,0.657378,0.733877,0.766887,249.5,50.0
17,64,V1,0.499428,0.593021,0.70951,0.799683,0.837853,249.5,50.0
20,82,V1,0.503545,0.5986,0.725131,0.822317,0.86294,249.5,50.0
23,100,V1,0.497559,0.596938,0.729876,0.832759,0.876147,249.5,50.0


In [None]:
fig, axn = pl.subplots(1,5)
sns.pointplot(x='n_cells', y=1)