In [1]:
import os
import glob
import importlib
import numpy as np
import pandas as pd
import pylab as pl
import seaborn as sns

In [2]:
import analyze2p.gratings.utils as gutils
import analyze2p.plotting as pplot
import analyze2p.arousal.dlc_utils as dlcutils
import analyze2p.aggregate_datasets as aggr
import analyze2p.utils as hutils

import analyze2p.decoding as dec

  **kwargs


In [3]:
#### Plotting params
visual_areas, area_colors = pplot.set_threecolor_palette()
pplot.set_plot_params(labelsize=6, lw_axes=0.25)
bw_colors = dict((v, [0.7]*3) for v in visual_areas)
#### Set trace ID and FOV/state type
aggregate_dir = '/n/coxfs01/julianarhee/aggregate-visual-areas'
rootdir='/n/coxfs01/2p-data'
fov_type = 'zoom2p0x'
state = 'awake'
traceid = 'traces001'
# ----------------------------------------

In [4]:
%matplotlib notebook

In [5]:
experiment = 'blobs'
trial_epoch = 'stimulus'
responsive_test='ROC'
# ----------------------------------------
response_type='dff'
if responsive_test=='nstds':
    responsive_thr=7.0 if experiment=='rfs' else 10.
else:
    responsive_thr=0.05 if responsive_test=='ROC' else responsive_thr
resp_desc = '%s_responsive-%s-thr%.2f' \
                % (response_type, responsive_test, responsive_thr)
data_id = '|'.join([traceid, resp_desc])
print(data_id)
# ------------------------------------------------------------------
# Load all metdata and assigned cells
sdata, cells0 = aggr.get_aggregate_info(visual_areas=visual_areas, return_cells=True)
experiment_list = ['rfs10', 'rfs'] if experiment=='rfs' else [experiment]
meta = sdata[sdata.experiment.isin(experiment_list)].copy()

traces001|dff_responsive-ROC-thr0.05
/n/coxfs01/julianarhee/aggregate-visual-areas/dataset_info_assigned.pkl
Segmentation: missing 13 dsets


In [6]:
C_value=1.0
test_split=0.2
cv_nfolds=5

chance_level = 1/8. if experiment=='gratings' else 0.5

In [7]:
analysis_type='by_ncells'
test_type='size_single'
class_name = 'morphlevel'
match_rfs = True
overlap_thr= None

test_str = 'default' if test_type is None else test_type
basedir = os.path.join(aggregate_dir, 'decoding', 'py3_%s' % analysis_type,
                      class_name, '%s' % test_str)
if not os.path.exists(basedir):
    os.makedirs(basedir)
    print('making dirs')
print(basedir)


/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single


## Output dirs

In [8]:
aggr_id = dec.create_aggregate_id(C_value=C_value, 
                            trial_epoch=trial_epoch,
                            responsive_test=responsive_test,
                            match_rfs=match_rfs, overlap_thr=overlap_thr)
if overlap_thr is not None \
    and isinstance(overlap_thr, (list, np.ndarray)) or match_rfs:
    curr_dst_dir = os.path.join(basedir, 'controls')
else:
    curr_dst_dir = os.path.join(basedir, aggr_id)
if not os.path.exists(curr_dst_dir):
    os.makedirs(curr_dst_dir)
print(curr_dst_dir)
print(aggr_id)

FIGDIR = os.path.join(aggregate_dir, 'FIGURES')

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls
dff-ROC__stimulus__matchRF__C1.00


## Check input data

In [9]:
import _pickle as pkl
input_id = '__'.join(aggr_id.split('__')[0:-1])
input_files = glob.glob(os.path.join(basedir, 'files', 
                                     'inputcells-*_%s.pkl' % input_id))
#input_files = glob.glob(os.path.join(basedir, 'files', 'inputcells*.pkl'))
print(len(input_files))
#input_fpath = input_files[0]
i_=[]
for ipath in input_files:
    with open(ipath, 'rb') as f:
        icells = pkl.load(f)
    i_.append(icells)
inputcells = pd.concat(i_, axis=0)

3


In [10]:
inputcells.columns

Index(['datakey', 'cell', 'x0', 'y0', 'fwhm_x', 'fwhm_y', 'theta', 'offset',
       'amplitude', 'r2', 'std_x', 'std_y', 'fwhm_avg', 'std_avg', 'area',
       'fx', 'fy', 'ratio_xy', 'theta_c', 'theta_Mm_deg', 'major_axis',
       'minor_axis', 'anisotropy', 'theta_Mm_c', 'aniso_index', 'visual_area',
       'experiment', 'global_ix'],
      dtype='object')

In [11]:
fig, ax = pl.subplots(figsize=(3,3))

sns.stripplot(x='visual_area', y='fwhm_avg', data=inputcells, ax=ax, s=2,
              palette=area_colors, order=visual_areas, zorder=-1000)
# sns.pointplot(x='visual_area', y='fwhm_avg', data=inputcells, ax=ax, 
#              color='k', order=visual_areas, scale=0.5, errwidth=0.5, join=False,
#              markers='_', zorder=1000, ci='sd', estimator=np.median)
ax.set_ylim((10, 40))
pl.subplots_adjust(left=0.2, right=0.9, bottom=0.1, top=0.8)
pplot.label_figure(fig, '%s\n%s' % (data_id, aggr_id))
# sns.boxplot(x='visual_area', y='fwhm_avg', data=inputcells, ax=ax, 
#              color='w', order=visual_areas, fliersize=0)
ax.tick_params(which='both', axis='x', size=0)
ax.set_xlabel('')
sns.despine(bottom=True, trim=True)
fig.text(0.01, 0.85, 'Input cell popn (%s, %s)' % (analysis_type, test_type), 
         fontsize=8)
figname = 'input_cell_distribution__%s' % aggr_id
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))
print(curr_dst_dir, figname)

<IPython.core.display.Javascript object>

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls input_cell_distribution__dff-ROC__stimulus__matchRF__C1.00


In [12]:
aggr_id

'dff-ROC__stimulus__matchRF__C1.00'

## Load

In [13]:
max_ncells = 96 # if match_rfs and (responsive_test=='ROC')

experiment = 'gratings' if class_name=='ori' else 'blobs'
m_=[]
for match_rf_flag in [True, False]:
    iterdf, missing_ = dec.load_iterdf(meta, class_name,
                          experiment=None,
                          analysis_type=analysis_type,
                          test_type=test_type,
                          traceid=traceid,
                          trial_epoch=trial_epoch, responsive_test=responsive_test, 
                          C_value=C_value, break_correlations=False, 
                          match_rfs=match_rf_flag, overlap_thr=overlap_thr)

    sample_sizes = [s for s in sorted(iterdf['n_cells'].unique()) if s <=max_ncells\
                   and s!=169]
    iterdf = iterdf[iterdf['n_cells'].isin(sample_sizes)]
    # Group means
    grouper = ['visual_area', 'condition', 'iteration']
    mean_df0 = dec.average_within_iterations_by_ncells(iterdf, 
                                analysis_type=analysis_type, test_type=test_type,
                                grouper=grouper) 
    mean_df0['rfs'] = 'matched' if match_rf_flag else 'all'
    mean_df0['match_rfs'] = match_rf_flag
    m_.append(mean_df0)


(Li) Found 9 paths
(Lm) Found 9 paths
(V1) Found 10 paths
    checking for break-corrs
(Li) Found 0 paths
(Lm) Found 0 paths
(V1) Found 0 paths
['visual_area', 'condition', 'iteration', 'n_cells', 'novel']
(Li) Found 10 paths
(Lm) Found 10 paths
(V1) Found 10 paths
    checking for break-corrs
(Li) Found 0 paths
(Lm) Found 0 paths
(V1) Found 0 paths
['visual_area', 'condition', 'iteration', 'n_cells', 'novel']


In [25]:
n_iters = iterdf['iteration'].max() +1
xlabels = [1, 32, 64, 96, max_ncells]
mean_df = pd.concat(m_, axis=0).reset_index(drop=True)

if test_type is not None:
    print("novel:", mean_df['novel'].unique())
print("conditions:", mean_df['condition'].unique())
print("rfs:", mean_df['rfs'].unique())


mean_df['novel_labels'] = None
mean_df.loc[mean_df['novel'], 'novel_labels'] = 'novel'
mean_df.loc[~mean_df['novel'], 'novel_labels'] = 'trained'

novel: [False  True]
conditions: ['data' 'shuffled']
rfs: ['matched' 'all']


In [26]:
mean_df.groupby(['visual_area', 'condition', 'train_transform', 'test_transform', 'novel', 'rfs', 'n_cells']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,iteration,fit_time,score_time,test_score,train_score,heldout_test_score,C,randi,n_trials,randi_cells,intact,match_rfs,novel_labels
visual_area,condition,train_transform,test_transform,novel,rfs,n_cells,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Li,data,30.0,30.0,False,all,1,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,2,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,4,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,8,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,16,500,500,500,500,500,500,500,500,500,500,500,500,500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
V1,shuffled,30.0,30.0,False,matched,8,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,shuffled,30.0,30.0,False,matched,16,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,shuffled,30.0,30.0,False,matched,32,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,shuffled,30.0,30.0,False,matched,64,500,500,500,500,500,500,500,500,500,500,500,500,500


In [27]:
print_index = ['visual_area','rfs', 'novel']

if test_type is not None:
    score_table = mean_df[(mean_df.condition=='data')]\
            .groupby([ 'visual_area', 'n_cells', 'novel', 'rfs'])\
            .mean()['heldout_test_score'].reset_index()\
            .pivot_table(index=print_index, columns='n_cells')
else:
    score_table = mean_df[(mean_df.condition=='data')]\
            .groupby([ 'visual_area', 'n_cells', 'rfs'])\
            .mean()['heldout_test_score'].reset_index()\
            .pivot_table(index=['visual_area'], columns='n_cells')
print("EXP: %s (class=%s): %s" % (experiment, class_name, aggr_id))
score_table

EXP: blobs (class=morphlevel): dff-ROC__stimulus__matchRF__C1.00


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,n_cells,1,2,4,8,16,32,64,96
visual_area,rfs,novel,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Li,all,False,0.514566,0.521792,0.541963,0.555068,0.568285,0.587394,0.638783,0.670803
Li,all,True,0.512347,0.52039,0.528657,0.539456,0.54823,0.565075,0.594145,0.618094
Li,matched,False,0.535555,0.555703,0.585391,0.607603,0.643817,0.682259,0.765552,0.812609
Li,matched,True,0.536457,0.547944,0.57163,0.593906,0.626433,0.654182,0.71358,0.747681
Lm,all,False,0.518542,0.535697,0.553942,0.574979,0.58903,0.600667,0.666364,0.705648
Lm,all,True,0.513441,0.522489,0.531605,0.537958,0.548986,0.560324,0.588188,0.607315
Lm,matched,False,0.529367,0.538167,0.555733,0.5789,0.5946,0.624233,0.6765,0.705933
Lm,matched,True,0.520351,0.527039,0.538333,0.554007,0.565181,0.579734,0.612484,0.640829
V1,all,False,0.535391,0.559851,0.5817,0.6156,0.6486,0.6796,0.751233,0.8019
V1,all,True,0.520353,0.526662,0.543042,0.562101,0.577155,0.59576,0.637393,0.668683


In [28]:
area_colors2 = {'V1': 'darkmagenta', 'Lm': 'saddlebrown', 'Li': 'darkblue'}

In [29]:
pl.rcParams['legend.title_fontsize'] = 8


In [38]:
metric='heldout_test_score'

plotd = mean_df[mean_df.condition=='data']

#cond_colors = {'novel': 'cornflowerblue', 'trained': [0.3]*3}
cond_styles = {'trained': '', 'novel': (1,1)}

fig, axn = pl.subplots(1,3, figsize=(7, 3), sharex=True, sharey=True)

for vi, (va, df_) in enumerate(plotd.groupby('visual_area')):
    ai = visual_areas.index(va)
    ax=axn[ai]; ax.set_title(va);
    sns.lineplot(x='n_cells', y=metric, data=df_[df_.rfs=='all'], ax=ax,
            style='novel_labels', dashes=cond_styles, ci='sd', err_style='bars',
            color=area_colors[va])
    sns.lineplot(x='n_cells', y=metric, data=df_[df_.rfs=='matched'],ax=ax,
            style='novel_labels', dashes=cond_styles, ci='sd', err_style='bars',
            color=area_colors2[va])
    ax.axhline(y=chance_level, ls=':', c='k', lw=0.5)
    ax.set_ylim([0.4, 1])
    print(ai, ax.legend_.legendHandles)
    if ai==1:
        leg_h = ax.legend_.legendHandles[0:2]
        print(len(leg_h))
        leg = ax.legend(handles=leg_h, bbox_to_anchor=(2.2,1.), loc='upper left', 
                  frameon=False)
        leg._legend_box.align = "left"
    else:
        ax.legend_.remove()
for ax in axn:
    ax.set_yticks(np.linspace(0.4, 1, 4))
    ax.set_ylim([0.4,1])
    ax.set_xticks(sample_sizes)
    ax.set_box_aspect(1)
    ax.set_xticklabels([i if i in xlabels else '' for i in sample_sizes])
sns.despine(trim=True)

leg_h2 = pplot.custom_legend_markers(colors=[area_colors[v] for v in visual_areas], 
                                     labels=visual_areas, markers='.')
leg = axn[-1].legend(handles=leg_h2, bbox_to_anchor=(1,0.8), loc='upper left', 
               frameon=False, title='all cells')
leg._legend_box.align = "left"

leg_h3 = pplot.custom_legend_markers(colors=[area_colors2[v] for v in visual_areas], 
                                     labels=visual_areas, markers='.')
leg = axn[0].legend(handles=leg_h3, bbox_to_anchor=(3.4,0.45), loc='upper left', 
               frameon=False, title='RF size-matched', fontsize=6)
leg._legend_box.align = "left"

pl.subplots_adjust(left=0.1, right=0.85, bottom=0.15, wspace=0.2, top=0.8)
pplot.label_figure(fig, '%s\n%s' % (data_id, aggr_id))

figname = 'accuracy_byncell_curves_matchRF-v-all_train-v-novel'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))
print(curr_dst_dir, figname)

<IPython.core.display.Javascript object>

2 [<matplotlib.lines.Line2D object at 0x2b0dab14dad0>, <matplotlib.lines.Line2D object at 0x2b0dab156210>, <matplotlib.lines.Line2D object at 0x2b0dab156910>, <matplotlib.lines.Line2D object at 0x2b0dab15f050>]
1 [<matplotlib.lines.Line2D object at 0x2b0dab12d650>, <matplotlib.lines.Line2D object at 0x2b0dab12dd50>, <matplotlib.lines.Line2D object at 0x2b0dab37e210>, <matplotlib.lines.Line2D object at 0x2b0dab37e910>]
2
0 [<matplotlib.lines.Line2D object at 0x2b0dab2cf050>, <matplotlib.lines.Line2D object at 0x2b0dab2cf750>, <matplotlib.lines.Line2D object at 0x2b0dab2cfe50>, <matplotlib.lines.Line2D object at 0x2b0dab2b4510>]
/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls accuracy_byncell_curves_matchRF-v-all_train-v-novel


In [39]:
import itertools
max_ncells

96

In [40]:
plotd = mean_df[(mean_df.condition=='data') & (mean_df.n_cells==max_ncells)]
plotd.head()


Unnamed: 0,visual_area,condition,iteration,n_cells,novel,fit_time,score_time,test_score,train_score,heldout_test_score,C,randi,train_transform,test_transform,n_trials,randi_cells,intact,rfs,match_rfs,novel_labels
14,Li,data,0,96,False,0.001201,0.000293,0.74,1.0,0.825,1.0,3726.2,30.0,30.0,38.0,2538.0,True,matched,True,trained
15,Li,data,0,96,True,0.001201,0.000293,0.74,1.0,0.706579,1.0,3726.2,30.0,30.0,38.0,2538.0,True,matched,True,novel
30,Li,data,1,96,False,0.000758,0.00031,0.853333,1.0,0.825,1.0,4638.4,30.0,30.0,38.0,7280.0,True,matched,True,trained
31,Li,data,1,96,True,0.000758,0.00031,0.853333,1.0,0.751316,1.0,4638.4,30.0,30.0,38.0,7280.0,True,matched,True,novel
46,Li,data,2,96,False,0.00074,0.000296,0.793333,1.0,0.85,1.0,4090.0,30.0,30.0,38.0,8617.0,True,matched,True,trained


In [43]:
%matplotlib notebook
plotd['plotcond'] = ['%s_%s' %  (r, n) for r, n in plotd[['rfs', 'novel_labels']].values]

matchrf_color = [0.5]*3
all_color = [0.8]*3
plot_palette = {'all_trained': all_color, 'all_novel': all_color,
                'matched_trained': matchrf_color, 'matched_novel': matchrf_color}
keys = ['all_trained', 'all_novel', 'matched_trained', 'matched_novel']

fig, ax = pl.subplots(figsize=(5,4))
sns.barplot(x='visual_area', y=metric, data=plotd, ax=ax,
           hue='plotcond', hue_order=keys, palette=plot_palette, 
           edgecolor='w', order=visual_areas, ci='sd', errwidth=0.5)
# legends
leg_h = pplot.custom_legend_markers(colors=[all_color, matchrf_color],
                                    labels=['all cells', 'RF size-matched'],
                                    use_patch=True)
leg_h2 = pplot.custom_legend_markers(colors=['k', 'k'],
                                    labels=['trained', 'novel'],markers=None,
                                    linestyles=['-', '--'], use_patch=False)
ax.legend(handles=leg_h+leg_h2, bbox_to_anchor=(0.99,1), 
          loc='upper left', frameon=False)

bar_locs = 3
hatches = itertools.cycle(['', '//'])
for i, bar in enumerate(ax.patches):
    if i % bar_locs == 0:
        hatch = next(hatches)
    bar.set_hatch(hatch)
ax.set_aspect(2, anchor='SW')
ax.set_ylim([0, 1])
ax.tick_params(which='both', axis='x', size=0)
ax.set_xlabel('')
sns.despine(bottom=True, trim=True)
pl.subplots_adjust(left=0.1, right=0.8, bottom=0.1, top=0.8)
fig.text(0.01, 0.85,\
         'Accuracy, train cond | RF cond (n=%i cells, SD over %i iters)' \
         % (max_ncells, n_iters))

pplot.label_figure(fig, '%s\n%s' % (data_id, aggr_id))

figname = 'compare_performance_matchRF-v-all_train-v-novel'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


<IPython.core.display.Javascript object>

In [44]:
d_list=[]
for (va, data_cond, rf_cond), cg in plotd.sort_values(by='iteration')\
                .groupby(['visual_area', 'condition', 'rfs']):

    novel_scores = cg[cg.novel].sort_values(by='iteration')['heldout_test_score'].values
    trained_scores = cg[~cg.novel].sort_values(by='iteration')['heldout_test_score'].values
    diffs = trained_scores - novel_scores
    df_ = pd.DataFrame({'trained_minus_novel': diffs})
    df_['visual_area'] = va
    df_['condition'] = data_cond
    df_['rfs'] = rf_cond
    df_['iteration'] = cg[cg.novel].sort_values(by='iteration')['iteration'].values
    d_list.append(df_)
diffdf = pd.concat(d_list, axis=0)

In [45]:
# fig, ax = pl.subplots()
g = sns.displot(hue='rfs', x='trained_minus_novel',  col='visual_area',
           data=diffdf, height=2, col_order=visual_areas, #hue_norm=True,
            kind='ecdf', 
           )
pl.subplots_adjust(left=0.1, right=0.9, bottom=0.2, top=0.8)

pplot.label_figure(g.fig, '%s\n%s' % (data_id, aggr_id))

figname = 'diff_trained-minus-novel_matchrf-v-all'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))

<IPython.core.display.Javascript object>

In [46]:
sns.displot(hue='rfs', x='trained_minus_novel',  col='visual_area',
           data=diffdf, height=2, col_order=visual_areas, #hue_norm=True,
            kind='hist'
           )

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2b0daab10890>

In [49]:
va='Lm'
plotd = diffdf[diffdf.visual_area==va]

In [62]:
all_vals = plotd[plotd['rfs']=='all']['trained_minus_novel'].values

fig, ax = pl.subplots()
sns.histplot(x='trained_minus_novel', data=plotd[plotd['rfs']=='all'], ax=ax,
            color=[0.8]*3)

c_lo, c_hi = hutils.get_empirical_ci(all_vals, ci=0.67)
ax.axvline(x=c_lo, color='k')
ax.axvline(x=c_hi, color='k')

# sns.histplot(x='trained_minus_novel', data=plotd[plotd['rfs']=='matched'], ax=ax)
mean_v = plotd[plotd['rfs']=='matched']['trained_minus_novel'].median()
ax.axvline(x=mean_v, color='orange')

<IPython.core.display.Javascript object>

<matplotlib.lines.Line2D at 0x2b0dac54eed0>

In [56]:
plotd[plotd['rfs']=='matched']['trained_minus_novel'].mean()


0.06510402298850573