In [1]:
import os
import glob
import importlib
import numpy as np
import pandas as pd
import pylab as pl
import seaborn as sns

In [2]:
import analyze2p.gratings.utils as gutils
import analyze2p.plotting as pplot
import analyze2p.arousal.dlc_utils as dlcutils
import analyze2p.aggregate_datasets as aggr
import analyze2p.utils as hutils

import analyze2p.decoding as dec

  **kwargs


In [3]:
#### Plotting params
visual_areas, area_colors = pplot.set_threecolor_palette()
pplot.set_plot_params(labelsize=6, lw_axes=0.25)
bw_colors = dict((v, [0.7]*3) for v in visual_areas)
#### Set trace ID and FOV/state type
aggregate_dir = '/n/coxfs01/julianarhee/aggregate-visual-areas'
rootdir='/n/coxfs01/2p-data'
fov_type = 'zoom2p0x'
state = 'awake'
traceid = 'traces001'
# ----------------------------------------

In [4]:
%matplotlib notebook

In [5]:
experiment = 'blobs'
trial_epoch = 'stimulus'
responsive_test='ROC'
# ----------------------------------------
response_type='dff'
if responsive_test=='nstds':
    responsive_thr=7.0 if experiment=='rfs' else 10.
else:
    responsive_thr=0.05 if responsive_test=='ROC' else responsive_thr
resp_desc = '%s_responsive-%s-thr%.2f' \
                % (response_type, responsive_test, responsive_thr)
data_id = '|'.join([traceid, resp_desc])
print(data_id)
# ------------------------------------------------------------------
# Load all metdata and assigned cells
sdata, cells0 = aggr.get_aggregate_info(visual_areas=visual_areas, return_cells=True)
experiment_list = ['rfs10', 'rfs'] if experiment=='rfs' else [experiment]
meta = sdata[sdata.experiment.isin(experiment_list)].copy()

traces001|dff_responsive-ROC-thr0.05
/n/coxfs01/julianarhee/aggregate-visual-areas/dataset_info_assigned.pkl
Segmentation: missing 13 dsets


In [6]:
C_value=1.0
test_split=0.2
cv_nfolds=5

chance_level = 1/8. if experiment=='gratings' else 0.5

In [43]:
analysis_type='by_ncells'
test_type='size_single'
class_name = 'morphlevel'
match_rfs = True
overlap_thr= 0

test_str = 'default' if test_type is None else test_type
basedir = os.path.join(aggregate_dir, 'decoding', 'py3_%s' % analysis_type,
                      class_name, '%s' % test_str)
if not os.path.exists(basedir):
    os.makedirs(basedir)
    print('making dirs')
print(basedir)


/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single


## Output dirs

In [44]:
aggr_id = dec.create_aggregate_id(C_value=C_value, 
                            trial_epoch=trial_epoch,
                            responsive_test=responsive_test,
                            match_rfs=match_rfs, overlap_thr=overlap_thr)
curr_dst_dir = os.path.join(basedir, 'controls', 'match_rfs')

if not os.path.exists(curr_dst_dir):
    os.makedirs(curr_dst_dir)
print(curr_dst_dir)
print(aggr_id)

FIGDIR = os.path.join(aggregate_dir, 'FIGURES')

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls/match_rfs
dff-ROC__stimulus__matchRF__C1.00


## Check input data

In [45]:
import _pickle as pkl
input_id = '__'.join(aggr_id.split('__')[0:-1])
input_files = glob.glob(os.path.join(basedir, 'files', 
                                     'inputcells-*_%s.pkl' % input_id))
#input_files = glob.glob(os.path.join(basedir, 'files', 'inputcells*.pkl'))
print(len(input_files))
#input_fpath = input_files[0]
i_=[]
for ipath in input_files:
    with open(ipath, 'rb') as f:
        icells = pkl.load(f)
    i_.append(icells)
inputcells = pd.concat(i_, axis=0)

3


In [46]:
importlib.reload(dec)

<module 'analyze2p.decoding' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/rat-2p-area-characterizations/analyze2p/decoding.py'>

In [47]:
fig, ax = pl.subplots(figsize=(3,3))

sns.stripplot(x='visual_area', y='fwhm_avg', data=inputcells, ax=ax, s=2,
              palette=area_colors, order=visual_areas, zorder=-1000)
# sns.pointplot(x='visual_area', y='fwhm_avg', data=inputcells, ax=ax, 
#              color='k', order=visual_areas, scale=0.5, errwidth=0.5, join=False,
#              markers='_', zorder=1000, ci='sd', estimator=np.median)
ax.set_ylim((10, 40))
pl.subplots_adjust(left=0.2, right=0.9, bottom=0.1, top=0.8)
pplot.label_figure(fig, '%s\n%s' % (data_id, aggr_id))
# sns.boxplot(x='visual_area', y='fwhm_avg', data=inputcells, ax=ax, 
#              color='w', order=visual_areas, fliersize=0)
ax.tick_params(which='both', axis='x', size=0)
ax.set_xlabel('')
sns.despine(bottom=True, trim=True)
fig.text(0.01, 0.85, 'Input cell popn (%s, %s)' % (analysis_type, test_type), 
         fontsize=8)
figname = 'input_cell_distribution__%s' % aggr_id
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))
print(curr_dst_dir, figname)

<IPython.core.display.Javascript object>

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls/match_rfs input_cell_distribution__dff-ROC__stimulus__matchRF__C1.00


In [12]:
aggr_id

'dff-ROC__stimulus__matchRF__C1.00'

## Load

In [183]:
max_ncells = 96 #96 # if match_rfs and (responsive_test=='ROC')
special_cases = [169, 120, 46]
experiment = 'gratings' if class_name=='ori' else 'blobs'
i_=[]
for match_rf_flag in [True, False]:
    iterdf_, missing_ = dec.load_iterdf(meta, class_name,
                          experiment=None,
                          analysis_type=analysis_type,
                          test_type=test_type,
                          traceid=traceid,
                          trial_epoch=trial_epoch, responsive_test=responsive_test, 
                          C_value=C_value, break_correlations=False, 
                          match_rfs=match_rf_flag, 
                          overlap_thr=0 if match_rf_flag is False else None)

    sample_sizes = [s for s in sorted(iterdf_['n_cells'].unique()) if s <=max_ncells\
                   and s not in special_cases]
    iterdf_ = iterdf_[iterdf_['n_cells'].isin(sample_sizes)]
    iterdf_['rfs'] = 'matched' if match_rf_flag else 'all'
    iterdf_['match_rfs'] = match_rf_flag
    i_.append(iterdf_)
iterdf = pd.concat(i_, axis=0, ignore_index=True) #.dropna()

(Li) Found 8 paths
(Lm) Found 8 paths
(V1) Found 8 paths
(Li) Found 10 paths
(Lm) Found 10 paths
(V1) Found 11 paths


In [184]:
    
m_=[]
for rf_cond, iterdf_ in iterdf.groupby('rfs'):
    # Group means
    grouper = ['visual_area', 'condition', 'iteration']
    mean_ = dec.average_within_iterations_by_ncells(iterdf_, 
                                analysis_type=analysis_type, test_type=test_type,
                                grouper=grouper) 
    mean_['rfs'] = rf_cond
    #mean_['match_rfs'] = match_rf_flag
    m_.append(mean_)
mean_df = pd.concat(m_, axis=0).reset_index(drop=True)

In [185]:
n_iters = iterdf['iteration'].max() +1
xlabels = [1, 32, 64, 96, max_ncells]

if test_type is not None:
    print("novel:", mean_df['novel'].unique())
print("conditions:", mean_df['condition'].unique())
print("rfs:", mean_df['rfs'].unique(), mean_df['match_rfs'].unique())

mean_df['novel_labels'] = None
mean_df.loc[mean_df['novel'], 'novel_labels'] = 'novel'
mean_df.loc[~mean_df['novel'], 'novel_labels'] = 'trained'

novel: [False  True]
conditions: ['data' 'shuffled']
rfs: ['all' 'matched'] [False  True]


In [186]:
mean_df.groupby(['visual_area', 'condition', 'train_transform', 'test_transform', 'novel', 'rfs', 'n_cells']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,iteration,fit_time,score_time,test_score,train_score,heldout_test_score,C,randi,n_trials,randi_cells,intact,match_rfs,novel_labels
visual_area,condition,train_transform,test_transform,novel,rfs,n_cells,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Li,data,30.0,30.0,False,all,1,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,2,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,4,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,8,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,data,30.0,30.0,False,all,16,500,500,500,500,500,500,500,500,500,500,500,500,500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
V1,shuffled,30.0,30.0,False,matched,8,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,shuffled,30.0,30.0,False,matched,16,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,shuffled,30.0,30.0,False,matched,32,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,shuffled,30.0,30.0,False,matched,64,500,500,500,500,500,500,500,500,500,500,500,500,500


In [187]:
print_index = ['visual_area','rfs', 'novel']

if test_type is not None:
    score_table = mean_df[(mean_df.condition=='data')]\
            .groupby([ 'visual_area', 'n_cells', 'novel', 'rfs'])\
            .mean()['heldout_test_score'].reset_index()\
            .pivot_table(index=print_index, columns='n_cells')
else:
    score_table = mean_df[(mean_df.condition=='data')]\
            .groupby([ 'visual_area', 'n_cells', 'rfs'])\
            .mean()['heldout_test_score'].reset_index()\
            .pivot_table(index=['visual_area'], columns='n_cells')
print("EXP: %s (class=%s): %s" % (experiment, class_name, aggr_id))
score_table

EXP: blobs (class=morphlevel): dff-ROC__stimulus__matchRF__C1.00


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score,heldout_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,n_cells,1,2,4,8,16,32,64,96
visual_area,rfs,novel,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Li,all,False,0.52502,0.544831,0.570656,0.598471,0.61577,0.645717,0.724856,0.772612
Li,all,True,0.523391,0.537498,0.556059,0.582581,0.598081,0.623237,0.674318,0.707698
Li,matched,False,0.535555,0.555703,0.585391,0.607603,0.643817,0.682259,0.765552,0.812609
Li,matched,True,0.536457,0.547944,0.57163,0.593906,0.626433,0.654182,0.71358,0.747681
Lm,all,False,0.534767,0.5456,0.5615,0.589667,0.614267,0.6409,0.702533,0.743033
Lm,all,True,0.517668,0.522439,0.53199,0.541983,0.552917,0.563745,0.595391,0.614047
Lm,matched,False,0.529367,0.538167,0.555733,0.5789,0.5946,0.624233,0.6765,0.705933
Lm,matched,True,0.520351,0.527039,0.538333,0.554007,0.565181,0.579734,0.612484,0.640829
V1,all,False,0.542438,0.560095,0.5892,0.619867,0.653233,0.6947,0.768233,0.815267
V1,all,True,0.520938,0.532741,0.55136,0.564369,0.583514,0.606193,0.6488,0.678997


In [188]:
area_colors2 = {'V1': 'darkmagenta', 'Lm': 'saddlebrown', 'Li': 'darkblue'}

In [189]:
pl.rcParams['legend.title_fontsize'] = 8


In [190]:
metric='heldout_test_score'

plotd = mean_df[mean_df.condition=='data']

#cond_colors = {'novel': 'cornflowerblue', 'trained': [0.3]*3}
cond_styles = {'trained': '', 'novel': (1,1)}

fig, axn = pl.subplots(1,3, figsize=(7, 3), sharex=True, sharey=True)

for vi, (va, df_) in enumerate(plotd.groupby('visual_area')):
    ai = visual_areas.index(va)
    ax=axn[ai]; ax.set_title(va);
    sns.lineplot(x='n_cells', y=metric, data=df_[df_.rfs=='all'], ax=ax,
            style='novel_labels', dashes=cond_styles, ci='sd', err_style='bars',
            color=area_colors[va])
    sns.lineplot(x='n_cells', y=metric, data=df_[df_.rfs=='matched'],ax=ax,
            style='novel_labels', dashes=cond_styles, ci='sd', err_style='bars',
            color=area_colors2[va])
    ax.axhline(y=chance_level, ls=':', c='k', lw=0.5)
    ax.set_ylim([0.4, 1])
    print(ai, ax.legend_.legendHandles)
    if ai==1:
        leg_h = ax.legend_.legendHandles[0:2]
        print(len(leg_h))
        leg = ax.legend(handles=leg_h, bbox_to_anchor=(2.2,1.), loc='upper left', 
                  frameon=False)
        leg._legend_box.align = "left"
    else:
        ax.legend_.remove()
for ax in axn:
    ax.set_yticks(np.linspace(0.4, 1, 4))
    ax.set_ylim([0.4,1])
    ax.set_xticks(sample_sizes)
    ax.set_box_aspect(1)
    ax.set_xticklabels([i if i in xlabels else '' for i in sample_sizes])
sns.despine(trim=True)

leg_h2 = pplot.custom_legend_markers(colors=[area_colors[v] for v in visual_areas], 
                                     labels=visual_areas, markers='.')
leg = axn[-1].legend(handles=leg_h2, bbox_to_anchor=(1,0.8), loc='upper left', 
               frameon=False, title='all cells')
leg._legend_box.align = "left"

leg_h3 = pplot.custom_legend_markers(colors=[area_colors2[v] for v in visual_areas], 
                                     labels=visual_areas, markers='.')
leg = axn[0].legend(handles=leg_h3, bbox_to_anchor=(3.4,0.45), loc='upper left', 
               frameon=False, title='RF size-matched', fontsize=6)
leg._legend_box.align = "left"

pl.subplots_adjust(left=0.1, right=0.85, bottom=0.15, wspace=0.2, top=0.8)
pplot.label_figure(fig, '%s\n%s' % (data_id, aggr_id))

figname = 'accuracy_byncell_curves_matchRF-v-all_train-v-novel'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))
print(curr_dst_dir, figname)

<IPython.core.display.Javascript object>

2 [<matplotlib.lines.Line2D object at 0x2ac50bc608d0>, <matplotlib.lines.Line2D object at 0x2ac50bc3bdd0>, <matplotlib.lines.Line2D object at 0x2ac50bc33190>, <matplotlib.lines.Line2D object at 0x2ac50bc21510>]
1 [<matplotlib.lines.Line2D object at 0x2ac50bbbcc50>, <matplotlib.lines.Line2D object at 0x2ac50bbb3a10>, <matplotlib.lines.Line2D object at 0x2ac50bc56990>, <matplotlib.lines.Line2D object at 0x2ac50bc44a50>]
2
0 [<matplotlib.lines.Line2D object at 0x2ac509372610>, <matplotlib.lines.Line2D object at 0x2ac509971dd0>, <matplotlib.lines.Line2D object at 0x2ac50a8f5210>, <matplotlib.lines.Line2D object at 0x2ac50b98e110>]
/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls/match_rfs accuracy_byncell_curves_matchRF-v-all_train-v-novel


In [191]:
import itertools
max_ncells

96

In [192]:
plotd = mean_df[(mean_df.condition=='data') & (mean_df.n_cells==max_ncells)]
plotd.head()


Unnamed: 0,visual_area,condition,iteration,n_cells,novel,fit_time,score_time,test_score,train_score,heldout_test_score,C,randi,train_transform,test_transform,n_trials,randi_cells,intact,match_rfs,rfs,novel_labels
14,Li,data,0,96,False,0.045807,0.000376,0.866667,1.0,0.7,1.0,5157.4,30.0,30.0,38.0,9286.0,True,False,all,trained
15,Li,data,0,96,True,0.045807,0.000376,0.866667,1.0,0.7,1.0,5157.4,30.0,30.0,38.0,9286.0,True,False,all,novel
30,Li,data,1,96,False,0.000752,0.000299,0.753333,1.0,0.775,1.0,5587.6,30.0,30.0,38.0,287.0,True,False,all,trained
31,Li,data,1,96,True,0.000752,0.000299,0.753333,1.0,0.707895,1.0,5587.6,30.0,30.0,38.0,287.0,True,False,all,novel
46,Li,data,2,96,False,0.000739,0.000299,0.78,1.0,0.875,1.0,4615.4,30.0,30.0,38.0,2775.0,True,False,all,trained


In [193]:
%matplotlib notebook
plotd['plotcond'] = ['%s_%s' %  (r, n) for r, n in plotd[['rfs', 'novel_labels']].values]

matchrf_color = [0.5]*3
all_color = [0.8]*3
plot_palette = {'all_trained': all_color, 'all_novel': all_color,
                'matched_trained': matchrf_color, 'matched_novel': matchrf_color}
keys = ['all_trained', 'all_novel', 'matched_trained', 'matched_novel']

fig, ax = pl.subplots(figsize=(5,4))
sns.barplot(x='visual_area', y=metric, data=plotd, ax=ax,
           hue='plotcond', hue_order=keys, palette=plot_palette, 
           edgecolor='w', order=visual_areas, ci='sd', errwidth=0.5)
# legends
leg_h = pplot.custom_legend_markers(colors=[all_color, matchrf_color],
                                    labels=['all cells', 'RF size-matched'],
                                    use_patch=True)
leg_h2 = pplot.custom_legend_markers(colors=['k', 'k'],
                                    labels=['trained', 'novel'],markers=None,
                                    linestyles=['-', '--'], use_patch=False)
ax.legend(handles=leg_h+leg_h2, bbox_to_anchor=(0.99,1), 
          loc='upper left', frameon=False)

bar_locs = 3
hatches = itertools.cycle(['', '//'])
for i, bar in enumerate(ax.patches):
    if i % bar_locs == 0:
        hatch = next(hatches)
    bar.set_hatch(hatch)
ax.set_aspect(2, anchor='SW')
ax.set_ylim([0, 1])
ax.tick_params(which='both', axis='x', size=0)
ax.set_xlabel('')
sns.despine(bottom=True, trim=True)
pl.subplots_adjust(left=0.1, right=0.8, bottom=0.1, top=0.8)
fig.text(0.01, 0.85,\
         'Accuracy, train cond | RF cond (n=%i cells, SD over %i iters)' \
         % (max_ncells, n_iters))

pplot.label_figure(fig, '%s\n%s' % (data_id, aggr_id))

figname = 'compare_performance_matchRF-v-all_train-v-novel'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


<IPython.core.display.Javascript object>

In [194]:
data_ = mean_df[(mean_df['n_cells']==max_ncells) 
            & (mean_df['condition']=='data')].copy().reset_index(drop=True)
data_.groupby(['visual_area', 'novel', 'rfs']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,condition,iteration,n_cells,fit_time,score_time,test_score,train_score,heldout_test_score,C,randi,train_transform,test_transform,n_trials,randi_cells,intact,match_rfs,novel_labels
visual_area,novel,rfs,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Li,False,all,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,False,matched,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,True,all,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,True,matched,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,False,all,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,False,matched,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,True,all,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,True,matched,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,False,all,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,False,matched,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500


In [195]:
metric='heldout_test_score'
v1='novel'
v2='trained'
data_['novel_label'] = [v1 if v else v2 for v in data_['novel'].values]
fig, axn = pl.subplots(2,3, figsize=(7,5), sharex=True, sharey=True)

for (va, rf_), vg in data_.groupby(['visual_area', 'rfs']):
    ri=1 if rf_=='matched' else 0
    vi = visual_areas.index(va)
    ax=axn[ri, vi]
    c1 = area_colors2[va] if rf_=='matched' else area_colors[va]
    c2 = [0.5]*3
    sns.histplot(x='heldout_test_score', hue='novel_label', data=vg, ax=ax,
                palette={v1: c1, v2: c2}, fill=True, edgecolor=None)

    c_lo, c_hi = hutils.get_empirical_ci(vg[vg.novel][metric].values)
    ax.axvline(x=c_lo, color=c1)
    ax.axvline(x=c_hi, color=c1)
    c_lo, c_hi = hutils.get_empirical_ci(vg[~(vg.novel)][metric].values)
    ax.axvline(x=c_lo, color=c2)
    ax.axvline(x=c_hi, color=c2)
    # Sig test
    # vg = df_[(df_.visual_area==va) & (df_.rfs=='matched')]
    meas_mean = vg[(vg.novel)]['heldout_test_score'].mean()
    trained_dist = sorted(vg[~(vg.novel)]['heldout_test_score'].values)
    n_below = len(np.where(trained_dist<meas_mean)[0])
    pv = n_below/len(trained_dist)
    if pv<0.05:
        sig_str='**' if pv<0.01 else '*'
    else:
        sig_str='n.s.'

    ax.set_title('%s (%s, p=%.2f)' % (va, sig_str, pv))
    
pl.subplots_adjust(left=0.1, right=0.9, bottom=0.2, top=0.8, wspace=0.3, hspace=0.5)
pplot.label_figure(fig, data_id)

figname = 'distns_95CI_test_trained-v-novel'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))
print(curr_dst_dir, figname)


<IPython.core.display.Javascript object>

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls/match_rfs distns_95CI_test_trained-v-novel


In [100]:
curr_dst_dir

'/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls/match_rfs'

In [149]:
importlib.reload(dec)

<module 'analyze2p.decoding' from '/net/coxfs01/srv/export/coxfs01/share_root/2p-pipeline/repos/rat-2p-area-characterizations/analyze2p/decoding.py'>

# Is this difference significantly greater than chance?

In [209]:
# data_ = mean_df[(mean_df['n_cells']==max_ncells) 
#             & (mean_df['condition']=='data')].copy().reset_index(drop=True)
data_ = iterdf[(iterdf.n_cells==max_ncells) & (iterdf.condition=='data')].copy()

d_list=[]
for rf_cond, data_df in data_.groupby('rfs'):
    diff_ = dec.permutation_test_trained_v_novel(data_df)
    diff_['rfs'] = rf_cond
    d_list.append(diff_.reset_index(drop=True))
diffs = pd.concat(d_list, axis=0, ignore_index=True)

In [230]:
shuff_color=[0.8]*3

fig, axn = pl.subplots(2,3, figsize=(7,5), sharex=True, sharey=True)

for (va, rf_), vg in diffs.groupby(['visual_area', 'rfs']):
    ri=1 if rf_=='matched' else 0
    vi = visual_areas.index(va)
    ax=axn[ri, vi]

    curr_colors= area_colors2.copy() if rf_=='matched' else area_colors.copy()
    sns.histplot(x='difference', hue='trained_v_novel', data=vg, ax=ax, alpha=0.5,
                palette={'shuffled': shuff_color, 'true': curr_colors[va]},
                stat='probability', legend=True, common_norm=False,
                edgecolor='w')
    
    meas_mean = vg[vg.trained_v_novel=='true']['difference'].mean()
    ax.axvline(x=meas_mean, color=curr_colors[va])
    shuff_dist = sorted(vg[vg.trained_v_novel=='shuffled']['difference'].values)
    n_extreme = len(np.where(shuff_dist>=meas_mean)[0])
    pv = n_extreme/len(shuff_dist)
    if pv<0.05:
        sig_str='**' if pv<0.01 else '*'
    else:
        sig_str='n.s.'
    ax.set_title('%s (%s, p=%.2f)' % (va, sig_str, pv), loc='left', fontsize=7)
    
    c_lo, c_hi = hutils.get_empirical_ci(shuff_dist)
    ax.axvline(x=c_lo, color=[0.5]*3, ls=':')
    ax.axvline(x=c_hi, color=[0.5]*3, ls=':')
    if ri==0 and va=='Li':
        leg_h = pplot.custom_legend_markers(colors=[[0.5]*3, shuff_color],
                                           labels=['true', 'shuffled'], 
                                           use_patch=True)
        ax.legend(handles=leg_h, bbox_to_anchor=(1,1), loc='upper left', frameon=False,
                 title=None)
    else:
        ax.legend_.remove()
pl.subplots_adjust(left=0.1, right=0.9, bottom=0.2, top=0.8, wspace=0.3, hspace=0.5)
fig.text(0.01, 0.85,\
         "Permutation test on differences (Trained-Novel), n=%i cells" \
         % max_ncells, fontsize=8)

pplot.label_figure(fig, data_id)
figname = 'permutation_test_trained-v-novel-diffs'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))
print(curr_dst_dir, figname)

<IPython.core.display.Javascript object>

/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls/match_rfs permutation_test_trained-v-novel-diffs


In [236]:
diffs[diffs.trained_v_novel=='true'].groupby(['visual_area', 'rfs'])['difference'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
visual_area,rfs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Li,all,500.0,0.064914,0.064052,-0.197368,0.02404,0.065765,0.108995,0.234343
Li,matched,500.0,0.064928,0.056877,-0.093421,0.023684,0.068421,0.106579,0.198684
Lm,all,500.0,0.128987,0.054466,-0.050575,0.09533,0.131609,0.164727,0.288218
Lm,matched,500.0,0.065104,0.055064,-0.100862,0.028664,0.06408,0.101509,0.242816
V1,all,500.0,0.13627,0.053083,-0.034483,0.103305,0.138362,0.171336,0.287069
V1,matched,500.0,0.14004,0.0435,-0.012356,0.113937,0.139655,0.169325,0.262356


In [234]:
diffs

Unnamed: 0,visual_area,n_cells,iteration,difference,trained_v_novel,rfs
0,Li,96,0,-1.110223e-16,true,all
1,Lm,96,0,1.238506e-01,true,all
2,V1,96,0,4.252874e-02,true,all
3,Li,96,1,6.710526e-02,true,all
4,Lm,96,1,1.488506e-01,true,all
...,...,...,...,...,...,...
5995,Lm,96,498,-7.097701e-02,shuffled,matched
5996,V1,96,498,1.418103e-01,shuffled,matched
5997,Li,96,499,1.085526e-02,shuffled,matched
5998,Lm,96,499,-2.528736e-02,shuffled,matched


In [135]:
vg = diffs[(diffs.visual_area=='V1') & (diffs.rfs=='matched')
          & (vg.trained_v_novel=='shuffled')].copy()
vg[vg.trained_v_novel=='shuffled']
fig, axn = pl.subplots(1,2)
ax=axn[0]
sns.histplot(x='difference', data=vg, hue='trained_v_novel', ax=ax)
pl.subplots_adjust(bottom=0.3)

<IPython.core.display.Javascript object>

In [126]:
vg.groupby(['rfs', 'trained_v_novel']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,visual_area,n_cells,iteration,difference
rfs,trained_v_novel,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
matched,shuffled,500,500,500,500
matched,true,500,500,500,500


In [127]:
vg[vg.trained_v_novel=='shuffled']

Unnamed: 0,visual_area,n_cells,iteration,difference,trained_v_novel,rfs
4502,V1,96,0,-0.085057,shuffled,matched
4505,V1,96,1,0.172989,shuffled,matched
4508,V1,96,2,0.186782,shuffled,matched
4511,V1,96,3,0.011494,shuffled,matched
4514,V1,96,4,-0.101149,shuffled,matched
...,...,...,...,...,...,...
5987,V1,96,495,-0.133908,shuffled,matched
5990,V1,96,496,-0.148276,shuffled,matched
5993,V1,96,497,-0.121264,shuffled,matched
5996,V1,96,498,-0.139655,shuffled,matched


In [75]:
pl.figure()
pl.plot(vg[~(vg.novel)].sort_values(by='heldout_test_score')['heldout_test_score'].values)


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2ac50a34f050>]

In [32]:
data_df = mean_df[(mean_df.condition=='data') & (mean_df.n_cells==max_ncells)]


In [33]:
data_df.groupby(['visual_area', 'rfs', 'novel']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,condition,iteration,n_cells,fit_time,score_time,test_score,train_score,heldout_test_score,C,randi,train_transform,test_transform,n_trials,randi_cells,intact,match_rfs,novel_labels
visual_area,rfs,novel,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Li,all,False,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,all,True,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,matched,False,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Li,matched,True,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,all,False,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,all,True,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,matched,False,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
Lm,matched,True,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,all,False,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500
V1,all,True,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500


In [36]:
d_list=[]
for (va, rf_cond), cg in data_df.sort_values(by='iteration')\
                .groupby(['visual_area', 'rfs']):
    novel_scores = cg[cg.novel].sort_values(by='iteration')['heldout_test_score'].values
    trained_scores = cg[~cg.novel].sort_values(by='iteration')['heldout_test_score'].values
    diffs = trained_scores - novel_scores
    df_ = pd.DataFrame({'trained_minus_novel': diffs})
    df_['visual_area'] = va
    df_['condition'] = 'data'
    df_['rfs'] = rf_cond
    df_['iteration'] = cg[cg.novel].sort_values(by='iteration')['iteration'].values
    d_list.append(df_)
diffdf = pd.concat(d_list, axis=0)

In [37]:
# fig, ax = pl.subplots()
g = sns.displot(hue='rfs', x='trained_minus_novel',  col='visual_area',
           data=diffdf, height=2, col_order=visual_areas, #hue_norm=True,
            kind='ecdf', 
           )
pl.subplots_adjust(left=0.1, right=0.9, bottom=0.2, top=0.8)

pplot.label_figure(g.fig, '%s\n%s' % (data_id, aggr_id))

figname = 'diff_trained-minus-novel_matchrf-v-all'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))

<IPython.core.display.Javascript object>

In [38]:
sns.displot(hue='rfs', x='trained_minus_novel',  col='visual_area',
           data=diffdf, height=2, col_order=visual_areas, #hue_norm=True,
            kind='hist', row='condition'
           )

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2ac509248350>

In [50]:
metric='trained_minus_novel'
fig, axn = pl.subplots(1,3, figsize=(8,3))

for va, vg in diffdf.groupby('visual_area'):
    ax=axn[visual_areas.index(va)]
    sns.histplot(x=metric, data=vg[vg['rfs']=='all'], ax=ax,
            color=[0.8]*3, label='all distn')
    c_lo, c_hi = hutils.get_empirical_ci(vg[vg['rfs']=='all'][metric].values, ci=0.95)
    ax.axvline(x=c_lo, color='k', ls=':')
    ax.axvline(x=c_hi, color='k', ls=':')
    mean_v0 = vg[vg['rfs']=='all'][metric].mean()
    ax.axvline(x=mean_v0, color='k', label='all')
    
    # sns.histplot(x='trained_minus_novel', data=plotd[plotd['rfs']=='matched'], ax=ax)
    mean_v = vg[vg['rfs']=='matched'][metric].mean()
    ax.axvline(x=mean_v, color=area_colors[va], label='matched')
    ax.set_title(va, loc='left')
    ax.legend(bbox_to_anchor=(1,1), loc='lower right', frameon=False)
    
pl.subplots_adjust(bottom=0.25, right=0.8, left=0.1, wspace=0.5, top=0.8)

<IPython.core.display.Javascript object>

In [51]:
df_ = diffdf[(diffdf['condition']=='data')].copy()
df_.groupby(['visual_area', 'rfs']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,trained_minus_novel,condition,iteration
visual_area,rfs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Li,all,500,500,500
Li,matched,500,500,500
Lm,all,500,500,500
Lm,matched,500,500,500
V1,all,500,500,500
V1,matched,500,500,500


In [61]:
v1='all'
v2='matched'
c1=[0.5]*3
c2='r'
fig, axn = pl.subplots(2, 3, figsize=(8,4))
for va, vg in diffdf.groupby(['visual_area']):
    # Shuffle test
    a_ = vg[(vg.rfs=='matched')][metric]
    b_ = vg[(vg.rfs=='all')][metric]
    true_ = b_.mean() - a_.mean()
    combined_ = pd.concat([a_, b_], axis=0, ignore_index=True).values
    shuff_=[]
    for n in range(1000):
        np.random.shuffle(combined_)
        i = np.mean(combined_[0:500])
        v = np.mean(combined_[500:])
        shuff_.append(i-v)
    shuff = np.array(shuff_)

    ai = visual_areas.index(va)
    ax=axn[0, ai]
    sns.histplot(shuff, color=[0.8]*3, ax=ax, label='shuffled')
    ax.axvline(x=true_, color='r', label='true')
    print(len(np.where(shuff>true_)[0])/1000.)
    print(len(np.where(shuff<true_)[0])/1000.)
    pl.subplots_adjust(bottom=0.2, right=0.8)
    ax.set_xlim([-0.05, 0.05])
    ax.axvline(x=0, color='k', ls=':')

    ax.set_title(va)
    
    # Plot original distns with CI
    ax=axn[1, ai]
    sns.histplot(x=metric, hue='rfs', data=vg, ax=ax,
                palette={v1: c1, v2: c2}, fill=True, edgecolor=None)
    c_lo, c_hi = hutils.get_empirical_ci(vg[vg.rfs==v1][metric].values)
    ax.axvline(x=c_lo, color=c1)
    ax.axvline(x=c_hi, color=c1)

    c_lo, c_hi = hutils.get_empirical_ci(vg[(vg.rfs==v2)][metric].values)
    ax.axvline(x=c_lo, color=c2)
    ax.axvline(x=c_hi, color=c2)
    ax.set_xlim([-0.3, 0.3])
    if ai==2:
        ax.legend(bbox_to_anchor=(1,1), loc='upper left', frameon=False)
    else:
        ax.legend_.remove()

ax=axn[0, 2]
ax.legend(bbox_to_anchor=(1,1), loc='upper left', frameon=False)

ax=axn[1,2]
leg_h = pplot.custom_legend_markers(colors=[c1, c2], labels=[v1, v2])
ax.legend(handles=leg_h, bbox_to_anchor=(1,1), loc='upper left', frameon=False)
pl.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.75, wspace=0.5, hspace=0.5)
fig.text(0.01, 0.9, "Distns of differences [trained-novel]")

pl.subplots_adjust(left=0.1, right=0.8, bottom=0.25)

pplot.label_figure(fig, data_id)

figname = 'shuffle_test_DIFF_distn'
pl.savefig(os.path.join(curr_dst_dir, '%s.svg' % figname))
print(curr_dst_dir, figname)

<IPython.core.display.Javascript object>

1.0
0.0


No handles with labels found to put in legend.


0.0
1.0
0.985
0.015
/n/coxfs01/julianarhee/aggregate-visual-areas/decoding/py3_by_ncells/morphlevel/size_single/controls shuffle_test_DIFF_distn


In [70]:
rfmatch_colors={'all': [0.7]*3, 'matched': [0.3]*3}
fig, ax = pl.subplots()

sns.barplot(x='visual_area', y='trained_minus_novel', data=diffdf, ax=ax,
           hue='rfs', palette=rfmatch_colors, order=visual_areas, ci='sd')
pl.subplots_adjust(left=0.1, right=0.8, bottom=0.2)

<IPython.core.display.Javascript object>