In [1]:
from os.path import join, dirname, exists
from os import makedirs

from thesis_v2 import dir_dict
import pandas as pd


f_main_result = join(dir_dict['analyses'], 'yuanyuan_8k_a_3day_refactored', '20200725', 'main_result.pkl')

In [2]:
df_main_result = pd.read_pickle(f_main_result)

In [3]:
df_main_result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,cc2_normed_avg,cc2_raw_avg,cc_raw_avg,num_param
act_fn,ff_1st_bn_before_act,loss_type,model_seed,num_layer,out_channel,rcnn_bl_cls,readout_type,train_keep,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
relu,True,mse,0,2,8,1,cm-avg,1280,0.380000,0.182549,0.389889,17453
relu,False,mse,0,2,8,1,cm-avg,1280,0.357125,0.170239,0.379368,17461
relu,True,mse,0,2,8,2,cm-avg,1280,0.400852,0.190568,0.404994,18045
relu,False,mse,0,2,8,2,cm-avg,1280,0.390532,0.187182,0.398958,18053
relu,True,mse,0,2,8,3,cm-avg,1280,0.397727,0.188654,0.403129,18061
...,...,...,...,...,...,...,...,...,...,...,...,...
softplus,False,poisson,1,4,32,1,legacy,5120,0.666608,0.306528,0.537318,48621
softplus,True,poisson,1,5,32,1,legacy,5120,0.671754,0.308807,0.539876,57869
softplus,False,poisson,1,5,32,1,legacy,5120,0.669742,0.307882,0.538928,57901
softplus,True,poisson,1,6,32,1,legacy,5120,0.674378,0.310332,0.540986,67149


In [4]:
# iterate over three metrics. cc2_normed, cc2_raw, cc_raw.

# iterate over each train_size.

# plot

def main_loop(df_in):
    for metric in ('cc2_normed_avg', 'cc2_raw_avg', 'cc_raw_avg'):
        print(metric)
        df_this = df_in.loc[:,[metric, 'num_param']].rename(columns={metric: 'perf'})
        loop_over_train_size(df_this)

def loop_over_train_size(df_in):
    for train_keep in df_in.index.get_level_values('train_keep').unique():
        print(train_keep)
        df_this = df_in.xs(train_keep, level='train_keep').sort_index()
        process_one_case(df_this)

In [5]:
from scipy.stats import pearsonr
import numpy as np

In [6]:
def check_model_seeds(df_in):
    assert set(df_in.index.get_level_values('model_seed').unique()) == {0,1}
    data_0 = df_in['perf'].xs(0, level='model_seed').sort_index()
    data_1 = df_in['perf'].xs(1, level='model_seed').sort_index()
    assert data_0.index.equals(data_1.index)
    data_0_raw = data_0.values
    data_1_raw = data_1.values
    
    print(f'seed=0, mean {data_0_raw.mean()} std {data_0_raw.std()}')
    print(f'seed=1, mean {data_1_raw.mean()} std {data_1_raw.std()}')
    print('corr', pearsonr(data_0_raw, data_1_raw)[0])
    
    
    # check that num_param are the same.
    data_0_num_param = df_in['num_param'].xs(0, level='model_seed').sort_index()
    data_1_num_param = df_in['num_param'].xs(1, level='model_seed').sort_index()
    assert data_0_num_param.equals(data_1_num_param)

def avg_out_seed(df_in):
    df_perf = df_in['perf'].unstack('model_seed').mean(axis=1).sort_index()
    df_num_param = df_in['num_param'].xs(0, level='model_seed').sort_index()
    df_perf.name = 'perf'
    df_num_param.name = 'num_param'
    assert df_perf.index.equals(df_num_param.index)
    ret = pd.concat([df_perf, df_num_param], axis='columns')
    assert ret.index.equals(df_perf.index)
    assert ret.index.equals(df_num_param.index)
    return ret

def process_ff_models(df_in):
    # for each combination of (num_channel, num_layer)
    # generate a sub data frame
    # indexed by ('act_fn', 'ff_1st_bn_before_act', 'loss_type')
    # with columns (perf, num_param)
    
    data = df_in.xs(1, level='rcnn_bl_cls')
    
#     index_names = data.index.name
    
    index_out_channel = data.index.get_level_values('out_channel').values
    index_num_layer = data.index.get_level_values('num_layer').values
    
    data_channel_layer = np.asarray([index_out_channel, index_num_layer]).T
#     print(data_channel_layer.dtype, data_channel_layer.shape)
    unique_channel_layer = np.unique(data_channel_layer, axis=0).tolist()
    
    
    data_dict = dict()
    
    for key_this in unique_channel_layer:
        key_this = tuple(key_this)
        (c_this, l_this) = key_this
        df_this = data.xs(key=(c_this, l_this), level=('out_channel', 'num_layer'))
#         print(df_this.shape)
        # average out over readout_type
        df_this = df_this.unstack('readout_type')
        perf = df_this['perf']
        num_param = df_this['num_param']
        assert perf.shape == num_param.shape
        num_readout = perf.shape[1]
        if num_readout > 1:
            # take average to remove card-to-card variance.
            # remove NAs due to incomplete configs.
            perf = perf.mean(axis=1, skipna=True)
            for _, row_this in num_param.iterrows():
                assert row_this.nunique(dropna=True) == 1
                
            num_param = num_param.mean(axis=1, skipna=True)
        assert perf.index.equals(num_param.index)
        
        perf.name = 'perf'
        num_param.name = 'num_param'
        ret = pd.concat([perf, num_param], axis='columns')
        assert ret.index.equals(perf.index)
        assert ret.index.equals(num_param.index)
        
        data_dict[key_this] = ret
    return data_dict
        
        
        
        
        
        
        
        
        
        
    
    
#     assert data.shape[1] == 3
    
#     print()
    
#     for idx, case in data.iterrows():
#         case_val = case.values
#         assert case_val.shape == (3,)
        # get non nan values
#         case_non_nan = case_val[~np.isnan(case)]
#         assert case_non_nan.size > 0
#         case_non_nan_debug = np.full_like(case_non_nan, fill_value=case_non_nan[0])
#         if not np.allclose(case_non_nan, case_non_nan_debug, atol=1e-3):
#             print(idx, case)
#             print(case_non_nan, case_non_nan_debug)
#         assert np.allclose(case_non_nan, case_non_nan_debug, atol=1e-3)

# actually, probably due to card-to-card variance, difference can appear.
# I checked one of them.

# 5120, cc_mean_avg
# (relu, False, poisson, 2, 32)
# cm-avg     0.499929
# cm-last    0.496784
# legacy          NaN
# Name: (relu, False, poisson, 2, 32), dtype: float64
# [0.49992943 0.49678396] [0.49992943 0.49992943]

# for cm-avg
# check files
# models/yuanyuan_8k_a_3day/maskcnn_polished_with_rcnn_k_bl/
# s_selegacy/in_sz50/out_ch32/num_l2/k_l19/k_p3/ptavg/bn_a_fcFalse/actrelu/r_c1/r_psize1/r_ptypeNone/
# r_acccummean/ff1st_True/ff1stbba_False/rp_none/sc0.01/sm0.000005/lpoisson/m_se0/stats_best.json
#
# corr_mean: 0.5004660408667474
# "best_phase": 2, "best_epoch": 50, "early_stopping_loss": 0.813396155834198
# 
# and
#
# models/yuanyuan_8k_a_3day/maskcnn_polished_with_rcnn_k_bl/
# s_selegacy/in_sz50/out_ch32/num_l2/k_l19/k_p3/ptavg/bn_a_fcFalse/actrelu/r_c1/r_psize1/r_ptypeNone/
# r_acccummean/ff1st_True/ff1stbba_False/rp_none/sc0.01/sm0.000005/lpoisson/m_se1/stats_best.json
#
# corr_mean: 0.49939281448219086
# "best_phase": 1, "best_epoch": 1150, "early_stopping_loss": 0.8130963444709778


# for cm-last
# check files
# .... r_acccummean_last/ff1st_True/ff1stbba_False/sc0.01/sm0.000005/lpoisson/m_se0/stats_best.json
# 
# "corr_mean": 0.49417510516371543
# "best_phase": 2, "best_epoch": 150, "early_stopping_loss": 0.8139971494674683
#
# and
#
# .... r_acccummean_last/ff1st_True/ff1stbba_False/sc0.01/sm0.000005/lpoisson/m_se1/stats_best.json
#
# corr_mean: 0.49939281448219086
# {"best_phase": 1, "best_epoch": 1150, "early_stopping_loss": 0.8130963444709778


# in this case, when seed=1, results are same; when seed=0, they are different.


def process_one_case(df_in):
    print(df_in.shape)
    # for each metric.


    # 1. compare seed=0 and seed=1. make sure things are ok.
    # the larger the training size is, the more stable across seeds.
    check_model_seeds(df_in)
    
    # 2. take average of model seeds.
    df_in = avg_out_seed(df_in)
    print(df_in.shape)
    

    # 3. for each combination (ff, PROPER cm-avg x num_layer, PROPER cm-last x num_layer) x (out_channel, num_layer)
    #    compute average. make sure each one has SAME number of settings (handle cm-avg/cm-last ambiguity for ff)
    data_ff = process_ff_models(df_in)
    for kk, vv in data_ff.items():
        print(kk, vv.shape)
    
    

    # 4. create a mapping between ff (out, num_layer) to similarly sized PROPER recurrents.

    # 5. plot/table! maybe have both combined / separate results.

    # KEEP number of parameters the same, so that things are comparable.

In [7]:
main_loop(df_main_result)

cc2_normed_avg
1280
(1872, 2)
seed=0, mean 0.4665990525514411 std 0.07990275148459684
seed=1, mean 0.4688627050028689 std 0.0783478402817805
corr 0.919645772049287
(936, 2)
(2, 4) (8, 2)
(2, 5) (8, 2)
(2, 6) (8, 2)
(4, 4) (8, 2)
(4, 5) (8, 2)
(4, 6) (8, 2)
(8, 2) (8, 2)
(8, 3) (8, 2)
(8, 4) (8, 2)
(8, 5) (8, 2)
(8, 6) (8, 2)
(16, 2) (8, 2)
(16, 3) (8, 2)
(16, 4) (8, 2)
(16, 5) (8, 2)
(16, 6) (8, 2)
(32, 2) (8, 2)
(32, 3) (8, 2)
(32, 4) (8, 2)
(32, 5) (8, 2)
(32, 6) (8, 2)
2560
(1872, 2)
seed=0, mean 0.5758502908285674 std 0.05274786024254501
seed=1, mean 0.5755912712916434 std 0.05441454150695219
corr 0.957336951512258
(936, 2)
(2, 4) (8, 2)
(2, 5) (8, 2)
(2, 6) (8, 2)
(4, 4) (8, 2)
(4, 5) (8, 2)
(4, 6) (8, 2)
(8, 2) (8, 2)
(8, 3) (8, 2)
(8, 4) (8, 2)
(8, 5) (8, 2)
(8, 6) (8, 2)
(16, 2) (8, 2)
(16, 3) (8, 2)
(16, 4) (8, 2)
(16, 5) (8, 2)
(16, 6) (8, 2)
(32, 2) (8, 2)
(32, 3) (8, 2)
(32, 4) (8, 2)
(32, 5) (8, 2)
(32, 6) (8, 2)
5120
(1872, 2)
seed=0, mean 0.6228860885579299 std 0.0503630