In [1]:
# this file collects the main result to be shown in the paper.

# basically, FF vs recurrent (cm-avg) vs recurrent (cm-last) with similar numbers of parameters

In [2]:
from os.path import join, dirname, exists
from os import makedirs

from thesis_v2 import dir_dict
from thesis_v2.submission import utils
from thesis_v2.configs.model.maskcnn_polished_with_rcnn_k_bl import (
    explored_models_20200801_gaya_generator,
    keygen
)

from thesis_v2.training_extra.misc import count_params
from thesis_v2.training.training_aux import load_training_results

from thesis_v2.models.maskcnn_polished_with_rcnn_k_bl.builder import load_modules

load_modules()

import json
from copy import deepcopy
import pandas as pd

In [3]:
from torchnetjson.builder import build_net
import numpy as np

In [4]:
# get cc_max
# from strflab.stats import cc_max
# from thesis_v2.data.prepared.yuanyuan_8k import get_neural_data_per_trial

# cc_max_all_neurons = cc_max(get_neural_data_per_trial(('042318', '043018','051018',)))
# assert cc_max_all_neurons.shape == (79,)

In [5]:
def collect_main_result():
    # this does not collect non native results. those will be used in deeper analysis but not in main result.
    
    fixed_keys = {
        'kernel_size_l23': 3,
        'kernel_size_l1': 9,
        'ff_1st_block': True,
        'pooling_ksize': 3,
        'pooling_type': 'avg',
        'bn_after_fc': False,
        'rcnn_bl_psize': 1,
        'rcnn_bl_ptype': None,
        'input_size': 63,
        'split_seed': 'legacy',
        'dataset_prefix': 'gaya',
        'model_prefix': 'maskcnn_polished_with_rcnn_k_bl',
        'scale_name': '0.01',
        'scale': '0.01',
        'smoothness_name': '0.000005',
        'smoothness': '0.000005',
    }
    
    rows_all = []
    
    for idx, (src, param) in enumerate(explored_models_20200801_gaya_generator(with_source=True)):
        assert len(param) == 26
        total_param_to_explain = len(param)
        
        if idx % 100 == 0:
            print(idx)
            
        # some parameters that won't change.
        for k_fix, v_fix in fixed_keys.items():
            assert param[k_fix] == v_fix
            total_param_to_explain -= 1
        
        # {'yhat_reduce_pick': 'none', 'train_keep': 1280, 'model_seed': 0,
        # act_fn': 'relu', 'loss_type': 'mse', 'out_channel': 8, 'num_layer': 2,
        # 'rcnn_bl_cls': 1,
        # 'rcnn_acc_type': 'cummean', 'ff_1st_bn_before_act': True}
        
        
        # load model to get param count
        key = keygen(**{k: v for k, v in param.items() if k not in {'scale', 'smoothness'}})
        # 10 to go.
        result = load_training_results(key, return_model=False)
        # load twice, first time to get the model.
        result = load_training_results(key, return_model=True, model=build_net(result['config_extra']['model']))
        num_param = count_params(result['model'])
        
        
        cc_native = np.asarray(result['stats_best']['stats']['test']['corr'])
        assert cc_native.shape == (14,)
        
        # replace 'yhat_reduce_pick' + 'rcnn_acc_type' with 'readout_type'
        readout_raw = param['yhat_reduce_pick'], param['rcnn_acc_type']
        if readout_raw == (-1, 'cummean'):
            # this should only happen for deep FF models, where this does not matter.
            assert param['rcnn_bl_cls'] == 1
            assert src == 'deep-ff'
            
        param['readout_type'] = {
            ('none', 'cummean'): 'cm-avg',
            (-1, 'cummean_last'): 'cm-last',
            (-1, 'cummean'): 'legacy',
        }[readout_raw]
        if param['readout_type'] == 'legacy':
            assert src == 'deep-ff'
        else:
#             print(src)
            assert src == param['readout_type']
        
        
        
        del param['yhat_reduce_pick']
        del param['rcnn_acc_type']
        total_param_to_explain -= 1
        
#         # load eval json
#         eval_json_file = join(dir_dict['analyses'], key, 'eval.json')
#         try:
#             with open(eval_json_file, 'rt', encoding='utf-8') as f_eval:
#                 eval_json = json.load(f_eval)
#         except FileNotFoundError as e:
#             print('missing file',idx)
#             raise e
        
#         cc_native_debug = np.asarray(eval_json['native'])
        
#         if param['rcnn_bl_cls'] != 1:
#             cc_native_debug_2 = np.asarray(eval_json[param['readout_type']][str(param['rcnn_bl_cls'])])
#         else:
#             cc_native_debug_2 = cc_native_debug
        
#         assert cc_native_debug.shape == cc_native.shape == (79,) == cc_max_all_neurons.shape == cc_native_debug_2.shape
        
        
#         assert np.allclose(cc_native, cc_native_debug, atol=1e-4)
#         assert np.allclose(cc_native, cc_native_debug_2, atol=1e-4)
        
        
        assert param['train_keep'] in {1900//2, 1900, 3800}
        
        
        # add result
        row_this = {
            k: v for k, v in param.items() if k not in fixed_keys
        }
        row_this['num_param'] = num_param
        row_this['cc_raw_avg'] = cc_native.mean()
        row_this['cc2_raw_avg'] = (cc_native**2).mean()
#         row_this['cc2_normed_avg'] = ((cc_native/cc_max_all_neurons)**2).mean()
        
        
        rows_all.append(row_this)

    
    df_this = pd.DataFrame(rows_all, columns=sorted(list(rows_all[0].keys())))
    df_this = df_this.set_index(keys=sorted([k for k in param if k not in fixed_keys]), verify_integrity=True)
    return df_this

In [6]:
f_main_result = join(dir_dict['analyses'], 'gaya_refactored', '20200801', 'main_result.pkl')
if not exists(f_main_result):
    makedirs(dirname(f_main_result), exist_ok=True)
    df_main_result = collect_main_result()
    df_main_result.to_pickle(f_main_result)
    del df_main_result
df_main_result = pd.read_pickle(f_main_result)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700


In [7]:
df_main_result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,cc2_raw_avg,cc_raw_avg,num_param
act_fn,ff_1st_bn_before_act,loss_type,model_seed,num_layer,out_channel,rcnn_bl_cls,readout_type,train_keep,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
relu,True,mse,0,2,8,1,cm-avg,950,0.358683,0.579923,6438
relu,False,mse,0,2,8,1,cm-avg,950,0.342887,0.566550,6446
relu,True,mse,0,2,8,2,cm-avg,950,0.384929,0.608562,7030
relu,False,mse,0,2,8,2,cm-avg,950,0.382683,0.606576,7038
relu,True,mse,0,2,8,3,cm-avg,950,0.376820,0.599995,7046
...,...,...,...,...,...,...,...,...,...,...,...
softplus,False,poisson,1,4,32,1,legacy,3800,0.498975,0.697397,36046
softplus,True,poisson,1,5,32,1,legacy,3800,0.492938,0.692489,45294
softplus,False,poisson,1,5,32,1,legacy,3800,0.493143,0.692411,45326
softplus,True,poisson,1,6,32,1,legacy,3800,0.496192,0.694490,54574
