In [1]:
from sys import path
from os import makedirs
from os.path import relpath, realpath, abspath, join, exists, dirname
from itertools import product
from functools import partial

import numpy as np
import pandas as pd
import h5py

pd.set_option('display.max_rows', 999)

In [2]:
from torchnetjson.builder import build_net

In [3]:
from thesis_v2 import dir_dict
from thesis_v2.data import load_data_helper
from thesis_v2.training.training_aux import load_training_results
from thesis_v2.training_extra.misc import count_params
from thesis_v2.models.maskcnn_polished_with_local_pcn.builder import load_modules

load_modules()

In [4]:
folder_to_check = 'scripts/training/yuanyuan_8k_a_3day/maskcnn_polished_with_local_pcn'
path.insert(0, join(dir_dict['root'], folder_to_check))
from submit_certain_configs import param_iterator_obj
from key_utils import keygen

In [5]:
from thesis_v2.training_extra.data import generate_datasets
from thesis_v2.training_extra.evaluation import eval_fn_wrapper as eval_fn_wrapper_neural
from thesis_v2.training.training import eval_wrapper
from thesis_v2.data.prepared.yuanyuan_8k import get_data, get_neural_data

In [6]:
# compute ccmax
from strflab.stats import cc_max
from thesis_v2.data.prepared.yuanyuan_8k import get_neural_data_per_trial

cc_max_all_neurons = cc_max(get_neural_data_per_trial(
    ('042318', '043018','051018',))
                           )
assert cc_max_all_neurons.shape == (79,)

In [7]:
from torch.backends import cudnn
import torch
cudnn.enabled = True
cudnn.deterministic = True
cudnn.benchmark = False

_data_cache = dict()

def get_data_cached(input_size, seed):
    if (input_size, seed) not in _data_cache:
        _data_cache[input_size, seed] = get_data('a', 200, input_size,
                                                 ('042318', '043018', '051018'),
                                                 scale=0.5,
                                                 seed=seed)
    return _data_cache[input_size, seed]

In [8]:
from scipy.stats import pearsonr

In [9]:
def load_everything():
    dir_to_save = join(dir_dict['analyses'], 'responses_yuanyuan_8k_a_3day+maskcnn_polished_with_local_pcn+certain_configs')
    makedirs(dir_to_save, exist_ok=True)
    
    old_case = 0
    new_case = 0
    for idx, param in enumerate(param_iterator_obj.generate()):
        if idx % 50 == 0:
            print(idx)
            
#         print(len(param))
        
        assert len(param) == 23
        assert param['split_seed'] == 'legacy'
        assert param['out_channel'] == 16
        assert param['num_layer'] == 2
        assert param['kernel_size_l1'] == 9
        assert param['pooling_ksize'] == 3
        assert param['pooling_type'] == 'avg'
        
#         assert param['model_seed'] == 0
        
        key = keygen(**{k: v for k, v in param.items() if k not in {'scale', 'smoothness'}})
        # 10 to go.
        result = load_training_results(key, return_model=False)
        # load twice, first time to get the model.
        result = load_training_results(key, return_model=True, model=build_net(result['config_extra']['model']))
        num_epochs = [len(x) for x in result['stats_all']]
        
        cc_raw = np.asarray(result['stats_best']['stats']['test']['corr'])
        assert cc_raw.shape == (79,)
        
        
        datasets = get_data_cached(param['input_size'], param['split_seed'])


        datasets = {
            'X_train': datasets[0].astype(np.float32),
            'y_train': datasets[1],
            'X_val': datasets[2].astype(np.float32),
            'y_val': datasets[3],
            'X_test': datasets[4].astype(np.float32),
            'y_test': datasets[5],
        }

        # only the test one is needed.
        datasets_all = generate_datasets(
            **datasets,
            per_epoch_train=False,
            shuffle_train=False,
        )
        shape_dict = {
            'train': 5120,
            'val': 1280,
            'test': 1600,
        }
        
        for subset in ('train', 'val', 'test'):
            
            file_to_store_this = join(dir_to_save, key, subset + '.npy')
            if exists(file_to_store_this):
                old_case += 1
                continue
                
            new_case += 1
                
            makedirs(dirname(file_to_store_this), exist_ok=True)
        
            result_on_the_go = eval_wrapper(result['model'].cuda(),
                                            datasets_all[subset],
                                            'cuda',
                                            1,
                                            partial(eval_fn_wrapper_neural, loss_type=param['loss_type']),
                                            (lambda dummy1,dummy2,dummy3: torch.tensor(0.0)),
                                            return_responses=True
                                           )
            
            ret = np.asarray(result_on_the_go['corr'])
            
            responses = np.concatenate([x[0] for x in result_on_the_go['responses']], axis=0)
            
            assert responses.shape == (shape_dict[subset], 79)
            
            # store the responses.
            np.save(file_to_store_this, responses)
        
            if subset in {'test', 'val'}:
                assert abs(result['stats_best']['stats'][subset]['corr_mean'] - result_on_the_go['corr_mean']) < 1e-5
                assert abs(result['stats_best']['stats'][subset]['corr_mean'] - ret.mean()) < 1e-5
                
                # another check.
                # make sure order of data aligns.
                
                label = datasets[f'y_{subset}']
                assert responses.shape == label.shape
                assert responses.ndim == 2
                assert responses.shape[1] == 79
                corr_each = np.array(
                    [pearsonr(yhat, y)[0] if np.std(yhat) > 1e-5 and np.std(y) > 1e-5 else 0 for yhat, y in
                          zip(responses.T, label.T)])
                corr_each_ref = np.asarray(result_on_the_go['corr'])
                assert corr_each.shape == corr_each_ref.shape
                # not equal because in `eval_fn_wrapper_neural`, the label is in float32, while here it's in float64.
                assert np.allclose(corr_each_ref, corr_each)
        
        
        
        
#         _df_collect.append(
#             {
# #                 'split_seed': str(param['split_seed']),
                
                
#                 'act_fn': param['act_fn'],
#                 'bn_before_act': param['bn_before_act'],
#                 'bn_after_fc': param['bn_after_fc'],
#                 'input_size': param['input_size'],
#                 'loss_type': param['loss_type'],
                
#                 # 12 to go
                
#                 'scale': float(param['scale']),
#                 # 
#                 'smoothness': float(param['smoothness']),
#                 # 
                
#                 # 8 to go
#                 'pcn_bn': param['pcn_bn'],
#                 'pcn_bn_post': param['pcn_bn_post'],
#                 'pcn_bypass': param['pcn_bypass'],
#                 'pcn_final_act': param['pcn_final_act'],
#                 'pcn_no_act': param['pcn_no_act'],
#                 'pcn_bias': param['pcn_bias'],
                
#                 'pcn_cls': param['pcn_cls'],
                
#                 'model_seed': param['model_seed'],
                
# #                 'corr_test': result['stats_best']['stats']['test']['corr_mean'],
#                 'corr_test': ((cc_raw/cc_max_all_neurons)**2).mean(),
#                 'max_epoch': max(num_epochs),
#                 'num_param': count_params(result['model']),
#             }
#         )
#     _df =  pd.DataFrame(_df_collect, columns=[
# #             'split_seed',
#         'act_fn', 'bn_before_act', 'bn_after_fc',
#         'input_size', 'loss_type', 'scale',
#         'smoothness',

#         'pcn_bn', 'pcn_bn_post', 'pcn_bypass', 'pcn_final_act', 'pcn_no_act', 'pcn_bias', 'pcn_cls',
#         'model_seed',

#         'corr_test', 'num_param',
#         'max_epoch',

#                                              ])
#     _df = _df.set_index([
#         'act_fn', 'bn_before_act', 'bn_after_fc',
#         'input_size', 'loss_type', 'scale',
#         'smoothness',
#         'pcn_bn', 'pcn_bn_post', 'pcn_bypass', 'pcn_final_act', 'pcn_no_act', 'pcn_bias', 'pcn_cls',
#         'model_seed',
#     ],verify_integrity=True)
    print('new', new_case)
    print('old', old_case)

In [10]:
df = load_everything()

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
new 6912
old 0
