this file is to make sure that feature extraction of `maskcnn_polished_with_local_pcn` added in <https://github.com/leelabcnbc/thesis-yimeng-v2/commit/92c22a57c8bc72286eec4d9d03e2acb79ab5ab26> works as expected.

In [1]:
# first, check that adding these lambda stuffs do not change response at all.


from torchnetjson.builder import build_net
from thesis_v2.training.training_aux import load_training_results
from thesis_v2 import dir_dict
from thesis_v2.models.maskcnn_polished_with_local_pcn.builder import load_modules

load_modules()

In [2]:
from sys import path
from os.path import join
folder_to_check = 'scripts/training/yuanyuan_8k_a_3day/maskcnn_polished_with_local_pcn'
path.insert(0, join(dir_dict['root'], folder_to_check))
from submit_certain_configs import param_iterator_obj
from key_utils import keygen

In [3]:
import numpy as np

def get_one_model():
    for idx, param in enumerate(param_iterator_obj.generate()):
        assert len(param) == 23
        assert param['split_seed'] == 'legacy'
        assert param['out_channel'] == 16
        assert param['num_layer'] == 2
        assert param['kernel_size_l1'] == 9
        assert param['pooling_ksize'] == 3
        assert param['pooling_type'] == 'avg'

    #         assert param['model_seed'] == 0

        key = keygen(**{k: v for k, v in param.items() if k not in {'scale', 'smoothness'}})
        # 10 to go.
        result_ = load_training_results(key, return_model=False)
        # load twice, first time to get the model.
        result_ = load_training_results(key, return_model=True, model=build_net(result_['config_extra']['model']))
        num_epochs = [len(x) for x in result_['stats_all']]

        cc_raw = np.asarray(result_['stats_best']['stats']['test']['corr'])
        
        print(param)
        
        return {
            'key': key,
            'param': param,
            'result': result_,
        }
result = get_one_model()

OrderedDict([('split_seed', 'legacy'), ('model_seed', 0), ('act_fn', 'relu'), ('loss_type', 'mse'), ('input_size', 50), ('out_channel', 16), ('num_layer', 2), ('kernel_size_l1', 9), ('pooling_ksize', 3), ('pooling_type', 'avg'), ('bn_before_act', True), ('bn_after_fc', False), ('scale_name', '0.01'), ('scale', '0.01'), ('smoothness_name', '0.000005'), ('smoothness', '0.000005'), ('pcn_bn', True), ('pcn_bn_post', False), ('pcn_bypass', False), ('pcn_cls', 0), ('pcn_final_act', True), ('pcn_no_act', False), ('pcn_bias', True)])


In [4]:
result['result']['model']

JSONNet(
  (moduledict): ModuleDict(
    (act0): ReLU()
    (act1): ReLU()
    (bn0): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (bn_input): BatchNorm2d(1, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (conv0): Conv2d(1, 16, kernel_size=(9, 9), stride=(1, 1), bias=False)
    (conv1): PcConvBp(
      (lambda_out): LambdaSingle()
      (lambda_pred): LambdaSingle()
      (FFconv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act_fn): ReLU(inplace)
    )
    (fc): FactoredLinear2D()
    (final_act): ReLU()
    (pooling): AvgPool2d(kernel_size=3, stride=3, padding=0)
  )
)

In [5]:
from thesis_v2.data.prepared.yuanyuan_8k import get_data
from thesis_v2.training_extra.data import generate_datasets
from thesis_v2.training.training import eval_wrapper

from functools import partial
from thesis_v2.training_extra.evaluation import eval_fn_wrapper as eval_fn_wrapper_neural
import torch


def eval_again(param, model, stats_best):
    datasets = get_data('a', 200, param['input_size'], ('042318', '043018', '051018'), scale=0.5,
                        seed=param['split_seed'])
    datasets = {
        'X_train': datasets[0].astype(np.float32),
        'y_train': datasets[1],
        'X_val': datasets[2].astype(np.float32),
        'y_val': datasets[3],
        'X_test': datasets[4].astype(np.float32),
        'y_test': datasets[5],
    }
    
    datasets_to_return = {
        'X_test': datasets['X_test'],
        'y_test': datasets['y_test'],
    }
    
    # only the test one is needed.
    datasets = generate_datasets(
        **datasets,
        per_epoch_train=True, shuffle_train=True,
    )['test']
    
    result_on_the_go = eval_wrapper(model.cuda(),
                                    datasets,
                                    'cuda',
                                    1,
                                    partial(eval_fn_wrapper_neural, loss_type=param['loss_type']),
                                        lambda dummy1,dummy2,dummy3: torch.tensor(0.0)
                                   )
    corrs = np.asarray(result_on_the_go['corr'])
    corr_ref = np.asarray(stats_best['stats']['test']['corr'])
    assert corrs.shape == corr_ref.shape == (79,)
    
    assert abs(corrs-corr_ref).max() < 1e-6
    
    return datasets_to_return
    

datasets_for_debug = eval_again(result['param'], result['result']['model'], result['result']['stats_best'])

In [6]:
global_vars = {
    'feature_file': join(dir_dict['features'],
                            'maskcnn_polished_with_local_pcn',
                            'debug.hdf5'
                            ),
    'augment_config': {
        'module_names': ['bottomup', 'topdown', 'final'],
        'name_mapping': {
            'moduledict.conv1.lambda_out': 'bottomup',
            'moduledict.conv1.lambda_pred': 'topdown',
            'moduledict.final_act': 'final',
        }
    }
}

from os import makedirs
from os.path import dirname
from thesis_v2.feature_extraction.extraction import extract_features
import h5py
from torch import tensor

def augment_modules(model, datasets_to_extract, file_to_save, grp_name='features'):
    augment_config = global_vars['augment_config']
    makedirs(dirname(file_to_save), exist_ok=True)
    with h5py.File(file_to_save) as f_feature:
        if grp_name not in f_feature:
            grp = f_feature.create_group(grp_name)

            extract_features(model, (datasets_to_extract,),
                             preprocessor=lambda x: (tensor(x[0]).cuda(),),
                             output_group=grp,
                             batch_size=50,
                             augment_config=augment_config,
                             # mostly for replicating old results
                             deterministic=True
                             )
            
augment_modules(result['result']['model'].eval(), datasets_for_debug['X_test'],
                global_vars['feature_file'])

In [7]:
# time to verify that these saved feature make sense.

# let's check output `final` first. this is easy to check.
def check_outputs(feature_file, corr_ref, y_data, loss_type, grp_name='features',):
    with h5py.File(feature_file, 'r') as f_feature:
        y_hat = f_feature[grp_name][str(global_vars['augment_config']['module_names'].index('final')) + '.0'][()]
    assert y_hat.shape == y_data.shape == (1600, 79)
    ret_dict = eval_fn_wrapper_neural(yhat_all=[[y_hat]], y_all=[[y_data]],loss_type=loss_type)
    
    corr = np.asarray(ret_dict['corr'])
    assert corr_ref.shape == corr.shape == (79,)
    
    assert abs(corr-corr_ref).max() < 1e-6
    
check_outputs(global_vars['feature_file'], np.asarray(result['result']['stats_best']['stats']['test']['corr']),
              datasets_for_debug['y_test'], result['param']['loss_type'])