this notebook sees how to create a feedforward approximator for recurrent features extracted in `scripts/feature_extraction/yuanyuan_8k_a/maskcnn_polished_with_rcnn_k_bl/20200218.py`

In [1]:
# common libs
import numpy as np
import h5py

from sys import path
from os.path import join


from thesis_v2 import dir_dict

from torchnetjson.builder import build_net
from thesis_v2.training.training_aux import load_training_results

In [2]:
# copied from that file.
from thesis_v2.models.maskcnn_polished_with_rcnn_k_bl.builder import load_modules

global_vars = {
    'feature_file_dir': join(dir_dict['features'],
                             'maskcnn_polished_with_rcnn_k_bl',
                             '20200218'),
    'augment_config': {
        'module_names': ['layer0', 'layer1', 'layer2'],
        'name_mapping': {
            'moduledict.bl_stack.input_capture': 'layer0',
            'moduledict.bl_stack.capture_list.0': 'layer1',
            'moduledict.bl_stack.capture_list.1': 'layer2',
        }
    }
}

from thesis_v2.configs.model.maskcnn_polished_with_rcnn_k_bl import (
    explored_models_20200218,
    script_keygen,
    keygen
)

load_modules()

In [3]:
print(global_vars)

# utils
def get_layer_idx(friendly_name):
    return global_vars['augment_config']['module_names'].index(friendly_name)

{'feature_file_dir': '/my_data/thesis-yimeng-v2/results/features/maskcnn_polished_with_rcnn_k_bl/20200218', 'augment_config': {'module_names': ['layer0', 'layer1', 'layer2'], 'name_mapping': {'moduledict.bl_stack.input_capture': 'layer0', 'moduledict.bl_stack.capture_list.0': 'layer1', 'moduledict.bl_stack.capture_list.1': 'layer2'}}}


In [4]:
def good_model_param(param):
    # test maximal number of channel in case of out of memory.
    return param['rcnn_bl_cls'] == 4 and param['kernel_size_l23'] == 3 and param['num_layer'] == 3 and param['out_channel'] == 32


def get_all_model_params():
    all_params_dict = dict()
    for idx, param in enumerate(explored_models_20200218().generate()):
        # let's use a fully recurrent one for debugging.
        if not good_model_param(param):
            continue

        key = keygen(**{k: v for k, v in param.items() if k not in {'scale', 'smoothness'}})
        key_script = script_keygen(**param)
        all_params_dict[key_script] = {
            'key': key,
            'param': param,
        }

    return all_params_dict

In [5]:
# all model params
all_params_dict = get_all_model_params()

model_to_check = 's_selegacy+in_sz50+out_ch32+num_l3+k_l19+k_p3+ptavg+bn_a_fcFalse+actrelu+r_c4+r_psize1+r_ptypeNone+r_acccummean+ff1st_True+ff1stbba_True+sc0.01+sm0.000005+lmse+m_se0'

In [6]:
# function to extract input 'topdown.0' as well as all 'bottomup.*' except the last one.

def fetch_data(feature_file, grp_name):
    slice_to_check = slice(None)
    with h5py.File(feature_file, 'r') as f_feature:
        grp = f_feature[grp_name]
        num_bottom_up = len([x for x in grp if x.startswith(str(get_layer_idx('layer2')) + '.')])
        assert num_bottom_up > 1
        assert num_bottom_up == len([x for x in grp if x.startswith(str(get_layer_idx('layer1')) + '.')])
        
        pcn_in = grp[str(get_layer_idx('layer0')) + '.0'][slice_to_check]
        pcn_inter_list = [grp[str(get_layer_idx('layer1')) + f'.{x}'][slice_to_check] for x in range(num_bottom_up)]
        pcn_out_list = [grp[str(get_layer_idx('layer2')) + f'.{x}'][slice_to_check] for x in range(num_bottom_up)]
    
    print((pcn_in.shape, pcn_in.mean(), pcn_in.std(), pcn_in.min(), pcn_in.max()))
    print([(x.shape, x.mean(), x.std(), x.min(), x.max()) for x in pcn_inter_list])
    print([(x.shape, x.mean(), x.std(), x.min(), x.max()) for x in pcn_out_list])
    
    return {
        'in': pcn_in,
        'inter_list': pcn_inter_list,
        'out_list': pcn_out_list,
    }

In [7]:
data_returned = fetch_data(join(global_vars['feature_file_dir'], model_to_check + '.hdf5'), 'X_train')

((5120, 32, 42, 42), 0.00097621273, 0.13129921, -1.5368805, 1.6458784)
[((5120, 32, 42, 42), 0.30902508, 0.5585249, 0.0, 13.410167), ((5120, 32, 42, 42), 0.38653353, 0.5509103, 0.0, 10.5434675), ((5120, 32, 42, 42), 0.38611388, 0.5902634, 0.0, 10.2964525), ((5120, 32, 42, 42), 0.38038015, 0.58427274, 0.0, 11.386213)]
[((5120, 32, 42, 42), 0.8971239, 1.1501049, 0.0, 28.444695), ((5120, 32, 42, 42), 0.9062065, 1.3005654, 0.0, 23.221088), ((5120, 32, 42, 42), 0.87318933, 1.303688, 0.0, 23.964273), ((5120, 32, 42, 42), 0.8348414, 1.2768589, 0.0, 22.642118)]


In [8]:
# load the model
def load_model(key):
    result = load_training_results(key, return_model=False)
    # load twice, first time to get the model.
    model_ = load_training_results(key, return_model=True, model=build_net(result['config_extra']['model']))['model']
    model_.cuda()
    model_.eval()
    return model_

model = load_model(all_params_dict[model_to_check]['key'])

In [9]:
# # the idea is, given idx1 and idx2, predict out_list[idx2] - out_list[idx1]  given (out_list[idx1]  and in).

from numpy.linalg import norm
from torch.backends import cudnn
import torch
cudnn.deterministic = True
cudnn.benchmark = False
def check_similarity(d1, d2):
    assert d1.shape == d2.shape
    norm_diff = norm(d1-d2)/norm(d2)
    print(norm_diff)
    print(abs(d1-d2).max())
    assert norm_diff < 1e-5

def debug_result(model_,
                 in_,
                 inter_,
                 out_,
                 idx_diff,
                 # this determines which BN layer to use.
                 time_start,
                ):
    assert idx_diff > 0
    assert time_start >= 0
    
    print(in_.shape, inter_.shape, out_.shape)
    
    model_ = model_.moduledict['bl_stack']
    
    assert model_.n_layer == 2
    
    with torch.no_grad():
        last_out = [torch.tensor(inter_).cuda(), torch.tensor(out_).cuda()]
        b_input = torch.tensor(in_).cuda()
        for t in range(time_start, time_start + idx_diff):
            for layer_idx in range(model_.n_layer):
                layer_this = model_.layer_list[layer_idx]
                bn_this = model_.bn_layer_list[t * model_.n_layer + layer_idx]
                if layer_idx == 0:
                    last_out[layer_idx] = layer_this(b_input, last_out[layer_idx])
                else:
                    last_out[layer_idx] = layer_this(last_out[layer_idx - 1], last_out[layer_idx])
                # do batch norm
                last_out[layer_idx] = bn_this(last_out[layer_idx])
                # do act
                last_out[layer_idx] = model_.act_fn(last_out[layer_idx])
    return last_out[-1].cpu().numpy() - out_

def check_result(model_, data_dict):
    num_out = len(data_dict['out_list'])
    
    # only start from the beginning. otherwise we need some buffer to calculate.
    for idx1 in range(num_out):
        for idx2 in range(idx1+1, num_out):
            print((idx1, idx2))
            result_ref = data_dict['out_list'][idx2][:32] - data_dict['out_list'][idx1][:32]
            print(result_ref.mean(), result_ref.std(), result_ref.min(), result_ref.max())
            result_debug = debug_result(model_,data_dict['in'][:32],
                                        data_dict['inter_list'][idx1][:32],
                                        data_dict['out_list'][idx1][:32],
                                        idx2-idx1,
                                        idx1+1)
            check_similarity(result_ref, result_debug)

# all ok.
check_result(model, data_returned)

(0, 1)
-0.006521218 0.6765042 -11.364738 8.701018
(32, 32, 42, 42) (32, 32, 42, 42) (32, 32, 42, 42)
0.0
0.0
(0, 2)
-0.03389127 0.8185497 -13.730714 9.92229
(32, 32, 42, 42) (32, 32, 42, 42) (32, 32, 42, 42)
0.0
0.0
(0, 3)
-0.06841364 0.91917163 -15.834566 10.484059
(32, 32, 42, 42) (32, 32, 42, 42) (32, 32, 42, 42)
0.0
0.0
(1, 2)
-0.027370047 0.47159806 -7.076529 5.2973967
(32, 32, 42, 42) (32, 32, 42, 42) (32, 32, 42, 42)
0.0
0.0
(1, 3)
-0.061892413 0.6678318 -9.848972 7.0623784
(32, 32, 42, 42) (32, 32, 42, 42) (32, 32, 42, 42)
0.0
0.0
(2, 3)
-0.034522373 0.39061153 -4.7467127 4.2083216
(32, 32, 42, 42) (32, 32, 42, 42) (32, 32, 42, 42)
0.0
0.0


In [10]:
# now time to get a model to train it.
# simple stuff. conv + relu.
# maybe with BN.

# two kinds of models

# BN + conv + ReLU + BN
# conv + ReLU + BN
# I may want to constrain the first BN a bit,
# say, all in_ channels share the same scale and bias; same goes with out1 channels.

# some concerns: stats are different for `out_` at different iterations.
# but let's ignore it for now.
from thesis_v2.models.feature_approximation.builder import (
    gen_local_pcn_recurrent_feature_approximator
)

from thesis_v2.training_extra.feature_approximation.opt import get_feature_approximation_opt_config
from thesis_v2.training_extra.feature_approximation.training import train_one

def handle_one_case(*,
                    data_dict,
                    kernel_size,
                    note,
                    batchnorm_pre=True,
                    batchnorm_post=True,
                    act_fn='relu',
                   ):
    
    # prepare dataset
    num_out = len(data_dict['out_list'])
    
    x_train = []
    y_train = []
    
    for idx1 in range(0, num_out-1):
        # we should use a fixed idx1, as the function to be modeled depends not only on idx1,
        # but also on idx2 - idx1.
        for idx2 in range(idx1+1, num_out):
            if idx2 - idx1 != 2:
                continue
            print((idx1, idx2))
            x_train.append(np.concatenate([data_dict['in'],data_dict['inter_list'][idx1],data_dict['out_list'][idx1]], axis=1))
            # using the difference (`data_dict['out_list'][idx2] - data_dict['out_list'][idx1]`)
            #    or data_dict['out_list'][idx2] makes little difference.
            y_train.append(data_dict['out_list'][idx2] - data_dict['out_list'][idx1])
        break
    
    x_train = np.concatenate(x_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)
    
    print((x_train.shape, y_train.shape))
    print(y_train.mean(), y_train.std())
    
    dataset_this = {
        'X_train': x_train,
        'y_train': y_train,
    }
    
    def gen_cnn_partial(in_shape, in_y_shape):
        assert len(in_shape) == 3
        assert len(in_y_shape) == 3
        in_higher_c = in_y_shape[0]
        in_lower_c = in_shape[0] - in_higher_c
        
        return gen_local_pcn_recurrent_feature_approximator(
            in_shape_lower=[in_lower_c, in_shape[1], in_shape[2]],
            in_shape_higher=[in_higher_c, in_shape[1], in_shape[2]],
            kernel_size=kernel_size,
            act_fn=act_fn,
        )
    #
    res = train_one(arch_json_partial=gen_cnn_partial,
                    opt_config_partial=get_feature_approximation_opt_config,
                    datasets=dataset_this,
                    # note this gets saved under v1 folder...
                    # but it should not matter.
                    key=f'debug/feature_approximation/k_bl_ksize3/note{note}/kernel_size{kernel_size}/act_fn{act_fn}/batchnorm_pre{batchnorm_pre}/batchnorm_post{batchnorm_post}',
                    show_every=50,
                    model_seed=0, return_model=False)

    return res

In [11]:
handle_one_case(
    data_dict=data_returned,
    kernel_size=9,
    note='debug13',
)

# 5000 max epoch should be sufficient.
# given that this is the biggest model to train (32 out_channel) and other models take less time to train.

(0, 2)
((5120, 96, 42, 42), (5120, 32, 42, 42))
-0.023933753 0.85231274
num_param 249120
num of phase:  3
0-0, train loss 1.6959259510040283
train loss 1.6959259510040283
50-0, train loss 0.6478598713874817
train loss 0.6478598713874817
100-0, train loss 0.4719250798225403
train loss 0.4719250798225403
150-0, train loss 0.38665175437927246
train loss 0.38665175437927246
200-0, train loss 0.3336195945739746
train loss 0.3336195945739746
250-0, train loss 0.30563950538635254
train loss 0.30563950538635254
300-0, train loss 0.2934873700141907
train loss 0.2934873700141907
350-0, train loss 0.2785651385784149
train loss 0.2785651385784149
400-0, train loss 0.26933735609054565
train loss 0.26933735609054565
450-0, train loss 0.26451021432876587
train loss 0.26451021432876587
500-0, train loss 0.2551293969154358
train loss 0.2551293969154358
550-0, train loss 0.2604365646839142
train loss 0.2604365646839142
600-0, train loss 0.24625247716903687
train loss 0.24625247716903687
650-0, train los

2900-0, train loss 0.22126905620098114
train loss 0.22126905620098114
2950-0, train loss 0.2183196246623993
train loss 0.2183196246623993
3000-0, train loss 0.22487185895442963
train loss 0.22487185895442963
3050-0, train loss 0.2210100293159485
train loss 0.2210100293159485
3100-0, train loss 0.22461624443531036
train loss 0.22461624443531036
3150-0, train loss 0.22682519257068634
train loss 0.22682519257068634
3200-0, train loss 0.2230764776468277
train loss 0.2230764776468277
3250-0, train loss 0.22390270233154297
train loss 0.22390270233154297
3300-0, train loss 0.22248750925064087
train loss 0.22248750925064087


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/opt/conda/envs/leelab/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3319, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-add7dfba14a4>", line 4, in <module>
    note='debug13',
  File "<ipython-input-10-fd7390de6e9a>", line 80, in handle_one_case
    model_seed=0, return_model=False)
  File "/my_data/thesis-yimeng-v2/thesis_v2/training_extra/feature_approximation/training.py", line 54, in train_one
    'training_extra_config': {'num_phase': num_phase},
  File "/my_data/thesis-yimeng-v2/thesis_v2/training_extra/training.py", line 133, in train_one_wrapper
    extra_params=extra_params,
  File "/my_data/thesis-yimeng-v2/thesis_v2/training_extra/training.py", line 54, in train_one_inner
    legacy_random_seed=True)
  File "/my_data/thesis-yimeng-v2/thesis_v2/training/training_aux.py", line 192, in training_wrapper
    legacy_random_seed=legacy_random_seed)
  File "/my_data/thesis

KeyboardInterrupt: 