this notebook sees how to create a feedforward approximator for recurrent features extracted in `scripts/feature_extraction/yuanyuan_8k_a/maskcnn_polished_with_local_pcn/debug.ipynb`

In [1]:
# common libs
import numpy as np
import h5py

from sys import path
from os.path import join


from thesis_v2 import dir_dict

from torchnetjson.builder import build_net
from thesis_v2.training.training_aux import load_training_results

In [2]:
folder_to_check = 'scripts/feature_extraction/yuanyuan_8k_a/maskcnn_polished_with_local_pcn'
path.insert(0, join(dir_dict['root'], folder_to_check))
from certain_configs import get_all_model_params, global_vars

In [3]:
print(global_vars)

# utils
def get_layer_idx(friendly_name):
    return global_vars['augment_config']['module_names'].index(friendly_name)

{'feature_file_dir': '/my_data/thesis-yimeng-v2/results/features/maskcnn_polished_with_local_pcn/certain_configs', 'augment_config': {'module_names': ['bottomup', 'topdown', 'final'], 'name_mapping': {'moduledict.conv1.lambda_out': 'bottomup', 'moduledict.conv1.lambda_in': 'topdown', 'moduledict.final_act': 'final'}}}


In [4]:
# all model params
all_params_dict = get_all_model_params()

# same one as in `scripts/feature_extraction/yuanyuan_8k_a/maskcnn_polished_with_local_pcn/debug.ipynb`
model_to_check = 's_selegacy+in_sz50+out_ch16+num_l2+k_l19+k_p3+ptavg+bn_b_actTrue+bn_a_fcFalse+actrelu+p_c5+p_bypassFalse+p_n_actFalse+p_bn_pFalse+p_actTrue+p_bnTrue+p_biasTrue+sc0.01+sm0.000005+lmse+m_se0'

In [5]:
# function to extract input 'topdown.0' as well as all 'bottomup.*' except the last one.

def fetch_data(feature_file, grp_name):
    slice_to_check = slice(None)
    with h5py.File(feature_file, 'r') as f_feature:
        grp = f_feature[grp_name]
        num_bottom_up = len([x for x in grp if x.startswith(str(get_layer_idx('bottomup')) + '.')])
        assert num_bottom_up > 2
        
        pcn_in = grp[str(get_layer_idx('topdown')) + '.0'][slice_to_check]
        pcn_out_list = [grp[str(get_layer_idx('bottomup')) + f'.{x}'][slice_to_check] for x in range(num_bottom_up-1)]
    
    print((pcn_in.shape, pcn_in.mean(), pcn_in.std(), pcn_in.min(), pcn_in.max()))
    print([(x.shape, x.mean(), x.std(), x.min(), x.max()) for x in pcn_out_list])
    
    return {
        'in': pcn_in,
        'out_list': pcn_out_list,
    }

In [6]:
data_returned = fetch_data(join(global_vars['feature_file_dir'], model_to_check + '.hdf5'), 'X_test')

((1600, 16, 42, 42), 0.3592737, 0.5737621, 0.0, 12.684619)
[((1600, 16, 42, 42), 0.26013193, 0.4335558, 0.0, 9.471747), ((1600, 16, 42, 42), 0.2820749, 0.7873015, -6.068827, 17.41325), ((1600, 16, 42, 42), 0.2963904, 1.0379094, -10.003517, 23.08837), ((1600, 16, 42, 42), 0.30989194, 1.3064132, -13.968054, 28.11877), ((1600, 16, 42, 42), 0.3228603, 1.6355528, -18.504385, 30.76726), ((1600, 16, 42, 42), 0.33641696, 2.0544658, -23.724724, 32.904312)]


In [7]:
# load the model
def load_model(key):
    result = load_training_results(key, return_model=False)
    # load twice, first time to get the model.
    model_ = load_training_results(key, return_model=True, model=build_net(result['config_extra']['model']))['model']
    model_.cuda()
    model_.eval()
    return model_

model = load_model(all_params_dict[model_to_check]['key'])

In [8]:
# # the idea is, given idx1 and idx2, predict out_list[idx2] - out_list[idx1]  given (out_list[idx1]  and in).

from numpy.linalg import norm
from torch.backends import cudnn
import torch
cudnn.deterministic = True
cudnn.benchmark = False
def check_similarity(d1, d2):
    assert d1.shape == d2.shape
    norm_diff = norm(d1-d2)/norm(d2)
    print(norm_diff)
    print(abs(d1-d2).max())
    assert norm_diff < 1e-5

def debug_result(model_,in_,out1,idx_diff):
    assert idx_diff > 0
    out_now = out1
    with torch.no_grad():
        for _ in range(idx_diff):
            pred_now = model.moduledict['conv1'].forward_fb(
                torch.tensor(out_now).cuda(),
            ).cpu().numpy()
            out_now = model.moduledict['conv1'].forward_update(
                torch.tensor(out_now).cuda(),
                torch.tensor(in_).cuda(),
                torch.tensor(pred_now).cuda(),
            ).cpu().numpy()
    return out_now - out1

def check_result(model_, data_dict):
    num_out = len(data_dict['out_list'])
    
    for idx1 in range(num_out):
        for idx2 in range(idx1+1, num_out):
            print((idx1, idx2))
            result_ref = data_dict['out_list'][idx2] - data_dict['out_list'][idx1]
            print(result_ref.mean(), result_ref.std(), result_ref.min(), result_ref.max())
            result_debug = debug_result(model_,data_dict['in'],data_dict['out_list'][idx1],idx2-idx1)
            check_similarity(result_ref, result_debug)

# all ok.
check_result(model, data_returned)

(0, 1)
0.02194325 0.42810473 -6.068827 8.904035
0.0
0.0
(0, 2)
0.036259294 0.7234565 -10.003517 16.093918
0.0
0.0
(0, 3)
0.04976021 1.0306181 -13.968054 21.12432
0.0
0.0
(0, 4)
0.06272887 1.3919907 -18.504385 24.259983
0.0
0.0
(0, 5)
0.076285854 1.8376727 -23.724724 27.46222
0.0
0.0
(1, 2)
0.0143161295 0.32812482 -3.93469 7.189883
0.0
0.0
(1, 3)
0.027816877 0.66755944 -7.8992267 12.7758465
0.0
0.0
(1, 4)
0.04078558 1.0581534 -12.435558 17.426577
0.0
0.0
(1, 5)
0.054342493 1.5301074 -17.655897 20.628813
0.0
0.0
(2, 3)
0.013500811 0.34966654 -3.9645367 5.948244
0.0
0.0
(2, 4)
0.026469508 0.7540298 -8.500868 10.598974
0.0
0.0
(2, 5)
0.04002639 1.2418066 -13.871584 13.929815
0.0
0.0
(3, 4)
0.012968653 0.41092587 -4.618208 4.65073
0.0
0.0
(3, 5)
0.02652559 0.90977573 -10.077094 11.556656
0.0
0.0
(4, 5)
0.013556905 0.5049501 -5.458886 7.06707
0.0
0.0


In [9]:
# now time to get a model to train it.
# simple stuff. conv + relu.
# maybe with BN.

# two kinds of models

# BN + conv + ReLU + BN
# conv + ReLU + BN
# I may want to constrain the first BN a bit,
# say, all in_ channels share the same scale and bias; same goes with out1 channels.

# some concerns: stats are different for `out_` at different iterations.
# but let's ignore it for now.
from thesis_v2.models.feature_approximation.builder import (
    gen_local_pcn_recurrent_feature_approximator
)

from thesis_v2.training_extra.feature_approximation.opt import get_feature_approximation_opt_config
from thesis_v2.training_extra.feature_approximation.training import train_one

def handle_one_case(*,
                    data_dict,
                    kernel_size,
                    note,
                    batchnorm_pre=True,
                    batchnorm_post=True,
                    act_fn='relu',
                   ):
    
    # prepare dataset
    num_out = len(data_dict['out_list'])
    
    x_train = []
    y_train = []
    
    for idx1 in range(num_out):
        for idx2 in range(idx1+1, num_out):
            if idx2 - idx1 != 2:
                continue
            print((idx1, idx2))
            x_train.append(np.concatenate([data_dict['in'],data_dict['out_list'][idx1]], axis=1))
            y_train.append(data_dict['out_list'][idx2] - data_dict['out_list'][idx1])
    
    x_train = np.concatenate(x_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)
    
    print((x_train.shape, y_train.shape))
    
    dataset_this = {
        'X_train': x_train,
        'y_train': y_train,
    }
    
    def gen_cnn_partial(in_shape):
        # I assume two inputs have the same number of channels and shapes.
        assert len(in_shape) == 3
        assert in_shape[0] % 2 == 0
        return gen_local_pcn_recurrent_feature_approximator(
            in_shape_lower=[in_shape[0]//2, in_shape[1], in_shape[2]],
            in_shape_higher=[in_shape[0]//2, in_shape[1], in_shape[2]],
            kernel_size=kernel_size,
            act_fn=act_fn,
        )
    #
    res = train_one(arch_json_partial=gen_cnn_partial,
                    opt_config_partial=get_feature_approximation_opt_config,
                    datasets=dataset_this,
                    # note this gets saved under v1 folder...
                    # but it should not matter.
                    key=f'debug/feature_approximation/local_pcn_recurrent_feature_approximator/note{note}/kernel_size{kernel_size}/act_fn{act_fn}/batchnorm_pre{batchnorm_pre}/batchnorm_post{batchnorm_post}',
                    show_every=1000,
                    model_seed=0, return_model=False)

    return res

In [10]:
handle_one_case(
    data_dict=data_returned,
    kernel_size=9,
    note='debug',
)

(0, 2)
(1, 3)
(2, 4)
(3, 5)
((6400, 32, 42, 42), (6400, 16, 42, 42))
num_param 41584
0-0, train loss 1.6306195259094238
train loss 1.6306195259094238
1000-0, train loss 0.07750361412763596
train loss 0.07750361412763596
2000-0, train loss 0.05282807722687721
train loss 0.05282807722687721
3000-0, train loss 0.04851727560162544
train loss 0.04851727560162544
4000-0, train loss 0.050110407173633575
train loss 0.050110407173633575
5000-0, train loss 0.049012117087841034
train loss 0.049012117087841034
6000-0, train loss 0.047640301287174225
train loss 0.047640301287174225
7000-0, train loss 0.0482809953391552
train loss 0.0482809953391552
8000-0, train loss 0.0482926145195961
train loss 0.0482926145195961
9000-0, train loss 0.0502128005027771
train loss 0.0502128005027771
10000-0, train loss 0.0504579097032547
train loss 0.0504579097032547
11000-0, train loss 0.05106149613857269
train loss 0.05106149613857269
12000-0, train loss 0.0500941164791584
train loss 0.0500941164791584
13000-0, tr

KeyboardInterrupt: 