this notebook makes sure that I can train models in the same way using either old or new code.

reference is mostly <https://github.com/leelabcnbc/tang_jcompneuro/blob/master/results_ipynb/debug/cnn_debug/cnn_fitting_demo.ipynb>

In [1]:
import numpy as np
from copy import deepcopy
from collections import OrderedDict
import torch
from torch.autograd import Variable
from torch import FloatTensor
from tang_jcompneuro_legacy import cnn as cnn_legacy
from tang_jcompneuro.cnn import CNN
from tang_jcompneuro.configs.cnn_arch import arch_dict
from tang_jcompneuro.configs.cnn_init import init_dict
from tang_jcompneuro.configs.cnn_opt import opt_dict
from tang_jcompneuro import training_aux
from torch.utils.data import TensorDataset
from torch.backends import cudnn
# disable cudnn for complete determinism.
cudnn.enabled = False

In [2]:
arch_config = arch_dict['legacy_1L']['12']
init_config = init_dict['legacy']
opt_config_list = opt_dict['legacy']
# just to get an idea. so 5 epochs.
total_epoch = 5

In [3]:
opt_config_list

OrderedDict([('baseline',
              {'conv': [{'l1': 0.0,
                 'l1_bias': 0.0,
                 'l2': 0.0001,
                 'l2_bias': 0.0001}],
               'fc': {'l1': 0.0,
                'l1_bias': 0.0,
                'l2': 0.0001,
                'l2_bias': 0.0001},
               'loss': 'mse',
               'optimizer': {'lr': 0.1,
                'momentum': 0.9,
                'optimizer_type': 'sgd'}}),
             ('middle_decay',
              {'conv': [{'l1': 0.0,
                 'l1_bias': 0.0,
                 'l2': 0.001,
                 'l2_bias': 0.001}],
               'fc': {'l1': 0.0,
                'l1_bias': 0.0,
                'l2': 0.001,
                'l2_bias': 0.001},
               'loss': 'mse',
               'optimizer': {'lr': 0.1,
                'momentum': 0.9,
                'optimizer_type': 'sgd'}}),
             ('adam_longer',
              {'conv': [{'l1': 0.0,
                 'l1_bias': 0.0,
                 '

In [4]:
def generate_legacy_opt_config_list():
    opt_param_list = OrderedDict()
    opt_param_list['baseline'] = {'num_epoch': total_epoch,}
    opt_param_list['middle_decay'] = {'weight_decay': 0.001,'num_epoch': total_epoch,}
    opt_param_list['adam_longer'] =  {'momentum': None, 'opt_type': 'Adam', 'lr': 0.001, 'num_epoch': total_epoch}
    return opt_param_list
    
opt_config_list_old = generate_legacy_opt_config_list()

In [5]:
# prepare some dummy datasets
def provide_training_dataset():
    num_im = 500
    rng_state = np.random.RandomState(seed=0)
    X_ = rng_state.randn(num_im, 1, 20, 20)*0.1
    y_ = rng_state.rand(num_im, 1)*0.01
    # prepare dataset
    # by shuffle, I will be able to test whether random seed behavior is preserved as well.
    return X_, y_

X, y = provide_training_dataset()

In [6]:
def train_one_old_model(X_tensor, y_tensor, opt_param, seed):
    opt_param = deepcopy(opt_param)
    opt_param.update({'seed': seed})
    net_this = cnn_legacy.one_train_loop('baseline',
                                      TensorDataset(FloatTensor(X_tensor), 
                                                    FloatTensor(y_tensor)),
                                       submodel_param=None, opt_param=opt_param,
                                      loss_every=None, verbose=True)[0]
    return net_this

def train_one_new_model(X_tensor, y_tensor, opt_param, seed):
    # generate model.
    model_new = CNN(arch_config, init_config, seed=seed)
    model_new.cuda()
    # generate loss and optimizer.
    training_aux.train_one_case(model_new, (X, y, None, None, None, None),
                               opt_param, legacy=True, legacy_epoch=total_epoch, shuffle_train=False)
    return model_new


In [7]:
def check():
    assert opt_config_list.keys() == opt_config_list_old.keys()
    for k, v in opt_config_list.items():
        print(f'check {k}')
        old_opt_param = opt_config_list_old[k]
        new_opt_param = v
        for seed in range(5):
            model_old = train_one_old_model(X, y, old_opt_param, seed)
            model_new = train_one_new_model(X, y, new_opt_param, seed)
            
            params_old = print_and_save_parameters(model_old)
            params_new = print_and_save_parameters(model_new)
            check_parameters(params_new, params_old)

In [8]:
parameter_mapping = {
    'conv.conv0.weight': 'features.0.weight',
    'conv.conv0.bias': 'features.0.bias',
    'fc.fc.weight': 'classifier.0.weight',
    'fc.fc.bias': 'classifier.0.bias',
}

def print_and_save_parameters(model):
    parameter_dict = {}
    for x, y in model.named_parameters():
        parameter_dict[x] = y.data.cpu().numpy().copy()
    return parameter_dict

def check_parameters(params_new, params_old):
    assert len(params_new) == len(params_old) == len(parameter_mapping)
    for x, y in params_new.items():
        
        y_old = params_old[parameter_mapping[x]]
        assert y_old.shape == y.shape
        print(f'check {x}', y.shape, abs(y_old-y).max())
        assert abs(y_old-y).max() < 1e-6

In [9]:
check()

check baseline
{'seed': 0, 'batch_size': 128, 'num_epoch': 5, 'weight_decay': 0.0001, 'lr': 0.1, 'opt_type': 'SGD', 'momentum': 0.9}
0-0, train loss 0.0001348872174276039
1-0, train loss 0.00011704946518875659
2-0, train loss 0.0001090884834411554
3-0, train loss 0.00011384298704797402
4-0, train loss 0.0001105038754758425
check conv.conv0.weight (12, 1, 9, 9) 0.0
check conv.conv0.bias (12,) 0.0
check fc.fc.weight (1, 48) 0.0
check fc.fc.bias (1,) 0.0
{'seed': 1, 'batch_size': 128, 'num_epoch': 5, 'weight_decay': 0.0001, 'lr': 0.1, 'opt_type': 'SGD', 'momentum': 0.9}
0-0, train loss 0.0001326469937339425
1-0, train loss 0.00011895146599272266
2-0, train loss 0.00010677921818569303
3-0, train loss 0.00011112236097687855
4-0, train loss 0.00011123009608127177
check conv.conv0.weight (12, 1, 9, 9) 0.0
check conv.conv0.bias (12,) 0.0
check fc.fc.weight (1, 48) 0.0
check fc.fc.bias (1,) 0.0
{'seed': 2, 'batch_size': 128, 'num_epoch': 5, 'weight_decay': 0.0001, 'lr': 0.1, 'opt_type': 'SGD', 

0-0, train loss 0.0001348872174276039
1-0, train loss 9.925181075232103e-05
2-0, train loss 9.064744517672807e-05
3-0, train loss 7.407571683870628e-05
4-0, train loss 6.48670393275097e-05
check conv.conv0.weight (12, 1, 9, 9) 7.45058e-09
check conv.conv0.bias (12,) 1.39698e-09
check fc.fc.weight (1, 48) 1.04774e-09
check fc.fc.bias (1,) 0.0
{'seed': 1, 'batch_size': 128, 'num_epoch': 5, 'weight_decay': 0.0001, 'lr': 0.001, 'opt_type': 'Adam', 'momentum': None}
0-0, train loss 0.0001326469937339425
1-0, train loss 0.00010011318954639137
2-0, train loss 8.906856237445027e-05
3-0, train loss 7.312009256565943e-05
4-0, train loss 6.622305954806507e-05
check conv.conv0.weight (12, 1, 9, 9) 1.11759e-08
check conv.conv0.bias (12,) 9.31323e-10
check fc.fc.weight (1, 48) 5.82077e-10
check fc.fc.bias (1,) 0.0
{'seed': 2, 'batch_size': 128, 'num_epoch': 5, 'weight_decay': 0.0001, 'lr': 0.001, 'opt_type': 'Adam', 'momentum': None}
0-0, train loss 0.00014481543621513993
1-0, train loss 0.000106770