a small scale test on neuron 553 of Monkey A.

In [1]:
from torch.backends import cudnn
# this config is fastest.
cudnn.benchmark = False
cudnn.enabled = True

In [2]:
from collections import OrderedDict
import os.path
from itertools import product

import numpy as np
import h5py

from tang_jcompneuro.cnn_exploration import one_layer_models_to_explore, init_config_to_use_fn
from tang_jcompneuro.training_aux import count_params, train_one_case
from tang_jcompneuro.cnn import CNN
from tang_jcompneuro.configs.cnn_opt import legacy_opt_generator
from tang_jcompneuro import dir_dictionary

In [3]:
# timer.
# https://stackoverflow.com/questions/5849800/tic-toc-functions-analog-in-python
import time
class Timer(object):
    def __init__(self, name=None):
        self.name = name

    def __enter__(self):
        self.tstart = time.time()

    def __exit__(self, type, value, traceback):
        if self.name:
            print('[%s]' % self.name, end=' ')
        print('Elapsed: %s' % (time.time() - self.tstart))

In [4]:
arch_config_list = one_layer_models_to_explore()

In [5]:
arch_configs_to_test = (
    'k9c12_nobn_k8s4max_vanilla',
    'k9c12_bn_k8s4max_vanilla',
#     'legacy_b12',  # same as 'k9c12_nobn_k8s4max_vanilla'.
)
arch_config_list = OrderedDict([(k, arch_config_list[k])  for k in arch_configs_to_test ])
opt_configs_to_test = legacy_opt_generator()

In [6]:
arch_config_list

OrderedDict([('k9c12_nobn_k8s4max_vanilla',
              {'act_fn': 'relu',
               'conv': [{'bn': False,
                 'kernel_size': 9,
                 'out_channel': 12,
                 'padding': 0,
                 'pool': {'kernel_size': 8,
                  'padding': 0,
                  'pool_type': 'max',
                  'stride': 4},
                 'stride': 1}],
               'fc': {'dropout': None, 'factored': False},
               'linear_output': True}),
             ('k9c12_bn_k8s4max_vanilla',
              {'act_fn': 'relu',
               'conv': [{'bn': True,
                 'kernel_size': 9,
                 'out_channel': 12,
                 'padding': 0,
                 'pool': {'kernel_size': 8,
                  'padding': 0,
                  'pool_type': 'max',
                  'stride': 4},
                 'stride': 1}],
               'fc': {'dropout': None, 'factored': False},
               'linear_output': True})])

In [7]:
opt_configs_to_test

OrderedDict([('baseline',
              {'conv': [{'l1': 0.0,
                 'l1_bias': 0.0,
                 'l2': 0.0001,
                 'l2_bias': 0.0001}],
               'fc': {'l1': 0.0,
                'l1_bias': 0.0,
                'l2': 0.0001,
                'l2_bias': 0.0001},
               'loss': 'mse',
               'optimizer': {'lr': 0.1,
                'momentum': 0.9,
                'optimizer_type': 'sgd'}}),
             ('middle_decay',
              {'conv': [{'l1': 0.0,
                 'l1_bias': 0.0,
                 'l2': 0.001,
                 'l2_bias': 0.001}],
               'fc': {'l1': 0.0,
                'l1_bias': 0.0,
                'l2': 0.001,
                'l2_bias': 0.001},
               'loss': 'mse',
               'optimizer': {'lr': 0.1,
                'momentum': 0.9,
                'optimizer_type': 'sgd'}}),
             ('adam_longer',
              {'conv': [{'l1': 0.0,
                 'l1_bias': 0.0,
                 '

In [8]:
init_config_to_use_fn()

{'conv_init': 'kaiming_fan_out', 'fc_init': 0.0001}

In [9]:
# load dataset.
def load_dataset(neuron_idx, subset):
    group_to_use = OrderedDict()
    group_to_use['new'] = f'/MkA_Shape/{subset}/with_val/100/0'
    group_to_use['legacy'] = f'/MkA_Shape/{subset}/without_val/100/0'
    
    result = OrderedDict()
    
    datafile = os.path.join(dir_dictionary['datasets'], 'split_datasets.hdf5')
    with h5py.File(datafile, 'r') as f:
        for k, g in group_to_use.items():
            g_this = f[g]
            # load X_train/test/val
            # load y_train/test/val
            X_train = g_this['train/X'][...]
            y_train = g_this['train/y'][:, neuron_idx:neuron_idx+1]
            X_test = g_this['test/X'][...]
            y_test = g_this['test/y'][:, neuron_idx:neuron_idx+1]
            
            X_val = g_this['val/X'][...] if 'val' in g_this else None
            y_val = g_this['val/y'][:, neuron_idx:neuron_idx+1] if 'val' in g_this else None
            result[k] = (X_train, y_train, X_test, y_test, X_val, y_val)
    
    assert np.array_equal(result['new'][2], result['legacy'][2])
    assert np.array_equal(result['new'][3], result['legacy'][3])
    
    return result

In [16]:
dataset_dict = load_dataset(553, 'all')

In [17]:
# ok. let's train.
def do():
    for (arch_name, arch_config), (opt_name, opt_config) in product(arch_config_list.items(), opt_configs_to_test.items()):
        # train legacy
        dataset_this = dataset_dict['legacy']
        # BN models don't work.
        # because binary image can create a batch of all zero, etc.
        # and that gives extreme values next layer.
        # if you increase bn_eps, then it's fine.
        # however, that prevents effective learning. basically,
        # worse than or same as no BN.
        model = CNN(arch_config, init_config_to_use_fn(), mean_response=dataset_this[1].mean(axis=0), bn_eps=1)
        model.cuda()
        with Timer('old'):
            _, old_cc = train_one_case(model, dataset_this, opt_config, seed=0, legacy=True)
        dataset_this = dataset_dict['new']
        del model
        model = CNN(arch_config, init_config_to_use_fn(), mean_response=dataset_this[1].mean(axis=0), bn_eps=1)
        model.cuda()
        with Timer('new'):
            _, new_cc = train_one_case(model, dataset_this, opt_config, seed=0)
        # train new.
        del model
        print((arch_name, opt_name), old_cc, new_cc)

In [18]:
do()

[old] Elapsed: 18.338154077529907
val metric
 {'neg_corr': -0.61469704, 'corr': 0.61469704, 'mse': 0.0047237189}
test metric
 {'neg_corr': -0.61848414, 'corr': 0.61848414, 'mse': 0.0047682012}
val metric
 {'neg_corr': -0.63323694, 'corr': 0.63323694, 'mse': 0.0046056658}
test metric
 {'neg_corr': -0.66902661, 'corr': 0.66902661, 'mse': 0.004311264}
val metric
 {'neg_corr': -0.64174056, 'corr': 0.64174056, 'mse': 0.0046025217}
test metric
 {'neg_corr': -0.67691702, 'corr': 0.67691702, 'mse': 0.0042340797}
early stopping after epoch 3500
[new] Elapsed: 13.245948076248169
('k9c12_nobn_k8s4max_vanilla', 'baseline') 0.618553 0.687492
[old] Elapsed: 15.218517303466797
val metric
 {'neg_corr': -0.62568891, 'corr': 0.62568891, 'mse': 0.0047055758}
test metric
 {'neg_corr': -0.61073232, 'corr': 0.61073232, 'mse': 0.004919196}
val metric
 {'neg_corr': -0.63376415, 'corr': 0.63376415, 'mse': 0.0046449443}
test metric
 {'neg_corr': -0.63663596, 'corr': 0.63663596, 'mse': 0.0047323923}
val metric
 