this notebook makes sure that models with various setups (BN, act_fn) do have correct number of parameters and correct model structures and they can train.

In [1]:
import numpy as np

from thesis_v2.data.prepared.yuanyuan_8k import get_data

from thesis_v2.training_extra.maskcnn_like.opt import get_maskcnn_v1_opt_config
from thesis_v2.training_extra.maskcnn_like.training import (train_one,
                                                            partial)

from thesis_v2.models.maskcnn_polished.builder import (gen_maskcnn_polished, load_modules)
import json
from itertools import product
load_modules()

# keeping mean response at 0.5 seems the best. somehow. using batch norm is bad, somehow.
datasets_raw = get_data('a', 200, 50, ('042318', '043018', '051018'), scale=0.5)

datasets_raw = {
    'X_train': datasets_raw[0].astype(np.float32),
    'y_train': datasets_raw[1],
    'X_val': datasets_raw[2].astype(np.float32),
    'y_val': datasets_raw[3],
    'X_test': datasets_raw[4].astype(np.float32),
    'y_test': datasets_raw[5],
}





def do_something(*,
                 bn_before_act,
                 bn_after_fc,
                 act_fn,
                ):
    opt_config_partial = partial(get_maskcnn_v1_opt_config,
                                 scale=0.01,
                                 smoothness=0.00005,
                                 group=0.0)
    
    def gen_cnn_partial(input_size, n):
        a = gen_maskcnn_polished(input_size=input_size,
                                    num_neuron=n,
                                    out_channel=16,  # (try, 8, 16, 32, 48)
                                    kernel_size_l1=9,  # (try 5,9,13)
                                    kernel_size_l23=3,
                                    act_fn=act_fn,
                                    pooling_ksize=3,  # (try, 1,3,5,7)
                                    pooling_type='avg',  # try (avg, max)  # looks that max works well here?
                                    num_layer=2,
                                    bn_before_act=bn_before_act,
                                    bn_after_fc=bn_after_fc,
                                    )
        print(json.dumps(a, indent=2))
        return a
        

    r = train_one(arch_json_partial=gen_cnn_partial,
                       opt_config_partial=opt_config_partial,
                       datasets=datasets_raw,
                       key=f'debug/maskcnn_polished_check_alternative_setups/{bn_before_act}/{bn_after_fc}/{act_fn}',
                       show_every=10000,
                       max_epoch=100, # not meant to be fully run.
                       model_seed=0,
                       return_model=True)
    print(r['model'])
    print(r['stats_best']['stats']['test']['corr_mean'])



In [2]:
for bn_before_act_this, bn_after_fc_this, act_fn_this in product(
    [True, False], [True, False], ('softplus', 'relu'),
):
    print(bn_before_act_this, bn_after_fc_this, act_fn_this)
    do_something(bn_before_act=bn_before_act_this,
                bn_after_fc=bn_after_fc_this,
                 act_fn=act_fn_this,
                )
    # as a reference, my standard model has 20493 parameters.

True True softplus
{
  "module_dict": {
    "bn_input": {
      "name": "torch.nn.batchnorm2d",
      "params": {
        "num_features": 1,
        "eps": 0.001,
        "momentum": 0.1,
        "affine": true
      },
      "init": {}
    },
    "conv0": {
      "name": "torch.nn.conv2d",
      "params": {
        "in_channels": 1,
        "out_channels": 16,
        "kernel_size": 9,
        "padding": 0,
        "bias": false
      },
      "init": {
        "strategy": "normal",
        "parameters": {
          "std": 0.01
        }
      }
    },
    "bn0": {
      "name": "torch.nn.batchnorm2d",
      "params": {
        "num_features": 16,
        "eps": 0.001,
        "momentum": 0.1,
        "affine": true
      },
      "init": {}
    },
    "act0": {
      "name": "torch.nn.softplus",
      "params": {},
      "init": null
    },
    "conv1": {
      "name": "torch.nn.conv2d",
      "params": {
        "in_channels": 16,
        "out_channels": 16,
        "kernel_size": 3

recover best model after 100 epochs metric 0.8367301821708679
for grp of sz 12, lr from 0.001000 to 0.000333
val metric init {'loss': 0.8385625123977661, 'loss_no_reg': 0.8367301821708679, 'corr': None, 'corr_mean': 0.33294379711151123, 'corr_mean_neg': -0.33294379711151123, 'corr2_mean': 0.1254153698682785, 'corr2_mean_neg': -0.1254153698682785, 'acc': None}
0-0, train loss 0.8320508599281311
train loss 0.8320508599281311
val metric {'loss': 0.8380900144577026, 'loss_no_reg': 0.8362612128257751, 'corr': None, 'corr_mean': 0.33449840545654297, 'corr_mean_neg': -0.33449840545654297, 'corr2_mean': 0.12657108902931213, 'corr2_mean_neg': -0.12657108902931213, 'acc': None}
test metric {'loss': 0.8387245876448495, 'loss_no_reg': 0.8387371301651001, 'corr': None, 'corr_mean': 0.3337578773498535, 'corr_mean_neg': -0.3337578773498535, 'corr2_mean': 0.1265597939491272, 'corr2_mean_neg': -0.1265597939491272, 'acc': None}
recover best model after 100 epochs metric 0.82666015625
for grp of sz 12, l

recover best model after 100 epochs metric 0.8399950265884399
for grp of sz 11, lr from 0.000333 to 0.000111
val metric init {'loss': 0.8421354174613953, 'loss_no_reg': 0.8399950265884399, 'corr': None, 'corr_mean': 0.1894105882914383, 'corr_mean_neg': -0.1894105882914383, 'corr2_mean': 0.053965267705462366, 'corr2_mean_neg': -0.053965267705462366, 'acc': None}
0-0, train loss 0.8441876769065857
train loss 0.8441876769065857
val metric {'loss': 0.8420949697494506, 'loss_no_reg': 0.8399515151977539, 'corr': None, 'corr_mean': 0.189067386915978, 'corr_mean_neg': -0.189067386915978, 'corr2_mean': 0.05378369110861028, 'corr2_mean_neg': -0.05378369110861028, 'acc': None}
test metric {'loss': 0.8424391235624041, 'loss_no_reg': 0.8423449993133545, 'corr': None, 'corr_mean': 0.18867079890038393, 'corr_mean_neg': -0.18867079890038393, 'corr2_mean': 0.05355377561340672, 'corr2_mean_neg': -0.05355377561340672, 'acc': None}
recover best model after 100 epochs metric 0.839191734790802
JSONNet(
  (m

recover best model after 100 epochs metric 0.8322732448577881
JSONNet(
  (moduledict): ModuleDict(
    (act0): ReLU()
    (act1): ReLU()
    (bn0): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (bn_input): BatchNorm2d(1, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (conv0): Conv2d(1, 16, kernel_size=(9, 9), stride=(1, 1), bias=False)
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (fc): FactoredLinear2D()
    (final_act): ReLU()
    (pooling): AvgPool2d(kernel_size=3, stride=3, padding=0)
  )
)
0.31663039326667786
False True softplus
{
  "module_dict": {
    "bn_input": {
      "name": "torch.nn.batchnorm2d",
      "params": {
        "num_features": 1,
        "eps": 0.001,
        "momentum": 0.1,
        "affine": true
      },
      "init": {}
    },
    "conv0": {
      "name": "torch.

recover best model after 100 epochs metric 0.8392715454101562
for grp of sz 14, lr from 0.001000 to 0.000333
val metric init {'loss': 0.8421927332878113, 'loss_no_reg': 0.8392715454101562, 'corr': None, 'corr_mean': 0.2677591145038605, 'corr_mean_neg': -0.2677591145038605, 'corr2_mean': 0.08841968327760696, 'corr2_mean_neg': -0.08841968327760696, 'acc': None}
0-0, train loss 0.8379603028297424
train loss 0.8379603028297424
val metric {'loss': 0.8417660117149353, 'loss_no_reg': 0.8388570547103882, 'corr': None, 'corr_mean': 0.27082303166389465, 'corr_mean_neg': -0.27082303166389465, 'corr2_mean': 0.08998856693506241, 'corr2_mean_neg': -0.08998856693506241, 'acc': None}
test metric {'loss': 0.8422899757112775, 'loss_no_reg': 0.841249406337738, 'corr': None, 'corr_mean': 0.2728033661842346, 'corr_mean_neg': -0.2728033661842346, 'corr2_mean': 0.09038567543029785, 'corr2_mean_neg': -0.09038567543029785, 'acc': None}
recover best model after 100 epochs metric 0.8275705575942993
for grp of sz

recover best model after 100 epochs metric 0.8356146812438965
for grp of sz 13, lr from 0.000333 to 0.000111
val metric init {'loss': 0.8384275555610656, 'loss_no_reg': 0.8356146812438965, 'corr': None, 'corr_mean': 0.2549826502799988, 'corr_mean_neg': -0.2549826502799988, 'corr2_mean': 0.07975511997938156, 'corr2_mean_neg': -0.07975511997938156, 'acc': None}
0-0, train loss 0.8404800891876221
train loss 0.8404800891876221
val metric {'loss': 0.8384282469749451, 'loss_no_reg': 0.8356153964996338, 'corr': None, 'corr_mean': 0.2550466060638428, 'corr_mean_neg': -0.2550466060638428, 'corr2_mean': 0.07979422062635422, 'corr2_mean_neg': -0.07979422062635422, 'acc': None}
test metric {'loss': 0.8385710545948574, 'loss_no_reg': 0.8380202651023865, 'corr': None, 'corr_mean': 0.2543041706085205, 'corr_mean_neg': -0.2543041706085205, 'corr2_mean': 0.07897321879863739, 'corr2_mean_neg': -0.07897321879863739, 'acc': None}
recover best model after 100 epochs metric 0.8352594971656799
JSONNet(
  (mo

recover best model after 100 epochs metric 0.8290277123451233
JSONNet(
  (moduledict): ModuleDict(
    (act0): ReLU()
    (act1): ReLU()
    (bn0): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (bn_input): BatchNorm2d(1, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (conv0): Conv2d(1, 16, kernel_size=(9, 9), stride=(1, 1))
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fc): FactoredLinear2D()
    (final_act): ReLU()
    (pooling): AvgPool2d(kernel_size=3, stride=3, padding=0)
  )
)
0.34681108593940735
