In [1]:
import wandb
import torch
import torch.nn as nn
import torchvision.transforms as T
import timm

from train import FashionTrainer, config, set_seed, model_size

## Baseline

In [2]:
WANDB_PROJECT = "fmnist_bench"
WANDB_ENTITY = "capecape"

In [3]:
config

namespace(epochs=5,
          model_name='resnet10t',
          bs=512,
          device='cuda',
          seed=42,
          lr=0.001,
          use_wandb=True,
          wd=0.0)

In [4]:
set_seed(config.seed)

In [5]:
mean, std = (0.28, 0.35)

In [6]:
config.size = 28

In [7]:
train_tfms = T.Compose([
    T.RandomCrop(config.size, padding=1),     
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean, std),
    # T.RandomErasing(scale=(0.02, 0.25), value="random"),
])

val_tfms = T.Compose([
    T.ToTensor(),
    T.Normalize(mean, std),
])

tfms = {"train": train_tfms, "valid":val_tfms}

## Train a custom ConvMixer

In [8]:
from timm.models.convmixer import ConvMixer

In [9]:
model_params = dict(act_layer=nn.modules.activation.Mish, kernel_size=5, 
                    dim=512, depth=6, patch_size=2, in_chans=1, num_classes=10)
module = ConvMixer(**model_params)
module.head = nn.Sequential(
    nn.Dropout(0.2), 
    nn.Flatten(), 
    nn.Linear(model_params["dim"], model_params["num_classes"], bias=False), 
    nn.BatchNorm1d(model_params["num_classes"])
)
config.model_name = "convmixer"

In [10]:
config.model_params = model_params
config.model_size = model_size(module)
config.lsuv = False

In [11]:
if config.lsuv:
    from lsuv import LSUVinit
    module = LSUVinit(module.cuda(), torch.rand(128,1,32,32).cuda());

In [12]:
config.epochs = 5
config.lr = 5e-2
config.bs = 256

model = FashionTrainer(module, bs=config.bs, tfms=tfms)

model.compile(epochs=config.epochs, lr=config.lr, wd=config.wd)

ValueError: too many values to unpack (expected 2)

In [None]:
# train
run = wandb.init(project=WANDB_PROJECT, entity=WANDB_ENTITY, config=config)
model.fit()

In [None]:
wandb.finish()

## Jeremy

=============================

In [16]:
from jeremy_resnet import *

In [26]:
iw = partial(init_weights, leaky=0.0003)
config.model_params = dict(act=nn.Mish, norm=nn.BatchNorm2d, nfs=(16,32,64,128,256,256))
module = get_model(**config.model_params).apply(iw)
config.model_name = "jeremy_resnet"

In [27]:
config.epochs = 5
config.lr = 5e-2
config.bs = 160

model = FashionTrainer(module, bs=config.bs, tfms=tfms)

model.compile(epochs=config.epochs, lr=config.lr, wd=config.wd)

In [28]:
# train
run = wandb.init(project=WANDB_PROJECT, entity=WANDB_ENTITY, config=config)
model.fit()

epoch:   0, train_loss:      0.671, train_acc: 0.771   ||   val_loss:      0.644, val_acc: 0.824


epoch:   1, train_loss:      0.415, train_acc: 0.845   ||   val_loss:      0.415, val_acc: 0.847


epoch:   2, train_loss:      0.353, train_acc: 0.869   ||   val_loss:      0.355, val_acc: 0.872


epoch:   3, train_loss:      0.297, train_acc: 0.889   ||   val_loss:      0.294, val_acc: 0.897


epoch:   4, train_loss:      0.258, train_acc: 0.904   ||   val_loss:      0.258, val_acc: 0.904


In [29]:
wandb.finish()

0,1
epoch,▁▃▅▆█
learning_rate,▁▂▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁
model_size,▁
train_acc,▁▅▆▇█
train_loss,█▆▄▆▄▄▃▃▃▃▃▃▃▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▂▂▂▂▂
val_acc,▁▃▅▇█
val_loss,█▄▃▂▁

0,1
epoch,4.0
learning_rate,0.0
model_size,2409604.0
train_acc,0.90388
train_loss,0.20969
val_acc,0.9043
val_loss,0.2576
