In [1]:
import wandb
import torch
import torch.nn as nn
import torchvision.transforms as T
import timm

from train import FashionTrainer, config, set_seed, model_size

## Baseline

In [2]:
WANDB_PROJECT = "fmnist_bench"
WANDB_ENTITY = "capecape"

In [3]:
config

namespace(epochs=20,
          model_name='resnet10t',
          bs=512,
          device='cuda',
          seed=42,
          lr=0.001,
          use_wandb=True,
          wd=0.0,
          tfms={'train': Compose(
                    RandomCrop(size=(28, 28), padding=1)
                    RandomHorizontalFlip(p=0.5)
                    ToTensor()
                    Normalize(mean=0.28, std=0.35)
                    RandomErasing(p=0.5, scale=(0.02, 0.25), ratio=(0.3, 3.3), value=random, inplace=False)
                ),
                'valid': Compose(
                    ToTensor()
                    Normalize(mean=0.28, std=0.35)
                )})

In [4]:
config.epochs = 5
config.lr = 5e-2

In [5]:
set_seed(config.seed)

In [6]:
mean, std = (0.28, 0.35)

In [7]:
train_tfms = T.Compose([
    T.RandomCrop(28, padding=1), 
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean, std),
    # T.RandomErasing(scale=(0.02, 0.25), value="random"),
])

val_tfms = T.Compose([
    T.ToTensor(),
    T.Normalize(mean, std),
])

tfms = {"train": train_tfms, "valid":val_tfms}

## Train

In [8]:
from timm.models.convmixer import ConvMixer

In [9]:
model_params = {"kernel_size":5, "dim":512, "depth":8, "patch_size":2, "in_chans":1, "num_classes":10}
module = ConvMixer(**model_params)
module.head = nn.Sequential(
    nn.Dropout(0.2), 
    nn.Flatten(), 
    nn.Linear(model_params["dim"], model_params["num_classes"], bias=False), 
    nn.BatchNorm1d(model_params["num_classes"])
)
config.model_name = "convmixer"
config.model_params = model_params
config.model_size = model_size(module)
config.lsuv = False

In [10]:
config.epochs = 5
config.lr = 7e-3
config.bs = 64

model = FashionTrainer(module, bs=config.bs, tfms=tfms)

model.compile(epochs=config.epochs, lr=config.lr, wd=config.wd)

In [11]:
# train
run = wandb.init(project=WANDB_PROJECT, entity=WANDB_ENTITY, config=config)
model.fit()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcapecape[0m. Use [1m`wandb login --relogin`[0m to force relogin


epoch:   0, train_loss:      0.552, train_acc: 0.832   ||   val_loss:      0.530, val_acc: 0.851


epoch:   1, train_loss:      0.311, train_acc: 0.891   ||   val_loss:      0.306, val_acc: 0.904


epoch:   2, train_loss:      0.250, train_acc: 0.913   ||   val_loss:      0.246, val_acc: 0.919


epoch:   3, train_loss:      0.200, train_acc: 0.930   ||   val_loss:      0.198, val_acc: 0.934


epoch:   4, train_loss:      0.157, train_acc: 0.946   ||   val_loss:      0.160, val_acc: 0.937


In [12]:
wandb.finish()

0,1
epoch,▁▃▅▆█
learning_rate,▂▂▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁
train_acc,▁▅▆▇█
train_loss,██▇▄▅▄▂▄▂▄▄▃▆▃▂▄▃▃▂▂▃▂▂▃▃▃▂▂▂▂▂▁▃▂▁▂▁▂▂▁
val_acc,▁▅▇██
val_loss,█▄▃▂▁

0,1
epoch,4.0
learning_rate,0.0
train_acc,0.94602
train_loss,0.29072
val_acc,0.9367
val_loss,0.1599


more epochs

In [66]:
config.epochs = 5
config.lr = 1e-2

model.compile(epochs=config.epochs, lr=config.lr, wd=config.wd)

In [67]:
# train
model.fit()

epoch:   0, train_loss:      0.636, train_acc: 0.820   ||   val_loss:      0.633, val_acc: 0.798    
epoch:   1, train_loss:      0.372, train_acc: 0.876   ||   val_loss:      0.368, val_acc: 0.875    
epoch:   2, train_loss:      0.298, train_acc: 0.896   ||   val_loss:      0.292, val_acc: 0.911    
epoch:   3, train_loss:      0.250, train_acc: 0.913   ||   val_loss:      0.246, val_acc: 0.922    
epoch:   4, train_loss:      0.217, train_acc: 0.925   ||   val_loss:      0.216, val_acc: 0.927    
 |████████████████████████████████████████| 100.00% [5/5 01:19<00:00]

In [68]:
config.epochs = 5
config.lr = 5e-3

model.compile(epochs=config.epochs, lr=config.lr, wd=config.wd)

In [69]:
# train
model.fit()

epoch:   0, train_loss:      0.255, train_acc: 0.911   ||   val_loss:      0.265, val_acc: 0.886    
epoch:   1, train_loss:      0.252, train_acc: 0.910   ||   val_loss:      0.248, val_acc: 0.918    
epoch:   2, train_loss:      0.221, train_acc: 0.922   ||   val_loss:      0.221, val_acc: 0.919    
epoch:   3, train_loss:      0.191, train_acc: 0.931   ||   val_loss:      0.191, val_acc: 0.932    
epoch:   4, train_loss:      0.173, train_acc: 0.939   ||   val_loss:      0.174, val_acc: 0.934    
 |████████████████████████████████████████| 100.00% [5/5 01:19<00:00]

In [70]:
config.epochs = 5
config.lr = 1e-3

model.compile(epochs=config.epochs, lr=config.lr, wd=config.wd)

In [71]:
# train
model.fit()

epoch:   0, train_loss:      0.174, train_acc: 0.938   ||   val_loss:      0.176, val_acc: 0.934    
epoch:   1, train_loss:      0.169, train_acc: 0.940   ||   val_loss:      0.171, val_acc: 0.934    
epoch:   2, train_loss:      0.162, train_acc: 0.942   ||   val_loss:      0.164, val_acc: 0.937    
epoch:   3, train_loss:      0.153, train_acc: 0.946   ||   val_loss:      0.156, val_acc: 0.939    
epoch:   4, train_loss:      0.146, train_acc: 0.949   ||   val_loss:      0.150, val_acc: 0.938    
 |████████████████████████████████████████| 100.00% [5/5 01:19<00:00]

In [72]:
wandb.finish()

0,1
epoch,▁▃▅▆█▁▃▅▆█▁▃▅▆█▁▃▅▆█
learning_rate,▁▁▁▁▁▁▁▁▁▁▃██▇▆▅▄▂▁▁▁▅▄▄▃▃▂▂▁▁▁▂▂▂▂▁▁▁▁▁
train_acc,▁▅▅▅▆▆▇▇▇█▇▇████████
train_loss,█▆▅▅▄▄▄▄▄▄▄▃▂▂▂▂▂▁▁▁▁▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▃▃▄▄▂▅▇▇█▆▇▇███████
val_loss,█▅▅▅▅▄▂▂▂▁▂▂▁▁▁▁▁▁▁▁

0,1
epoch,4.0
learning_rate,0.0
train_acc,0.94922
train_loss,0.1598
val_acc,0.9384
val_loss,0.15042
