## Data

In [1]:
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, random_split
from torchvision import transforms

In [2]:
CIFAR10_ROOT = 'data/cifar10'
train_data = CIFAR10(root=CIFAR10_ROOT, train=True, download=True, transform=transforms.ToTensor())

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar10/cifar-10-python.tar.gz to data/cifar10


In [3]:
train_split, valid_split = random_split(train_data, [0.7, 0.3])

In [4]:
len(train_split), len(valid_split)

(35000, 15000)

In [5]:
BATCH_SIZE = 128
train_loader = DataLoader(train_split, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_split, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

# Pull model

In [6]:
import os

os.environ['REPO'] = 'pure-noise'
# Add username and pass.
os.environ['USER'] = ''
os.environ['PASS'] = ''

!git clone https://$USER:$PASS@github.com/seungjaeryanlee/$REPO.git

Cloning into 'pure-noise'...
remote: Enumerating objects: 59, done.[K
remote: Counting objects: 100% (59/59), done.[K
remote: Compressing objects: 100% (41/41), done.[K
remote: Total 59 (delta 21), reused 49 (delta 13), pack-reused 0[K
Unpacking objects: 100% (59/59), done.


In [7]:
%cd pure-noise

/content/pure-noise


## Model

In [8]:
# Model hyperparameters
MODEL__WIDERESNET_DEPTH = 28
MODEL__WIDERESNET_K = 10

In [9]:
from networks import WideResNet

net = WideResNet(
    num_classes=10,
    depth=MODEL__WIDERESNET_DEPTH,
    widen_factor=MODEL__WIDERESNET_K,
)

In [10]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(net)

36479194

In [11]:
net = net.cuda()

## Wandb

In [14]:
# !pip install -r requirements.txt
!pip install wandb -qU

[K     |████████████████████████████████| 182 kB 13.6 MB/s 
[K     |████████████████████████████████| 174 kB 84.6 MB/s 
[K     |████████████████████████████████| 62 kB 1.5 MB/s 
[K     |████████████████████████████████| 173 kB 95.3 MB/s 
[K     |████████████████████████████████| 168 kB 95.3 MB/s 
[K     |████████████████████████████████| 168 kB 107.7 MB/s 
[K     |████████████████████████████████| 166 kB 92.5 MB/s 
[K     |████████████████████████████████| 166 kB 93.6 MB/s 
[K     |████████████████████████████████| 162 kB 71.4 MB/s 
[K     |████████████████████████████████| 162 kB 96.2 MB/s 
[K     |████████████████████████████████| 158 kB 90.4 MB/s 
[K     |████████████████████████████████| 157 kB 84.1 MB/s 
[K     |████████████████████████████████| 157 kB 88.4 MB/s 
[K     |████████████████████████████████| 157 kB 75.3 MB/s 
[K     |████████████████████████████████| 157 kB 89.8 MB/s 
[K     |████████████████████████████████| 157 kB 104.9 MB/s 
[K     |███████████████

In [16]:
import wandb
wandb.login()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## Optimizer

In [17]:
# Optimizer Hyperparameters
OPTIM__LR = 0.1
OPTIM__MOMENTUM = 0.9
OPTIM__WEIGHT_DECAY = 2e-4

In [18]:
import torch.optim as optim

optimizer = optim.SGD(
    net.parameters(),
    lr=OPTIM__LR,
    momentum=OPTIM__MOMENTUM,
    weight_decay=OPTIM__WEIGHT_DECAY,
)
scheduler = optim.lr_scheduler.StepLR(
    optimizer,
    step_size=1,
    gamma=0.01,
)

## Prepare Training

In [19]:
# Training Hyperparameters
N_EPOCH = 10

In [20]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss()

## Training Loop

In [27]:
wandb.init(
    # entity="brianryan",
    # Set the project where this run will be logged
    project="pure-noise",
    # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
    name="first-run",
)

# Track hyperparameters and run metadata
wandb.config.update({
    # Training
    "n_epoch": N_EPOCH,
    # Optimizer
    "optim__lr": OPTIM__LR,
    "optim__momentum": OPTIM__MOMENTUM,
    "optim__weight_decay": OPTIM__WEIGHT_DECAY,
    # Model
    "model__wideresnet_depth": MODEL__WIDERESNET_DEPTH,
    "model__wideresnet_k": MODEL__WIDERESNET_K,
})

0,1
train_loss,▁
train_loss_batch,▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇█████████▁▁▁▁▁▁▁▁▁▁▁▂
valid_loss,▁

0,1
train_loss,1435.72693
train_loss_batch,4074878.25
valid_loss,2097.20605


In [None]:
import torch

global_step = 0
for epoch_i in range(N_EPOCH):
    # Training Phase
    net.train()
    train_loss = 0
    for minibatch_i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.float().cuda()
        labels = labels.cuda()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        wandb.log({
            "train_loss_batch": loss
            },
            step=global_step
        )
        train_loss += loss * len(labels)
        
        global_step += 1

    # Validation Phase
    net.eval()
    with torch.no_grad():
        valid_loss = 0
        for minibatch_i, (inputs, labels) in enumerate(valid_loader):
            inputs = inputs.float().cuda()
            labels = labels.cuda()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            valid_loss += loss * len(labels)

    wandb.log({
        "train_loss": train_loss / len(train_split),
        "valid_loss": valid_loss / len(valid_split),
    },
    step=global_step)
    
    if epoch_i in [160, 180]:
        scheduler.step()