# Pytorch CIFAR

In [1]:
import os
import random

import wandb

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from fastprogress import progress_bar, master_bar

In [2]:
PROJECT = "apple_m1_pro"
ENTITY = "tcapelle"
GROUP = 'pytorch'

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2 ** 32 - 1)
np.random.seed(hash("improves reproducibility") % 2 ** 32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2 ** 32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2 ** 32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
wandb.login()

run = wandb.init(project=PROJECT,
                 entity=ENTITY,
                 group=GROUP, 
                 config = {
                    "lr": 0.005,
                    "epochs": 5,
                    "batch_size": 128,
                    "loss_function": "CrossEntropyLoss",
                    "architecture": "cnn",
                    "dataset": "CIFAR-10",
                })
config = wandb.config

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtcapelle[0m (use `wandb login --relogin` to force relogin)


In [5]:
transform = transforms.Compose(
    [transforms.ToTensor(), 
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

def get_data(train=True, sample=True):
    ds = torchvision.datasets.CIFAR10(
        root="./data", train=train, download=True, transform=transform)

    if sample and train:
        ds.data = ds.data[::5]

    loader = torch.utils.data.DataLoader(
        ds, batch_size=config.batch_size*(2-train), shuffle=train, num_workers=2)
    return loader

train_loader, test_loader = get_data(True), get_data(False)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
criterion = nn.CrossEntropyLoss()

In [7]:
x, y = next(iter(train_loader))
print(f'Input shape: {x.shape}, len loader: {len(train_loader)}')

Input shape: torch.Size([128, 3, 32, 32]), len loader: 391


In [8]:
x.shape

torch.Size([128, 3, 32, 32])

In [9]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
model = nn.Sequential(
    nn.Conv2d(3, 32, 3),
    nn.ReLU(),
    nn.Conv2d(32, 32, 3),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 32, 3),
    nn.ReLU(),
    nn.Conv2d(32, 32, 3),
    nn.AdaptiveAvgPool2d(1),
    nn.Flatten(),
    nn.Linear(32, 128),
    nn.ReLU(),
    nn.Linear(128, 32),
    nn.ReLU(),
    nn.Linear(32, len(classes))
).to(device)

In [12]:
criterion(model(x.to(device)), y.to(device))

tensor(2.3079, device='cuda:0', grad_fn=<NllLossBackward>)

In [13]:
class Learner:
    "A Wrapper around model and data"
    def __init__(self, train_loader, test_loader, model, criterion):
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.model = model.to(device)
        self.criterion = criterion
        self.mb = None
        self.batch_ct = 0
        self.example_ct = 0
        
        
    def one_batch_train(self, images, labels):
        "Do one batch train"
        images, labels = images.to(device), labels.to(device)

        # zero the parameter gradients
        self.optimizer.zero_grad()
        
        # Forward pass ➡
        outputs = self.model(images)
        loss = self.criterion(outputs, labels)

        # Backward pass ⬅
        loss.backward()

        # Step with optimizer
        self.optimizer.step()

        return loss
    
    def one_epoch_train(self):
        "Do one epoch train"
        self.model.train()
        for images, labels in progress_bar(self.train_loader, parent=self.mb):
            loss = self.one_batch_train(images, labels)
            self.batch_ct += 1
            self.example_ct += len(labels)
    
            # Report metrics every 25th batch
            if ((self.batch_ct + 1) % 25) == 0:
                wandb.log({"epoch": self.epoch, "loss": float(loss)})
                
            self.mb.child.comment = f'train_loss={loss.item():.3f}'
    
    
    def one_batch_test(self, images, labels):
        "Do one batch test"
        images, labels = images.to(device), labels.to(device)

        # Forward pass ➡
        outputs = self.model(images)
        loss = self.criterion(outputs, labels)
        
        _, predicted = torch.max(outputs.data, 1)
        
        correct = (predicted == labels).sum().item()
        
        return loss, correct
    
    def one_epoch_test(self):
        self.model.eval()
        
        # Run the model on some test examples
        with torch.no_grad():
            correct_total, loss_test = 0, 0
            for images, labels in progress_bar(test_loader, parent=self.mb):
                loss, correct = self.one_batch_test(images, labels)
                correct_total += correct
                wandb.log({"test_loss": float(loss)})
        
        wandb.log({"test_accuracy": correct_total / len(test_loader)})

    
    def save(self):
        # save and log last mdoel to wandb
        torch.save(self.model.state_dict(), 'model.pt')
        wandb.save('model.pt')
    
    def fit(self, epochs, lr=config.lr):
        
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.mb = master_bar(range(epochs))
        
        wandb.watch(self.model, self.criterion, log='all', log_freq=10)
                          
        for self.epoch in self.mb:
            self.one_epoch_train()
            self.one_epoch_test()
        
        self.save()

In [14]:
learn = Learner(train_loader, test_loader, model, criterion)

In [15]:
learn.fit(config.epochs)

In [16]:
wandb.finish()

VBox(children=(Label(value=' 0.15MB of 0.15MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆████████
loss,█▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▄▃▃▃▃▂▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▃
test_accuracy,▁▃▆▇█
test_loss,▇▆▆▇█▇▇▇▅▆▅▆▅▆▅▅▄▃▃▃▄▄▃▃▃▂▃▂▃▃▃▂▁▂▂▂▂▃▃▁

0,1
epoch,4.0
loss,1.31365
test_accuracy,142.0
test_loss,1.10227
