# Konvoluční sítě pro klasifikaci

Úkolem cvičení je upravit tento notebook a dosáhnout co nejlepšího možného validačního skóre na datasetu CIFAR-10. Viz nápovědu a možné směry úprav v komentářích u jednotlivých buněk. Klasifikaci obrázků pomocí konvolučních sítí v PyTorch popisuje notebook [pytorch-convnets](lectures/pytorch-convnets.ipynb). 

In [89]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tqdm

import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

import ans

## Načtení CIFAR10

In [90]:
# zlepsi predzpracovani a jine augmentace skore?
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225]),
])

In [91]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=train_transform)
train_dataset

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [92]:
# augmentaci lze provadet i v testovacim rezimu
valid_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225]),
])

In [93]:
valid_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=valid_transform)
valid_dataset

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [94]:
# pomuze jina batch_size?
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=64, shuffle=False)

## Kritérium

In [95]:
# muze zlepsit skore napr. SVM?
crit = nn.CrossEntropyLoss()
crit

CrossEntropyLoss()

## Definice konvoluční sítě

Síť definujte následující třídou `Convnet`. Není povoleno používat modely z `torchvision.models` ať už předtrénovanou či nepředtrénovanou verzi, ani jinou formu transfer learningu.

In [118]:
class Convnet(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Sequential(
                nn.Conv2d(3, 192, 5, 1, padding=2),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 160, 1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(160),
                nn.Conv2d(160,  96, 1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(96),
                nn.MaxPool2d(3, stride=2, padding=1),
                nn.Dropout(0.5),
                nn.Conv2d(96, 192, 5, stride=1, padding=2),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 192, 1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 192, 1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.AvgPool2d(3, stride=2, padding=1),
                nn.Dropout(0.5),
                nn.Conv2d(192, 192, 3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 192, 1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 10, 1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(8, stride=1, padding=0),
                )

    def forward(self, x):
        x = self.classifier(x)
        x = x.view(x.size(0), 10)
        return x

## Trénování a validace

In [119]:
# pokud vytvorime novou sit, vyresetujeme i statistiky
model = Convnet()
stats = ans.Stats()

In [120]:
device = 'cuda'
model.to(device)

Convnet(
  (classifier): Sequential(
    (0): Conv2d(3, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(192, 160, kernel_size=(1, 1), stride=(1, 1))
    (4): ReLU(inplace=True)
    (5): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(160, 96, kernel_size=(1, 1), stride=(1, 1))
    (7): ReLU(inplace=True)
    (8): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (10): Dropout(p=0.5, inplace=False)
    (11): Conv2d(96, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (12): ReLU(inplace=True)
    (13): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1))
    (15): ReLU(inplace=True)
  

In [121]:
for name, par in model.named_parameters():
    print(name, par.dtype, par.shape, par.numel())

classifier.0.weight torch.float32 torch.Size([192, 3, 5, 5]) 14400
classifier.0.bias torch.float32 torch.Size([192]) 192
classifier.2.weight torch.float32 torch.Size([192]) 192
classifier.2.bias torch.float32 torch.Size([192]) 192
classifier.3.weight torch.float32 torch.Size([160, 192, 1, 1]) 30720
classifier.3.bias torch.float32 torch.Size([160]) 160
classifier.5.weight torch.float32 torch.Size([160]) 160
classifier.5.bias torch.float32 torch.Size([160]) 160
classifier.6.weight torch.float32 torch.Size([96, 160, 1, 1]) 15360
classifier.6.bias torch.float32 torch.Size([96]) 96
classifier.8.weight torch.float32 torch.Size([96]) 96
classifier.8.bias torch.float32 torch.Size([96]) 96
classifier.11.weight torch.float32 torch.Size([192, 96, 5, 5]) 460800
classifier.11.bias torch.float32 torch.Size([192]) 192
classifier.13.weight torch.float32 torch.Size([192]) 192
classifier.13.bias torch.float32 torch.Size([192]) 192
classifier.14.weight torch.float32 torch.Size([192, 192, 1, 1]) 36864
cla

In [122]:
# s novou siti musime obnovit i seznam parametru pro optimizer
# lepsich vysledku obvykle dosahuje SGD s momentum
optimizer = torch.optim.Adam(model.parameters())
#optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)

In [123]:
# nasledujici kod libovolne upravujte

def train_step(model, batch, crit, optimizer, stats):
    # prepnout model do trenovaciho rezimu (tyka se vrstev jako Dropout nebo BatchNorm2d)
    model.train()
    
    # zajistit, aby model i data byla na stejnem zarizeni (cpu vs gpu)
    device = next(model.parameters()).device
    inputs, targets = batch
    inputs, targets = inputs.to(device), targets.to(device)
    
    # dopredny pruchod
    scores = model(inputs)

    # loss
    loss = crit(scores, targets)

    # pred zpetnym pruchodem vycistit prip. existujici gradienty z minulych iteraci
    optimizer.zero_grad()

    # zpetny pruchod, gradienty se ulozi primo do parametru modelu do atributu `grad`
    loss.backward()

    # update parametru na zaklade atributu `grad`
    optimizer.step()

    # vyhodnotime presnost
    _, pred = scores.max(dim=1)
    acc = torch.sum(pred == targets).float() / targets.shape[0]
    
    # update aktualnich statistik
    stats.append_batch_stats('train', loss=float(loss), acc=float(acc))

In [124]:
# jaky vliv ma pocet epoch?
# zkuste postupne menit learning rate (optimizer.param_groups[0]['lr'] = ...)
# nebo pomoci scheduleru (https://pytorch.org/docs/master/optim.html#how-to-adjust-learning-rate)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95], gamma=0.01)

for ep in range(100):
    stats.new_epoch()
    
    # trenovaci faze
    pb = tqdm.auto.tqdm(train_loader, desc='epoch {:02d} train'.format(ep+1))
    for inputs, targets in pb:
        train_step(model, (inputs, targets), crit, optimizer, stats)
        pb.set_postfix(
            loss='{:.3f}'.format(stats.ravg('train', 'loss')),
            acc='{:.3f}'.format(stats.ravg('train', 'acc'))
        )
    scheduler.step()
    
    # validacni faze
    ans.validate(model, crit, valid_loader, stats)

epoch 01 train:   0%|          | 0/782 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [69]:
stats.plot_by_batch(block_len=10)

In [70]:
stats.plot_by_epoch()

In [112]:
stats.best_results()

Epoch 05,loss,acc
train,0.511154,0.826027
valid,0.524774,0.818471


## Predikce na testovacím obrázku

In [115]:
rgb_test = cv2.imread('./data/happy-green-frog.jpg')[..., ::-1]
rgb_test.dtype, rgb_test.shape, rgb_test.min(), rgb_test.max()

(dtype('uint8'), (500, 500, 3), 0, 252)

In [117]:
ans.predict_and_show(cv2.resize(rgb_test, (32, 32)), model, valid_transform, classes=train_dataset.classes)