# Konvoluční sítě pro klasifikaci

Úkolem cvičení je upravit tento notebook a dosáhnout co nejlepšího možného validačního skóre na datasetu CIFAR-10. Viz nápovědu a možné směry úprav v komentářích u jednotlivých buněk. Klasifikaci obrázků pomocí konvolučních sítí v PyTorch popisuje notebook [pytorch-convnets](lectures/pytorch-convnets.ipynb). 

In [54]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tqdm

import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

import ans

## Načtení CIFAR10

In [55]:
# zlepsi predzpracovani a jine augmentace skore?
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225]),
])

In [56]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=train_transform)
train_dataset

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [57]:
# augmentaci lze provadet i v testovacim rezimu
valid_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225]),
])

In [58]:
valid_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=valid_transform)

In [59]:
# pomuze jina batch_size?
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=64, shuffle=False)

## Kritérium

In [60]:
# muze zlepsit skore napr. SVM?
crit = nn.CrossEntropyLoss()
crit

CrossEntropyLoss()

## Definice konvoluční sítě

Síť definujte následující třídou `Convnet`. Není povoleno používat modely z `torchvision.models` ať už předtrénovanou či nepředtrénovanou verzi, ani jinou formu transfer learningu.

In [133]:
class Convnet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 8, 3, padding=1, bias=True)
        self.batchnorm1 = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1, bias=True)
        self.batchnorm2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, 3, padding=1, bias=True)
        self.batchnorm3 = nn.BatchNorm2d(32)
        #self.conv4 = nn.Conv2d(32, 32, 3, padding=1, bias=True)
        #self.batchnorm4 = nn.BatchNorm2d(32)
        self.fc1 = nn.Linear(2048, 1000)
        self.fc2 = nn.Linear(1000, 1000)
        self.fc3 = nn.Linear(1000, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.batchnorm1(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.batchnorm2(x)
        x = F.max_pool2d(x, 2)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.batchnorm3(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, 0.5)
        x = self.fc2(x)
        x = F.relu(x)
        x = F.dropout(x , 0.5)
        x = self.fc3(x)
        return x

## Trénování a validace

In [134]:
# pokud vytvorime novou sit, vyresetujeme i statistiky
model = Convnet()
stats = ans.Stats()

In [135]:
device = 'cpu'
model.to(device)

Convnet(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=2048, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=1000, bias=True)
  (fc3): Linear(in_features=1000, out_features=10, bias=True)
)

In [136]:
for name, par in model.named_parameters():
    print(name, par.dtype, par.shape, par.numel())

conv1.weight torch.float32 torch.Size([8, 3, 3, 3]) 216
conv1.bias torch.float32 torch.Size([8]) 8
batchnorm1.weight torch.float32 torch.Size([8]) 8
batchnorm1.bias torch.float32 torch.Size([8]) 8
conv2.weight torch.float32 torch.Size([16, 8, 3, 3]) 1152
conv2.bias torch.float32 torch.Size([16]) 16
batchnorm2.weight torch.float32 torch.Size([16]) 16
batchnorm2.bias torch.float32 torch.Size([16]) 16
conv3.weight torch.float32 torch.Size([32, 16, 3, 3]) 4608
conv3.bias torch.float32 torch.Size([32]) 32
batchnorm3.weight torch.float32 torch.Size([32]) 32
batchnorm3.bias torch.float32 torch.Size([32]) 32
fc1.weight torch.float32 torch.Size([1000, 2048]) 2048000
fc1.bias torch.float32 torch.Size([1000]) 1000
fc2.weight torch.float32 torch.Size([1000, 1000]) 1000000
fc2.bias torch.float32 torch.Size([1000]) 1000
fc3.weight torch.float32 torch.Size([10, 1000]) 10000
fc3.bias torch.float32 torch.Size([10]) 10


In [137]:
# s novou siti musime obnovit i seznam parametru pro optimizer
# lepsich vysledku obvykle dosahuje SGD s momentum
optimizer = torch.optim.Adam(model.parameters())
#optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)

In [138]:
# nasledujici kod libovolne upravujte

def train_step(model, batch, crit, optimizer, stats):
    # prepnout model do trenovaciho rezimu (tyka se vrstev jako Dropout nebo BatchNorm2d)
    model.train()
    
    # zajistit, aby model i data byla na stejnem zarizeni (cpu vs gpu)
    device = next(model.parameters()).device
    inputs, targets = batch
    inputs, targets = inputs.to(device), targets.to(device)
    
    # dopredny pruchod
    scores = model(inputs)

    # loss
    loss = crit(scores, targets)

    # pred zpetnym pruchodem vycistit prip. existujici gradienty z minulych iteraci
    optimizer.zero_grad()

    # zpetny pruchod, gradienty se ulozi primo do parametru modelu do atributu `grad`
    loss.backward()

    # update parametru na zaklade atributu `grad`
    optimizer.step()

    # vyhodnotime presnost
    _, pred = scores.max(dim=1)
    acc = torch.sum(pred == targets).float() / targets.shape[0]
    
    # update aktualnich statistik
    stats.append_batch_stats('train', loss=float(loss), acc=float(acc))

In [139]:
# jaky vliv ma pocet epoch?
# zkuste postupne menit learning rate (optimizer.param_groups[0]['lr'] = ...)
# nebo pomoci scheduleru (https://pytorch.org/docs/master/optim.html#how-to-adjust-learning-rate)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 10], gamma=0.01)

for ep in range(20):
    stats.new_epoch()
    
    # trenovaci faze
    pb = tqdm.auto.tqdm(train_loader, desc='epoch {:02d} train'.format(ep+1))
    for inputs, targets in pb:
        train_step(model, (inputs, targets), crit, optimizer, stats)
        pb.set_postfix(
            loss='{:.3f}'.format(stats.ravg('train', 'loss')),
            acc='{:.3f}'.format(stats.ravg('train', 'acc'))
        )
    scheduler.step()
    
    # validacni faze
    ans.validate(model, crit, valid_loader, stats)

epoch 01 train:   0%|          | 0/782 [00:00<?, ?it/s]

epoch 01 valid:   0%|          | 0/157 [00:00<?, ?it/s]

epoch 02 train:   0%|          | 0/782 [00:00<?, ?it/s]

epoch 02 valid:   0%|          | 0/157 [00:00<?, ?it/s]

epoch 03 train:   0%|          | 0/782 [00:00<?, ?it/s]

epoch 03 valid:   0%|          | 0/157 [00:00<?, ?it/s]

epoch 04 train:   0%|          | 0/782 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [219]:
stats.plot_by_batch(block_len=10)

In [220]:
stats.plot_by_epoch()

In [260]:
stats.best_results()

Epoch 04,loss,acc
train,0.872556,0.694413
valid,1.060872,0.633459


## Predikce na testovacím obrázku

In [104]:
rgb_test = cv2.imread('./data/happy-green-frog.jpg')[..., ::-1]
rgb_test.dtype, rgb_test.shape, rgb_test.min(), rgb_test.max()

(dtype('uint8'), (500, 500, 3), 0, 252)

In [105]:
ans.predict_and_show(cv2.resize(rgb_test, (32, 32)), model, valid_transform, classes=train_dataset.classes)