# Konvoluční sítě pro klasifikaci

Úkolem cvičení je upravit tento notebook a dosáhnout co nejlepšího možného validačního skóre na datasetu CIFAR-10. Viz nápovědu a možné směry úprav v komentářích u jednotlivých buněk. Klasifikaci obrázků pomocí konvolučních sítí v PyTorch popisuje notebook [pytorch-convnets](lectures/pytorch-convnets.ipynb). 

In [5]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tqdm

import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

import ans

## Načtení CIFAR10

In [6]:
# zlepsi predzpracovani a jine augmentace skore?
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224))
])

In [7]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=train_transform)
train_dataset

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
           )

In [8]:
# augmentaci lze provadet i v testovacim rezimu
valid_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224))
])

In [9]:
valid_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=valid_transform)
valid_dataset

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
           )

In [10]:
# pomuze jina batch_size?
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=64, shuffle=False)

## Kritérium

In [11]:
# muze zlepsit skore napr. SVM?
crit = nn.CrossEntropyLoss()
crit

CrossEntropyLoss()

## Definice konvoluční sítě

Síť definujte následující třídou `Convnet`. Není povoleno používat modely z `torchvision.models` ať už předtrénovanou či nepředtrénovanou verzi, ani jinou formu transfer learningu.

In [21]:
class Convnet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        #self.conv1.trainable = False
        self.relu1 = nn.ReLU(inplace=True)
        #self.relu1.trainable = False
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        #self.conv2.trainable = False
        self.relu2 = nn.ReLU(inplace=True)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        #self.maxpool1.trainable = False
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        #self.conv3.trainable = False
        self.relu3 = nn.ReLU(inplace=True)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        #self.conv4.trainable = False
        self.relu4 = nn.ReLU(inplace=True)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        #self.conv5.trainable = False
        self.relu5 = nn.ReLU(inplace=True)
        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        #self.conv6.trainable = False
        self.relu6 = nn.ReLU(inplace=True)
        self.conv7 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.relu7 = nn.ReLU(inplace=True)
        self.conv8 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.relu8 = nn.ReLU(inplace=True)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv9 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        #self.conv9.trainable = False
        self.relu9 = nn.ReLU(inplace=True)
        self.conv10 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        #self.conv10.trainable = False
        self.relu10 = nn.ReLU(inplace=True)
        self.conv11 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.relu11 = nn.ReLU(inplace=True)
        self.conv12 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.relu12 = nn.ReLU(inplace=True)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv13 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.relu13 = nn.ReLU(inplace=True)
        self.conv14 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.relu14 = nn.ReLU(inplace=True)
        self.conv15 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.relu15 = nn.ReLU(inplace=True)
        self.conv16 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.relu16 = nn.ReLU(inplace=True)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        #self.avgpool.trainable = False
        self.fc1 = nn.Linear(25088, 4096)
        #self.fc1.trainable = False
        self.relu17 = nn.ReLU(inplace=True)
        #self.dropout_layer1 = nn.Dropout(p=0.5, inplace=False)
        #self.dropout_layer1.trainable = False
        self.fc2 = nn.Linear(4096, 4096)
        #self.fc2.trainable = False
        self.relu18 = nn.ReLU(inplace=True)
        #self.dropout_layer2 = nn.Dropout(p=0.5, inplace=False)
        self.fc3 = nn.Linear(4096, 10)
        #self.fc3.trainable = False
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool1(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.maxpool2(x)
        x = self.conv5(x)
        x = self.relu5(x)
        x = self.conv6(x)
        x = self.relu6(x)
        x = self.conv7(x)
        x = self.relu7(x)
        x = self.conv8(x)
        x = self.relu8(x)
        x = self.maxpool3(x)
        x = self.conv9(x)
        x = self.relu9(x)
        x = self.conv10(x)
        x = self.relu10(x)
        x = self.conv11(x)
        x = self.relu11(x)
        x = self.conv12(x)
        x = self.relu12(x)
        x = self.maxpool4(x)
        x = self.conv13(x)
        x = self.relu13(x)
        x = self.conv14(x)
        x = self.relu14(x)
        x = self.conv15(x)
        x = self.relu15(x)
        x = self.conv16(x)
        x = self.relu16(x)
        x = self.maxpool5(x)
        x = self.avgpool(x)
        #x = x.reshape(x.shape[0], -1)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu17(x)
        #x = self.dropout_layer1(x)
        x = self.fc2(x)
        x = self.relu18(x)
        #x = self.dropout_layer2(x)
        x = self.fc3(x)
        return x

## Trénování a validace

In [22]:
# pokud vytvorime novou sit, vyresetujeme i statistiky
model = Convnet()
stats = ans.Stats()

In [23]:
device = 'cpu'
model.to(device)

Convnet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU(inplace=True)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU(inplace=True)
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU(inplace=True)
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu4): ReLU(inplace=True)
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu5): ReLU(inplace=True)
  (conv6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu6): ReLU(inplace=True)
  (conv7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu7): ReLU(inplace=True)
  (conv8): Conv2d(256, 256, kernel_size=(3, 3), stride=(

In [24]:
for name, par in model.named_parameters():
    print(name, par.dtype, par.shape, par.numel())

conv1.weight torch.float32 torch.Size([64, 3, 3, 3]) 1728
conv1.bias torch.float32 torch.Size([64]) 64
conv2.weight torch.float32 torch.Size([64, 64, 3, 3]) 36864
conv2.bias torch.float32 torch.Size([64]) 64
conv3.weight torch.float32 torch.Size([128, 64, 3, 3]) 73728
conv3.bias torch.float32 torch.Size([128]) 128
conv4.weight torch.float32 torch.Size([128, 128, 3, 3]) 147456
conv4.bias torch.float32 torch.Size([128]) 128
conv5.weight torch.float32 torch.Size([256, 128, 3, 3]) 294912
conv5.bias torch.float32 torch.Size([256]) 256
conv6.weight torch.float32 torch.Size([256, 256, 3, 3]) 589824
conv6.bias torch.float32 torch.Size([256]) 256
conv7.weight torch.float32 torch.Size([256, 256, 3, 3]) 589824
conv7.bias torch.float32 torch.Size([256]) 256
conv8.weight torch.float32 torch.Size([256, 256, 3, 3]) 589824
conv8.bias torch.float32 torch.Size([256]) 256
conv9.weight torch.float32 torch.Size([512, 256, 3, 3]) 1179648
conv9.bias torch.float32 torch.Size([512]) 512
conv10.weight torch.flo

In [25]:
# s novou siti musime obnovit i seznam parametru pro optimizer
# lepsich vysledku obvykle dosahuje SGD s momentum
optimizer = torch.optim.Adam(model.parameters())
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)

In [26]:
# nasledujici kod libovolne upravujte

def train_step(model, batch, crit, optimizer, stats):
    # prepnout model do trenovaciho rezimu (tyka se vrstev jako Dropout nebo BatchNorm2d)
    model.train()
    
    # zajistit, aby model i data byla na stejnem zarizeni (cpu vs gpu)
    device = next(model.parameters()).device
    inputs, targets = batch
    inputs, targets = inputs.to(device), targets.to(device)
    
    # dopredny pruchod
    scores = model(inputs)

    # loss
    loss = crit(scores, targets)

    # pred zpetnym pruchodem vycistit prip. existujici gradienty z minulych iteraci
    optimizer.zero_grad()

    # zpetny pruchod, gradienty se ulozi primo do parametru modelu do atributu `grad`
    loss.backward()

    # update parametru na zaklade atributu `grad`
    optimizer.step()

    # vyhodnotime presnost
    _, pred = scores.max(dim=1)
    acc = torch.sum(pred == targets).float() / targets.shape[0]
    
    # update aktualnich statistik
    stats.append_batch_stats('train', loss=float(loss), acc=float(acc))

In [27]:
# jaky vliv ma pocet epoch?
# zkuste postupne menit learning rate (optimizer.param_groups[0]['lr'] = ...)
# nebo pomoci scheduleru (https://pytorch.org/docs/master/optim.html#how-to-adjust-learning-rate)
optimizer.param_groups[0]['weight_decay'] = 5e-4
for ep in range(2):
    stats.new_epoch()
    
    # trenovaci faze
    pb = tqdm.auto.tqdm(train_loader, desc='epoch {:02d} train'.format(ep+1))
    for inputs, targets in pb:
        train_step(model, (inputs, targets), crit, optimizer, stats)
        
        pb.set_postfix(
            loss='{:.3f}'.format(stats.ravg('train', 'loss')),
            acc='{:.3f}'.format(stats.ravg('train', 'acc'))
        )
    
    # validacni faze
    ans.validate(model, crit, valid_loader, stats)

epoch 01 train:   0%|          | 0/782 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
stats.plot_by_batch(block_len=10)

In [19]:
stats.plot_by_epoch()

In [20]:
stats.best_results()

Epoch 08,loss,acc
train,0.813185,0.717671
valid,0.925073,0.689291


## Predikce na testovacím obrázku

In [17]:
rgb_test = cv2.imread('./data/happy-green-frog.jpg')[..., ::-1]
rgb_test.dtype, rgb_test.shape, rgb_test.min(), rgb_test.max()

(dtype('uint8'), (500, 500, 3), 0, 252)

In [20]:
ans.predict_and_show(cv2.resize(rgb_test, (32, 32)), model, valid_transform, classes=train_dataset.classes)

None
