# Laborismertető
Az Irányítástechnika és képfeldolgozás 1 laboratórium jelen mérése a mély neurális hálók segítségével megvalósított objektumszegmentálás kérdéskörét járja végig.

Jelen fájl egyszerre szolgál mérésként és jegyzőkönyvként, a feltett kérdéseket ide válaszoljátok meg.


# Előkészítése
A következő cellák az aadatok előkészítését és a szükséges kisegítő osztályokat tartalmazzák.

## Package-ek importálása

In [5]:
# imports
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.utils.data as data
import torchvision.transforms as transforms
from PIL import Image

import transforms as ext_transforms
import utils

from utils import display_batch, calc_class_weights, setup_IoU

import os
import random
from collections import OrderedDict




import data_utils

## Konfiguráció
Az `args` dict foglalja össze az összes konfigurációs (hiper)paramétert

#### Megjegyzés: ebben a cellában nem kell semmit implementálnotok ahhoz, hogy a kód lefusson. Azonban későbbi feladatok során szükség lehet a paraméterek megváltoztatására, azt itt tehetitek meg.

In [6]:
args = {
        "mode": "train", # choices=['train', 'test', 'full']
        "resume" : False,
        "batch_size" : 10,
        "epochs" : 300,
        "lr" : 5e-4,
        "lr_decay" : .1,
        "lr_decay_epochs": 100,
        "weight_decay": 2e-4,
        "dataset_dir" : "data/CamVid",
        "height" : 360,
        "width" : 480,
        "weighting" : "Enet", # choices=['enet', 'mfb', 'none']

        "ignore_unlabeled" : False, # The unlabeled class is not ignored
        "workers" : 4,
        "print_step" : False, # prints loss every step if turned on
        "imshow_batch" : False, # Displays batch images when loading the dataset and making predictions
        "device" : "cuda",

        "name": "ENet",
        "save_dir" : "save"

     }

# Fail fast if the dataset directory doesn't exist
assert os.path.isdir(args["dataset_dir"]), f'The directory {args["dataset_dir"]} does not exist.'
# Fail fast if the saving directory doesn't exist
assert os.path.isdir(args["save_dir"]), f'The directory {args["save_dir"]} does not exist.'

## CamVid adatbázis

Az alábbi cella a CamVid adatbázis használatához szükséges rutinokat tartalmazza.

### Feladatok:
- [ ] Az `exec_transform` függvény kiegészítése
- [ ] Horizontal random flip operátor implementálása
- [ ] A random transzformációk reprodukálhatóságának megvalósítása _(tipp: random seed)_

### Kérdések:
- [ ] Mire kell figyelni szegmentálás során megvalósított adataugmentáció esetében?
- [ ] Hány véletlenszám-generátort használunk? Miért annyit?

In [10]:
class CamVid(data.Dataset):
    """CamVid dataset loader where the dataset is arranged as in
    https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid.


    Keyword arguments:
    - root_dir (``string``): Root directory path.
    - mode (``string``): The type of dataset: 'train' for training set, 'val'
    for validation set, and 'test' for test set.
    - transform (``callable``, optional): A function/transform that  takes in
    an PIL image and returns a transformed version. Default: None.
    - label_transform (``callable``, optional): A function/transform that takes
    in the target and transforms it. Default: None.
    - loader (``callable``, optional): A function to load an image given its
    path. By default ``default_loader`` is used.

    """
    # Training dataset root folders
    train_folder = 'train'
    train_lbl_folder = 'trainannot'

    # Validation dataset root folders
    val_folder = 'val'
    val_lbl_folder = 'valannot'

    # Test dataset root folders
    test_folder = 'test'
    test_lbl_folder = 'testannot'

    # Images extension
    img_extension = '.png'

    # Default encoding for pixel value, class name, and class color
    color_encoding = OrderedDict([
        ('sky', (128, 128, 128)),
        ('building', (128, 0, 0)),
        ('pole', (192, 192, 128)),
        ('road_marking', (255, 69, 0)),
        ('road', (128, 64, 128)),
        ('pavement', (60, 40, 222)),
        ('tree', (128, 128, 0)),
        ('sign_symbol', (192, 128, 128)),
        ('fence', (64, 64, 128)),
        ('car', (64, 0, 128)),
        ('pedestrian', (64, 64, 0)),
        ('bicyclist', (0, 128, 192)),
        ('unlabeled', (0, 0, 0))
    ])

    def __init__(self,
                 root_dir,
                 mode='train',
                 transform=None,
                 label_transform=None,
                 loader=data_utils.pil_loader):
        self.root_dir = root_dir
        self.mode = mode
        self.transform = transform
        self.label_transform = label_transform
        self.loader = loader

        if self.mode.lower() == 'train':
            # Get the training data and labels filepaths
            self.train_data = data_utils.get_files(
                os.path.join(root_dir, self.train_folder),
                extension_filter=self.img_extension)

            self.train_labels = data_utils.get_files(
                os.path.join(root_dir, self.train_lbl_folder),
                extension_filter=self.img_extension)
        elif self.mode.lower() == 'val':
            # Get the validation data and labels filepaths
            self.val_data = data_utils.get_files(
                os.path.join(root_dir, self.val_folder),
                extension_filter=self.img_extension)

            self.val_labels = data_utils.get_files(
                os.path.join(root_dir, self.val_lbl_folder),
                extension_filter=self.img_extension)
        elif self.mode.lower() == 'test':
            # Get the test data and labels filepaths
            self.test_data = data_utils.get_files(
                os.path.join(root_dir, self.test_folder),
                extension_filter=self.img_extension)

            self.test_labels = data_utils.get_files(
                os.path.join(root_dir, self.test_lbl_folder),
                extension_filter=self.img_extension)
        else:
            raise RuntimeError("Unexpected dataset mode. "
                               "Supported modes are: train, val and test")

    def __getitem__(self, index):
        """
        Args:
        - index (``int``): index of the item in the dataset

        Returns:
        A tuple of ``PIL.Image`` (image, label) where label is the ground-truth
        of the image.

        """
        if self.mode.lower() == 'train':
            data_path, label_path = self.train_data[index], self.train_labels[
                index]
        elif self.mode.lower() == 'val':
            data_path, label_path = self.val_data[index], self.val_labels[
                index]
        elif self.mode.lower() == 'test':
            data_path, label_path = self.test_data[index], self.test_labels[
                index]
        else:
            raise RuntimeError("Unexpected dataset mode. "
                               "Supported modes are: train, val and test")

        img, label = self.loader(data_path, label_path)

        img, label = self.exec_transform(img, label)

        return img, label

    def __len__(self):
        """Returns the length of the dataset."""
        if self.mode.lower() == 'train':
            return len(self.train_data)
        elif self.mode.lower() == 'val':
            return len(self.val_data)
        elif self.mode.lower() == 'test':
            return len(self.test_data)
        else:
            raise RuntimeError("Unexpected dataset mode. "
                               "Supported modes are: train, val and test")

    def exec_transform(self, img, label):
        pass
        # todo: Make a seed with numpy generator
        # todo: apply this seed to img transforms


        if self.transform is not None:
            img = self.transform(img)

        # todo: apply this seed to label transforms


        if self.label_transform is not None:
            label = self.label_transform(label)

        # todo: Random horizontal flip (with probability 0.5)


        return img, label

## Adatbázis betöltése
Az alábbi cella felel a `DataLoader`-ek létrehozásáért.

### Feladatok:
- [ ] Hozzátok létre az adatbázisokat (train, test, validation) és a hozzájuk tartozó `DataLoader` objektumokat

### Kérdések:
- [ ] Mi a különbség az egyes `DataLoader` objektumok között (ha van)? Miért?


In [17]:
def load_dataset():
    print("\nLoading dataset...\n")


    print("Dataset directory:", args["dataset_dir"])
    print("Save directory:", args["save_dir"])

    image_transform = transforms.Compose(
        [transforms.Resize((args["height"], args["width"])),
         transforms.ToTensor()])

    label_transform = transforms.Compose([
        transforms.Resize((args["height"], args["width"]), Image.NEAREST),
        ext_transforms.PILToLongTensor()
    ])

    # todo: Create datasets and dataloaders
    # Get selected dataset
    # Load the training set as tensors
    train_set = CamVid()
    train_loader = None

    # Load the validation set as tensors
    val_set = CamVid()
    val_loader = None

    # Load the test set as tensors
    test_set = CamVid()
    test_loader = None

    # todo: end

    # Get encoding between pixel values in label images and RGB colors
    class_encoding = train_set.color_encoding

    # Remove the road_marking class from the CamVid dataset as it's merged
    # with the road class
    del class_encoding['road_marking']

    # Print information for debugging
    print("Number of classes to predict:", len(class_encoding))
    print("Runner dataset size:", len(train_set))
    print("Validation dataset size:", len(val_set))

    display_batch(args, class_encoding, test_loader, train_loader)

    class_weights = calc_class_weights(args, class_encoding, train_loader)

    return train_loader, val_loader, test_loader, class_weights, class_encoding


## Epoch menedzsment
A `Runner` osztály a tanító/tesztelési folyamat egy epochját implementálja.

### Feladatok:
- [ ] Valósítsátok meg a tanítás előtrejesztési lépését a `train_pass` függvényben
- [ ] Valósítsátok meg a tesztelés előtrejesztési lépését a `test_pass` függvényben

### Kérdések:
- [ ] Mi a különbség a két megvalósított függvényben (ha van)? Miért?

In [12]:


class Runner:
    """Performs the training of ``model`` given a training dataset data
    loader, the optimizer, and the loss criterion.

    Keyword arguments:
    - model (``nn.Module``): the model instance to train.
    - data_loader (``Dataloader``): Provides single or multi-process
    iterators over the dataset.
    - criterion (``Optimizer``): The loss criterion.
    - metric (```Metric``): An instance specifying the metric to return.
    - device (``torch.device``): An object representing the device on which
    tensors are allocated.
    - is_train (```bool```): the model mode (True = train, False = validation OR test)
    - optim (``Optimizer``): The optimization algorithm.

    """

    def __init__(self, model, data_loader, criterion, metric, device, is_train=True, optim=None):
        self.model = model
        self.data_loader = data_loader
        self.optim = optim
        self.criterion = criterion
        self.metric = metric
        self.device = device

        self.is_train = is_train

        if self.optim is None:
            self.is_train = False

    def run_epoch(self, iteration_loss=False):
        """Runs an epoch of training.

        Keyword arguments:
        - iteration_loss (``bool``, optional): Prints loss at every step.

        Returns:
        - The epoch loss (float).

        """
        self.set_model_mode()


        epoch_loss = 0.0
        self.metric.reset()

        for step, batch_data in enumerate(self.data_loader):
            # Get the inputs and labels
            inputs = batch_data[0].to(self.device)
            labels = batch_data[1].to(self.device)

            if self.is_train:
                loss, outputs = self.train_pass(inputs, labels)
            else:
                loss, outputs = self.test_pass(inputs, labels)


            """Keep track of loss for current epoch"""
            epoch_loss += None

            # Keep track of the evaluation metric
            self.metric.add(outputs.detach(), labels.detach())

            if iteration_loss:
                print("[Step: %d] Iteration loss: %.4f" % (step, loss.item()))

        return epoch_loss / len(self.data_loader), self.metric.value()

    def set_model_mode(self):
        if self.is_train:
            self.model.train()
        else:
            self.model.eval()

    def test_pass(self, inputs, labels):
        loss, outputs = None, None

        # todo: the below two steps should be within a clause
        # todo: here something is needed that is specific to the test pass
            #Forward propagation


            #Loss computation


        return loss, outputs

    def train_pass(self, inputs, labels):
        loss, outputs = None, None
        # todo: Forward propagation


        # todo: Loss computation


        # todo: Backpropagation


        return loss, outputs

# ENet
A következő cellák tartalmazzák az ENet architektúrát.

## Konvolúciós blokk
Az ENet komponenseinek alapját képező, konvolúciót, Batch Normalization-t és aktivációt tartalmazó osztály

### Feladatok:
- [ ] Implementáljátok a konvolúciót, Batch Normalization-t és aktivációt tartalmazó osztályt

### Kérdések:
- [ ] Milyen sorrendben hívjuk meg a Batch Normalization-t és a (p)ReLU aktivációt? Van ennek jelentősége?
- [ ] Miért nem jó jelen esetben az aktiváicót funkcionális alakban meghívni (`torch.nn.functional`) ? _Tipp: a különbséget csak PReLU esetében lehet tapasztalni, annak melyik tulajdonsága okozza a problémát?_

In [14]:
class ConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch, kernel, stride=1, padding=0, dilation=1, bias=True, activation=nn.ReLU()) -> None:
        super().__init__()

        # todo: Create 2d conv, batch normalization, and an activation layer

    def forward(self, x):
        pass
        # todo: Define the forward pass

        return None

## Bementi modul
A bemenetet alakítja megfelelő dimenzionalitásúvá

### Megjegyzés: ebben a cellában nincs feladatotok.

In [None]:
class InitialBlock(nn.Module):
    """The initial block is composed of two branches:
    1. a main branch which performs a regular convolution with stride 2;
    2. an extension branch which performs max-pooling.

    Doing both operations in parallel and concatenating their results
    allows for efficient downsampling and expansion. The main branch
    outputs 13 feature maps while the extension branch outputs 3, for a
    total of 16 feature maps after concatenation.

    Keyword arguments:
    - in_channels (int): the number of input channels.
    - out_channels (int): the number output channels.
    - kernel_size (int, optional): the kernel size of the filters used in
    the convolution layer. Default: 3.
    - padding (int, optional): zero-padding added to both sides of the
    input. Default: 0.
    - bias (bool, optional): Adds a learnable bias to the output if
    ``True``. Default: False.
    - relu (bool, optional): When ``True`` ReLU is used as the activation
    function; otherwise, PReLU is used. Default: True.

    """

    def __init__(self, in_channels, out_channels, bias=False, relu=True):
        super().__init__()

        if relu:
            activation = nn.ReLU
        else:
            activation = nn.PReLU

        # Main branch - As stated above the number of output channels for this
        # branch is the total minus 3, since the remaining channels come from
        # the extension branch
        self.main_branch = nn.Conv2d(in_channels, out_channels - 3, kernel_size=3, stride=2, padding=1, bias=bias)

        # Extension branch
        self.ext_branch = nn.MaxPool2d(3, stride=2, padding=1)

        # Initialize batch normalization to be used after concatenation
        self.batch_norm = nn.BatchNorm2d(out_channels)

        # PReLU layer to apply after concatenating the branches
        self.out_activation = activation()

    def forward(self, x):
        main = self.main_branch(x)
        ext = self.ext_branch(x)

        # Concatenate branches
        out = torch.cat((main, ext), 1)

        # Apply batch normalization
        out = self.batch_norm(out)

        return self.out_activation(out)

## RegularBottleneck
Az ENet alapvető magasszintű építőeleme.

### Feladatok:
- [ ] Implementáljatok aszimmetrikus konvolúciót (`ext_conv2`)
- [ ] Írjátok meg a `forward` függvényt

### Kérdések:
- [ ] Mi az előnye az aszimmetrikus konvolúciónak? El tudtok képzelni olyan helyzetet, amikor nem lehet megvalósítani?

In [None]:
class RegularBottleneck(nn.Module):
    """Regular bottlenecks are the main building block of ENet.
    Main branch:
    1. Shortcut connection.

    Extension branch:
    1. 1x1 convolution which decreases the number of channels by
    ``internal_ratio``, also called a projection;
    2. regular, dilated or asymmetric convolution;
    3. 1x1 convolution which increases the number of channels back to
    ``channels``, also called an expansion;
    4. dropout as a regularizer.

    Keyword arguments:
    - channels (int): the number of input and output channels.
    - internal_ratio (int, optional): a scale factor applied to
    ``channels`` used to compute the number of
    channels after the projection. eg. given ``channels`` equal to 128 and
    internal_ratio equal to 2 the number of channels after the projection
    is 64. Default: 4.
    - kernel_size (int, optional): the kernel size of the filters used in
    the convolution layer described above in item 2 of the extension
    branch. Default: 3.
    - padding (int, optional): zero-padding added to both sides of the
    input. Default: 0.
    - dilation (int, optional): spacing between kernel elements for the
    convolution described in item 2 of the extension branch. Default: 1.
    asymmetric (bool, optional): flags if the convolution described in
    item 2 of the extension branch is asymmetric or not. Default: False.
    - dropout_prob (float, optional): probability of an element to be
    zeroed. Default: 0 (no dropout).
    - bias (bool, optional): Adds a learnable bias to the output if
    ``True``. Default: False.
    - relu (bool, optional): When ``True`` ReLU is used as the activation
    function; otherwise, PReLU is used. Default: True.

    """

    def __init__(self, channels, internal_ratio=4, kernel_size=3, padding=0, dilation=1, asymmetric=False,
                 dropout_prob=0., bias=False, relu=True):
        super().__init__()

        # Check in the internal_scale parameter is within the expected range
        # [1, channels]
        if internal_ratio <= 1 or internal_ratio > channels:
            raise RuntimeError("Value out of range. Expected value in the "
                               "interval [1, {0}], got internal_scale={1}."
                               .format(channels, internal_ratio))

        internal_channels = channels // internal_ratio

        if relu:
            activation = nn.ReLU
        else:
            activation = nn.PReLU

        # Main branch - shortcut connection

        # Extension branch - 1x1 convolution, followed by a regular, dilated or
        # asymmetric convolution, followed by another 1x1 convolution, and,
        # finally, a regularizer (spatial dropout). Number of channels is constant.
        stride = 1

        # 1x1 projection convolution
        self.ext_conv1 = ConvBlock(channels, internal_channels, 1, stride, 0, 1, bias, activation())


        # todo: Write the (asymmetric convolution)
        # If the convolution is asymmetric we split the main convolution in
        # two. Eg. for a 5x5 asymmetric convolution we have two convolution:
        # the first is 5x1 and the second is 1x5.
        # In the asymmetric case, padding also needs to be a tuple of two
        # (the item corresponding to "1" in the kernel is always 0
        if asymmetric:
            self.ext_conv2 = None
        else:
            self.ext_conv2 = None

        # 1x1 expansion convolution
        self.ext_conv3 = ConvBlock(internal_channels, channels, 1, stride, 0, 1, bias, activation())

        self.ext_regul = nn.Dropout2d(p=dropout_prob)

        # PReLU layer to apply after adding the branches
        self.out_activation = activation()

    def forward(self, x):
        pass
        # todo: Main branch shortcut

        # todo: Extension branch (three convs + regularizer)

        # todo: Add main and extension branches

        # todo: Call the output activation

        return


## DownsamplingBottleneck
Leskálázásért felelős struktúra
### Megjegyzés: ebben a cellában nincs semmi feladatotok.

In [None]:
class DownsamplingBottleneck(nn.Module):
    """Downsampling bottlenecks further downsample the feature map size.

    Main branch:
    1. max pooling with stride 2; indices are saved to be used for
    unpooling later.

    Extension branch:
    1. 2x2 convolution with stride 2 that decreases the number of channels
    by ``internal_ratio``, also called a projection;
    2. regular convolution (by default, 3x3);
    3. 1x1 convolution which increases the number of channels to
    ``out_channels``, also called an expansion;
    4. dropout as a regularizer.

    Keyword arguments:
    - in_channels (int): the number of input channels.
    - out_channels (int): the number of output channels.
    - internal_ratio (int, optional): a scale factor applied to ``channels``
    used to compute the number of channels after the projection. eg. given
    ``channels`` equal to 128 and internal_ratio equal to 2 the number of
    channels after the projection is 64. Default: 4.
    - return_indices (bool, optional):  if ``True``, will return the max
    indices along with the outputs. Useful when unpooling later.
    - dropout_prob (float, optional): probability of an element to be
    zeroed. Default: 0 (no dropout).
    - bias (bool, optional): Adds a learnable bias to the output if
    ``True``. Default: False.
    - relu (bool, optional): When ``True`` ReLU is used as the activation
    function; otherwise, PReLU is used. Default: True.

    """

    def __init__(self, in_channels, out_channels, internal_ratio=4, return_indices=False, dropout_prob=0., bias=False,
                 relu=True):
        super().__init__()

        # Store parameters that are needed later
        self.return_indices = return_indices

        # Check in the internal_scale parameter is within the expected range
        # [1, channels]
        if internal_ratio <= 1 or internal_ratio > in_channels:
            raise RuntimeError("Value out of range. Expected value in the "
                               "interval [1, {0}], got internal_scale={1}. "
                               .format(in_channels, internal_ratio))

        internal_channels = in_channels // internal_ratio

        if relu:
            activation = nn.ReLU
        else:
            activation = nn.PReLU

        # Main branch - max pooling followed by feature map (channels) padding
        self.main_max1 = nn.MaxPool2d(2, stride=2, return_indices=return_indices)

        # Extension branch - 2x2 convolution, followed by a regular, dilated or
        # asymmetric convolution, followed by another 1x1 convolution. Number
        # of channels is doubled.

        # 2x2 projection convolution with stride 2
        self.ext_conv1 = ConvBlock(in_channels, internal_channels, 2, 2, 0, 1, bias, activation())


        # Convolution
        self.ext_conv2 = ConvBlock(internal_channels, internal_channels, 3, 1, 1, 1, bias, activation())


        # 1x1 expansion convolution
        self.ext_conv3 = ConvBlock(internal_channels, out_channels, 1, 1, 0, 1, bias, activation())

        self.ext_regul = nn.Dropout2d(p=dropout_prob)

        # PReLU layer to apply after concatenating the branches
        self.out_activation = activation()

    def forward(self, x):
        # Main branch shortcut
        if self.return_indices:
            main, max_indices = self.main_max1(x)
        else:
            main = self.main_max1(x)

        # Extension branch
        ext = self.ext_conv1(x)
        ext = self.ext_conv2(ext)
        ext = self.ext_conv3(ext)
        ext = self.ext_regul(ext)

        # Main branch channel padding
        n, ch_ext, h, w = ext.size()
        ch_main = main.size()[1]
        padding = torch.zeros(n, ch_ext - ch_main, h, w)

        # Before concatenating, check if main is on the CPU or GPU and
        # convert padding accordingly
        if main.is_cuda:
            padding = padding.cuda()

        # Concatenate
        main = torch.cat((main, padding), 1)

        # Add main and extension branches
        out = main + ext

        return self.out_activation(out), max_indices

### UpsamplingBottleneck
Felskálázásért felelős struktúra
### Megjegyzés: ebben a cellában nincs semmi feladatotok.

In [None]:
class UpsamplingBottleneck(nn.Module):
    """The upsampling bottlenecks upsample the feature map resolution using max
    pooling indices stored from the corresponding downsampling bottleneck.

    Main branch:
    1. 1x1 convolution with stride 1 that decreases the number of channels by
    ``internal_ratio``, also called a projection;
    2. max unpool layer using the max pool indices from the corresponding
    downsampling max pool layer.

    Extension branch:
    1. 1x1 convolution with stride 1 that decreases the number of channels by
    ``internal_ratio``, also called a projection;
    2. transposed convolution (by default, 3x3);
    3. 1x1 convolution which increases the number of channels to
    ``out_channels``, also called an expansion;
    4. dropout as a regularizer.

    Keyword arguments:
    - in_channels (int): the number of input channels.
    - out_channels (int): the number of output channels.
    - internal_ratio (int, optional): a scale factor applied to ``in_channels``
     used to compute the number of channels after the projection. eg. given
     ``in_channels`` equal to 128 and ``internal_ratio`` equal to 2 the number
     of channels after the projection is 64. Default: 4.
    - dropout_prob (float, optional): probability of an element to be zeroed.
    Default: 0 (no dropout).
    - bias (bool, optional): Adds a learnable bias to the output if ``True``.
    Default: False.
    - relu (bool, optional): When ``True`` ReLU is used as the activation
    function; otherwise, PReLU is used. Default: True.

    """

    def __init__(self, in_channels, out_channels, internal_ratio=4, dropout_prob=0., bias=False, relu=True):
        super().__init__()

        # Check in the internal_scale parameter is within the expected range
        # [1, channels]
        if internal_ratio <= 1 or internal_ratio > in_channels:
            raise RuntimeError("Value out of range. Expected value in the "
                               "interval [1, {0}], got internal_scale={1}. "
                               .format(in_channels, internal_ratio))

        internal_channels = in_channels // internal_ratio

        if relu:
            activation = nn.ReLU
        else:
            activation = nn.PReLU

        # Main branch - max pooling followed by feature map (channels) padding
        self.main_conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias),
            nn.BatchNorm2d(out_channels)
        )

        # Remember that the stride is the same as the kernel_size, just like
        # the max pooling layers
        self.main_unpool1 = nn.MaxUnpool2d(kernel_size=2)

        # Extension branch - 1x1 convolution, followed by a regular, dilated or
        # asymmetric convolution, followed by another 1x1 convolution. Number
        # of channels is doubled.

        # 1x1 projection convolution with stride 1
        self.ext_conv1 = ConvBlock(in_channels, internal_channels, 1, 1, 0, 1, bias, activation())


        # Transposed convolution
        self.ext_tconv1 = nn.ConvTranspose2d(internal_channels, internal_channels, kernel_size=2, stride=2, bias=bias)
        self.ext_tconv1_bnorm = nn.BatchNorm2d(internal_channels)
        self.ext_tconv1_activation = activation()

        # 1x1 expansion convolution
        self.ext_conv2 = ConvBlock(internal_channels, out_channels, 1, 1, 0, 1, bias, activation())

        self.ext_regul = nn.Dropout2d(p=dropout_prob)

        # PReLU layer to apply after concatenating the branches
        self.out_activation = activation()

    def forward(self, x, max_indices, output_size):
        # Main branch shortcut
        main = self.main_conv1(x)
        main = self.main_unpool1(main, max_indices, output_size=output_size)

        # Extension branch
        ext = self.ext_conv1(x)
        ext = self.ext_tconv1(ext, output_size=output_size)
        ext = self.ext_tconv1_bnorm(ext)
        ext = self.ext_tconv1_activation(ext)
        ext = self.ext_conv2(ext)
        ext = self.ext_regul(ext)

        # Add main and extension branches
        out = main + ext

        return self.out_activation(out)

## Az ENet architektúra
### Megjegyzés: ebben a cellában nincs semmi feladatotok.

In [None]:
class ENet(nn.Module):
    """Generate the ENet model.

    Keyword arguments:
    - num_classes (int): the number of classes to segment.
    - encoder_relu (bool, optional): When ``True`` ReLU is used as the
    activation function in the encoder blocks/layers; otherwise, PReLU
    is used. Default: False.
    - decoder_relu (bool, optional): When ``True`` ReLU is used as the
    activation function in the decoder blocks/layers; otherwise, PReLU
    is used. Default: True.

    """

    def __init__(self, num_classes, encoder_relu=False, decoder_relu=True):
        super().__init__()

        self.initial_block = InitialBlock(3, 16, relu=encoder_relu)

        # Stage 1 - Encoder
        self.downsample1_0 = DownsamplingBottleneck(16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)
        self.regular1_1 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu)
        self.regular1_2 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu)
        self.regular1_3 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu)
        self.regular1_4 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu)

        # Stage 2 - Encoder
        self.downsample2_0 = DownsamplingBottleneck(64, 128, return_indices=True, dropout_prob=0.1, relu=encoder_relu)
        self.regular2_1 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu)
        self.dilated2_2 = RegularBottleneck(128, dilation=2, padding=2, dropout_prob=0.1, relu=encoder_relu)
        self.asymmetric2_3 = RegularBottleneck(128, kernel_size=5, padding=2, asymmetric=True, dropout_prob=0.1,
                                               relu=encoder_relu)
        self.dilated2_4 = RegularBottleneck(128, dilation=4, padding=4, dropout_prob=0.1, relu=encoder_relu)
        self.regular2_5 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu)
        self.dilated2_6 = RegularBottleneck(128, dilation=8, padding=8, dropout_prob=0.1, relu=encoder_relu)
        self.asymmetric2_7 = RegularBottleneck(128, kernel_size=5, asymmetric=True, padding=2, dropout_prob=0.1,
                                               relu=encoder_relu)
        self.dilated2_8 = RegularBottleneck(128, dilation=16, padding=16, dropout_prob=0.1, relu=encoder_relu)

        # Stage 3 - Encoder
        self.regular3_0 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu)
        self.dilated3_1 = RegularBottleneck(128, dilation=2, padding=2, dropout_prob=0.1, relu=encoder_relu)
        self.asymmetric3_2 = RegularBottleneck(128, kernel_size=5, padding=2, asymmetric=True, dropout_prob=0.1,
                                               relu=encoder_relu)
        self.dilated3_3 = RegularBottleneck(128, dilation=4, padding=4, dropout_prob=0.1, relu=encoder_relu)
        self.regular3_4 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu)
        self.dilated3_5 = RegularBottleneck(128, dilation=8, padding=8, dropout_prob=0.1, relu=encoder_relu)
        self.asymmetric3_6 = RegularBottleneck(128, kernel_size=5, asymmetric=True, padding=2, dropout_prob=0.1,
                                               relu=encoder_relu)
        self.dilated3_7 = RegularBottleneck(128, dilation=16, padding=16, dropout_prob=0.1, relu=encoder_relu)

        # Stage 4 - Decoder
        self.upsample4_0 = UpsamplingBottleneck(128, 64, dropout_prob=0.1, relu=decoder_relu)
        self.regular4_1 = RegularBottleneck(64, padding=1, dropout_prob=0.1, relu=decoder_relu)
        self.regular4_2 = RegularBottleneck(64, padding=1, dropout_prob=0.1, relu=decoder_relu)

        # Stage 5 - Decoder
        self.upsample5_0 = UpsamplingBottleneck(64, 16, dropout_prob=0.1, relu=decoder_relu)
        self.regular5_1 = RegularBottleneck(16, padding=1, dropout_prob=0.1, relu=decoder_relu)
        self.transposed_conv = nn.ConvTranspose2d(16, num_classes, kernel_size=3, stride=2, padding=1, bias=False)

    def forward(self, x):
        # Initial block
        input_size = x.size()
        x = self.initial_block(x)

        # Stage 1 - Encoder
        stage1_input_size = x.size()
        x, max_indices1_0 = self.downsample1_0(x)
        x = self.regular1_1(x)
        x = self.regular1_2(x)
        x = self.regular1_3(x)
        x = self.regular1_4(x)

        # Stage 2 - Encoder
        stage2_input_size = x.size()
        x, max_indices2_0 = self.downsample2_0(x)
        x = self.regular2_1(x)
        x = self.dilated2_2(x)
        x = self.asymmetric2_3(x)
        x = self.dilated2_4(x)
        x = self.regular2_5(x)
        x = self.dilated2_6(x)
        x = self.asymmetric2_7(x)
        x = self.dilated2_8(x)

        # Stage 3 - Encoder
        x = self.regular3_0(x)
        x = self.dilated3_1(x)
        x = self.asymmetric3_2(x)
        x = self.dilated3_3(x)
        x = self.regular3_4(x)
        x = self.dilated3_5(x)
        x = self.asymmetric3_6(x)
        x = self.dilated3_7(x)

        # Stage 4 - Decoder
        x = self.upsample4_0(x, max_indices2_0, output_size=stage2_input_size)
        x = self.regular4_1(x)
        x = self.regular4_2(x)

        # Stage 5 - Decoder
        x = self.upsample5_0(x, max_indices1_0, output_size=stage1_input_size)
        x = self.regular5_1(x)
        x = self.transposed_conv(x, output_size=input_size)

        return x


# Kiértékelés

## Tanító szkript

A háló tanításáért felel

### Feladatok:
- [ ] hozzátok létre a modellt, valamint
- [ ] az optimalizációs eljárást,
- [ ] a költségfüggvényt,
- [ ] az `lr_scheduler`-t
- [ ] és a `Runner` objektumokat (tanításhoz és validációhoz)

### Kérdések:
- [ ] hogyan lehet az inhomogén osztályeloszlást figyelembe venni a költségfüggvényben? _(Tipp: nézzétek meg a költségfüggvény paramétereit)


In [None]:
def train(train_loader, val_loader, class_weights, class_encoding):
    print("\nTraining...\n")

    num_classes = len(class_encoding)

    # todo: Create network and deploy to device
    # Intialize ENet
    model = None
    # Check if the network architecture is correct
    print(model)

    # todo: Create criterion with weights
    # We are going to use the CrossEntropyLoss loss function as it's most
    # frequentely used in classification problems with multiple classes which
    # fits the problem. This criterion  combines LogSoftMax and NLLLoss.
    criterion = None

    # todo: Create ADAM optimizer with weight decay
    optimizer = None

    # todo: Create learning rate decay scheduler (StepLR)
    lr_updater = None

    metric = setup_IoU(args, class_encoding)

    # Optionally resume from a checkpoint
    if args["resume"]:
        model, optimizer, start_epoch, best_miou = utils.load_checkpoint(
            model, optimizer, args["save_dir"], args["name"])
        print(f"Resuming from model: Start epoch = {start_epoch} | Best mean IoU = {best_miou:.4f}")
    else:
        start_epoch = 0
        best_miou = 0


    # todo: Create Runner objects
    print()
    train = None
    val = None

    for epoch in range(start_epoch, args["epochs"]):
        print(f">>>> [Epoch: {epoch:d}] Training")

        epoch_loss, (iou, miou) = train.run_epoch(args["print_step"])

        print(f">>>> [Epoch: {epoch:d}] Avg. loss: {epoch_loss:.4f} | Mean IoU: {miou:.4f}")

        if (epoch + 1) % 10 == 0 or epoch + 1 == args["epochs"]:
            print(f">>>> [Epoch: {epoch:d}] Validation")

            loss, (iou, miou) = val.run_epoch(args["print_step"])

            print(f">>>> [Epoch: {epoch:d}] Avg. loss: {loss:.4f} | Mean IoU: {miou:.4f}")

            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args["epochs"] or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print(f"{key}: {class_iou:.4f}")

            # Save the model if it's the best thus far
            if miou > best_miou:
                print("\nBest model thus far. Saving...\n")
                best_miou = miou
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args)

        lr_updater.step()

    return model

## Tesztszkript
A tesztelést végzi

### Megjegyzés: ebben a cellában nincs feladatotok.

In [None]:
def test(model, test_loader, class_weights, class_encoding):
    print("\nTesting...\n")

    num_classes = len(class_encoding)

    # We are going to use the CrossEntropyLoss loss function as it's most
    # frequently used in classification problems with multiple classes which
    # fits the problem. This criterion  combines LogSoftMax and NLLLoss.
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    metric = setup_IoU(args, class_encoding)

    # Test the trained model on the test set
    test = Runner(model, test_loader, criterion, metric, device, is_train=False)

    print(">>>> Running test dataset")

    loss, (iou, miou) = test.run_epoch(args["print_step"])
    class_iou = dict(zip(class_encoding.keys(), iou))

    print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))

    # Print per class IoU
    for key, class_iou in zip(class_encoding.keys(), iou):
        print("{0}: {1:.4f}".format(key, class_iou))

    # Show a batch of samples and labels
    if args["imshow_batch"]:
        print("A batch of predictions from the test set...")
        images, _ = iter(test_loader).next()
        predict(model, images, class_encoding)


def predict(model, images, class_encoding):
    images = images.to(device)

    # Make predictions!
    model.eval()
    with torch.no_grad():
        predictions = model(images)

    # Predictions is one-hot encoded with "num_classes" channels.
    # Convert it to a single int using the indices where the maximum (1) occurs
    _, predictions = torch.max(predictions.data, 1)

    label_to_rgb = transforms.Compose([
        ext_transforms.LongTensorToRGBPIL(class_encoding),
        transforms.ToTensor()
    ])
    color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb)
    utils.imshow_batch(images.data.cpu(), color_predictions)

## Kiértékelési folyamat
Ebben a cellában hívjuk meg a kiértékeléshez szükséges függvényeket

### Megjegyzés: ebben a cellában nincs feladatotok.

In [18]:
device = torch.device(args["device"])

train_loader, val_loader, test_loader, w_class, class_encoding = load_dataset()

if args["mode"].lower() in {'train', 'full'}:
    model = train(train_loader, val_loader, w_class, class_encoding)

if args["mode"].lower() in {'test', 'full'}:
    if args["mode"].lower() == 'test':
        # Intialize a new ENet model
        num_classes = len(class_encoding)
        model = ENet(num_classes).to(device)

    # Initialize a optimizer just so we can retrieve the model from the
    # checkpoint
    optimizer = optim.Adam(model.parameters())

    # Load the previoulsy saved model state to the ENet model
    model = utils.load_checkpoint(model, optimizer, args["save_dir"], args["name"])[0]

    if args["mode"].lower() == 'test':
        print(model)

    test(model, test_loader, w_class, class_encoding)




Loading dataset...

Dataset directory: data/CamVid
Save directory: save


TypeError: __init__() missing 1 required positional argument: 'root_dir'