![image](./task1.png)

- [src-1](https://habr.com/ru/companies/skillfactory/articles/565232/)
- [src-2](https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D1%91%D1%80%D1%82%D0%BE%D1%87%D0%BD%D0%B0%D1%8F_%D0%BD%D0%B5%D0%B9%D1%80%D0%BE%D0%BD%D0%BD%D0%B0%D1%8F_%D1%81%D0%B5%D1%82%D1%8C)

In [9]:
import io
import numpy as np
import matplotlib.pyplot as plt
import requests
import torch

from copy import deepcopy
from lime import lime_image
from matplotlib import gridspec
from matplotlib.image import imread
from mpl_toolkits import mplot3d
from PIL import Image
from skimage.segmentation import mark_boundaries
from sklearn.metrics import classification_report
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
from tqdm.notebook import tqdm
from urllib.request import urlopen


In [10]:
import warnings
warnings.filterwarnings("ignore")


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cuda')

In [12]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()

    output = model(x_batch.to(device))

    loss = loss_function(output, y_batch.to(device))
    loss.backward()

    optimizer.step()
    return loss.cpu().item()


In [13]:
def train_epoch(train_generator, model, loss_function, optimizer, callback = None):
    epoch_loss = 0
    total = 0
    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(model, batch_of_x.to(device), batch_of_y.to(device), optimizer, loss_function)

        if callback is not None:
            callback(model, batch_loss)

        epoch_loss += batch_loss*len(batch_of_x)
        total += len(batch_of_x)

    return epoch_loss/total


In [14]:
def trainer(count_of_epoch,
            batch_size,
            dataset,
            model,
            loss_function,
            optimizer,
            lr = 0.001,
            callback = None):

    optima = optimizer(model.parameters(), lr=lr)

    iterations = tqdm(range(count_of_epoch), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(
            torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True),
            leave=False, total=len(dataset)//batch_size+(len(dataset)%batch_size> 0))

        epoch_loss = train_epoch(train_generator=batch_generator,
                    model=model,
                    loss_function=loss_function,
                    optimizer=optima,
                    callback=callback)

        iterations.set_postfix({'train epoch loss': epoch_loss})


In [82]:
# create parametrizable agile class to speed up investigations
class CNNParametrizable(torch.nn.Module):
    __DEFAULT_IS_POOLING__ : bool = True
    __DEFAULT_BATCHNORM__  : bool = False

    __DEFAULT_DROPOUT__ : float = -1.0

    __DEFAULT_CONV_LAYERS__   : int = 2
    __DEFAULT_LINEAR_LAYERS__ : int = 3

    __DEFAULT_CONV_SIZE__    : list[int] = [5]*__DEFAULT_CONV_LAYERS__
    __DEFAULT_POOLING_SIZE__ : list[int] = [2]*__DEFAULT_CONV_LAYERS__
    __DEFAULT_CONV_CHANNELS_ : list[int] = [1, 6, 16]

    conv_sz:    list[int] = __DEFAULT_CONV_SIZE__
    conv_chnls: list[int] = __DEFAULT_CONV_CHANNELS_

    is_pooling:   bool = __DEFAULT_IS_POOLING__
    pool_sz: list[int] = __DEFAULT_POOLING_SIZE__

    add_batchnorm : bool = __DEFAULT_BATCHNORM__
    dropout : float = __DEFAULT_DROPOUT__

    # number of each type layers
    num_conv: int = __DEFAULT_CONV_LAYERS__
    num_lin:  int = __DEFAULT_LINEAR_LAYERS__

    # creates layers based on provided params
    class SequentialBuilder:
        model: torch.nn.Module

        # stores output size after convolution
        output_conv_sz: int
        # stores output size from previous linear layer
        prev_linear_sz: int

        curr_conv_i: int
        curr_lin_i: int

        # input MNIST x*y sizes
        SIZE_X: int = 28
        SIZE_Y: int = 28

        def __init__(self, model: torch.nn.Module):
            self.model = model

            self.output_conv_sz, self.prev_linear_sz = (self.__calc_output_conv(),) * 2

            self.curr_conv_i = 0
            self.curr_lin_i = 0

        def add_layer_conv(self):
            input_channels  = self.model.conv_chnls[self.curr_conv_i]
            output_channels = self.model.conv_chnls[self.curr_conv_i + 1]
            kernel_sz = self.model.conv_sz[self.curr_conv_i]

            self.model.layers.add_module(f"conv{self.curr_conv_i}", torch.nn.Conv2d(input_channels, output_channels, kernel_sz))
            if self.model.add_batchnorm:
                self.model.layers.add_module(f"bnorm{self.curr_conv_i}", torch.nn.BatchNorm2d(output_channels))
            self.model.layers.add_module(f"conv-relu{self.curr_conv_i}", torch.nn.ReLU())

            # add polling
            if (self.model.is_pooling):
                kernel_pool_sz = self.model.pool_sz[self.curr_conv_i]
                self.model.layers.add_module(f"pool{self.curr_conv_i}", torch.nn.MaxPool2d(kernel_pool_sz))

            self.curr_conv_i += 1

        def add_flatten(self):
            self.model.layers.add_module("flatten", torch.nn.Flatten())

        def add_layer_lin(self):
            def output_linear_proportional(num_lin) -> int:
                return int((1 - self.curr_lin_i / num_lin) * 0.8 * self.output_conv_sz)
            # we are adding last linear layer
            if self.curr_lin_i == self.model.num_lin - 1:
                output_linear_sz = 10
            else:
                output_linear_sz = max(output_linear_proportional(self.model.num_lin), 10)

            self.model.layers.add_module(f"linear{self.curr_lin_i}", torch.nn.Linear(self.prev_linear_sz, output_linear_sz))
            if (self.curr_lin_i != self.model.num_lin - 1):
                self.add_dropout(self.curr_lin_i)
                self.model.layers.add_module(f"lin-relu{self.curr_lin_i}", torch.nn.ReLU())

            self.prev_linear_sz = output_linear_sz
            self.curr_lin_i += 1

        def add_dropout(self, idx):
            if self.model.dropout > 0:
                self.model.layers.add_module(f"dropout{idx}", torch.nn.Dropout(self.model.dropout))

        def __calc_output_conv(self) -> int:
            spatialX = self.SIZE_X
            spatialY = self.SIZE_Y

            if self.model.is_pooling:
                for conv_sz, pool_sz in zip(self.model.conv_sz, self.model.pool_sz):
                    spatialX -= (conv_sz - 1)
                    spatialY -= (conv_sz - 1)
                    spatialX //= pool_sz
                    spatialY //= pool_sz
            else:
                for conv_sz in self.model.conv_sz:
                    spatialX -= (conv_sz - 1)
                    spatialY -= (conv_sz - 1)

            return int(spatialX * spatialY * self.model.conv_chnls[-1])

    @property
    def device(self):
        for p in self.parameters():
            return p.device

    def __init__(self, **params):
        super(CNNParametrizable, self).__init__()
        # check all params presented are valid
        for name in params.keys():
            self.__getattribute__(name)

        # setting values on init
        for name, value in params.items():
            if value is not None:
                self.__setattr__(name, value)

        is_comp, msg = self.is_compatible()
        if not is_comp:
            raise RuntimeError(msg)

        self.layers = torch.nn.Sequential()
        builder = self.SequentialBuilder(self)
        # build parametrized CNN
        for _ in range(self.num_conv):
            builder.add_layer_conv()
        builder.add_flatten()
        for _ in range(self.num_lin):
            builder.add_layer_lin()

    # checks if entered parametrs are compatible with each other
    def is_compatible(self) -> tuple[bool, str]:
        if self.num_conv <= 0:
            return False, "num_conv <= 0"
        if self.num_lin <= 0:
            return False, "num_lin <= 0"
        if len(self.conv_sz) != self.num_conv:
            return False, f"len(conv_sz) == {len(self.conv_sz)} != {self.num_conv} == num of conv layers"
        if len(self.conv_chnls) - 1 != self.num_conv:
            return False, f"len(conv_chnls) == {len(self.conv_chnls)} != {self.num_conv} == num of conv layers"
        if self.conv_chnls[0] != 1:
            return False, "first input channel must be 1 sized"
        if self.is_pooling and len(self.pool_sz) != self.num_conv:
            return False, f"len(pool_sz) == {len(self.pool_sz)} != {self.num_conv} == num of conv layers"
        if self.dropout > 1:
            return False, f"dropout = {self.dropout} > 1"

        return True, "Compatable!"

    def forward(self, input):
        return self.layers(input)


In [84]:
my_model = CNNParametrizable(
    num_lin=4,
    is_pooling=True,
    pool_sz=[2, 2, 2],
    num_conv=3,
    conv_chnls=[1,6,16,20],
    conv_sz=[5, 5, 3],
    add_batchnorm=True,
    dropout=0.3)
my_model.to(device)


CNNParametrizable(
  (layers): Sequential(
    (conv0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (bnorm0): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv-relu0): ReLU()
    (pool0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv1): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (bnorm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv-relu1): ReLU()
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(16, 20, kernel_size=(3, 3), stride=(1, 1))
    (bnorm2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv-relu2): ReLU()
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear0): Linear(in_features=20, out_features=16, bias=True)
    (dropout0): Dropout(p=0.3, inplac

In [85]:
MNIST_train = datasets.MNIST('./mnist', train=True, download=True, transform=transforms.ToTensor())
MNIST_test = datasets.MNIST('./mnist', train=False, download=True, transform=transforms.ToTensor())


In [86]:
class callback():
    def __init__(self, writer, dataset, loss_function, delimeter = 100, batch_size=64):
        self.step = 0
        self.writer = writer
        self.delimeter = delimeter
        self.loss_function = loss_function
        self.batch_size = batch_size

        self.dataset = dataset

    def forward(self, model, loss):
        self.step += 1
        self.writer.add_scalar('LOSS/train', loss, self.step)

        if self.step % self.delimeter == 0:

            self.writer.add_graph(model, self.dataset[0][0].view(1,1,28,28).to(model.device))

            batch_generator = torch.utils.data.DataLoader(dataset = self.dataset,
                                                          batch_size=self.batch_size)

            pred = []
            real = []
            test_loss = 0
            for it, (x_batch, y_batch) in enumerate(batch_generator):
                x_batch = x_batch.to(model.device)
                y_batch = y_batch.to(model.device)

                output = model(x_batch)

                test_loss += self.loss_function(output, y_batch).cpu().item()*len(x_batch)

                pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
                real.extend(y_batch.cpu().numpy().tolist())

            test_loss /= len(self.dataset)

            self.writer.add_scalar('LOSS/test', test_loss, self.step)

            self.writer.add_text('REPORT/test', str(classification_report(real, pred)), self.step)
    def __call__(self, model, loss):
        return self.forward(model, loss)


In [87]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam


In [88]:
writer = SummaryWriter(log_dir='tensorboard')
call = callback(writer, MNIST_test, loss_function, delimeter=10)


In [91]:
trainer(count_of_epoch=5,
        batch_size=64,
        dataset=MNIST_train,
        model=my_model,
        loss_function=loss_function,
        optimizer=optimizer,
        lr=0.001,
        callback=call)


epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

In [None]:
%load_ext tensorboard


In [None]:
%tensorboard --logdir tensorboard/ --port 6009


Reusing TensorBoard on port 6008 (pid 10447), started 0:00:06 ago. (Use '!kill 10447' to kill it.)

In [92]:
batch_generator = torch.utils.data.DataLoader(dataset = MNIST_test,
                                              batch_size=64)

pred = []
real = []
test_loss = 0
for it, (x_batch, y_batch) in enumerate(batch_generator):
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    output = my_model(x_batch)

    test_loss += loss_function(output, y_batch).cpu().item()*len(x_batch)

    pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
    real.extend(y_batch.cpu().numpy().tolist())

test_loss /= len(MNIST_test)

print('loss: {}'.format(test_loss))


loss: 0.6460663062095642
