# **Отладка нейронных сетей**

TL;DR: нужно найти и исправить всё что не так.

Правила игры:
- Нельзя добавлять параметров в сеть---нельзя менять параметры количество/параметры свёрточных и линейных слоёв.
- Читать примеры обучения сетки на пайторче/отдельные куски на стековерфлоу против правил.
- Можно читать документацию к функциям :)
- Количество эпох увеличивать нельзя. *25 эпох хватит всем*.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/puhsu/dl-hse/blob/master/week01-intro/looking_for_bugs.ipynb)

<img src="https://raw.githubusercontent.com/senya-ashukha/senya-ashukha.github.io/master/projects/teaching/dasha.jpg" style="width: 50%;">

# Будем работать с датасетом Fashion-MNIST

Подробнее о датасете [https://github.com/zalandoresearch/fashion-mnist](https://github.com/zalandoresearch/fashion-mnist)

<img src="https://raw.githubusercontent.com/senya-ashukha/senya-ashukha.github.io/master/projects/teaching/fmnist.png" style="width: 80%;">

<!-- <img src="https://miro.medium.com/max/2312/1*jXssb_WjoYZgepOCfdQfJA.png" style="width:10%"> -->


In [None]:
import os
import sys
import random
from collections import OrderedDict
from time import gmtime, strftime

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from tabulate import tabulate
from pandas import DataFrame

In [None]:
# bug free, I swear

class Logger:
    def __init__(self, name='name', fmt=None):
        self.handler = True
        self.scalar_metrics = OrderedDict()
        self.fmt = fmt if fmt else dict()

        base = './logs'
        if not os.path.exists(base): os.mkdir(base)

        time = gmtime()
        hash = ''.join([chr(random.randint(97, 122)) for _ in range(5)])
        fname = '-'.join(sys.argv[0].split('/')[-3:])
        self.path = '%s/%s-%s-%s-%s' % (base, fname, name, strftime('%m-%d-%H-%M', time), hash)

        self.logs = self.path + '.csv'
        self.output = self.path + '.out'
        self.checkpoint = self.path + '.cpt'

    def print(self, *args):
        str_to_write = ' '.join(map(str, args))
        with open(self.output, 'a') as f:
            f.write(str_to_write + '\n')
            f.flush()

        print(str_to_write)
        sys.stdout.flush()

    def add_scalar(self, t, key, value):
        if key not in self.scalar_metrics:
            self.scalar_metrics[key] = []
        self.scalar_metrics[key] += [(t, value)]

    def add_dict(self, t, d):
        for key, value in d.iteritems():
            self.add_scalar(t, key, value)

    def add(self, t, **args):
        for key, value in args.items():
            self.add_scalar(t, key, value)

    def iter_info(self, order=None):
        names = list(self.scalar_metrics.keys())
        if order:
            names = order
        values = [self.scalar_metrics[name][-1][1] for name in names]
        t = int(np.max([self.scalar_metrics[name][-1][0] for name in names]))
        fmt = ['%s'] + [self.fmt[name] if name in self.fmt else '.1f' for name in names]

        if self.handler:
            self.handler = False
            self.print(tabulate([[t] + values], ['epoch'] + names, floatfmt=fmt))
        else:
            self.print(tabulate([[t] + values], ['epoch'] + names, tablefmt='plain', floatfmt=fmt).split('\n')[1])

    def save(self, silent=False):
        result = None
        for key in self.scalar_metrics.keys():
            if result is None:
                result = DataFrame(self.scalar_metrics[key], columns=['t', key]).set_index('t')
            else:
                df = DataFrame(self.scalar_metrics[key], columns=['t', key]).set_index('t')
                result = result.join(df, how='outer')
        result.to_csv(self.logs)
        if not silent:
            self.print('The log/output/model have been saved to: ' + self.path + ' + .csv/.out/.cpt')


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout1 = nn.Dropout2d(0.9)
        self.dropout2 = nn.Dropout2d(0.9)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm1d(128)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        x = F.relu(x)
        return x

In [None]:
def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        torch.nn.init.constant_(m.weight.data, 0)
        torch.nn.init.constant_(m.bias.data, 0)

In [None]:
def train(args, model, device, train_loader, optimizer):
    global logger, epoch
    mean_loss = 0.0
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        mean_loss += len(data)/len(train_loader.dataset) * loss.item()
        with torch.no_grad():
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    logger.add_scalar(epoch, 'train_loss',  mean_loss)
    logger.add_scalar(epoch, 'train_acc',  100. * correct / len(train_loader.dataset))

In [None]:
def test(model, device, test_loader):
    global logger, epoch
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    logger.add_scalar(epoch, 'test_loss', test_loss)
    logger.add_scalar(epoch, 'test_acc',  100. * correct / len(test_loader.dataset))

In [None]:
from types import SimpleNamespace

args = {
  'batch_size': 128,
  'test_batch_size': 1000,
  'epochs': 25,
  'lr': 1e-3,
  'gamma': 0.1,
  'no_cuda': False,
  'log_interval': 2,
}
args = SimpleNamespace(**args)
print('args:', args)

In [None]:
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [None]:
transform=transforms.Compose([
    transforms.RandomResizedCrop(28),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.FashionMNIST(
    '../data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    num_workers=5,
    batch_size=args.batch_size)

test_dataset = datasets.FashionMNIST(
    '../data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(
    train_dataset,
    num_workers=5,
    batch_size=args.test_batch_size)


In [None]:
fmt = {'test_loss': '.3f', 'test_acc': '.3f', 'train_loss': '.3f', 'lr': '1.1e'}
logger = Logger('sparse_vd', fmt=fmt)

model = Net().to(device).train()
model = model.apply(weights_init)
optimizer = optim.SGD(model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

for epoch in range(1, args.epochs + 1):
    logger.add_scalar(epoch, 'lr', scheduler.get_last_lr()[0])
    train(args, model, device, train_loader, optimizer)
    test(model, device, test_loader)
    scheduler.step()
    if epoch % args.log_interval == 0 or epoch==1:
      logger.iter_info()

In [None]:
# Plot train and test loss
train_loss = np.array(logger.scalar_metrics['train_loss'])[:,1]
plt.plot(train_loss, 'o-', label='train_loss')

test_loss = np.array(logger.scalar_metrics['test_loss'])[:,1]
plt.plot(test_loss, 'o-', label='test_loss')

plt.plot([0.21]*len(train_loss), '-', label='you win (for test)', c='r')

plt.grid(True)
plt.ylabel('Loss')
plt.xlabel('# epoch')
plt.legend()

In [None]:
# Plot train and test accuracy
train_acc = np.array(logger.scalar_metrics['train_acc'])[:,1]
plt.plot(train_acc, 'o-', label='train_acc')

test_acc = np.array(logger.scalar_metrics['test_acc'])[:,1]
plt.plot(test_acc, 'o-', label='test_acc')

plt.plot([92]*len(train_acc), '-', label='you win (for test)', c='r')

plt.grid(True)
plt.ylabel('Accuracy (%)')
plt.xlabel('# epoch')
plt.legend()

In [None]:
# Plot learning rate
train_loss = np.array(logger.scalar_metrics['lr'])[:,1]
plt.plot(train_loss, 'o-', label='lr')

plt.grid(True)
plt.ylabel('Accuracy (%)')
plt.xlabel('# epoch')
plt.legend()