<a href="https://colab.research.google.com/github/nerobite/neural_networks/blob/main/%D0%9C%D0%BD%D0%BE%D0%B3%D0%BE%D1%81%D0%BB%D0%BE%D0%B9%D0%BD%D0%B0%D1%8F_%D0%BD%D0%B5%D0%B9%D1%80%D0%BE%D0%BD%D0%BD%D0%B0%D1%8F_%D1%81%D0%B5%D1%82%D1%8C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Домашнее задание по теме «Многослойная нейронная сеть»
Цель задания: научиться на простейшем уровне подбирать архитектуру полносвязной нейронной сети для получения качества решения задачи не ниже заданного.

**Задание**
Постройте модель на основе полносвязных слоёв для классификации Fashion MNIST из библиотеки torchvision ([datasets](https://pytorch.org/vision/stable/datasets.html)).
Получите качество на тестовой выборке не ниже 88%

**Инструкция по выполнению задания**

1. Скачайте тренировочную и тестовою часть датасета Fashion MNIST

2. Постройте модель, выбрав стартовую архитектуру

3. Обучите модель и сверьте качество на тестовой части с заданным порогом

4. Изменяйте архитектуру модели пока качество на тестовой части не будет выше порога. Вариации архитектуры можно реализовать через изменение количества слоёв, количества нейронов в слоях и использование регуляризации. Можно использовать различные оптимизаторы.

In [3]:
import torchvision as tv
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import time

In [5]:
BATCH_SIZE=256

In [6]:
train_dataset = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [7]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

In [13]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.BatchNorm1d(512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, 256),
    torch.nn.BatchNorm1d(256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 128),
    torch.nn.BatchNorm1d(128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 32),
    torch.nn.BatchNorm1d(32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)
    )

In [14]:
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Linear(in_features=256, out_features=128, bias=True)
  (8): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU()
  (10): Linear(in_features=128, out_features=32, bias=True)
  (11): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU()
  (13): Linear(in_features=32, out_features=10, bias=True)
)

In [18]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters(), lr=.01)
num_epochs = 10

In [16]:
def train_model():
    for ep in range(num_epochs):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()

        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)

        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)

        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed)
        )

In [19]:
train_model()

ep: 0, taked: 13.890, train_loss: 0.24269348631434617, train_acc: 0.933, test_loss: 0.128302570595406, test_acc: 0.9589
ep: 1, taked: 13.922, train_loss: 0.09325601137144135, train_acc: 0.9714833333333334, test_loss: 0.10948093366459943, test_acc: 0.9668
ep: 2, taked: 14.121, train_loss: 0.06097834970247238, train_acc: 0.9816, test_loss: 0.09567625726340338, test_acc: 0.9715
ep: 3, taked: 14.312, train_loss: 0.044978943484974034, train_acc: 0.9855666666666667, test_loss: 0.08701998553733574, test_acc: 0.9736
ep: 4, taked: 13.459, train_loss: 0.033566884965972696, train_acc: 0.9892, test_loss: 0.0921761783582042, test_acc: 0.9759
ep: 5, taked: 13.371, train_loss: 0.03133266921789247, train_acc: 0.9898166666666667, test_loss: 0.10590796461910941, test_acc: 0.9718
ep: 6, taked: 13.508, train_loss: 0.02288605476471972, train_acc: 0.99255, test_loss: 0.09749155449712817, test_acc: 0.9754
ep: 7, taked: 13.435, train_loss: 0.021425275565892896, train_acc: 0.9928833333333333, test_loss: 0.1006

In [None]:
#Видим, что на 10 эпохе train_loss немного подрос, возможно модель начала переобучаться, соответственно дальнейшее обучение не нужно, необходимое качество модели получено.

In [None]:
# в качестве эксперимента изменим параметры

In [21]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.Dropout(0.5),
    torch.nn.ReLU(),
    torch.nn.Linear(512, 256),
    torch.nn.BatchNorm1d(256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 128),
    torch.nn.Dropout(0.5),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 32),
    torch.nn.BatchNorm1d(32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)
    )

In [22]:
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Linear(in_features=256, out_features=128, bias=True)
  (8): Dropout(p=0.5, inplace=False)
  (9): ReLU()
  (10): Linear(in_features=128, out_features=32, bias=True)
  (11): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU()
  (13): Linear(in_features=32, out_features=10, bias=True)
)

In [23]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.RMSprop(model.parameters(), lr=.01)
num_epochs = 10

In [24]:
train_model()

ep: 0, taked: 13.621, train_loss: 0.4497471792742293, train_acc: 0.8561, test_loss: 0.22001185107510537, test_acc: 0.9349
ep: 1, taked: 13.478, train_loss: 0.20830419463046054, train_acc: 0.9418333333333333, test_loss: 0.1307021677494049, test_acc: 0.9604
ep: 2, taked: 13.688, train_loss: 0.16618107346461175, train_acc: 0.9528166666666666, test_loss: 0.10534885316737927, test_acc: 0.969
ep: 3, taked: 13.524, train_loss: 0.1431672879514542, train_acc: 0.9593166666666667, test_loss: 0.09300981083943043, test_acc: 0.9718
ep: 4, taked: 13.808, train_loss: 0.1264190141151243, train_acc: 0.9655, test_loss: 0.08632637001865077, test_acc: 0.9751
ep: 5, taked: 14.149, train_loss: 0.11381868225462893, train_acc: 0.9674666666666667, test_loss: 0.07417325814167271, test_acc: 0.9787
ep: 6, taked: 14.176, train_loss: 0.104632625697141, train_acc: 0.9698666666666667, test_loss: 0.07513518250489142, test_acc: 0.9786
ep: 7, taked: 13.738, train_loss: 0.09821211260129163, train_acc: 0.9716166666666667, 

In [None]:
#Добавление Dropout не много, по повысило качество модели и избавило ее от переобучения

In [None]:
#Попробуем ту же модель, но теперь с оператором Adam

In [25]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters(), lr=.01)
num_epochs = 10

In [26]:
train_model()

ep: 0, taked: 14.251, train_loss: 0.08445948730123803, train_acc: 0.9758166666666667, test_loss: 0.06441805298090912, test_acc: 0.9822
ep: 1, taked: 13.935, train_loss: 0.07914308038480738, train_acc: 0.9771166666666666, test_loss: 0.06611715400795219, test_acc: 0.9804
ep: 2, taked: 13.885, train_loss: 0.0718383460246185, train_acc: 0.97955, test_loss: 0.06743319488232373, test_acc: 0.9816
ep: 3, taked: 13.896, train_loss: 0.06962267379177378, train_acc: 0.9799833333333333, test_loss: 0.06423537344307988, test_acc: 0.9819
ep: 4, taked: 13.989, train_loss: 0.06714783220135785, train_acc: 0.9807, test_loss: 0.06321509596491523, test_acc: 0.9818
ep: 5, taked: 14.404, train_loss: 0.06176252698010587, train_acc: 0.9819166666666667, test_loss: 0.05962656399824482, test_acc: 0.9822
ep: 6, taked: 14.412, train_loss: 0.0578387515307305, train_acc: 0.9830166666666666, test_loss: 0.057609591788423134, test_acc: 0.983
ep: 7, taked: 14.255, train_loss: 0.058743448612934096, train_acc: 0.98331666666

In [None]:
#Комбинация Dropout и BatchNorm1d, при изспользовании Adam побеждают в данном занятии)))