Постройте модель на основе полносвязных слоёв для классификации Fashion MNIST из библиотеки torchvision.
Получите качество на тестовой выборке не ниже 88%

In [23]:
!pip install torchvision



In [36]:
import numpy as np
import torch
import torchvision as tv
import time

datasets: https://pytorch.org/vision/stable/datasets.html

In [25]:
BATCH_SIZE=256

train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [26]:
train_dataset.class_to_idx

{'T-shirt/top': 0,
 'Trouser': 1,
 'Pullover': 2,
 'Dress': 3,
 'Coat': 4,
 'Sandal': 5,
 'Shirt': 6,
 'Sneaker': 7,
 'Bag': 8,
 'Ankle boot': 9}

In [31]:
train_dataset.data.shape

torch.Size([60000, 28, 28])

### Базовая модель (SGD)

In [57]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

In [58]:
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [59]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.SGD(model.parameters(), lr=.01)
num_epochs = 10

In [60]:
def train_model():
    for ep in range(num_epochs):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()
        
        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
            
        print("ep: {}, taked: {:.3f}, train_loss: {:.3f}, train_acc: {:.3f}, test_loss: {:.3f}, test_acc: {:.3f}"
              .format(ep, time.time() - start, 
                      train_loss / train_iters, 
                      train_acc / train_passed,
                      test_loss / test_iters, 
                      test_acc / test_passed)
             )

In [61]:
train_model()

ep: 0, taked: 7.077, train_loss: 1.737, train_acc: 0.581, test_loss: 1.281, test_acc: 0.646
ep: 1, taked: 7.184, train_loss: 1.073, train_acc: 0.673, test_loss: 0.944, test_acc: 0.682
ep: 2, taked: 6.856, train_loss: 0.863, train_acc: 0.711, test_loss: 0.816, test_acc: 0.716
ep: 3, taked: 6.836, train_loss: 0.767, train_acc: 0.744, test_loss: 0.745, test_acc: 0.743
ep: 4, taked: 7.085, train_loss: 0.708, train_acc: 0.764, test_loss: 0.697, test_acc: 0.759
ep: 5, taked: 7.057, train_loss: 0.665, train_acc: 0.779, test_loss: 0.660, test_acc: 0.773
ep: 6, taked: 6.786, train_loss: 0.633, train_acc: 0.791, test_loss: 0.632, test_acc: 0.783
ep: 7, taked: 7.019, train_loss: 0.607, train_acc: 0.800, test_loss: 0.610, test_acc: 0.789
ep: 8, taked: 6.928, train_loss: 0.586, train_acc: 0.806, test_loss: 0.592, test_acc: 0.796
ep: 9, taked: 7.316, train_loss: 0.569, train_acc: 0.811, test_loss: 0.577, test_acc: 0.801


### Модель (Adam)

In [49]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

In [50]:
trainer = torch.optim.Adam(model.parameters(), lr=.01)
train_model()

ep: 0, taked: 6.810, train_loss: 0.539, train_acc: 0.808, test_loss: 0.446, test_acc: 0.838
ep: 1, taked: 6.707, train_loss: 0.380, train_acc: 0.862, test_loss: 0.417, test_acc: 0.848
ep: 2, taked: 6.752, train_loss: 0.348, train_acc: 0.873, test_loss: 0.390, test_acc: 0.860
ep: 3, taked: 8.012, train_loss: 0.326, train_acc: 0.881, test_loss: 0.405, test_acc: 0.862
ep: 4, taked: 7.633, train_loss: 0.314, train_acc: 0.885, test_loss: 0.406, test_acc: 0.861
ep: 5, taked: 7.103, train_loss: 0.303, train_acc: 0.888, test_loss: 0.403, test_acc: 0.861
ep: 6, taked: 7.117, train_loss: 0.295, train_acc: 0.891, test_loss: 0.408, test_acc: 0.864
ep: 7, taked: 7.356, train_loss: 0.285, train_acc: 0.895, test_loss: 0.387, test_acc: 0.868
ep: 8, taked: 7.188, train_loss: 0.278, train_acc: 0.895, test_loss: 0.385, test_acc: 0.871
ep: 9, taked: 7.193, train_loss: 0.276, train_acc: 0.899, test_loss: 0.380, test_acc: 0.871


### Модель (Adam) + дополнительный слой

In [62]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 10)
)

In [63]:
trainer = torch.optim.Adam(model.parameters(), lr=.01)
train_model()

ep: 0, taked: 8.452, train_loss: 0.613, train_acc: 0.774, test_loss: 0.433, test_acc: 0.839
ep: 1, taked: 8.766, train_loss: 0.394, train_acc: 0.856, test_loss: 0.397, test_acc: 0.854
ep: 2, taked: 8.972, train_loss: 0.363, train_acc: 0.865, test_loss: 0.398, test_acc: 0.856
ep: 3, taked: 11.410, train_loss: 0.350, train_acc: 0.871, test_loss: 0.415, test_acc: 0.856
ep: 4, taked: 10.099, train_loss: 0.325, train_acc: 0.879, test_loss: 0.409, test_acc: 0.865
ep: 5, taked: 10.046, train_loss: 0.319, train_acc: 0.883, test_loss: 0.397, test_acc: 0.866
ep: 6, taked: 10.548, train_loss: 0.302, train_acc: 0.888, test_loss: 0.412, test_acc: 0.859
ep: 7, taked: 10.698, train_loss: 0.296, train_acc: 0.891, test_loss: 0.408, test_acc: 0.863
ep: 8, taked: 10.068, train_loss: 0.288, train_acc: 0.893, test_loss: 0.396, test_acc: 0.870
ep: 9, taked: 11.480, train_loss: 0.286, train_acc: 0.894, test_loss: 0.366, test_acc: 0.874


### Модель (Adam) + дополнительный слой + batch normalization

In [64]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(512),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(256),
    torch.nn.Linear(256, 128),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(128),
    torch.nn.Linear(128, 10)
)

In [65]:
trainer = torch.optim.Adam(model.parameters(), lr=.01)
train_model()

ep: 0, taked: 8.828, train_loss: 0.474, train_acc: 0.826, test_loss: 0.420, test_acc: 0.844
ep: 1, taked: 8.502, train_loss: 0.370, train_acc: 0.864, test_loss: 0.559, test_acc: 0.848
ep: 2, taked: 8.735, train_loss: 0.338, train_acc: 0.875, test_loss: 0.502, test_acc: 0.826
ep: 3, taked: 9.053, train_loss: 0.316, train_acc: 0.883, test_loss: 0.396, test_acc: 0.857
ep: 4, taked: 8.817, train_loss: 0.296, train_acc: 0.891, test_loss: 0.391, test_acc: 0.856
ep: 5, taked: 9.243, train_loss: 0.284, train_acc: 0.896, test_loss: 0.395, test_acc: 0.857
ep: 6, taked: 8.885, train_loss: 0.269, train_acc: 0.901, test_loss: 0.379, test_acc: 0.860
ep: 7, taked: 8.772, train_loss: 0.260, train_acc: 0.903, test_loss: 0.352, test_acc: 0.871
ep: 8, taked: 9.076, train_loss: 0.246, train_acc: 0.908, test_loss: 0.368, test_acc: 0.872
ep: 9, taked: 9.215, train_loss: 0.234, train_acc: 0.913, test_loss: 0.358, test_acc: 0.876


### Модель (RMSprop) + дополнительный слой + batch normalization

In [70]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(512),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(256),
    torch.nn.Linear(256, 128),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(128),
    torch.nn.Linear(128, 10)
)

In [71]:
trainer = torch.optim.RMSprop(model.parameters(), lr=.01)
train_model()

ep: 0, taked: 8.607, train_loss: 0.567, train_acc: 0.797, test_loss: 0.897, test_acc: 0.732
ep: 1, taked: 8.571, train_loss: 0.386, train_acc: 0.856, test_loss: 0.780, test_acc: 0.741
ep: 2, taked: 8.370, train_loss: 0.347, train_acc: 0.871, test_loss: 0.738, test_acc: 0.766
ep: 3, taked: 8.647, train_loss: 0.322, train_acc: 0.881, test_loss: 0.546, test_acc: 0.802
ep: 4, taked: 8.391, train_loss: 0.295, train_acc: 0.890, test_loss: 0.526, test_acc: 0.806
ep: 5, taked: 8.371, train_loss: 0.282, train_acc: 0.895, test_loss: 0.435, test_acc: 0.841
ep: 6, taked: 8.477, train_loss: 0.263, train_acc: 0.902, test_loss: 0.534, test_acc: 0.819
ep: 7, taked: 8.175, train_loss: 0.252, train_acc: 0.906, test_loss: 0.510, test_acc: 0.836
ep: 8, taked: 8.466, train_loss: 0.238, train_acc: 0.911, test_loss: 0.427, test_acc: 0.876
ep: 9, taked: 8.146, train_loss: 0.229, train_acc: 0.914, test_loss: 0.447, test_acc: 0.857


### Модель (Adam) + дополнительный слой + batch normalization + dropout

In [76]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.BatchNorm1d(512),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(512, 256),
    torch.nn.BatchNorm1d(256),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(256, 128),
    torch.nn.BatchNorm1d(128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 10)
)

In [77]:
trainer = torch.optim.Adam(model.parameters(), lr=.01)
train_model()

ep: 0, taked: 8.792, train_loss: 0.495, train_acc: 0.819, test_loss: 0.404, test_acc: 0.850
ep: 1, taked: 8.606, train_loss: 0.372, train_acc: 0.864, test_loss: 0.382, test_acc: 0.855
ep: 2, taked: 8.598, train_loss: 0.334, train_acc: 0.877, test_loss: 0.368, test_acc: 0.860
ep: 3, taked: 9.073, train_loss: 0.308, train_acc: 0.886, test_loss: 0.353, test_acc: 0.869
ep: 4, taked: 8.651, train_loss: 0.290, train_acc: 0.893, test_loss: 0.326, test_acc: 0.880
ep: 5, taked: 8.811, train_loss: 0.275, train_acc: 0.897, test_loss: 0.334, test_acc: 0.876
ep: 6, taked: 8.773, train_loss: 0.259, train_acc: 0.904, test_loss: 0.324, test_acc: 0.883
ep: 7, taked: 8.900, train_loss: 0.248, train_acc: 0.908, test_loss: 0.324, test_acc: 0.885
ep: 8, taked: 9.130, train_loss: 0.235, train_acc: 0.911, test_loss: 0.319, test_acc: 0.886
ep: 9, taked: 8.644, train_loss: 0.226, train_acc: 0.915, test_loss: 0.321, test_acc: 0.888
