#### Import torch and set train parameters

In [40]:
import torch
n_epochs = 3
batch_size_train = 64
batch_size_test = 64
learning_rate = 0.001

random_seed = 1
torch.manual_seed(random_seed)

<torch._C.Generator at 0x7f2da80f1790>

#### Check pytorch, import the dataset for train and test parts

In [54]:
import numpy as np
import matplotlib.image as mpimg
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
x = torch.rand(5, 3)
print(x)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

from torchvision.transforms import Compose, ToTensor, Normalize
use_cuda = False
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
  datasets.MNIST('./data', train=True, download=True,
                             transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True, num_workers=8)

test_loader = torch.utils.data.DataLoader(
  datasets.MNIST('./data', train=False, download=True,
                             transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True, num_workers=8)
%matplotlib inline

tensor([[1.4054e-01, 8.9186e-04, 8.9436e-02],
        [4.7699e-01, 5.9074e-02, 9.7135e-01],
        [7.1100e-01, 1.4182e-01, 5.1684e-02],
        [3.7895e-01, 3.3987e-01, 4.1600e-01],
        [9.0070e-01, 8.8610e-01, 6.5652e-01]])
cpu


#### Check the input size of single dataset entry

In [43]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
example_data.shape

torch.Size([64, 1, 28, 28])

#### Defining a model 
#### The model has one hidden layer with two activation functions. Because it's a classification task we set 10 as a desired predicted number

In [44]:
class MLPModel(nn.Module):
    def __init__(self, initial_size):
        super(MLPModel, self).__init__()
        self.initial_size = initial_size
        self.adv = nn.Sequential(
            nn.Linear(self.initial_size, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.SELU(),
            nn.Linear(256, 10),
        )
        
    def forward(self, xb):
        x = xb.view(xb.size(0), self.initial_size) # flatten the input
        return self.adv(x)

In [55]:
model = MLPModel(28*28)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

In [56]:
def train(model, train_dataloader, optimizer, n_epochs, loss_fn):
    # We will monitor loss functions as the training progresses
    train_losses = []
    train_accuracies = []
    for epoch in range(n_epochs):
        
        model.train()
        
        losses = []
        for batch_idx, (images, labels) in enumerate(train_dataloader):
            images = images.to(device)
            labels = labels.to(labels)
            output = model(images)
            # set gradients to zero
            optimizer.zero_grad()
            
            loss = loss_fn(output, labels)
            loss.backward()
            optimizer.step()

            # Metrics
            losses.append(loss.item())
            predicted_labels = output
            predicted_labels = predicted_labels.type(torch.FloatTensor)
                        
        train_losses.append(np.mean(np.array(losses)))
        dataset_length = len(train_dataloader.dataset)
        print('train losses: {}'.format(train_losses[len(train_losses)-1]))
        
    return train_losses

In [57]:
def test(model, test_dataloader, optimizer, n_epochs, loss_fn):
    # We will monitor loss functions as the training progresses
    test_losses = []
    train_accuracies = []
    losses = []
    model.eval()
    print('Evaluation')
    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(test_dataloader):
            images = images.to(device)
            labels = labels.to(labels)
            output = model(images)
            loss = loss_fn(output, labels)
            losses.append(loss.item())
            predicted_labels = output
            predicted_labels = predicted_labels.type(torch.FloatTensor)

        test_losses.append(np.mean(np.array(losses)))
        print('test loss: {}'.format(test_losses[len(test_losses) - 1]))

    return test_losses

In [48]:
n_epochs = 20
train(model, train_loader, optimizer, n_epochs, loss_fn)

train losses: 0.1942174430174042
train losses: 0.08878137598604535
train losses: 0.06238011767599247
train losses: 0.04830474668204276
train losses: 0.04046255635665551
train losses: 0.03826640228798458
train losses: 0.026784991400677766
train losses: 0.028742385730306224
train losses: 0.02306360038898901
train losses: 0.023797197716195446
train losses: 0.02333222370920405
train losses: 0.021249855755329894
train losses: 0.018543206905918336
train losses: 0.017245778706726996
train losses: 0.017793867744997874
train losses: 0.01809706180684094
train losses: 0.014583869938498367
train losses: 0.01355844690029555
train losses: 0.0167640475576112
train losses: 0.01632936537138689


[0.1942174430174042,
 0.08878137598604535,
 0.06238011767599247,
 0.04830474668204276,
 0.04046255635665551,
 0.03826640228798458,
 0.026784991400677766,
 0.028742385730306224,
 0.02306360038898901,
 0.023797197716195446,
 0.02333222370920405,
 0.021249855755329894,
 0.018543206905918336,
 0.017245778706726996,
 0.017793867744997874,
 0.01809706180684094,
 0.014583869938498367,
 0.01355844690029555,
 0.0167640475576112,
 0.01632936537138689]

In [49]:
test_loss = test(model, test_loader, optimizer, n_epochs, loss_fn)

Evaluation
test loss: 0.14274695036327764


## Comparing the train loss with test loss we can estimate, that the model is overfitting, i.e. it learned well on the train set, but fails to give similar results on a train set. The best way to avoid this problem is to improve the model.