## Training and testing a fully connected NN with one hidden layer

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive
%cd 'My Drive'

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dset
from torch.utils.data.sampler import SubsetRandomSampler, Sampler
from torch.optim.lr_scheduler import StepLR

from torchvision import transforms

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

In [0]:
def count_mean_std(data_folder):
    '''
    Функция подсчитывает усредненные значения mean и std по всему датасету и возвращает их
    '''
    data = dset.ImageFolder(
        root=data_folder,
        transform = transforms.ToTensor())
    
    dataiter = iter(data)
    
    count = 0
    for i in dataiter:
        features, _ = i
        if not count:
            mean_sum = features.mean(axis=(2,3)).sum(axis=0)
            std_sum = features.mean(axis=(2,3)).std(axis=0)
        else:
            mean_sum += features.mean(axis=(2,3)).sum(axis=0)
            std_sum += features.mean(axis=(2,3)).std(axis=0)
        count += features.shape[0]

    mean_value = mean_sum / count
    std_value = std_sum / count
    
    return (mean_value, std_value)

def load_dataset(data_folder, batch_size):
    '''
    Функция подгружает данные из указанной директории и возвращает DataLoader объекты
    '''
    mean_value = [0.6007, 0.5609, 0.6516]
    std_value = [0.0017, 0.0018, 0.0015]
    
    if not (mean_value or std_value):
        mean_value, std_value = count_mean_std(data_folder)

    data = dset.ImageFolder(
        root=data_folder,
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=mean_value, std=std_value)
        ])
    )
    
    test_split = 0.2
    val_split = 0.2
    test_split_ind = int(np.floor(test_split * len(data.imgs)))
    val_split_ind = test_split_ind + int(np.floor(val_split * (len(data.imgs) - test_split_ind)))
    indices = list(range(len(data.imgs)))
    np.random.shuffle(indices)
    
    test_indices, val_indices, train_indices = indices[:test_split_ind], indices[test_split_ind:val_split_ind], indices[val_split_ind:]                  
    
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)
    test_sampler = SubsetRandomSampler(test_indices)
    
    train_loader = torch.utils.data.DataLoader(data, batch_size = batch_size, sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(data, batch_size = batch_size, sampler=val_sampler)
    test_loader = torch.utils.data.DataLoader(data, batch_size = batch_size, sampler=test_sampler)

    return train_loader, val_loader, test_loader

# initializing the data loaders
batch_size = 64
train_loader, val_loader, test_loader = load_dataset('./data/', batch_size)

In [0]:
class Flattener(nn.Module):
    '''
    вспомогательный модуль для превращения многомерного тензора в одномерный
    '''
    def forward(self, x):
        batch_size, *_ = x.shape
        return x.view(batch_size, -1)

In [0]:
im_resolution = 3*616*820

nn_model = nn.Sequential(
            Flattener(),
            nn.Linear(im_resolution, 50),
            nn.ReLU(inplace=True),
            nn.Linear(50, 5),
         )
nn_model.type(torch.FloatTensor)

# We will minimize cross-entropy between the ground truth and
# network predictions using Adam optimizer
loss = nn.CrossEntropyLoss().type(torch.FloatTensor)
optimizer = torch.optim.Adam(nn_model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-3, amsgrad=False)

In [0]:
def train_model(model, train_loader, val_loader, loss, optimizer, num_epochs,
                scheduler = StepLR(optimizer, step_size = 1, gamma = 1.0)):    
    loss_history = []
    train_history = []
    val_history = []
    for epoch in range(num_epochs):
        model.train() # Enter train mode
        print('current epoch is ', epoch)
        loss_accum = 0
        correct_samples = 0
        total_samples = 0
        for i_step, (x, y) in enumerate(train_loader):
            print('current step is ', i_step)
            prediction = model(x)
            loss_value = loss(prediction, y)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            indices = torch.argmax(prediction, 1) # 1 stands for the dimention to reduce (read more in the documentation)
            correct_samples += torch.sum(indices == y) # using equation is suitable, because index and label are coincided
            total_samples += y.shape[0]
            
            loss_accum += loss_value

        ave_loss = loss_accum / (i_step + 1)
        train_accuracy = float(correct_samples) / total_samples
        val_accuracy = compute_accuracy(model, val_loader)
        
        loss_history.append(float(ave_loss))
        train_history.append(train_accuracy)
        val_history.append(val_accuracy)
        scheduler.step()
        
        print("Average loss: %f, Train accuracy: %f, Val accuracy: %f" % (ave_loss, train_accuracy, val_accuracy))
        
    return loss_history, train_history, val_history
        
def compute_accuracy(model, loader):
    """
    Computes accuracy on the dataset wrapped in a loader
    
    Returns: accuracy as a float value between 0 and 1
    """
    model.eval() # Evaluation mode
    # TODO: Implement the inference of the model on all of the batches from loader,
    #       and compute the overall accuracy.
    # Hint: PyTorch has the argmax function!
    correct_samples = 0
    total_samples = 0
    for i, (x, y) in enumerate(loader):
        print('current val step is ', i)
        prediction = model(x)
        indices = torch.argmax(prediction, 1)
        correct_samples += torch.sum(indices == y)
        total_samples += y.shape[0]
    
    val_accuracy = float(correct_samples) / total_samples
        
    return val_accuracy

loss_history, train_history, val_history = train_model(nn_model, train_loader, val_loader, loss, optimizer, 3)
loss_history_, train_history_, val_history_ = train_model(nn_model, train_loader, val_loader, loss, optimizer, 5)

In [11]:
test_accuracy = compute_accuracy(nn_model, test_loader)
print('Test accuracy: ', test_accuracy)

current val step is  0
current val step is  1
current val step is  2
current val step is  3
current val step is  4
current val step is  5
current val step is  6
current val step is  7
current val step is  8
current val step is  9
current val step is  10
current val step is  11
current val step is  12
current val step is  13
current val step is  14
current val step is  15
current val step is  16
current val step is  17
current val step is  18
current val step is  19
current val step is  20
current val step is  21
current val step is  22
current val step is  23
Test accuracy:  0.23733333333333334


In [0]:
loss = loss_history + loss_history_
train_ac = train_history + train_history_
val_ac = val_history + val_history_

In [15]:
#zipped = list(zip(train_ac, val_ac))
for i in zipped:
    print('Training:', round(i[0], 2), 'Validation:', round(i[1], 2))

Training: 0.23 Validation: 0.26
Training: 0.27 Validation: 0.22
Training: 0.3 Validation: 0.24
Training: 0.33 Validation: 0.27
Training: 0.35 Validation: 0.25
Training: 0.35 Validation: 0.27
Training: 0.38 Validation: 0.22
Training: 0.43 Validation: 0.24


### Conclusion

Despite the model is quite effective at finding new features and non-linear dependences, it doesn't perform a good result. The main reason is absence of sufficient number of hidden layers, which simplifies the way of classifying the images and leads to overfitting. 