# Feed Forward Neural Network on Fashion MNIST
---
Don't forget to use **https://pytorch.org/docs/stable/**

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

---

## Prepare Fashion MNIST dataset
We want to preprocess training data, specifically to have flatten shape `(28, 28) -> 784` in `torch.Tensor` format.

In [2]:
import torch
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor, Compose
from torch.utils.data import DataLoader

In [3]:
class ReshapeTransform:
    def __init__(self, new_size=(-1,)):
        self.new_size = new_size

    def __call__(self, sample):
        return torch.reshape(sample, self.new_size)

In [4]:
transformations = Compose([ToTensor(), ReshapeTransform()])

In [5]:
train_dataset = FashionMNIST('./dataset_fashion_mnist/', download=True, train=True, 
                             transform=transformations, 
                             target_transform=None)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

valid_dataset = FashionMNIST('./dataset_fashion_mnist/', download=True, train=False, 
                             transform=transformations, 
                             target_transform=None)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=64, shuffle=False)

In [6]:
train_dataset[0]

(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.0000, 0.0000,
         0.0510, 0.2863, 0.0000, 0.0000, 0.0039, 0.0157, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0039, 0.0

In [7]:
next(iter(train_loader))

[tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0039, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]),
 tensor([8, 6, 4, 0, 4, 6, 1, 5, 6, 8, 9, 7, 6, 1, 3, 7, 4, 0, 5, 2, 2, 1, 3, 1,
         6, 3, 1, 7, 0, 9, 3, 6, 2, 7, 3, 8, 2, 1, 6, 9, 0, 7, 1, 8, 7, 6, 8, 9,
         7, 8, 4, 8, 8, 7, 0, 9, 8, 7, 4, 3, 1, 2, 0, 7])]

---

## Define neural network
In case we use `torch.nn` modules, we don't need to register tensor with `torch.nn.Parameter`.   

**Important:** Don't forget to setup `.eval()` or `.train()` modes for model to enforce proper behaviour of certain layers as `torch.nn.Dropout` or `torch.nn.BatchNorm1d`.

In [8]:
from torch.nn import Module
from torch.nn import ReLU, Tanh, Dropout, Softmax, Linear, BatchNorm1d
from torch.nn import MSELoss, CrossEntropyLoss
from torch.optim import Adam, SGD
from torch.nn.init import xavier_uniform_, normal_

In [10]:
class FeedForwardNeuralNet(torch.nn.Module):
    def __init__(self):
        super(FeedForwardNeuralNet, self).__init__()
        
        self.layer_1 = Linear(784, 10)
        ##########################
        # TODO: Add extra layer. #
        ##########################
        
        ##########################################################
        # TODO: Prepare batch norlmalization and dropout module. #
        ##########################################################
        
    def forward(self, input_batch):
        prediction = self.layer_1(input_batch)
        ###################################################
        # TODO: Stack activation -> bn -> dropout layers. #
        ###################################################
        
        return torch.softmax(prediction, dim=1)
        ############################################################################################
        # TODO: Numeric optimization                                                               #
        #       Switch torch.softmax -> torch.log_softmax during training. Softmax leave for eval. #                               #
        #       Use torch.nn.NLLLoss as loss (https://pytorch.org/docs/stable/nn.html#nllloss).    #
        #       Why is it cool?                                                                    #
        ############################################################################################

In [11]:
feed_forward_neural_net = FeedForwardNeuralNet()

In [12]:
feed_forward_neural_net

FeedForwardNeuralNet(
  (layer_1): Linear(in_features=784, out_features=10, bias=True)
)

In [13]:
feed_forward_neural_net.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[-0.0081,  0.0105, -0.0290,  ..., -0.0237,  0.0292, -0.0175],
                      [ 0.0160, -0.0325,  0.0044,  ..., -0.0258,  0.0048, -0.0236],
                      [-0.0237, -0.0255,  0.0117,  ..., -0.0131, -0.0323,  0.0272],
                      ...,
                      [ 0.0313, -0.0009, -0.0300,  ..., -0.0131,  0.0142,  0.0161],
                      [ 0.0317, -0.0242, -0.0018,  ...,  0.0006, -0.0300,  0.0206],
                      [-0.0352,  0.0151,  0.0114,  ..., -0.0308,  0.0076, -0.0180]])),
             ('layer_1.bias',
              tensor([-0.0123,  0.0281,  0.0023,  0.0261, -0.0079, -0.0201,  0.0105, -0.0291,
                       0.0155,  0.0296]))])

In [14]:
images, labels = next(iter(valid_loader))

In [15]:
feed_forward_neural_net.eval()
predictions = feed_forward_neural_net(images)
feed_forward_neural_net.train()
predictions[:4]

tensor([[0.0839, 0.0843, 0.0906, 0.0855, 0.1039, 0.0904, 0.1080, 0.1026, 0.1421,
         0.1087],
        [0.0748, 0.0436, 0.0613, 0.0650, 0.1724, 0.0734, 0.1426, 0.1141, 0.1494,
         0.1035],
        [0.0738, 0.0909, 0.0878, 0.0776, 0.1408, 0.0923, 0.1037, 0.0935, 0.1419,
         0.0977],
        [0.0803, 0.0889, 0.1074, 0.0698, 0.1224, 0.0830, 0.0904, 0.0911, 0.1420,
         0.1246]], grad_fn=<SliceBackward>)

---

## Define optimizers and loss function

In [17]:
#######################################
# TODO: Try NLLLoss with log_softmax. #
#######################################
loss_fce = CrossEntropyLoss()
loss_fce

CrossEntropyLoss()

In [18]:
loss_fce(predictions, labels)

tensor(2.2973, grad_fn=<NllLossBackward>)

In [19]:
###############################
# TODO: Adjust learning rate. #
###############################
optimizer = SGD(feed_forward_neural_net.parameters(), lr=0.5)

###################################
# TODO: Switch optimizer to Adam. #
###################################
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.5
    momentum: 0
    nesterov: False
    weight_decay: 0
)

---

## Training of neural net

In [20]:
import numpy as np
import matplotlib.pyplot as plt

In [21]:
def get_valid_acc_and_loss(model, loss_fce, valid_loader):
    accuracy = 0
    loss = 0
    was_training = model.training
    
    model.eval()
    for images, labels in valid_loader:
        predictions = model(images)
        accuracy += (predictions.argmax(dim=1) == labels).type(torch.FloatTensor).mean().item() 
        loss += loss_fce(predictions, labels).item()
    model.train(mode=was_training)
    return accuracy / len(valid_loader) * 100, loss / len(valid_loader)

In [22]:
get_valid_acc_and_loss(feed_forward_neural_net, loss_fce, valid_loader)

(9.86265923566879, 2.3006389976307084)

In [23]:
from collections import deque

# Initial params setup.
epochs = 2
report_period = 100
batch_iteration = 0

# Storing of some data.
train_leak_loss = deque(maxlen=report_period)
train_loss_history = []
valid_loss_history = []
valid_acc_history = []

In [26]:
for epoch in range(epochs):
    # Setup net to train mode and go through one epoch.
    feed_forward_neural_net.train()
    for images, labels in train_loader:
        batch_iteration += 1
        
        ##################
        # Training Phase #
        ##################
        optimizer.zero_grad()
        predictions = feed_forward_neural_net.forward(images)
        loss = loss_fce(predictions, labels)
        loss.backward()
        optimizer.step()
        
        
        ####################
        # Validation Phase #
        ####################
        train_leak_loss.append(loss.item())
        if batch_iteration % report_period == 0:
            feed_forward_neural_net.eval()
            
            # We don't want to collect info for gradients from here.
            with torch.no_grad():
                valid_accuracy, valid_loss = get_valid_acc_and_loss(feed_forward_neural_net, loss_fce, valid_loader)
                
            print(f"Epoch: {epoch+1}/{epochs}.. ",
                  f"Train Loss: {round(np.mean(train_leak_loss), 2)}.. ",
                  f"Valid Loss: {round(valid_loss, 2)}.. ",
                  f"Valid Acc: {round(valid_accuracy, 2)}%")
            
            train_loss_history.append(np.mean(train_leak_loss))
            valid_loss_history.append(valid_loss)
            valid_acc_history.append(valid_accuracy)
                   
            feed_forward_neural_net.train()

KeyboardInterrupt: 

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.gca()
ax.set_xlabel('Iteration')
ax.set_ylabel('Cross Entropy')
plt.plot(train_loss_history, label='Train loss')
plt.plot(valid_loss_history, label='Valid loss')
plt.legend(frameon=False)

In [None]:
fig = plt.figure(figsize=(10, 10))
plt.plot(valid_acc_history, label='Valid acc')
ax = plt.gca()
ax.set_xlabel('Iteration')
ax.set_ylabel('Acc(%)')
plt.legend(frameon=False)

---

## Results evaluation

In [None]:
feed_forward_neural_net.eval()

### View single images and predictions

In [None]:
from image_processing_workshop.visual import plot_classify, plot_image

In [None]:
plot_classify(input_tensor=valid_dataset[12][0], 
              model=feed_forward_neural_net, image_shape=[28,28])

### Load reuslts to pandas df

In [None]:
from image_processing_workshop.eval import get_results_df
from image_processing_workshop.visual import plot_df_examples

In [None]:
df = get_results_df(feed_forward_neural_net, valid_loader)
df.head(10)

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.gca()
ax.set_xlabel('Prediction Score')
df[df.label_class_name=='Dress'].label_class_score.hist(ax=ax)

In [None]:
plot_df_examples(df.iloc[:25], image_shape=[28, 28])

### Precision

In [None]:
from image_processing_workshop.eval import get_precision

In [None]:
get_precision(df, 'Dress')

### Recall

In [None]:
from image_processing_workshop.eval import get_recall

In [None]:
get_recall(df, 'Dress')

### Overall Recall and Precision

In [None]:
from image_processing_workshop.eval import get_rec_prec

In [None]:
get_rec_prec(df)

### Accuracy

In [None]:
from image_processing_workshop.eval import get_accuracy

In [None]:
get_accuracy(df)

### False Positives

In [None]:
from image_processing_workshop.eval import get_false_positives

In [None]:
fp = get_false_positives(df, label_class_name='Shirt')

In [None]:
plot_df_examples(fp, image_shape=[28, 28])

In [None]:
fp = get_false_positives(df, label_class_name='Shirt', predicted_class_name='Pullover')

In [None]:
plot_df_examples(fp, image_shape=[28, 28])

### Confusion Matrix

In [None]:
from image_processing_workshop.visual import plot_coocurance_matrix

In [None]:
plot_coocurance_matrix(df, use_log=False)