In [None]:
!mkdir dataset
!mkdir models
!ls

# Data study and problem study.

In practice the recolection, anotation, and study of the data to use is a great amount of the work in the development of the model.

We will start with a very simple dataset.

## MNIST

So basically we describe our task as follows:

### Task

Create a model capable of recongnice hand writed data.

### Data

[MNIST](http://yann.lecun.com/exdb/mnist/)

Since this dataset has been for so long and it has been studied so much, we don't have to make all the necesary steps.

1. Search for missing or incomplete examples.
2. Look for ouliers.
3. Study if relevant statistical hypothesis are fullfiled.
4. Transform the examples to a data representation form adecuate for our model.
5. Creating an efficient way to feed the data to the model.



In [None]:
""" All the work is going to be relativeley simple thanks to a well stablish
tool for ML, DL, pytorch
https://pytorch.org/
"""

## Used libraries ###

import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch import nn, optim

## ################ ##

## MNIST DATA SET                                    ##
## torch provides all the tools needed to user MNIS  ##
## torch uses a special data structure called tensor ##


transform = transforms.Compose([transforms.ToTensor(), # change the data to tensor
                                transforms.Normalize( # apply a transofr to get
                                    (0.5,), # mean 0
                                    (0.5,) # std 1, this has been proved to help.
                                    )])

train = datasets.MNIST('dataset/',
                       download = True,
                       train = True,
                       transform = transform)

val = datasets.MNIST('dataset/',
                    download = True,
                    train = False,
                    transform=transform)

## A mechanism to make the loading of the images to memory more effient. ##
train_data_loader = torch.utils.data.DataLoader(
    train,
    batch_size = 256, # limited by the available memory
)

val_data_loader = torch.utils.data.DataLoader(
    val,
    batch_size = 256
)

In [None]:
""" To see what is in the dataset we can check some images. """

dataiter = iter(train_data_loader)
x, t, = dataiter.next()

print(f"Shape of the tensor object: {x.shape}")
print(f"Shape of the target vector: {t.shape}")

plt.imshow(x[0].numpy().squeeze(), cmap='gray')

print(f"Image Related to:{t[0]}")
print(f"Flatten {torch.flatten(x, start_dim=1).shape}")

# Model building

## Model design.

- This is an open question.
- Best results, use popular architectures i.e. SOTA in related tasks.

### FC

First we are going to use a FC network with several layers and RELU as an activation function.

In [None]:
""" We define a network with 3 layers each one with N/2 neurons """
""" We use a hot one vector for our t."""

class FCNet(nn.Module):

    def __init__(self, in_shape):
        super(FCNet, self).__init__()
        self.fc1 = nn.Linear(in_shape, 128)
        self.fc2 = nn.Linear(128,64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = torch.flatten(x, start_dim = 1)
        y = F.relu(self.fc1(x))
        y = F.relu(self.fc2(y))
        y = F.relu(self.fc3(y))
        y = F.softmax(y)
        return y

In [None]:
net = FCNet(784)

mse = nn.MSELoss(reduction = 'none') # MSE

# Btch SGD procedure
optimizer = optim.SGD(net.parameters(), lr = 0.003, momentum = 0.9)
start_time = time()
number_of_epochs = 4
for epoch in range(number_of_epochs):
    current_loss = 0.0
    for x, t in train_data_loader:
        optimizer.zero_grad()
        y = net(x)
        t = F.one_hot(t)
        loss = mse(y, t.float())
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    print(f"epoch: {epoch} \t loss: {current_loss/len(train)}")

end_time = time()
ellapsed_time = end_time - start_time
print(f"Total time for {number_of_epochs+1} epochs: {ellapsed_time}")

In [None]:
# Using gpu
device = torch.device("cuda:0")

# net = FCNet(784)
net = net.to(device)

mse = nn.MSELoss(reduction='mean')

optimizer = optim.SGD(net.parameters(), lr = 0.003, momentum = 0.9)
start_time = time()

number_of_epochs = 64
for epoch in range(number_of_epochs):
    current_loss = 0.0
    for x, t in train_data_loader:
        x = x.to(device)
        t = t.to(device)
        optimizer.zero_grad()
        y = net(x)
        t = F.one_hot(t)
        loss = mse(y, t.float())
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
    print(f"epoch: {epoch} \t loss: {current_loss}")

end_time = time()
ellapsed_time = end_time - start_time
print(f"Total time for {number_of_epochs+1} epochs: {ellapsed_time}")

# Model Evaluation

Once the loss seems to be an acceptable value, there are several points to consider in this aspect, we make an evaluation.

## Accuracy

One of the simplest ways to evaluate our model is the accuracy.

In [None]:
corrects = 0
for x,t in val_data_loader:
    x = x.to(device)
    t = t.to(device)
    with torch.no_grad():
        y = net(x)
        prob, predicted = torch.max(y, 1)
        corrects += (predicted == t).sum()

print(f"Total corrects {corrects} of {len(val)}, acc = {corrects/len(val)}")

In [None]:
# Saving the model for latter
net = net.to(torch.device('cpu'))
torch.save(net, 'fcmodel_trained.mdl')

In [None]:
# You can upload a trained model.

from google.colab import files
uploaded = file.upload()

# To reload the model

net2 = torch.load('fcmodel_trained.mdl')

corrects = 0
for x,t in val_data_loader:
    x = x.to(device)
    t = t.to(device)
    with torch.no_grad():
        y = net(x)
        prob, predicted = torch.max(y, 1)
        corrects += (predicted == t).sum()

print(f"Total corrects {corrects} of {len(val)}, acc = {corrects/len(val)}")

# Readings

- The first great breakthrugh or CNN [AlexNet](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf).
- A more efficient algorithm to train neural networks [Adam](https://arxiv.org/abs/1412.6980).
- A breakthrugh on cnn architectures [ResNet](https://arxiv.org/abs/1512.03385).