# Training Procedure

In [1]:
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision
import torch
print(torch.__version__)
print(torch.cuda.is_available())

0.4.0
True


In [2]:
!rm -r ./mnist

!wget https://www.dropbox.com/s/5yre1ofqco5titj/mnist.zip?dl=0 -O mnist.zip
!unzip -q mnist.zip

!ls

--2018-07-29 16:02:34--  https://www.dropbox.com/s/5yre1ofqco5titj/mnist.zip?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.65.1, 2620:100:6021:1::a27d:4101
Connecting to www.dropbox.com (www.dropbox.com)|162.125.65.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/5yre1ofqco5titj/mnist.zip [following]
--2018-07-29 16:02:35--  https://www.dropbox.com/s/raw/5yre1ofqco5titj/mnist.zip
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc6ed5c57c7a251ec2f61eb6ee29.dl.dropboxusercontent.com/cd/0/inline/AMnvIVxk3xVuVA-faUjluTOdLHmgA7aiwk3TgL1BljZlTGuGpBR2a7UCiXn9CTtlRsC-RYdGw81K3lYNpYs6dfhV9acE3abZYlKQjKgLp2ivgbsWX2lRt7C6hHSCad3lcydF85CGDPxUeqJzst7CUGrnVRD4WO8pCfzuaMHuK0xO5a0eXoBaJCoKg1zqAY3pznZBtEcMLcPHA100UZNv3YoA/file [following]
--2018-07-29 16:02:35--  https://uc6ed5c57c7a251ec2f61eb6ee29.dl.dropboxusercontent.com/cd/0/inline/AMnvIVxk3xVuVA-faUjluTOdLHmgA7

## Setting data loader

Creating data loader for reading images from the training and test sets.

In [3]:
import os
import numpy as np
import torch

import PIL

from torch.utils.data import DataLoader
from torch.utils import data
from skimage import io

# Constants.
num_classes = 10
root = 'mnist/'

# Class that reads a sequence of image paths from a text file and creates a data.Dataset with them.
class CustomDataset(data.Dataset):

    def __init__(self, fold, normalization = 'default'):

        super(CustomDataset, self).__init__()
        
        # Initializing variables.
        self.fold = fold
        self.normalization = normalization

        # Creating list of paths.
        self.imgs = self.make_dataset()

        # Check for consistency in list.
        if len(self.imgs) == 0:

            raise (RuntimeError('Found 0 images, please check the data set'))

    def make_dataset(self):

        # Initiating item list.
        items = []

        # Joining input paths.
        img_path = os.path.join(root, self.fold)

        # Reading paths from text file.
        #data_list = [l.strip('\n') for l in open(os.path.join(root, self.dataset, self.task + '_' + mode_str + '_f' + self.fold + '.txt')).readlines()]

        # Reading paths from directory.
        data_list = [f for f in os.listdir(img_path) if os.path.isfile(os.path.join(img_path, f))]
        
        # Creating list containing image and ground truth paths.
        for it in data_list:
            item = os.path.join(img_path, it)
            items.append(item)

        # Returning list.
        return items

    def __getitem__(self, index):

        # Reading items from list.
        img_path = self.imgs[index]
        
#         print(img_path)
        
        # Reading images.
        img = io.imread(img_path)        
        
        # Reading label from image file.
        lab = int(img_path[-5])
        
        # Removing unwanted channels. For the case of RGB images.
        if len(img.shape) > 2:
            img = img[:, :, 0] # Leaving only red (index = 0) channel from RGB.
        
        # Casting images to the appropriate dtypes.
        img = img.astype(np.float32)
        
        # Normalization.
        mn = img.min()
        mx = img.max()
        img = ((img - mn) / (mx - mn))
        
        # Adding channel dimension.
        img = np.expand_dims(img, axis=0)
        
        # Turning to tensors.
        img = torch.from_numpy(img)
        
        # Returning image and label to iterator.
        return img, lab

    def __len__(self):

        return len(self.imgs)

# Setting data loader.
batch_size = 100
num_workers = 1 # Number of Threads on each data_loader.

train_set = CustomDataset('train')
train_loader = DataLoader(train_set, batch_size, num_workers=num_workers, shuffle=True)

test_set = CustomDataset('test')
test_loader = DataLoader(test_set, batch_size, num_workers=num_workers, shuffle=False)

print('Size of training set: ' + str(len(train_set)) + ' samples')
print('Size of test set: ' + str(len(test_set)) + ' samples')


Size of training set: 60000 samples
Size of test set: 10000 samples


## Setting architecture

Creating a common CNN architecture composed of Convolutional and Fully Connected Layers.

Links úteis:

Função view() que dá reshape no tensor para adequá-lo ao tamanho da entrada das camadas. https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view

Pacote torch.nn que contém as implementações das camadas. https://pytorch.org/docs/stable/nn.html

In [11]:
import torch.nn as nn

# Customized Network.
class CustomNetwork(nn.Module):
    
    def __init__(self, in_channels, num_classes=10):

        super(CustomNetwork, self).__init__()
        
        # TO DO: Implementar a arquitetura de uma MLP.
        # Exemplo de camada: self.layer1 = nn.Linear(args)
        ########################################################################
        self.n_outputs_1 = 256
        self.n_outputs_2 = 128
        self.n_outputs_3 = num_classes
        
        self.layer1 = nn.Linear(in_channels * 28 * 28, self.n_outputs_1) # Fully Connected Layer: 784 -> 256.
        self.ativ1 = nn.ReLU(inplace=True)                               # ReLU Activation Layer.
        self.drop1 = nn.Dropout()                                        # Dropout Regularization Layer.
        
        self.layer2 = nn.Linear(self.n_outputs_1, self.n_outputs_2)      # Fully Connected Layer: 256 -> 128.
        self.ativ2 = nn.ReLU(inplace=True)                               # ReLU Activation Layer.
        self.drop2 = nn.Dropout()                                        # Dropout Regularization Layer.
        
        self.layer3 = nn.Linear(self.n_outputs_2, self.n_outputs_3)      # Fully Connected Layer: 128 -> 10.
        ########################################################################
        
        self.initialize_weights()
    
    # Function for randomly initializing weights.
    def initialize_weights(self):
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
                
    # Forward function.
    def forward(self, x):
        
        # TO DO: Implementar o forward.
        # Deve retornar a saída da última camada da rede.
        # A variável 'x' de entrada tem dimensões (BATCH_SIZE, N_CHANNELS, WIDTH, HEIGHT) = (100, 1, 28, 28).
        # A saída deve ter dimensões (BATCH_SIZE, N_CLASSES) = (100, 10).
        # Exemplo de forward em uma camada: out1 = self.layer1(x)
        # Um bom exercício é verificar o tamanho de todas as saídas da rede neural. Para verificar o tamanho de um tensor 'a', usar a função size: print(a.size()).
        ########################################################################
        
        #print('original shape', x.size()) # Comment this line for less text during execution.
        x = x.view(x.size(0), -1) # Reshaping tensor from (BATCH_SIZE, CHANNEL_SIZE, HEIGHT, WIDTH) to (BATCH_SIZE, CHANNEL_SIZE * HEIGHT * WIDTH).
        #print('transformed shape', x.size()) # Comment this line for less text during execution.
        
        out1 = self.layer1(x)
        out1 = self.ativ1(out1)
        out1 = self.drop1(out1)
        #print('out1', out1.size()) # Comment this line for less text during execution.
        
        out2 = self.layer2(out1)
        out2 = self.ativ2(out2)
        out2 = self.drop2(out2)
        #print('out2', out2.size()) # Comment this line for less text during execution.
        
        out3 = self.layer3(out2)
        #print('out3', out3.size()) # Comment this line for less text during execution.
        
        return out3
        ########################################################################
        
# Instancing Network.
in_channels = 1 # The input images only contain 1 channel.
num_classes = 10 # MNIST has 10 classes.

# model = CustomNetwork(in_channels, num_classes) # CPU version.
model = CustomNetwork(in_channels, num_classes).cuda() # GPU casting.

print(model)

CustomNetwork(
  (layer1): Linear(in_features=784, out_features=256, bias=True)
  (ativ1): ReLU(inplace)
  (drop1): Dropout(p=0.5)
  (layer2): Linear(in_features=256, out_features=128, bias=True)
  (ativ2): ReLU(inplace)
  (drop2): Dropout(p=0.5)
  (layer3): Linear(in_features=128, out_features=10, bias=True)
)


## Setting optimizer

$Pytorch$ has several options for optimizers, since the traditional SGD to more complex per-parameter adaptive ones (i.e. Adam, Adagrad, RSMProp...). All of them are located in the package torch.optim.

In [0]:
import torch.optim as optim

lr = 0.001 # Learning rate.
l2_normalization = 0.001 # L2 Normalization via weight decay.

opt = optim.Adam(model.parameters(), lr=lr, betas=(0.5, 0.999), weight_decay=l2_normalization)

## Setting loss criterion

$CrossEntropyLoss$, as this is a classification task.

In [0]:
# Setting a classification loss.
# criterion = nn.CrossEntropyLoss() # CPU version.
criterion = nn.CrossEntropyLoss().cuda() # GPU casting.

## Training/Testing

Iterating over epochs and batches.

In [0]:
from matplotlib import pyplot as plt
from torch.autograd import Variable

%matplotlib inline

epochs = 20 # Run network for 20 epochs.

training_metrics = list() # List for accuracies in training procedure.
test_metrics = list() # List for accuracies in test procedure.

# Iterating over epochs.
for ep in range(epochs):
    
    print('##############################################')
    print('Starting epoch ' + str(ep + 1) + '/' + str(epochs) + '...')
    
    #####################################################################
    # Training Procedure. ###############################################
    #####################################################################
    
    print('    Training...')
    
    # Setting model to training mode.
    model.train()
    
    total_correct = 0
    
    # Iterating over training batches.
    for it, data in enumerate(train_loader):

        # Obtaining images and labels for batch.
        inps, labs = data
        
        # GPU casting. In CPU version, remove the following two lines.
        inps = Variable(inps.cuda(), requires_grad=True)
        labs = Variable(labs.cuda(), requires_grad=False) # requires_grad = False -> it isn't necessary to compute gradients from targets, only from losses.
        
        # Zeroing optimizer.
        opt.zero_grad()
        
        # Forwarding inputs through DNN.
        output = model(inps)
        
        # Computing loss according to network prediction for batch and targets.
        # The Cross Entropy loss receives an output of size (BATCH_SIZE, N_CLASSES) and a label vector of size (BATCH_SIZE).
        loss = criterion(output, labs)
        
        # Backpropagating loss.
        loss.backward() # All backward pass is computed from this line automatically by package torch.autograd.
        
        # Taking optimization step.
        opt.step()
        
        # Computing prediction to batch
        pred = output.max(1, keepdim=True)[1]
        
        # Computing accuracy for batch.
        correct = pred.eq(labs.view_as(pred)).sum().item()
        accuracy = (100.0 * float(correct) / inps.size(0))
        
        training_metrics.append(accuracy)
        
        # Updating total counter.
        total_correct += correct
        
    # Computing accuracy for whole epoch.
    accuracy = 100.0 * float(total_correct) / len(train_set)
    
    print('        Accuracy for training epoch [' + str(ep + 1) + ' / ' + str(epochs) + ']: ' + str(accuracy))
    
    #####################################################################
    # Testing Procedure.  ###############################################
    #####################################################################
    
    print('    Testing...')
        
    # Setting model to evaluation mode.
    model.eval()
    
    total_correct = 0

    # Iterating over test batches.
    for it, data in enumerate(test_loader):

        # Obtaining images and labels for batch.
        inps, labs = data
        
        # GPU casting. In CPU version, remove the following line.
        inps = Variable(inps.cuda(), requires_grad=True)
        labs = Variable(labs.cuda(), requires_grad=False)
        
        # Forwarding through DNN.
        output = model(inps)
        
        # Computing prediction to batch
        pred = output.max(1, keepdim=True)[1]
        
        # Computing accuracy for batch.
        correct = pred.eq(labs.view_as(pred)).sum().item()
        accuracy = (100.0 * float(correct) / inps.size(0))
        
        # Appending accuracy in list of accuracies for all batches.
        test_metrics.append(accuracy)
        
        # Updating total counter.
        total_correct += correct
        
    # Computing accuracy for whole epoch.
    accuracy = 100.0 * float(total_correct) / len(test_set)
    
    print('        Accuracy for test epoch [' + str(ep + 1) + ' / ' + str(epochs) + ']: ' + str(accuracy))
    
# Transforming list into ndarray for plotting.
training_array = np.asarray(training_metrics, dtype=np.float32)
test_array = np.asarray(test_metrics, dtype=np.float32)

# Plotting accuracy.
fig, ax = plt.subplots(1, 2, figsize = (16, 8), sharex = False, sharey = True)

training_plt = ax[0].plot(training_array)
ax[0].set_xlabel('Training')
ax[0].set_ylim([0, 100])

test_plt = ax[1].plot(test_array)
ax[1].set_xlabel('Test')
ax[1].set_ylim([0, 100])

plt.show()

##############################################
Starting epoch 1/20...
    Training...
        Accuracy for training epoch [1 / 20]: 82.875
    Testing...
        Accuracy for test epoch [1 / 20]: 92.71
##############################################
Starting epoch 2/20...
    Training...
        Accuracy for training epoch [2 / 20]: 92.25666666666666
    Testing...
        Accuracy for test epoch [2 / 20]: 95.42
##############################################
Starting epoch 3/20...
    Training...
        Accuracy for training epoch [3 / 20]: 93.635
    Testing...
        Accuracy for test epoch [3 / 20]: 95.76
##############################################
Starting epoch 4/20...
    Training...
        Accuracy for training epoch [4 / 20]: 94.325
    Testing...
        Accuracy for test epoch [4 / 20]: 96.54
##############################################
Starting epoch 5/20...
    Training...
        Accuracy for training epoch [5 / 20]: 94.74166666666666
    Testing...
        Accuracy