# Redes Neurais - Projeto Final da Disciplina

## Equipe:
- Isabel Francine Mendes - ifm2@cesar.school
- Paulo César Siécola - pcs3@cesar.school

## Importação das bibliotecas

In [1]:
# http://pytorch.org/
from os.path import exists

import torch

In [2]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import copy
import plotly.express as px

## Criação da rede

In [3]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        output = F.log_softmax(x, dim=1)
        return output

modelFC = Net()
modelFC

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)

## Treinamento

### Criando o objeto de treinamento

In [4]:
def train(log_interval, dry_run, model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0
    train_accuracy = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        # data -> X  target -> Y (ou label). No nosso dataset, talvez isso tenha que ser feito de forma manual
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

        train_loss = loss.item()
        train_accuracy = 100. * batch_idx / len(train_loader)
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if dry_run:
                break

    return train_loss, train_accuracy

In [5]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    predictions_list = []
    labels_list = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability

            labels_list.append(target)
            predictions_list.append(pred)

            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        test_accuracy))

    return test_loss, test_accuracy, labels_list, predictions_list

## Avaliação

In [6]:
use_cuda = torch.cuda.is_available()
torch.manual_seed(1111)

device = torch.device("cuda" if use_cuda else "cpu")
#apenas utilizado para testes na máquina de desenvolvimento, com Apple M1 Max
#device = torch.device("mps")

train_kwargs = {'batch_size': 64}
test_kwargs = {'batch_size': 1000}
if use_cuda:
    cuda_kwargs = {'num_workers': 10,
                    'pin_memory': True,
                    'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])

dataset_train = datasets.FashionMNIST('../data', train=True, download=True,
                    transform=transform)
dataset_test = datasets.FashionMNIST('../data', train=False, download=True,
                    transform=transform)


train_loader = torch.utils.data.DataLoader(dataset_train,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset_test, **test_kwargs)

model = Net().to(device)

optimizer = optim.Adadelta(model.parameters(), lr=0.7)

epochs = 14

scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

best_loss = 100
best_accuracy = 0

test_losses = []
train_losses = []

test_accuracies = []
train_accuracies = []

for epoch in range(1, epochs + 1):
    train_loss, train_accuracy = train(100, False, model, device, train_loader, optimizer, epoch)
    test_loss, test_accuracy, labels_list, predictions_list  = test(model, device, test_loader)

    test_losses.append(test_loss)
    train_losses.append(train_loss)
    test_accuracies.append(test_accuracy)
    train_accuracies.append(train_accuracy)

    if (test_loss <= best_loss) & (test_accuracy >= best_accuracy):
        best_loss = test_loss
        best_accuracy = test_accuracy
        best_model = copy.deepcopy(model)
    scheduler.step()

print(f'Salvando o modelo com a menor perda de {best_loss} e a melhor acurácia de {round(best_accuracy, 1)}%')
torch.save(best_model.state_dict(), "mnist_cnn.pt")

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 11405371.34it/s]


Extracting ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 208651.62it/s]


Extracting ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:06<00:00, 685314.63it/s] 


Extracting ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 4532383.92it/s]


Extracting ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw


Test set: Average loss: 0.4109, Accuracy: 8466/10000 (85%)


Test set: Average loss: 0.4078, Accuracy: 8551/10000 (86%)


Test set: Average loss: 0.3860, Accuracy: 8681/10000 (87%)


Test set: Average loss: 0.3256, Accuracy: 8814/10000 (88%)


Test set: Average loss: 0.3395, Accuracy: 8795/10000 (88%)


Test set: Average loss: 0.3291, Accuracy: 8893/10000 (89%)


Test set: Average loss: 0.3170, Accuracy: 8860/10000 (89%)


Test set: Average loss: 0.3139, Accuracy: 8922/10000 (89%)


Test set: Average loss: 0.3153, Accuracy: 8915/10000 (89%)


Test set: Average loss: 0.3137, Accuracy: 8906/10000 (89%)


Test set: Average loss: 0.3137, Accuracy: 8921/10000 (89%)


Test set: Average loss: 0.3139, Accuracy: 8924/10000 (89%)


Test set: Average loss: 0.3132, Accuracy: 8923/10000 (89%)


Test set: Average loss: 0.3142, Accuracy: 8921/10000 (89%)

Salvando o modelo com a menor perda de 0.31320794372558

## Salvando a melhor acurácia com a menor perda

In [7]:
print(f'Salvando o modelo com a menor perda de {best_loss} e a melhor acurácia de {round(best_accuracy, 1)}%')
torch.save(best_model.state_dict(), "mnist_cnn.pt")

Salvando o modelo com a menor perda de 0.3132079437255859 e a melhor acurácia de 89.2%


## Gerando a matriz de confusão e o relatório de classificação

In [8]:
test_loss, test_accuracy, labels_list, predictions_list = test(best_model, device, test_loader)


Test set: Average loss: 0.3132, Accuracy: 8923/10000 (89%)



In [9]:
from itertools import chain

predictions_l = [predictions_list[i].tolist() for i in range(len(predictions_list))]
labels_l = [labels_list[i].tolist() for i in range(len(labels_list))]
predictions_l = list(chain.from_iterable(predictions_l))
labels_l = list(chain.from_iterable(labels_l))

- Matriz de confusão:

In [15]:
import sklearn.metrics as metrics
from sklearn.metrics import confusion_matrix
pd.DataFrame(confusion_matrix(labels_l, predictions_l))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,828,0,16,23,2,0,122,0,9,0
1,3,974,0,18,3,0,2,0,0,0
2,14,1,800,12,105,0,67,0,1,0
3,21,8,9,893,39,0,25,0,5,0
4,1,1,68,30,849,0,51,0,0,0
5,0,0,0,1,0,955,0,21,1,22
6,89,1,72,29,68,0,733,0,8,0
7,0,0,0,0,0,13,0,964,0,23
8,6,0,1,3,7,3,8,5,967,0
9,0,0,0,0,0,6,1,33,0,960


In [11]:
print("Classification report for CNN :\n%s\n"
      % (metrics.classification_report(labels_l, predictions_l)))

Classification report for CNN :
              precision    recall  f1-score   support

           0       0.86      0.83      0.84      1000
           1       0.99      0.97      0.98      1000
           2       0.83      0.80      0.81      1000
           3       0.89      0.89      0.89      1000
           4       0.79      0.85      0.82      1000
           5       0.98      0.95      0.97      1000
           6       0.73      0.73      0.73      1000
           7       0.94      0.96      0.95      1000
           8       0.98      0.97      0.97      1000
           9       0.96      0.96      0.96      1000

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000




## Gráficos de perda e acurácia para treino e teste utilizando o melhhor modelo encontrado

In [12]:
import pandas as pd
df_loss = pd.DataFrame(
    {
        'test_loss': test_losses,
        'train_loss': train_losses,
    }
)
df_accuracy = pd.DataFrame(
    {
        'test_accuracy': test_accuracies,
        'train_accuracy': train_accuracies,
    }
)

In [13]:
fig = px.line(df_loss,
              width=800, height=600)

fig.update_layout(
    title='Perda de treino e teste através das épocas',
    xaxis_title="Época",
    yaxis_title="Perda")

fig.update_traces(mode="markers+lines")
fig.update_layout(showlegend=True)
fig.update_layout(legend_title_text='Dados')
fig.update_traces(hovertemplate='<br>Perda: %{y} <br>Época: %{x}')

fig.show()

In [14]:
fig = px.line(df_accuracy,
              width=800, height=600)

fig.update_layout(
    title='Acurácia de treino e teste através das épocas',
    xaxis_title="Época",
    yaxis_title="Acurácia")

fig.update_traces(mode="markers+lines")
fig.update_layout(showlegend=True)
fig.update_layout(legend_title_text='Dados')
fig.update_traces(hovertemplate='<br>Acurácia: %{y} <br>Época: %{x}')

fig.show()

## Conclusão:
- Com a rede utilizada, pode-se perceber pelos gráficos de perda e acurácia, que o modelo encontrado na época 10 serial o ideal, pois as perdas de treino e teste estão muito próximas, com uma acurácia próxima da máxima encontrada.
- Com a rede utilizada, após a época 10, pode-se perceber pelo gráfico de perda entre teste e treinamento que houve underfitting, possivelmente por não utilizar uma rede mais complexa e sem convolução, visto que a acurácia ficou próxima de 90%. Foi difícil fazer ajustes no modelo da rede, learning rate e cálculo da perda para melhorar a acurácia com a rede utilizada.