### Importer les librairies de machine learning

In [85]:
import numpy as np
import pandas as pd

import poutyne as pt
from poutyne import Model
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

from sklearn.model_selection import train_test_split

### Ajuster les hyperparamètres

In [86]:
test_ratio = 0.2
valid_ratio = 0.2

hidden_size = 512
output_size = 8
dropout = 0.0

n_epochs = 50
batch_size = 32
learning_rate = 0.1
min_lr=0.00001
momentum = 0.9

gamma = 0.1
patience = 1

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Charger le dataframe traité, le séparer en datasets et créer les DataLoaders

In [87]:
# Load data/dataframe.csv file as a pandas dataframe
df = pd.read_csv('data/dataframe.csv')

input_size = len(df.columns) - 2 # Retirer le ID et l'étiquette

# Convert into a tensor
dataset = torch.tensor(df.values)

# Split into train and test
train_data, test_data = train_test_split(dataset, test_size=test_ratio)

# Split train into train and validation
train_data, val_data = train_test_split(train_data, test_size=valid_ratio)

# Split train into X and y without the firs column
X_train, y_train = train_data[:, 1:-1].float(), train_data[:, -1].long()
X_val, y_val = val_data[:, 1:-1].float(), val_data[:, -1].long()
X_test, y_test = test_data[:, 1:-1].float(), test_data[:, -1].long()

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)


train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

### Créer l'algorithme FC

In [88]:
class FCNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(FCNetwork, self).__init__()
        self.dropout1 = nn.Dropout(dropout)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)

        hidden_size_2 = hidden_size
        self.dropout2 = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden_size, hidden_size_2)
        self.bn2 = nn.BatchNorm1d(hidden_size_2)

        hidden_size_3 = hidden_size // 2
        self.dropout3 = nn.Dropout(dropout)
        self.fc3 = nn.Linear(hidden_size_2, hidden_size_3)
        self.bn3 = nn.BatchNorm1d(hidden_size_3)

        hidden_size_4 = hidden_size // 4
        self.dropout4 = nn.Dropout(dropout)
        self.fc4 = nn.Linear(hidden_size_3, hidden_size_4)
        self.bn4 = nn.BatchNorm1d(hidden_size_4)

        hidden_size_5 = hidden_size // 8
        self.dropout5 = nn.Dropout(dropout)
        self.fc5 = nn.Linear(hidden_size_4, hidden_size_5)
        self.bn5 = nn.BatchNorm1d(hidden_size_5)

        self.fc6 = nn.Linear(hidden_size_5, output_size)

    def forward(self, x):
        x = self.dropout1(x)
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout2(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout3(x)
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout4(x)
        x = F.relu(self.bn4(self.fc4(x)))
        x = self.dropout5(x)
        x = F.relu(self.bn5(self.fc5(x)))
        x = self.fc6(x)
        y = F.log_softmax(x, dim=1)
        return y

### Créer le modèle Poutyne

In [89]:
network = FCNetwork(input_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
scheduler = pt.ReduceLROnPlateau(monitor="val_acc", mode='max', patience=patience, factor=gamma, min_lr=min_lr, verbose=True)
model = Model(network, optimizer, criterion, device=device, batch_metrics=["accuracy"])

### Entraîner le modèle sur les données d'entraînement

In [90]:
history = model.fit_generator(train_loader, val_loader, epochs=n_epochs, callbacks=[scheduler])
torch.cuda.empty_cache()

[35mEpoch: [36m 1/50 [35mTrain steps: [36m1500 [35mVal steps: [36m375 [32m7.55s [35mloss:[94m 1.529484[35m acc:[94m 44.647248[35m val_loss:[94m 1.363269[35m val_acc:[94m 50.012501[0m
[35mEpoch: [36m 2/50 [35mTrain steps: [36m1500 [35mVal steps: [36m375 [32m6.78s [35mloss:[94m 1.331536[35m acc:[94m 51.793974[35m val_loss:[94m 1.189443[35m val_acc:[94m 56.013001[0m
[35mEpoch: [36m 3/50 [35mTrain steps: [36m1500 [35mVal steps: [36m375 [32m6.87s [35mloss:[94m 1.211239[35m acc:[94m 55.763220[35m val_loss:[94m 1.152754[35m val_acc:[94m 57.896491[0m
[35mEpoch: [36m 4/50 [35mTrain steps: [36m1500 [35mVal steps: [36m375 [32m6.83s [35mloss:[94m 1.137223[35m acc:[94m 58.719840[35m val_loss:[94m 1.059133[35m val_acc:[94m 60.580048[0m
[35mEpoch: [36m 5/50 [35mTrain steps: [36m1500 [35mVal steps: [36m375 [32m6.89s [35mloss:[94m 1.080654[35m acc:[94m 60.609660[35m val_loss:[94m 1.038719[35m val_acc:[94m 61.221768[0m
[35m

### Évaluer le modèle sur les données de test

In [91]:
loss, acc = model.evaluate_generator(test_loader)

[35mTest steps: [36m469 [32m0.86s [35mtest_loss:[94m 0.677585[35m test_acc:[94m 79.191946[0m                                                 
