# Description: example
## framework: pytorch

In [41]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

# Selection

In [42]:
# settings
source = './mnist.csv'
split_train, split_test, split_validation = 60, 20, 20
label_colname = 'label'

In [43]:
data = pd.read_csv(source, encoding='utf_8', parse_dates=True)

In [44]:
data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [45]:
X,y = data.drop([label_colname], axis = 1), data[label_colname]

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = split_train / 100, test_size = split_test / 100, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, train_size=(100 - split_validation) / 100)

# Preprocessing

In [47]:
# TODO

# Transformation

In [48]:
X_train = X_train / 255.
X_test = X_test / 255.

In [49]:
import torch
# converting data (to pytorch tensor)
input_data = torch.FloatTensor(X_train.select_dtypes(include=['int', 'float']).values)
target = torch.FloatTensor(y_train.values).view(-1,1)

# Data mining

In [50]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [51]:
class Network(nn.Module):
    
    def __init__(self, nbIn, nbOut):
        super(Network, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(nbIn, int(nbIn/2)),
            nn.Tanh(),
            nn.Linear(int(nbIn/2), nbOut),
            nn.Softmax(),
        )
        
    def forward(self, x):
        return self.network(x)

In [52]:
nbIn = input_data.shape[1]
nbOut = 1

neuralNetwork = Network(nbIn,nbOut)
neuralNetwork

Network(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=392, bias=True)
    (1): Tanh()
    (2): Linear(in_features=392, out_features=1, bias=True)
    (3): Softmax(dim=None)
  )
)

#### Loss/optimizers catalog

In [53]:
supported_loss_functions = {
    'mse': nn.MSELoss,
    'cross_entropy': nn.CrossEntropyLoss,
}
supported_optimizers = {
    'adam': torch.optim.Adam,
    'sgd': torch.optim.SGD,
}

In [54]:
selected_loss_function = 'mse'
selected_optimizer = 'sgd'
learning_rate = 0.003
nbEpochs = 100
batch_size = 128

In [55]:
criterion = supported_loss_functions[selected_loss_function]()
optimizer = supported_optimizers[selected_optimizer](neuralNetwork.parameters(), lr=learning_rate)

In [None]:
dataset = TensorDataset(input_data, target)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

items = []
# launching iterations
for epoch in range(nbEpochs):
    for id_batch, (batch_X_train, batch_y_train) in enumerate(dataloader):
        optimizer.zero_grad()
        output = neuralNetwork(batch_X_train)
        loss = criterion(output, batch_y_train) # calcul de la fonction cout (moindres carrés)
        loss.backward() # calcul du gradient (sens = droite vers gauche)
        optimizer.step() # modification des parametres du réseau de neurones
        print(id_batch, loss.item())
        items.append(loss.item())

  input = module(input)


0 17.265625
1 21.6796875
2 19.25
3 19.1796875
4 20.21875
5 19.484375
6 20.8828125
7 17.7109375
8 22.7734375
9 22.453125
10 22.8828125
11 19.1328125
12 20.46875
13 20.515625
14 18.171875
15 21.3515625
16 22.1875
17 19.59375
18 22.0390625
19 19.71875
20 18.9921875
21 21.9765625
22 16.2421875
23 21.7890625
24 22.6875
25 19.59375
26 17.203125
27 19.6484375
28 20.78125
29 19.4375
30 19.0078125
31 19.6171875
32 22.046875
33 22.28125
34 22.75
35 17.765625
36 19.3671875
37 20.078125
38 18.6640625
39 20.4921875
40 20.0703125
41 17.3125
42 20.6640625
43 15.7734375
44 23.4765625
45 18.890625
46 19.25
47 20.9296875
48 20.8359375
49 20.625
50 19.5078125
51 21.828125
52 23.8359375
53 22.609375
54 20.3828125
55 22.5703125
56 21.71875
57 20.484375
58 16.8125
59 18.96875
60 19.8515625
61 19.7109375
62 19.2109375
63 19.5234375
64 18.3359375
65 22.9375
66 21.671875
67 18.09375
68 22.8828125
69 17.875
70 21.171875
71 24.703125
72 19.0859375
73 20.625
74 20.3203125
75 21.53125
76 17.5078125
77 20.53125
78 

# Validation

In [None]:
fig, ax = plt.subplots()
x = np.arange(len(items))
ax.plot(x, items)
ax.set(xlabel='number of epochs', ylabel='loss', title='Evolution')
plt.show()

In [None]:
ax = plt.gca()
plt.plot(np.arange(y_train.values[:50].size), y_train.values[:50], '-', label='True data', color='b')
plt.plot(np.arange(output.detach().numpy()[:50].size), output.detach().numpy()[:50], '--', label='Predictions', color='r')
plt.gcf().autofmt_xdate()
plt.show()

# Knowledge

The conclusion