# Poutyne tutorial

This notebook demonstrate how to use poutyne on a simple dataset.

In [None]:
# Autoreload is to always reload the imported python files.
%load_ext autoreload
%autoreload 2
# Matplotlib inline allows to make plot inline with the notebook with Matplotlib
%matplotlib inline

In [None]:
# import all packages
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
import torch
import matplotlib.pyplot as plt
from torchsummary import summary
from poutyne import Model, SKLearnMetrics
from sklearn.metrics import r2_score, median_absolute_error
from utils import metric_flatten, get_poutyne_callbacks, saferm



In [None]:
# These line allows to select the first GPU of the machine or the CPU is not GPU is present
cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")

## 1. Generation of a simple dataset

In [None]:
def func(xx,yy):
    y = 2*np.sin(10*((xx-0.7)) + 5* (yy-0.3)**2)
    return y


In [None]:
x = np.arange(0, 1, 0.02)
y = np.arange(0, 1, 0.02)
xx, yy = np.meshgrid(x, y, sparse=False)
z = func(xx,yy)
h = plt.contourf(x, y, z)
plt.colorbar()
plt.axis('scaled')


In [None]:
## dataset creation
def sample_generator_regression(n):
    # RRandom sampling of points
    x = np.random.rand(n, 2).astype(np.float32)
    # compute the output
    # we expand the dimension to have the size [n x 1], which will be the output shape of the NN.
    y = np.expand_dims(func(x[:,0], x[:,1]),axis=1)
    # Convertion to Pytorch tensors
    return torch.tensor(x), torch.tensor(y)


train_data = sample_generator_regression(1024)
valid_data = sample_generator_regression(64)
test_data = sample_generator_regression(128)


In [None]:
# It is very practical to transform our data into torch dataset
train_dataset = TensorDataset(*train_data)
valid_dataset = TensorDataset(*valid_data)
test_dataset = TensorDataset(*test_data)

## 2. Definition of the model

In [None]:
# Definition of a NN with 4 Linear layers
class LinearNet(nn.Module):
    def __init__(self, input_size = 2, output_size = 1, n_hidden=64):
        super(LinearNet, self).__init__()
        self.linear1 = nn.Linear(input_size, n_hidden)
        self.linear2 = nn.Linear(n_hidden, n_hidden)
        self.linear3 = nn.Linear(n_hidden, n_hidden)
        self.linear4 = nn.Linear(n_hidden, output_size)

    def forward(self, x):
        x1 = F.leaky_relu(self.linear1(x))
        x2 = F.leaky_relu(self.linear2(x1))
        x3 = F.leaky_relu(self.linear3(x2))
        x4 = self.linear4(x3)
        return x4

network = LinearNet(input_size = 2, output_size = 1, n_hidden=64)

In [None]:
# The package `torchsummary` allows to display a summary of the model
summary(network, (2,))

## 3. Train the model

In [None]:
# Define a poutyne model with optimizer, loss and metrics
model = Model(network, 'adam', 'mse',
              batch_metrics=["l1"],
              epoch_metrics=[ SKLearnMetrics(metric_flatten(r2_score)), 
                              SKLearnMetrics(metric_flatten(median_absolute_error))
                            ],
              device=device)

In [None]:
experiment_name = "test"
callbacks, summary_dir, checkpoint_dir = get_poutyne_callbacks(experiment_name)

In [None]:
# optimization paramters
optimization_kwargs = {}
optimization_kwargs["batch_size"] = 8
optimization_kwargs["epochs"] = 15

# # delete the folder for summaries and checkpoints
# saferm(summary_dir)
# saferm(checkpoint_dir)
# summary_dir.mkdir(parents=True, exist_ok=True)
# checkpoint_dir.mkdir(parents=True, exist_ok=True)

# train the model
# note that the function will load the model with best validation score at the end
history = model.fit_dataset(train_dataset, valid_dataset=valid_dataset, **optimization_kwargs, callbacks=callbacks) 

## 4. Evaluate the model

The function `fit` returns a list of information for each epoch. We can simply plot convergence curves using it.

In [None]:
e = [int(v['epoch']) for v in history]
val_loss = [v['val_loss'] for v in history]
train_loss = [v['loss'] for v in history]
plt.plot(e, train_loss, "-x" ,label="Training")
plt.plot(e, val_loss,"-x", label="Validation")
plt.xlabel("Epochs")
plt.title("MSE")
plt.legend()

# Evaluate the model

We can compute the score on the test set.


In [None]:
model.evaluate_dataset(test_dataset)

We can also perform prediction on a grid of points an look at the results.

In [None]:
x = np.arange(0, 1, 0.02)
y = np.arange(0, 1, 0.02)
xx, yy = np.meshgrid(x, y, sparse=False)
Z1 = func(xx,yy)

X = torch.tensor(np.array([xx.reshape(-1), yy.reshape(-1)]).T.astype(np.float32))
Z2 = model.predict(X).reshape(xx.shape)

In [None]:
vmin = np.min(Z1)
vmax = np.max(Z1)
plt.figure(figsize=(10, 5))
plt.subplot(1,2,1)
# h = plt.contourf(x, y, Z1, vmin=vmin, vmax=vmax)
plt.imshow(Z1, vmin=vmin, vmax=vmax, extent=[0,1,0,1])
plt.colorbar()
plt.axis('scaled')
plt.title("Ground truth")

plt.subplot(1,2,2)
plt.imshow(Z2, vmin=vmin, vmax=vmax, extent=[0,1,0,1])
plt.colorbar()
plt.axis('scaled')
plt.title("Neural network");