# Exercise 1

You goal is to model with a neural a function of the form
$$ z = f \left( (x - a)^2 + (y - b)^2 \right), $$ 
so you are searching for a mapping from $\mathbb{R}^2 \rightarrow \mathbb{R}$. You don't know the constant $a,b$ and the function $f$, but you have a collection of data points $(x,y,z)$. 

1. Start by implementing a simple NN with linear layer. How hard is it to fit the data? 

2. Find a way to encode the data characteristics in the model. Is it still hard to fit the data? You can start by assuming that you know a and b.

3. Study the effect of the sample. Decrease the amount of data in the training set. What do you observe?

4. What about the case where you don't know a and b?


In [None]:
# Autoreload is to always reload the imported python files.
%load_ext autoreload
%autoreload 2
# Matplotlib inline allows to make plot inline with the notebook with Matplotlib
%matplotlib inline

In [None]:
# import all packages
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
import torch
import matplotlib.pyplot as plt
from torchsummary import summary
from poutyne import Model, SKLearnMetrics
from sklearn.metrics import r2_score, median_absolute_error
from utils import metric_flatten, get_poutyne_callbacks, saferm



In [None]:
# These line allows to select the first GPU of the machine or the CPU is not GPU is present
cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")

## 1. Dataset generation

In [None]:
# Assume you don't know the data generating function.
a = 0.2 # a, b are known
b = 0.8
def func(xx,yy):
    y = 2*np.cos( 3* ((xx-a)**2 + (yy-b)**2) )
    return y


In [None]:
lim = 1.5
x = np.arange(-lim, lim, 0.03)
y = np.arange(-lim, lim, 0.03)
xx, yy = np.meshgrid(x, y, sparse=False)
z = func(xx,yy)
h = plt.contourf(x, y, z)
plt.colorbar()
plt.axis('scaled');


In [None]:
## dataset creation
def sample_generator_regression(n):
    # RRandom sampling of points
    # x = np.random.laplace(size=[n, 2]).astype(np.float32)
    x = np.random.rand(n, 2).astype(np.float32)*3 -1.5
    # compute the output
    # we expand the dimension to have the size [n x 1], which will be the output shape of the NN.
    y = np.expand_dims(func(x[:,0], x[:,1]),axis=1)
    # Convertion to Pytorch tensors
    return torch.tensor(x), torch.tensor(y)

n_train = 1024

train_data = sample_generator_regression(n_train)
valid_data = sample_generator_regression(64)
test_data = sample_generator_regression(128)


In [None]:
# It is very practical to transform our data into torch dataset
train_dataset = TensorDataset(*train_data)
valid_dataset = TensorDataset(*valid_data)
test_dataset = TensorDataset(*test_data)

## 2. Linear layer NN.

In [None]:
# Definition of a NN with 4 Linear layers
class LinearNet(nn.Module):
    def __init__(self, input_size = 2, output_size = 1, n_hidden=64):
        super(LinearNet, self).__init__()
        self.linear1 = nn.Linear(input_size, n_hidden)
        self.linear2 = nn.Linear(n_hidden, n_hidden)
        self.linear3 = nn.Linear(n_hidden, n_hidden)
        self.linear4 = nn.Linear(n_hidden, output_size)

    def forward(self, x):
        x1 = F.leaky_relu(self.linear1(x))
        x2 = F.leaky_relu(self.linear2(x1))
        x3 = F.leaky_relu(self.linear3(x2))
        x4 = self.linear4(x3)
        return x4

network = LinearNet(input_size = 2, output_size = 1, n_hidden=64)
# The package `torchsummary` allows to display a summary of the model
summary(network, (2,))

Train the model

In [None]:
# Define a poutyne model with optimizer, loss and metrics
model = Model(network, 'adam', 'mse',
              batch_metrics=["l1"],
              epoch_metrics=[ SKLearnMetrics(metric_flatten(r2_score)), 
                              SKLearnMetrics(metric_flatten(median_absolute_error))
                            ],
              device=device)

# optimization paramters
optimization_kwargs = {}
optimization_kwargs["batch_size"] = 8
optimization_kwargs["epochs"] = 15

# train the model
# note that the function will load the model with best validation score at the end
history = model.fit_dataset(train_dataset, valid_dataset=valid_dataset, **optimization_kwargs) 

Evaluate the model

In [None]:
e = [int(v['epoch']) for v in history]
val_loss = [v['val_loss'] for v in history]
train_loss = [v['loss'] for v in history]
plt.plot(e, train_loss, "-x" ,label="Training")
plt.plot(e, val_loss,"-x", label="Validation")
plt.xlabel("Epochs")
plt.title("MSE")
plt.legend();

We can compute the score on the test set.


In [None]:
model.evaluate_dataset(test_dataset)

We can also perform prediction on a grid of points an look at the results.

In [None]:
lim = 3
x = np.arange(-lim, lim, 0.05)
y = np.arange(-lim, lim, 0.05)
xx, yy = np.meshgrid(x, y, sparse=False)
Z1 = func(xx,yy)

X = torch.tensor(np.array([xx.reshape(-1), yy.reshape(-1)]).T.astype(np.float32))
Z2 = model.predict(X).reshape(xx.shape)

vmin = np.min(Z1)
vmax = np.max(Z1)
plt.figure(figsize=(10, 5))
plt.subplot(1,2,1)
# h = plt.contourf(x, y, Z1, vmin=vmin, vmax=vmax)
plt.imshow(Z1, vmin=vmin, vmax=vmax, extent=[-lim,lim,-lim,lim])
plt.colorbar()
plt.axis('scaled')
plt.title("Ground truth")

plt.subplot(1,2,2)
plt.imshow(Z2, vmin=vmin, vmax=vmax, extent=[-lim,lim,-lim,lim])
plt.colorbar()
plt.axis('scaled')
plt.title("Neural network");

## 2. Architecture adapted to the dataset

In [None]:
# Definition of a NN with 4 Linear layers
class TransformedLinearNet(nn.Module):
    def __init__(self, n_hidden=64):
        super(TransformedLinearNet, self).__init__()
        self.a = a
        self.b = b
        self.linear1 = nn.Linear(1, n_hidden)
        self.linear2 = nn.Linear(n_hidden, n_hidden)
        self.linear3 = nn.Linear(n_hidden, n_hidden)
        self.linear4 = nn.Linear(n_hidden, 1)

    def forward(self, x):
        r = (x[:,:1]-self.a)**2 + (x[:,1:]-self.b)**2
        x1 = F.leaky_relu(self.linear1(r))
        x2 = F.leaky_relu(self.linear2(x1))
        x3 = F.leaky_relu(self.linear3(x2))
        x4 = self.linear4(x3)
        return x4

network2 = TransformedLinearNet( n_hidden=256)
summary(network2, (2,))

In [None]:
# Define a poutyne model with optimizer, loss and metrics
model2 = Model(network2, 'adam', 'mse',
              batch_metrics=["l1"],
              epoch_metrics=[ SKLearnMetrics(metric_flatten(r2_score)), 
                              SKLearnMetrics(metric_flatten(median_absolute_error))
                            ],
              device=device)

# optimization paramters
optimization_kwargs = {}
optimization_kwargs["batch_size"] = 8
optimization_kwargs["epochs"] = 15

# train the model
# note that the function will load the model with best validation score at the end
history = model2.fit_dataset(train_dataset, valid_dataset=valid_dataset, **optimization_kwargs) 

In [None]:
model2.evaluate_dataset(test_dataset)

In [None]:
lim = 1.5
x = np.arange(-lim, lim, 0.02)
y = np.arange(-lim, lim, 0.02)
xx, yy = np.meshgrid(x, y, sparse=False)
Z1 = func(xx,yy)

X = torch.tensor(np.array([xx.reshape(-1), yy.reshape(-1)]).T.astype(np.float32))
Z2 = model2.predict(X).reshape(xx.shape)

vmin = np.min(Z1)
vmax = np.max(Z1)
plt.figure(figsize=(10, 5))
plt.subplot(1,2,1)
# h = plt.contourf(x, y, Z1, vmin=vmin, vmax=vmax)
plt.imshow(Z1, vmin=vmin, vmax=vmax, extent=[-lim,lim,-lim,lim])
plt.colorbar()
plt.axis('scaled')
plt.title("Ground truth")

plt.subplot(1,2,2)
plt.imshow(Z2, vmin=vmin, vmax=vmax, extent=[-lim,lim,-lim,lim])
plt.colorbar()
plt.axis('scaled')
plt.title("Neural network");

## 3. Learning a and b.

In [None]:
# Definition of a NN with 4 Linear layers
class TransformedLinearNet2(nn.Module):
    def __init__(self, n_hidden=64):
        super(TransformedLinearNet2, self).__init__()
        
        self.a = nn.parameter.Parameter(torch.tensor([0.5]), requires_grad=True)
        self.b = nn.parameter.Parameter(torch.tensor([0.5]), requires_grad=True)

        self.linear1 = nn.Linear(1, n_hidden)
        self.linear2 = nn.Linear(n_hidden, n_hidden)
        self.linear3 = nn.Linear(n_hidden, n_hidden)
        self.linear4 = nn.Linear(n_hidden, 1)

    def forward(self, x):
        r = (x[:,:1]-self.a)**2 + (x[:,1:]-self.b)**2
        x1 = F.leaky_relu(self.linear1(r))
        x2 = F.leaky_relu(self.linear2(x1))
        x3 = F.leaky_relu(self.linear3(x2))
        x4 = self.linear4(x3)
        return x4

network3 = TransformedLinearNet2( n_hidden=256)
summary(network3, (2,))


In [None]:
# Define a poutyne model with optimizer, loss and metrics
model3 = Model(network3, 'adam', 'mse',
              batch_metrics=["l1"],
              epoch_metrics=[ SKLearnMetrics(metric_flatten(r2_score)), 
                              SKLearnMetrics(metric_flatten(median_absolute_error))
                            ],
              device=device)

# optimization paramters
optimization_kwargs = {}
optimization_kwargs["batch_size"] = 8
optimization_kwargs["epochs"] = 15

# train the model
# note that the function will load the model with best validation score at the end
history = model3.fit_dataset(train_dataset, valid_dataset=valid_dataset, **optimization_kwargs) 
model3.evaluate_dataset(test_dataset)

In [None]:
model3.network.a, model3.network.b

In [None]:

lim = 1.5
x = np.arange(-lim, lim, 0.02)
y = np.arange(-lim, lim, 0.02)
xx, yy = np.meshgrid(x, y, sparse=False)
Z1 = func(xx,yy)

X = torch.tensor(np.array([xx.reshape(-1), yy.reshape(-1)]).T.astype(np.float32))
Z2 = model3.predict(X).reshape(xx.shape)

vmin = np.min(Z1)
vmax = np.max(Z1)
plt.figure(figsize=(10, 5))
plt.subplot(1,2,1)
# h = plt.contourf(x, y, Z1, vmin=vmin, vmax=vmax)
plt.imshow(Z1, vmin=vmin, vmax=vmax, extent=[-lim,lim,-lim,lim])
plt.colorbar()
plt.axis('scaled')
plt.title("Ground truth")

plt.subplot(1,2,2)
plt.imshow(Z2, vmin=vmin, vmax=vmax, extent=[-lim,lim,-lim,lim])
plt.colorbar()
plt.axis('scaled')
plt.title("Neural network");