# Une introduction à `PyTorch` et `autograd`

In [None]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme() 
from numpy.random import default_rng
rng = default_rng()

# Feedforward Neural Network (perceptron multicouche)

In [None]:
from matplotlib.lines import Line2D
from matplotlib.patches import Circle, Ellipse

def draw_neural_net(layer_sizes, colors, ratio, delta):
    left, right, bottom, top = delta, 1-delta, delta, 1-delta 
    patches = []
    n_layers = len(layer_sizes)
    v_spacing = (top - bottom)/float(max(layer_sizes))
    h_spacing = (right - left)/float(len(layer_sizes) - 1)
    diameter = v_spacing/4.
    # Nodes
    for n, (layer_size, c) in enumerate(zip(layer_sizes, colors)):
        layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2.
        for m in range(layer_size):
            circle = Ellipse((n*h_spacing + left, layer_top - m*v_spacing), 
                              width=diameter, height=diameter*ratio,
                              color=c, ec='k', zorder=1)
            patches.append(circle)
    # Edges
    for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2.
        layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2.
        for m in range(layer_size_a):
            for o in range(layer_size_b):
                #patches.append(Arrow(0.2, 0.2, 0.3, 0.3, width=0.01))
                line = Line2D([n*h_spacing + left + diameter/2, (n + 1)*h_spacing + left- diameter/2],
                              [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], 
                              color='grey', lw=1, alpha=0.5, zorder=0)
                                       #coordsA = "data", coordsB = "data",
                                  #arrowstyle="-|>")
                patches.append(line)    
    return patches


In [None]:
fig, ax = plt.subplots(figsize=(8,4))
size = fig.get_size_inches()*fig.dpi
ratio = size[0] / size[1]
delta = 0.03
patches = draw_neural_net([3, 8, 16, 1], ['lightgrey', 'C1', 'C1', 'C0'], ratio, delta)
for p in patches: 
    ax.add_artist(p)
ax.annotate(r"Input $\mathbf{R}^3$", xy=(delta, 0.), xycoords="data", 
            va="center", ha="center")
ax.annotate(r"Two hidden layers $\mathbf{R}^8$", xy=(0.5, 0.), xycoords="data", 
            va="center", ha="center")
ax.annotate(r"Output layer $\mathbf{R}$", xy=(1-delta, 0.), xycoords="data", 
            va="center", ha="center")
ax.axis('off')
ax.set_title(r"Feedforward neural network from $\mathbf{R}^3$ to $\mathbf{R}$ ($L=3$)")
plt.show()

## Module `torch.nn` et classe `nn.Module` 

**Lire tous les tutoriels:** https://pytorch.org/tutorials/index.html

In [None]:
import torch
from torch import nn

In [None]:
print(dir(nn))

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(3, 8),
            nn.ReLU(),
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        out = self.linear_relu_stack(x)
        return out

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
from inspect import getsource
print(getsource(nn.Linear))

In [None]:
import torch.nn.functional as F 
print(type(F.linear))
F.linear

## Création d'un objet: une fonction paramétrique

In [None]:
Phi = NeuralNetwork()
print(Phi)

In [None]:
x = torch.tensor([0.2, 0.3, 0.4])
Phi(x)  # appel comme une fonction de $\R^3 \to \R$

In [None]:
n = 10 
xx = x.repeat(n, 1)
print(xx.shape)
xx

In [None]:
Phi(xx)  # appel vectoriel comme une fonction de $\R^{n\times3} \to \R^n$

## Accès aux paramètres

In [None]:
for name, param in Phi.named_parameters():
    print(name, "\t", param.size())
# ou for name, param in Phi.state_dict().items():

In [None]:
Phi.get_parameter("linear_relu_stack.0.weight")

In [None]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        print("Parameters initialization of", m)
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

Phi.apply(init_weights)

for name, param in Phi.named_parameters():
    print(name, "\t", param)

In [None]:
Phi(x)

# Outil `autograd`

- https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

In [None]:
from math import pi
xx = torch.linspace(0, 1, 1000)
yy = 0.1 - 0.3*xx + 0.5*xx**2  

In [None]:
yy_with_noise = yy + 0.005 * torch.randn_like(yy)

In [None]:
fig, ax = plt.subplots()
ax.plot(xx, yy, label="true function")
ax.scatter(xx, yy_with_noise, label="data", color='C1', alpha=0.2)
ax.legend() 

In [None]:
xx_ = xx[:, None]
bases = torch.cat([torch.ones_like(xx_), xx_, xx_**2], dim=1)

In [None]:
bases

In [None]:
bases.shape

In [None]:
theta = torch.tensor([0., 0., 0.], requires_grad=True)
loss = ((yy - bases @ theta)**2).mean()
loss.backward()   # différentiation automatique "backward" pour calculer le gradient
theta.grad        # on récupère le gradient dans le champ "grad" de la variable paramètre

In [None]:
loss

In [None]:
# attention: juste pour illustrer la syntaxe! ce n'est pas un gradient stochastique

theta = torch.tensor([0., 0., 0.], requires_grad=True)
for n in range(100000):
    # loss = ((yy - bases @ theta)**2).mean() # sans bruit
    loss = ((yy_with_noise - bases @ theta)**2).mean() # avec bruit
    loss.backward()
    with torch.no_grad():  # on impose à pytorch de pouvoir faire des opérations sans le gradient 
        theta -= 1/(n+1) * theta.grad
    if (n % 10000 == 0):
        print(n, loss.item(), theta)

## Une fonctionnalité utile moins connue

https://pytorch.org/tutorials/beginner/basics/autogradqs_tutorial.html#optional-reading-tensor-gradients-and-jacobian-products

On peut calculer la dérivée d'une fonction en plusieurs points simultanément, il suffit de voir la fonction comme une application de $\mathbf{R}^d$ dans $\mathbf{R}^d$ et de calculer son jacobien. Il faut ajouter un argument dans l'appel de la fonction `backward`.

In [None]:
xx = torch.linspace(0, 2*pi, 1000, requires_grad=True)
yy = torch.sin(xx)
yy.backward(torch.ones_like(yy), retain_graph=True)
with torch.no_grad():
    dyy = xx.grad

In [None]:
fig, ax = plt.subplots()
with torch.no_grad():
    ax.plot(xx, yy, label="cosinus")
    ax.plot(xx, dyy, label="sinus via autograd")
ax.legend()
plt.show()

In [None]:
inp = torch.eye(4, 5, requires_grad=True)
out = (inp+1).pow(2).t()
out.backward(torch.ones_like(out), retain_graph=True)

In [None]:
inp

In [None]:
out

In [None]:
torch.ones_like(out)