# Package import 

In [1]:
# Ne pas oublier d'executer dans le shell avant de lancer python :
# source /users/Enseignants/piwowarski/venv/amal/3.7/bin/activate
# N.B: commande pour lancer tensorboard: tensorboard --logdir=path/to/log-directory

import torch
from torch.autograd import Function
from torch.autograd import gradcheck
from datamaestro import prepare_dataset 
from torch.utils.tensorboard import SummaryWriter
import ipdb

# Functions 

In [2]:
class Context:
    """Very simplified context object"""
    def __init__(self):
        self._saved_tensors = ()
    def save_for_backward(self, *args):
        self._saved_tensors = args
    @property
    def saved_tensors(self):
        return self._saved_tensors


class linear(Function):
##Toute fonction a:
## une méthode forward pour calculer l'image de variables et paramètres donnés
## une méthode backward pour renvoyer son gradient par rapport à ses variables/paramètres
    @staticmethod
    #a static method can be called without referring to an object.     
    def forward(ctx,x,w,b):
        ctx.save_for_backward(x,w,b)
        return w*x+b

    @staticmethod
    def backward(ctx,grad_output):
        ## prend comme argument le gradient de l'étage d'au dessus
        x,w,b = ctx.saved_tensors
        dL_dx=grad_output*w
        dL_dw=grad_output*x
        dL_db=grad_output
        return dL_dx,dL_dw,dL_db
    
class MSE(Function):
    @staticmethod
    def forward(ctx,y,yhat):
        ctx.save_for_backward(y,yhat)
        return torch.mean((y-yhat)**2)
    @staticmethod
    def backward(ctx,grad_output=1):
        y,yhat=ctx.saved_tensors
        return 2*(y-yhat)*grad_output

x = torch.randn(10,5,requires_grad=True,dtype=torch.float64)
y=1

w = torch.randn(5,requires_grad=True,dtype=torch.float64)
b=torch.zeros(1,requires_grad=True, dtype=torch.float64)

## Pour utiliser la fonction 
f = linear()
ctx = Context()
output = f.forward(ctx,x,w,b)
f_grad = f.backward(ctx,y)

## Pour tester le gradient: 
## 1. Appliquer la méthode apply héritée de la classe Function pour créer une fonction
## 2. appliquer la fonction autograd_check à la fonction avec des paramètres
f_check = f.apply

print("Test of gradient for the linear function: ",torch.autograd.gradcheck(f_check,(x,w,b)))
#return True

##Test de la fonction MSE
yhat=output
mse=MSE()
loss=mse.forward(ctx,y,yhat)
print("the loss is: ",loss)
mse_grad=mse.backward(ctx)


Test of gradient for the linear function:  True
the loss is:  tensor(1.5230, dtype=torch.float64, grad_fn=<MeanBackward0>)


# Training 

## SGD descent

In [3]:
def SGD_descent(x,y,learning_rate,epochs=100):
    f=linear()
    mse=MSE()
    w = torch.randn(x.shape[1],requires_grad=True,dtype=torch.double)
    b = torch.zeros(1,dtype=torch.double)
    writer = SummaryWriter()
    print("SGD descent")
    for i in range(epochs):
        # Sample selection: one row of the sample selected randomly         
        index=torch.randint(0,len(x),size=(1,))
        x_i=torch.DoubleTensor(x[index,:])
        y_i=torch.DoubleTensor(y[index,:])
        #Context initialization
        ctx_f=Context()
        ctx_mse=Context()
        #Gradient calculation
        output=f.forward(ctx_f,x_i,w,b)
        loss=mse.forward(ctx_mse,output,y_i)
        mse_grad=mse.backward(ctx_mse)
        df_dx,df_dw,df_db=f.backward(ctx_f,mse_grad)
        #Parameters update
        w=w-learning_rate*df_dw
        b=b-learning_rate*df_db
        #Add loss to writer
        if i%10==0:
            print("Epoch %d"%i)
        writer.add_scalar('SGD_Loss', loss, i)
    return None

## Batch descent 

In [4]:
def Batch_descent(x,y,learning_rate,epochs=100):
    f=linear()
    mse=MSE()
    w = torch.randn(x.shape[1],requires_grad=True,dtype=torch.double)
    b = torch.zeros(1,requires_grad=True, dtype=torch.double)
    writer = SummaryWriter()
    print("Batch descent")
    for i in range(epochs):
        # Sample selection: the whole batch is considered
        for j in range(len(x)):
            x_j=torch.DoubleTensor(x[j,:])
            y_j=torch.DoubleTensor(y[j,:])
            #Context initialization
            ctx_f=Context()
            ctx_mse=Context()
            #Gradient calculation
            output=f.forward(ctx_f,x_j,w,b)
            loss=mse.forward(ctx_mse,output,y_j)
            mse_grad=mse.backward(ctx_mse)
            df_dx,df_dw,df_db=f.backward(ctx_f,mse_grad)
        #Parameters update
        w=w-learning_rate*df_dw
        b=b-learning_rate*df_db
        #Add loss to writer
        if i%10==0:
            print("Epoch %d"%i)
        writer.add_scalar('Batch_Loss', loss, i)
    return None

## Mini-batch 

In [5]:
def MiniBatch_descent(x,y,learning_rate,epochs=100,batch_size=1):
    f=linear()
    mse=MSE()
    w = torch.randn(1,x.shape[1],requires_grad=True,dtype=torch.double)
    b = torch.zeros(1,dtype=torch.double)
    writer = SummaryWriter()
    print("MiniBatch descent")
    for i in range(epochs):
        # Sample selection: batch_size rows of the sample selected randomly         
        index=torch.randint(0,len(x),size=(batch_size,))
        for j in index:
            x_j=torch.DoubleTensor(x[j,:])
            y_j=torch.DoubleTensor(y[j,:])
            #Context initialization
            ctx_f=Context()
            ctx_mse=Context()
            #Gradient calculation
            output=f.forward(ctx_f,x_j,w,b)
            loss=mse.forward(ctx_mse,output,y_j)
            mse_grad=mse.backward(ctx_mse)
            df_dx,df_dw,df_db=f.backward(ctx_f,mse_grad)
        #Parameters update
        w=w-learning_rate*df_dw
        b=b-learning_rate*df_db
        #Add loss to writer 
        if i%10==0:
            print("Epoch %d"%i)
        writer.add_scalar('Minibatch_Loss', loss, i)
    return None

## Tests 

In [6]:
n=10
x=torch.randn(n,5,requires_grad=True,dtype=torch.double)
y=torch.randn(n,1,dtype=torch.double)

epochs=100
learning_rate=0.01
batch_size=int(n/2)

# SGD_descent(x,y,learning_rate,epochs)
# Batch_descent(x,y,learning_rate,epochs)
# MiniBatch_descent(x,y,learning_rate,epochs,batch_size)


# Boston dataset 

In [11]:
## Téléchargement du dataset Boston
ds=prepare_dataset("edu.uci.boston")
fields, data =ds.files.data() 

print("fields:",fields)
print("dataset",data)

x=torch.DoubleTensor(data[:,:-1])
y=torch.DoubleTensor(data[:,-1]).reshape(len(x),1)

def standardize(z):
    mu = z.mean(dim=1, keepdim=True)
    std = z.std(dim=1, keepdim=True)
    return (z - mu) / std

x=standardize(x)
y=standardize(y)

fields: ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
dataset [[6.3200e-03 1.8000e+01 2.3100e+00 ... 3.9690e+02 4.9800e+00 2.4000e+01]
 [2.7310e-02 0.0000e+00 7.0700e+00 ... 3.9690e+02 9.1400e+00 2.1600e+01]
 [2.7290e-02 0.0000e+00 7.0700e+00 ... 3.9283e+02 4.0300e+00 3.4700e+01]
 ...
 [6.0760e-02 0.0000e+00 1.1930e+01 ... 3.9690e+02 5.6400e+00 2.3900e+01]
 [1.0959e-01 0.0000e+00 1.1930e+01 ... 3.9345e+02 6.4800e+00 2.2000e+01]
 [4.7410e-02 0.0000e+00 1.1930e+01 ... 3.9690e+02 7.8800e+00 1.1900e+01]]


In [12]:
epochs=100
learning_rate=0.0001
batch_size=int(n/10)

SGD_descent(x,y,learning_rate,epochs)
Batch_descent(x,y,learning_rate,epochs)
MiniBatch_descent(x,y,learning_rate,epochs,batch_size)

SGD descent
Epoch 0
Epoch 10
Epoch 20
Epoch 30
Epoch 40
Epoch 50
Epoch 60
Epoch 70
Epoch 80
Epoch 90
Batch descent
Epoch 0
Epoch 10
Epoch 20
Epoch 30
Epoch 40
Epoch 50
Epoch 60
Epoch 70
Epoch 80
Epoch 90
MiniBatch descent
Epoch 0
Epoch 10
Epoch 20
Epoch 30
Epoch 40
Epoch 50
Epoch 60
Epoch 70
Epoch 80
Epoch 90
