# Imports 

## Packages 

In [1]:
# Ne pas oublier d'executer dans le shell avant de lancer python :
# source /users/Enseignants/piwowarski/venv/amal/3.7/bin/activate
# N.B: commande pour lancer tensorboard: tensorboard --logdir=path/to/log-directory

import torch
from torch.autograd import Function
from torch.autograd import gradcheck
from datamaestro import prepare_dataset 
from torch.utils.tensorboard import SummaryWriter
from torch.optim import adadelta
import ipdb
import matplotlib.pyplot as plt

## TP1 classes 

In [2]:
class Context:
    """Very simplified context object"""
    def __init__(self):
        self._saved_tensors = ()
    def save_for_backward(self, *args):
        self._saved_tensors = args
    @property
    def saved_tensors(self):
        return self._saved_tensors


class linear(Function):
##Toute fonction a:
## une méthode forward pour calculer l'image de variables et paramètres donnés
## une méthode backward pour renvoyer son gradient par rapport à ses variables/paramètres
    @staticmethod
    #a static method can be called without referring to an object.     
    def forward(ctx,x,w,b):
        ctx.save_for_backward(x,w,b)
        return w*x+b

    @staticmethod
    def backward(ctx,grad_output):
        ## prend comme argument le gradient de l'étage d'au dessus
        x,w,b = ctx.saved_tensors
        dL_dx=grad_output*w
        dL_dw=grad_output*x
        dL_db=grad_output
        return dL_dx,dL_dw,dL_db
    
class MSE(Function):
    @staticmethod
    def forward(ctx,y,yhat):
        ctx.save_for_backward(y,yhat)
        return torch.mean((y-yhat)**2)
    @staticmethod
    def backward(ctx,grad_output=1):
        y,yhat=ctx.saved_tensors
        return 2*(y-yhat)*grad_output


## Data import 

In [3]:
ds=prepare_dataset("edu.uci.boston")
fields, data =ds.files.data()
n = data.shape[0]

def standardize(z):
    m=z.mean(dim=0,keepdim=True)
    s=z.std(dim=0,keepdim=True)
    return (z-m)/s

x=standardize(torch.tensor(data[:,:-1],dtype=torch.float32))
y=standardize(torch.tensor(data[:,-1],dtype=torch.float32))

# Différentiation automatique 

In [4]:
def linReg(x,y,learning_rate=0.001,epochs=100):    
    w=torch.randn(x.shape[1],requires_grad=True,dtype=torch.double)
    b=torch.randn(1,requires_grad=True,dtype=torch.double)
    writer=SummaryWriter()
    for i in range(epochs):
        for j in range(len(x)):
            x_j=x[j,:]
            y_j=y[j,:]
            y_hat=torch.dot(w,x_j)+b
            loss=(y_j-y_hat)**2
            loss.backward()
            ipdb.set_trace()
            w=w-learning_rate*w.grad
            #met à jour w,b en fonction d'eux même (impossible?)             
            b=b-learning_rate*b.grad
            w.grad.data.zero_()
            b.grad.data.zero_()
        writer.add_scalar('Loss LinReg',loss,i)
    return w,b

In [5]:

# w,b=linReg(x,y)

## Optimiseur 

#### SGD

In [6]:
learning_rate=1e-2
nb_epochs=1000

def f(x,w,b):
    return torch.mv(x,w)+b

def MSE(y,y_hat):
    return torch.mean(torch.pow((y-y_hat),2))


w=torch.nn.Parameter(torch.randn(x.shape[1]))
b=torch.nn.Parameter(torch.randn(1))

optim=torch.optim.SGD(params=[w,b],lr=learning_rate)
# Configuration de l'optimiseur: paramètres et critère d'arrêt
optim.zero_grad()
# Réinitialisation du gradient

writer=SummaryWriter()
for i in range(nb_epochs):
    index=torch.randint(0,len(x),size=(1,))
    #Forward     
    loss=MSE(f(x[index,:],w,b),y[index])
    writer.add_scalar("SGD loss",loss,i)
    #loss_history.append(loss)
    #Backward
    loss.backward()
    #Mise à jour des paramètres à chaque epoch (mod 1)
    if i % 1==0:
        optim.step() #met à jour les paramètres [w,b]
        optim.zero_grad() #réinitialise le gradient
    if i%100==0:
        print("Epoch",i)
        
# plt.plot(loss_history)
# plt.show()

Epoch 0
Epoch 100
Epoch 200
Epoch 300
Epoch 400
Epoch 500
Epoch 600
Epoch 700
Epoch 800
Epoch 900


#### Batch

In [7]:
nb_epochs=100

def f(x,w,b):
#     ipdb.set_trace()
    return torch.dot(x,w)+b

def MSE(y,y_hat):
    return torch.mean(torch.pow((y-y_hat),2))

w=torch.nn.Parameter(torch.randn(x.shape[1]))
b=torch.nn.Parameter(torch.randn(1))

optim=adadelta.Adadelta(params=[w,b])
# Configuration de l'optimiseur
optim.zero_grad()
# Réinitialisation du gradient

writer=SummaryWriter()
for i in range(nb_epochs):
    for j in range(len(x)):
        #Forward     
        loss=MSE(f(x[j,:],w,b),y[j])
        #Backward
        loss.backward()
    writer.add_scalar("Batch loss",loss,i)
    if i % 1==0:
        optim.step() #met à jour les paramètres [w,b]
        optim.zero_grad() #réinitialise le gradient
    if i%10==0:
        print("Epoch ",i)


Epoch  0
Epoch  10
Epoch  20
Epoch  30
Epoch  40
Epoch  50
Epoch  60
Epoch  70
Epoch  80
Epoch  90


#### Mini-Batch 

In [8]:
nb_epochs=100
batch_size=10

def f(x,w,b):
#     ipdb.set_trace()
    return torch.dot(x,w)+b

def MSE(y,y_hat):
    return torch.mean(torch.pow((y-y_hat),2))

w=torch.nn.Parameter(torch.randn(x.shape[1]))
b=torch.nn.Parameter(torch.randn(1))

optim=adadelta.Adadelta(params=[w,b])
# Configuration de l'optimiseur
optim.zero_grad()
# Réinitialisation du gradient

writer=SummaryWriter()
for i in range(nb_epochs):
    index=torch.randint(0,len(x),size=(batch_size,))
    #Forward     
    loss=MSE(f(x[j,:],w,b),y[j])
    #Backward
    loss.backward()
    writer.add_scalar("MiniBatch loss",loss,i)
    if i % 1==0:
        optim.step() #met à jour les paramètres [w,b]
        optim.zero_grad() #réinitialise le gradient
    if i%10==0:
        print("Epoch ",i)


Epoch  0
Epoch  10
Epoch  20
Epoch  30
Epoch  40
Epoch  50
Epoch  60
Epoch  70
Epoch  80
Epoch  90


## Module 

### 2-layers NN (without container) (SGD algorithm method)

In [9]:
nb_epochs=1000
learning_rate=1e-3

f1=torch.nn.Linear(x.shape[1],1)
tanh=torch.nn.Tanh()
f2=torch.nn.Linear(1,1)
mse=torch.nn.MSELoss()

# Define parameters to optimize
w1=f1.weight
b1=f1.bias
w2=f2.weight
b2=f2.bias

# Set optimizer
optim=torch.optim.SGD(params=[w1,b1,w2,b2],lr=learning_rate)
optim.zero_grad()

writer=SummaryWriter()
for i in range(nb_epochs):
    index=torch.randint(0,len(x),size=(1,))
    y1=f1(x[index,:])
    z=tanh(y1)
    y2=f2(z)
    loss=mse(y[index],y2)
    loss.backward()
    writer.add_scalar("SGD loss",loss.item(),i)
    optim.step()
    f1.weight=w1
    f1.bias=b1
    f2.weight=w2
    f2.bias=b2
    optim.zero_grad()
    if i %100==0:
        print("Epoch ",i)
    


Epoch  0


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch  100
Epoch  200
Epoch  300
Epoch  400
Epoch  500
Epoch  600
Epoch  700
Epoch  800
Epoch  900


### Container (Mini batch used)

In [10]:
nb_epochs=1000
batch_size=10
learning_rate=1e-2

f1=torch.nn.Linear(x.shape[1],1)
tanh=torch.nn.Tanh()
f2=torch.nn.Linear(1,1)
mse=torch.nn.MSELoss()

neural_network=torch.nn.Sequential(f1,tanh,f2)

optim=torch.optim.SGD(params=list(neural_network.parameters()),lr=learning_rate)
for i in range(nb_epochs):
    index=torch.randint(0,len(x),size=(batch_size,))
    loss=mse(neural_network(x[index,:]),y[index].reshape(len(index),1))
    loss.backward()
    writer.add_scalar("MiniBatch loss",loss.item(),i)
    if i%10==0:
        print("Epoch ",i)
    

Epoch  0
Epoch  10
Epoch  20
Epoch  30
Epoch  40
Epoch  50
Epoch  60
Epoch  70
Epoch  80
Epoch  90
Epoch  100
Epoch  110
Epoch  120
Epoch  130
Epoch  140
Epoch  150
Epoch  160
Epoch  170
Epoch  180
Epoch  190
Epoch  200
Epoch  210
Epoch  220
Epoch  230
Epoch  240
Epoch  250
Epoch  260
Epoch  270
Epoch  280
Epoch  290
Epoch  300
Epoch  310
Epoch  320
Epoch  330
Epoch  340
Epoch  350
Epoch  360
Epoch  370
Epoch  380
Epoch  390
Epoch  400
Epoch  410
Epoch  420
Epoch  430
Epoch  440
Epoch  450
Epoch  460
Epoch  470
Epoch  480
Epoch  490
Epoch  500
Epoch  510
Epoch  520
Epoch  530
Epoch  540
Epoch  550
Epoch  560
Epoch  570
Epoch  580
Epoch  590
Epoch  600
Epoch  610
Epoch  620
Epoch  630
Epoch  640
Epoch  650
Epoch  660
Epoch  670
Epoch  680
Epoch  690
Epoch  700
Epoch  710
Epoch  720
Epoch  730
Epoch  740
Epoch  750
Epoch  760
Epoch  770
Epoch  780
Epoch  790
Epoch  800
Epoch  810
Epoch  820
Epoch  830
Epoch  840
Epoch  850
Epoch  860
Epoch  870
Epoch  880
Epoch  890
Epoch  900
Epoch  910