CLASS

In [1]:
# Ne pas oublier d'executer dans le shell avant de lancer python :
# source /users/Enseignants/piwowarski/venv/amal/3.7/bin/activate
import torch
from torch.autograd import Function
from torch.autograd import gradcheck
from datamaestro import prepare_dataset 
import numpy as np

class Context:
    """Very simplified context object"""
    def __init__(self):
        self._saved_tensors = ()
    def save_for_backward(self, *args):
        self._saved_tensors = args
    @property
    def saved_tensors(self):
        return self._saved_tensors

class Linear(Function):
    @staticmethod
    def forward(ctx,x,w,b):
        ctx.save_for_backward(x,w,b)
        return torch.mm(x,torch.t(w))+b

    @staticmethod
    def backward(ctx,grad_output):
        x,w,b = ctx.saved_tensors
        dL_x = grad_output*w
        dL_w = grad_output*x
        dL_b = grad_output
        return dL_x,dL_w,dL_b
    
class MSE(Function):
    @staticmethod
    def forward(ctx_mse,y,yhat):
        ctx_mse.save_for_backward(y,yhat)
        tmp = yhat-y
        return torch.mul(tmp,tmp)
    
    @staticmethod
    def backward(ctx_mse):
        y,yhat = ctx_mse.saved_tensors
        return 2*(yhat-y)


VERIF BACKPROPAGATION

In [2]:
# Pour utiliser la fonction 
linear1 = Linear()
ctx1 = Context()
x = torch.randn(10,5,requires_grad=True,dtype=torch.float64)
w = torch.randn(1,5,requires_grad=True,dtype=torch.float64)
b = torch.randn(1,1,requires_grad=True,dtype=torch.float64)
output = linear1.forward(ctx1,x,w,b)

grad_output = linear1.backward(ctx1,1)
print(grad_output)
## Pour tester le gradient 
check = linear1.apply
print("Check backpropagation : ",torch.autograd.gradcheck(check,(x,w,b)))


(tensor([[-0.9181, -1.0957,  0.4695, -1.0051,  1.3708]], dtype=torch.float64,
       grad_fn=<MulBackward0>), tensor([[-0.4293, -0.3487, -0.3684, -0.3479,  0.2376],
        [-0.4214, -0.8073,  0.8854, -0.5931,  0.6962],
        [-1.3283,  2.3737,  1.5293,  0.2929,  1.5791],
        [-1.1369,  0.7733, -0.1657, -0.7762, -0.9779],
        [ 0.4702,  0.9757,  1.1605,  1.1399, -1.0780],
        [ 0.6794,  0.4269, -0.5760, -1.2367,  0.3724],
        [ 0.7432, -0.1651, -0.4219, -0.6452,  0.6890],
        [-0.1045,  0.9341, -0.7687, -0.1168,  2.2778],
        [ 0.5454, -0.0345,  0.7703,  1.8453, -0.0635],
        [ 0.4412,  1.4555,  0.7170, -1.5096, -0.3525]], dtype=torch.float64,
       grad_fn=<MulBackward0>), 1)
Check backpropagation :  True


In [3]:
# Pour telecharger le dataset Boston
ds=prepare_dataset("edu.uci.boston")
fields, data =ds.files.data() 

FUNCTIONS NEEDED

In [4]:
def normalize(X):
    mean = X.mean((0))
    std = X.std((0))
    return (X-mean)/std,mean,std

def loss(X,Y,w,b):
    return torch.mean((torch.mm(X,torch.t(w))+b-Y)**2)

def simple_split(X,Y,p):
    end = int(p*X.shape[0])
    X_train,Y_train = X[0:end,:],Y[0:end,:]
    X_test,Y_test = X[end:,:],Y[end:,:]
    return X_train,Y_train,X_test,Y_test

STOCHASTIC GRADIENT DESCENT 

In [5]:
def gradient_descent(layer,cost,X_train,Y_train,X_test,Y_test,nb_desc,eta,w,b):
    train_cost,test_cost = [],[]
    N = X_train.shape[0]
    if w is None:
        w = torch.randn(c,d,requires_grad=True,dtype=torch.float64)
    if b is None:
        b = torch.randn(1,1,requires_grad=True,dtype=torch.float64)
    for i in range(nb_desc):
        indice = torch.randint(0,N,size=(1,1))
        x,y = X_train[indice][0],Y_train[indice][0]
        #forward
        yhat = layer.forward(ctx1,x,w,b)
        mse = cost.forward(ctx_mse,y,yhat)
        #keep cost
        train_cost.append(loss(X_train,Y_train,w,b))
        test_cost.append(loss(X_test,Y_test,w,b))
        #backpropagation
        grad_output = cost.backward(ctx_mse)
        dl_x,dl_w,dl_b = layer.backward(ctx1,grad_output)
        #update
        w = w - eta*dl_w
        b = b - eta*dl_b
    return train_cost,test_cost,w,b

INITIALIZATION

In [6]:
#class
layer = Linear()
ctx1 = Context()
mse = MSE()
ctx_mse = Context()

#hyperparameters
p = 0.7 #for split
nb_desc = 15000
eta = 0.00001

#prepare data
X,Y = data[:,0:-1],data[:,-1]
X,Y = torch.from_numpy(X),torch.from_numpy(Y.reshape((Y.shape[0],1)))
X,meanX,stdX = normalize(X)
Y,meanY,stdY = normalize(Y)
d,c = X.shape[1],Y.shape[1]
X_train,Y_train,X_test,Y_test = simple_split(X,Y,p)

In [7]:
#reinitialize parameters
w = torch.randn(c,d,requires_grad=True,dtype=torch.float64)
b = torch.randn(c,1,requires_grad=True,dtype=torch.float64)
train_cost,test_cost = [],[]

DESCENT 

In [8]:
(tmp_train,tmp_test,w,b) = gradient_descent(layer,mse,X_train,Y_train,X_test,Y_test,nb_desc,eta,w,b)
train_cost += tmp_train
test_cost += tmp_test
print("Train cost : {}\nTest cost : {}".format(train_cost[-1],test_cost[-1]))

Train cost : 8.217651050338526
Test cost : 11.932371706039605


TRAIN/TEST CURVE

In [9]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
for i in range(len(train_cost)):
    writer.add_scalar('Train_cost', train_cost[i].item(), i)
    writer.add_scalar('Test_cost', test_cost[i].item(), i)
writer.close()