In [1]:
import torch
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import numpy as np

In [2]:
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
df = pd.read_csv('housing.csv', header=None, delimiter='\s+', names=column_names)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [3]:
df.dtypes

CRIM       float64
ZN         float64
INDUS      float64
CHAS         int64
NOX        float64
RM         float64
AGE        float64
DIS        float64
RAD          int64
TAX        float64
PTRATIO    float64
B          float64
LSTAT      float64
MEDV       float64
dtype: object

In [4]:
df.isna().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
MEDV       0
dtype: int64

**Normalization**

In [5]:
normalized_df=(df-df.min())/(df.max()-df.min())

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X = normalized_df.drop(columns=['MEDV']).values
Y = normalized_df['MEDV'].values
X = torch.from_numpy(X)
Y = torch.from_numpy(Y)
X = X.type(torch.float32)
Y = Y.type(torch.float32).unsqueeze(dim=-1)


In [8]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size= 0.2, random_state=1)

In [9]:
nb_features = X_train.shape[1]
out_dim = Y.shape[1]

## Q1

In [10]:
def mse(yhat, y):
    return (y-yhat).pow(2).sum()

def forward(X, W, B):
    return torch.mm(X, W.T) + B

## SGD

In [11]:
W = torch.rand((out_dim, nb_features), requires_grad=True)
B = torch.rand(out_dim, requires_grad=True)
NB_EPOCHS = 50
epsilon = 1e-3
writter = SummaryWriter()
for epoch in range(NB_EPOCHS):
    for _ in range(len(X_train)):
        random_inx = np.random.randint(0, len(X_train))
        x_sgd = X_train[random_inx : random_inx+1]
        y_sgd = Y_train[random_inx : random_inx+1]
        y_hat_sgd = forward(x_sgd, W, B)
        loss_sgd = mse(y_hat_sgd, y_sgd)
        loss_sgd.backward()
        with torch.no_grad():
            W -= epsilon*W.grad
            B -= epsilon*B.grad
            W.grad = None
            B.grad = None

    loss_train = mse(forward(X_train, W, B), Y_train)
    loss_val = mse(forward(X_val, W, B), Y_val)

    writter.add_scalar('Loss/train', loss_train, epoch)
    writter.add_scalar('Loss/val', loss_val, epoch)
    # Sortie directe
    print(f"Itérations {epoch}: loss-train {loss_train}   lostt-test {loss_val}")

Itérations 0: loss-train 70.35777282714844   lostt-test 15.377771377563477
Itérations 1: loss-train 51.730594635009766   lostt-test 11.235861778259277
Itérations 2: loss-train 41.24653244018555   lostt-test 8.663492202758789
Itérations 3: loss-train 35.75853729248047   lostt-test 7.327631950378418
Itérations 4: loss-train 31.385557174682617   lostt-test 6.372774124145508
Itérations 5: loss-train 28.123674392700195   lostt-test 5.89354133605957
Itérations 6: loss-train 24.872304916381836   lostt-test 5.027865409851074
Itérations 7: loss-train 23.543476104736328   lostt-test 4.8279032707214355
Itérations 8: loss-train 21.722986221313477   lostt-test 4.3436760902404785
Itérations 9: loss-train 20.509841918945312   lostt-test 4.144069671630859
Itérations 10: loss-train 20.051599502563477   lostt-test 4.191751956939697
Itérations 11: loss-train 18.595417022705078   lostt-test 3.730201005935669
Itérations 12: loss-train 17.74350357055664   lostt-test 3.542036533355713
Itérations 13: loss-tra

## mini-batch

In [12]:
W = torch.rand((out_dim, nb_features), requires_grad=True)
B = torch.rand(out_dim, requires_grad=True)
NB_EPOCHS = 50
batch_size = 128
epsilon = 1e-3
writter = SummaryWriter()
for epoch in range(NB_EPOCHS):
    for i in range(len(X_train)//batch_size):
        x_batch = X_train[i : i+batch_size]
        y_batch = Y_train[i : i+batch_size]
        y_hat_batch = forward(x_batch, W, B)
        loss_batch = mse(y_hat_batch, y_batch)
        loss_batch.backward()
        with torch.no_grad():
            W -= epsilon*W.grad
            B -= epsilon*B.grad
            W.grad = None
            B.grad = None

    loss_train = mse(forward(X_train, W, B), Y_train)
    loss_val = mse(forward(X_val, W, B), Y_val)

    writter.add_scalar('Loss/train', loss_train, epoch)
    writter.add_scalar('Loss/val', loss_val, epoch)
    # Sortie directe
    print(f"Itérations {epoch}: loss-train {loss_train}   lostt-test {loss_val}")

Itérations 0: loss-train 122.96549224853516   lostt-test 29.495174407958984
Itérations 1: loss-train 87.59003448486328   lostt-test 20.966278076171875
Itérations 2: loss-train 65.94181060791016   lostt-test 15.746397018432617
Itérations 3: loss-train 52.47607421875   lostt-test 12.504424095153809
Itérations 4: loss-train 43.91236877441406   lostt-test 10.450152397155762
Itérations 5: loss-train 38.30202102661133   lostt-test 9.113073348999023
Itérations 6: loss-train 34.48576736450195   lostt-test 8.212508201599121
Itérations 7: loss-train 31.772357940673828   lostt-test 7.580570697784424
Itérations 8: loss-train 29.748218536376953   lostt-test 7.116462707519531
Itérations 9: loss-train 28.164670944213867   lostt-test 6.759342670440674
Itérations 10: loss-train 26.871082305908203   lostt-test 6.472214698791504
Itérations 11: loss-train 25.775272369384766   lostt-test 6.232352256774902
Itérations 12: loss-train 24.819976806640625   lostt-test 6.025599956512451
Itérations 13: loss-train 

Nous constatons sur tensorboard et les résultats que la descente de gradient stochastique converge plus rapide que mini-batch.

## Q2

In [13]:
class monNN(torch.nn.Module):
    def __init__(self, input_dim, out_dim, hidden_dim) -> None:
        super().__init__()
        self.net = torch.nn.Sequential(
            torch.nn.Linear(in_features=input_dim, out_features=hidden_dim),
            torch.nn.Tanh(),
            torch.nn.Linear(in_features=hidden_dim, out_features=out_dim)
        )
        
    def forward(self, x):
        return self.net(x)

In [14]:
NB_EPOCHS = 50

model = monNN(input_dim=nb_features, out_dim=out_dim, hidden_dim=nb_features)
loss_fn = torch.nn.MSELoss()
optim = torch.optim.SGD(params=model.parameters(), lr=1e-3)

writter = SummaryWriter()
for epoch in range(NB_EPOCHS):
    yhat = model.forward(X_train)
    loss = loss_fn(yhat, Y_train)
    loss.backward()
    optim.step()
    optim.zero_grad()
    
    writter.add_scalar('Loss/train', loss, epoch)
    # evaluation
    yhat_eval = model.forward(X_val)
    loss_eval = loss_fn(yhat_eval,Y_val)
    writter.add_scalar('Loss/test', loss_eval, epoch)
    # Sortie directe
    print(f"Itérations {epoch}: loss-train {loss}   lostt-test {loss_eval}")

Itérations 0: loss-train 0.5396705865859985   lostt-test 0.5422253608703613
Itérations 1: loss-train 0.5325914025306702   lostt-test 0.5351741909980774
Itérations 2: loss-train 0.5256171822547913   lostt-test 0.5282277464866638
Itérations 3: loss-train 0.5187463164329529   lostt-test 0.5213843584060669
Itérations 4: loss-train 0.5119768977165222   lostt-test 0.5146423578262329
Itérations 5: loss-train 0.5053076148033142   lostt-test 0.5080002546310425
Itérations 6: loss-train 0.4987366795539856   lostt-test 0.5014562606811523
Itérations 7: loss-train 0.49226248264312744   lostt-test 0.4950089454650879
Itérations 8: loss-train 0.4858837425708771   lostt-test 0.48865681886672974
Itérations 9: loss-train 0.47959867119789124   lostt-test 0.48239827156066895
Itérations 10: loss-train 0.4734059274196625   lostt-test 0.47623181343078613
Itérations 11: loss-train 0.4673040807247162   lostt-test 0.47015610337257385
Itérations 12: loss-train 0.4612915813922882   lostt-test 0.4641696810722351
Ité

In [15]:
%tensorboard 

UsageError: Line magic function `%tensorboard` not found.
