In [146]:
from torch.utils.data import Dataset, DataLoader
import torch 
import torch.nn as nn
from torch.optim import SGD
import pandas as pd 
import numpy as np
from sklearn.metrics import r2_score

In [147]:
df_x = pd.read_csv("/Users/marcosalvalaggio/code/lazygrad/test/data/x.csv", sep=";")
df_x.shape

(414, 6)

In [148]:
df_x.head()

Unnamed: 0,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude
0,-0.823683,1.255628,-0.792495,2.007407,1.12543,0.448762
1,-0.823683,0.157086,-0.616612,1.667503,0.912444,0.401139
2,1.541151,-0.387791,-0.414015,0.307885,1.48686,0.688183
3,1.246435,-0.387791,-0.414015,0.307885,1.48686,0.688183
4,-1.121951,-1.117223,-0.549997,0.307885,0.834188,0.592937


In [149]:
xs = df_x.to_numpy()
print(xs.shape)
print(type(xs))

(414, 6)
<class 'numpy.ndarray'>


In [150]:
df_y = pd.read_csv("/Users/marcosalvalaggio/code/lazygrad/test/data/y.csv", sep=";")
y = df_y.to_numpy().squeeze()
print(y.shape)
print(type(y))

(414,)
<class 'numpy.ndarray'>


In [151]:
x_train = torch.tensor(xs).float()
y_train = torch.tensor(y).float()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# pass the objects into the device selected
x_train = x_train.to(device)
y_train = y_train.to(device)
print(x_train.shape)
print(type(x_train))
print(y_train.shape)
print(type(y_train))

torch.Size([414, 6])
<class 'torch.Tensor'>
torch.Size([414])
<class 'torch.Tensor'>


In [152]:
class MyDataset(Dataset):
    def __init__(self,x,y):
        self.x = x.clone().detach().requires_grad_(True)
        self.y = y.clone().detach().requires_grad_(True)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]
# create a MyDataset object 
ds = MyDataset(x_train, y_train)

In [153]:
batch_size = 32
dl = DataLoader(ds, batch_size=batch_size, shuffle=True)
# for d in dl: 
#     print(d)

In [154]:
class FFNN(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.input_to_hidden_layer = nn.Linear(6,16)
        self.hidden_layer_activation = nn.ReLU()
        self.hidden_to_output_layer = nn.Linear(16,1)
        
    def forward(self, x):
        x = self.input_to_hidden_layer(x)
        x = self.hidden_layer_activation(x)
        x = self.hidden_to_output_layer(x)
        return x

model = FFNN().to(device)

In [155]:
# test on a single observation before the training phase 
model(x_train[0])

tensor([0.5556], grad_fn=<AddBackward0>)

In [156]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.4079,  0.3970,  0.3447,  0.3953, -0.1863, -0.2149],
        [ 0.2333,  0.1282, -0.3173,  0.0432, -0.2437, -0.1616],
        [-0.0039, -0.3560, -0.2801,  0.2377, -0.3861,  0.0946],
        [-0.0898, -0.3485, -0.1534,  0.2909, -0.3358, -0.0444],
        [ 0.0915,  0.2905,  0.1560,  0.0029,  0.2739,  0.2538],
        [ 0.1270,  0.2603,  0.3003,  0.0648,  0.2491,  0.3452],
        [-0.3170, -0.2898,  0.3876,  0.4036,  0.2787, -0.2107],
        [ 0.0140,  0.0647, -0.1175,  0.3941, -0.1588,  0.3291],
        [ 0.3859, -0.3205,  0.1762, -0.3065,  0.2470,  0.3209],
        [-0.3062, -0.2113, -0.3739,  0.1416, -0.4033, -0.3064],
        [ 0.0331, -0.3719,  0.3719,  0.2618, -0.0363,  0.1981],
        [-0.2202, -0.0986,  0.2350,  0.0638,  0.0998, -0.3080],
        [ 0.1519, -0.4040,  0.3979, -0.2688, -0.1959, -0.1863],
        [ 0.0133,  0.3794,  0.3701, -0.1442,  0.1990,  0.2392],
        [ 0.3979, -0.0029,  0.2865,  0.1847,  0.2432, -0.1437],
        [-0.3800, 

In [157]:
loss_func = nn.MSELoss()
opt = SGD(model.parameters(), lr = 0.001)
epochs = 50

In [158]:
loss_history = []
print('='*50)
for _ in range(epochs):
    numBatch = np.round(x_train.shape[0]/batch_size)
    batch = 1
    for data in dl:
        x, y = data
        opt.zero_grad()
        predicted = model(x)
        loss_value = loss_func(predicted, y.unsqueeze(1))
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value)
        print(f'epoch: {_+1}/{int(epochs)} | batch: {batch}/{int(numBatch)} | loss: {loss_value:.2f}')
        batch += 1
print('='*50)

epoch: 1/50 | batch: 1/13 | loss: 1603.51
epoch: 1/50 | batch: 2/13 | loss: 1788.71
epoch: 1/50 | batch: 3/13 | loss: 1560.08
epoch: 1/50 | batch: 4/13 | loss: 1452.57
epoch: 1/50 | batch: 5/13 | loss: 1397.21
epoch: 1/50 | batch: 6/13 | loss: 1594.43
epoch: 1/50 | batch: 7/13 | loss: 1730.82
epoch: 1/50 | batch: 8/13 | loss: 1525.70
epoch: 1/50 | batch: 9/13 | loss: 1367.21
epoch: 1/50 | batch: 10/13 | loss: 1511.87
epoch: 1/50 | batch: 11/13 | loss: 1244.56
epoch: 1/50 | batch: 12/13 | loss: 1486.57
epoch: 1/50 | batch: 13/13 | loss: 1389.65
epoch: 2/50 | batch: 1/13 | loss: 1375.18
epoch: 2/50 | batch: 2/13 | loss: 1281.45
epoch: 2/50 | batch: 3/13 | loss: 1098.01
epoch: 2/50 | batch: 4/13 | loss: 1239.14
epoch: 2/50 | batch: 5/13 | loss: 1099.27
epoch: 2/50 | batch: 6/13 | loss: 1174.15
epoch: 2/50 | batch: 7/13 | loss: 931.16
epoch: 2/50 | batch: 8/13 | loss: 1388.82
epoch: 2/50 | batch: 9/13 | loss: 1088.76
epoch: 2/50 | batch: 10/13 | loss: 941.96
epoch: 2/50 | batch: 11/13 | lo

epoch: 20/50 | batch: 1/13 | loss: 83.73
epoch: 20/50 | batch: 2/13 | loss: 26.43
epoch: 20/50 | batch: 3/13 | loss: 48.28
epoch: 20/50 | batch: 4/13 | loss: 30.82
epoch: 20/50 | batch: 5/13 | loss: 27.53
epoch: 20/50 | batch: 6/13 | loss: 204.64
epoch: 20/50 | batch: 7/13 | loss: 44.58
epoch: 20/50 | batch: 8/13 | loss: 76.95
epoch: 20/50 | batch: 9/13 | loss: 56.34
epoch: 20/50 | batch: 10/13 | loss: 52.06
epoch: 20/50 | batch: 11/13 | loss: 74.59
epoch: 20/50 | batch: 12/13 | loss: 36.40
epoch: 20/50 | batch: 13/13 | loss: 42.84
epoch: 21/50 | batch: 1/13 | loss: 25.03
epoch: 21/50 | batch: 2/13 | loss: 45.26
epoch: 21/50 | batch: 3/13 | loss: 36.34
epoch: 21/50 | batch: 4/13 | loss: 205.23
epoch: 21/50 | batch: 5/13 | loss: 92.99
epoch: 21/50 | batch: 6/13 | loss: 73.92
epoch: 21/50 | batch: 7/13 | loss: 34.13
epoch: 21/50 | batch: 8/13 | loss: 116.57
epoch: 21/50 | batch: 9/13 | loss: 25.21
epoch: 21/50 | batch: 10/13 | loss: 38.13
epoch: 21/50 | batch: 11/13 | loss: 54.83
epoch: 

epoch: 35/50 | batch: 9/13 | loss: 52.97
epoch: 35/50 | batch: 10/13 | loss: 54.05
epoch: 35/50 | batch: 11/13 | loss: 45.18
epoch: 35/50 | batch: 12/13 | loss: 28.51
epoch: 35/50 | batch: 13/13 | loss: 54.66
epoch: 36/50 | batch: 1/13 | loss: 59.57
epoch: 36/50 | batch: 2/13 | loss: 22.93
epoch: 36/50 | batch: 3/13 | loss: 68.26
epoch: 36/50 | batch: 4/13 | loss: 39.17
epoch: 36/50 | batch: 5/13 | loss: 32.36
epoch: 36/50 | batch: 6/13 | loss: 50.94
epoch: 36/50 | batch: 7/13 | loss: 36.67
epoch: 36/50 | batch: 8/13 | loss: 35.99
epoch: 36/50 | batch: 9/13 | loss: 50.16
epoch: 36/50 | batch: 10/13 | loss: 54.02
epoch: 36/50 | batch: 11/13 | loss: 19.57
epoch: 36/50 | batch: 12/13 | loss: 57.61
epoch: 36/50 | batch: 13/13 | loss: 257.66
epoch: 37/50 | batch: 1/13 | loss: 84.89
epoch: 37/50 | batch: 2/13 | loss: 185.35
epoch: 37/50 | batch: 3/13 | loss: 32.01
epoch: 37/50 | batch: 4/13 | loss: 24.76
epoch: 37/50 | batch: 5/13 | loss: 29.99
epoch: 37/50 | batch: 6/13 | loss: 47.04
epoch:

In [159]:
idx = 0
print(model(x_train[idx]))
print(y_train[idx])

tensor([47.0625], grad_fn=<AddBackward0>)
tensor(37.9000)


In [160]:
output = model(x_train).detach().numpy().squeeze()
print(type(output))
print(output.shape)
target = y_train.detach().numpy()
print(type(target))
print(target.shape)

<class 'numpy.ndarray'>
(414,)
<class 'numpy.ndarray'>
(414,)


In [161]:
results = list(zip(target, output))
results

[(37.9, 47.062515),
 (42.2, 44.978695),
 (47.3, 50.542786),
 (54.8, 49.732452),
 (43.1, 48.425217),
 (32.1, 27.957642),
 (40.3, 37.594604),
 (46.7, 46.76162),
 (18.8, 19.051046),
 (22.1, 25.862442),
 (41.4, 36.364967),
 (58.1, 54.94413),
 (39.3, 38.308365),
 (23.8, 20.542212),
 (34.3, 43.170784),
 (50.5, 44.131157),
 (70.1, 54.800606),
 (37.4, 37.22494),
 (42.3, 44.23482),
 (47.7, 50.811253),
 (29.3, 29.589682),
 (51.6, 50.727062),
 (24.6, 24.299232),
 (47.9, 48.221596),
 (38.8, 37.647675),
 (27.0, 27.350067),
 (56.2, 49.646465),
 (33.6, 41.151127),
 (47.0, 40.871883),
 (57.1, 47.196945),
 (22.1, 19.829758),
 (25.0, 36.994495),
 (34.2, 33.56148),
 (49.3, 45.927437),
 (55.1, 47.10122),
 (27.3, 31.284042),
 (22.9, 24.730268),
 (25.3, 26.47555),
 (47.7, 45.626266),
 (46.2, 46.558235),
 (15.9, 17.970406),
 (18.2, 21.097668),
 (34.7, 36.30705),
 (34.1, 41.67021),
 (53.9, 52.01875),
 (38.3, 39.677795),
 (42.0, 43.02918),
 (61.5, 40.30201),
 (13.4, 19.166008),
 (13.2, 14.636532),
 (44.2, 39.8

In [162]:
r2 = r2_score(target, output)
r2

0.6881874737483678