In [102]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [103]:
class housePriceModel(nn.Module):
    def __init__(self, n_features):
        super(housePriceModel, self).__init__()

        self.network = nn.Sequential(
            nn.Linear(n_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.network(x)

In [104]:
# Load and create data matrix and target vector
df = pd.read_pickle("cleaned_df.pkl")

In [105]:
df

Unnamed: 0,price,condominium_fees,rooms,m2,bathrooms,elevator,is_luxury,floor,energy_class,year_of_construction,...,Cancello elettrico,Esposizione doppia,Terrazza,Cantina,VideoCitofono,Impianto di allarme,Giardino privato,Caminetto,Giardino comune,Arredato
30018,413000.0,168.0,2.0,72.0,2.0,0.0,0.0,1.0,1.0,1964.0,...,1,0,0,0,1,0,0,0,0,0
30019,1062000.0,382.0,4.0,164.0,4.0,0.0,0.0,3.0,1.0,1962.0,...,1,0,0,0,1,0,0,0,0,0
30020,696000.0,254.0,3.0,109.0,3.0,0.0,0.0,2.0,1.0,1964.0,...,1,0,0,0,1,0,0,0,0,0
30021,502000.0,200.0,3.0,86.0,3.0,0.0,0.0,2.0,5.0,1978.0,...,0,0,0,0,1,1,0,0,0,0
30022,610000.0,50.0,3.0,126.0,3.0,0.0,0.0,2.0,1.0,1970.0,...,1,1,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147562,189000.0,150.0,2.0,60.0,2.0,1.0,0.0,2.0,6.0,1960.0,...,1,0,0,1,0,0,0,0,1,0
147568,680000.0,500.0,5.0,160.0,5.0,1.0,0.0,3.0,5.0,1970.0,...,1,1,0,1,0,0,0,0,1,0
147569,417000.0,60.0,3.0,70.0,3.0,0.0,0.0,0.0,4.0,1950.0,...,1,1,0,0,0,0,0,0,0,0
147571,1050000.0,625.0,5.0,163.0,5.0,1.0,1.0,6.0,5.0,1970.0,...,1,0,0,1,1,1,0,0,0,1


In [106]:
X = df.drop(columns=["price"], axis=1).to_numpy(dtype="float32")

y = df["price"].to_numpy(dtype="float32")

In [107]:
# Train, validation and test splits
X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.30, shuffle=True)
X_val, X_test, y_val, y_test = train_test_split(X_tmp, y_tmp, test_size=0.10, shuffle=True)

# Check shapes
print(f"X_train, y_train (70%): {X_train.shape}, {y_train.shape}")
print(f"X_val, y_val (20%): {X_val.shape}, {y_val.shape}")
print(f"X_test, y_test (10%): {X_test.shape}, {y_test.shape}")

X_train, y_train (70%): (4393, 24), (4393,)
X_val, y_val (20%): (1694, 24), (1694,)
X_test, y_test (10%): (189, 24), (189,)


In [109]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_val   = torch.tensor(X_val, dtype=torch.float32)
y_val   = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

X_test  = torch.tensor(X_test, dtype=torch.float32)
y_test  = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


In [110]:
from torch.utils.data import TensorDataset

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

In [111]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  # shuffle only train
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Initialize the model
n_features = X.shape[1]

model = housePriceModel(n_features)
model

housePriceModel(
  (network): Sequential(
    (0): Linear(in_features=24, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [113]:
import torch.optim as optim

loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [117]:
# Training loop
n_epochs = 120
for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            val_loss += loss.item()
    
    # Logging
    print(f"Epoch {epoch+1}/{n_epochs} | Train Loss: {train_loss/len(train_loader):.4f} | Val Loss: {val_loss/len(val_loader):.4f}")


Epoch 1/120 | Train Loss: 1512422934913.8550 | Val Loss: 103598750430.1887
Epoch 2/120 | Train Loss: 1511861375005.6812 | Val Loss: 102911678695.8491
Epoch 3/120 | Train Loss: 1512044342257.1594 | Val Loss: 105309166456.7547
Epoch 4/120 | Train Loss: 1512732303434.2029 | Val Loss: 103338910546.1132
Epoch 5/120 | Train Loss: 1511946555659.1304 | Val Loss: 102843641160.4528
Epoch 6/120 | Train Loss: 1512046947728.6956 | Val Loss: 103138952404.5283
Epoch 7/120 | Train Loss: 1513176729540.6377 | Val Loss: 103244607797.1321
Epoch 8/120 | Train Loss: 1513175354813.2173 | Val Loss: 102791579068.3774
Epoch 9/120 | Train Loss: 1511679414361.0435 | Val Loss: 104298741219.0189
Epoch 10/120 | Train Loss: 1511904138507.1304 | Val Loss: 103256101173.1321
Epoch 11/120 | Train Loss: 1512804754595.2463 | Val Loss: 103082008035.0189
Epoch 12/120 | Train Loss: 1511790047603.0144 | Val Loss: 103047037391.6981
Epoch 13/120 | Train Loss: 1512290577497.0435 | Val Loss: 102810937614.4906
Epoch 14/120 | Train 

In [122]:
model.eval()
test_loss = 0.0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch)
        print(f"y_pred: {y_pred[0]} -> y_batch: {y_batch[0]}")
        loss = loss_fn(y_pred, y_batch)
        test_loss += loss.item()

# print(f"Test loss: {test_loss/len(test_loader):.4f}")

y_pred: tensor([395525.5938]) -> y_batch: tensor([510000.])
y_pred: tensor([2174591.2500]) -> y_batch: tensor([3580000.])
y_pred: tensor([974509.8125]) -> y_batch: tensor([1550000.])
y_pred: tensor([634743.9375]) -> y_batch: tensor([675000.])
y_pred: tensor([799397.6250]) -> y_batch: tensor([965000.])
y_pred: tensor([820249.0625]) -> y_batch: tensor([590000.])
