# Import libraries

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pandas as pd

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Load data

In [3]:
data = pd.read_csv("../data/complex_data.csv", sep="\t", index_col=0)

In [4]:
data.describe()

Unnamed: 0,Log Price,Log Area,Log Bedrooms,Log WC,Parking,Infrastructure,Facade,Log Floors
count,4764.0,4764.0,4764.0,4764.0,4764.0,4764.0,4764.0,4764.0
mean,-0.001738,-4e-06,0.000266,-0.000957,0.003535,0.014805,-0.001661,0.005422
std,0.999911,0.995137,0.998461,0.999261,1.000343,0.993868,0.999346,0.998988
min,-2.492482,-5.430634,-3.161024,-2.674523,-0.933208,-1.494289,-0.643026,-2.025287
25%,-0.618843,-0.668842,-0.351505,-1.123405,-0.933208,-1.494289,-0.643026,-0.698957
50%,-0.045966,-0.086254,0.384194,0.427712,-0.933208,0.669215,-0.643026,0.076897
75%,0.574668,0.584781,0.384194,0.427712,1.071573,0.669215,1.555148,0.627374
max,2.624948,6.825509,6.573883,5.988411,1.071573,0.669215,1.555148,6.898158


# Define model

The architecture is defined as:
- FFN (Fully connected network): 100 neurons
- FFN: 100 neurons
- FFN: 1 neuron

All with ReLU activation function

In [5]:
model = torch.nn.Sequential()
model.add_module('W0', nn.Linear(data.shape[1] - 1, 100))
model.add_module("relu0", nn.ReLU())
model.add_module('W1', nn.Linear(100, 100))
model.add_module("relu1", nn.ReLU())
model.add_module('W2', nn.Linear(100, 1))

# Define the Early Stopping

In [6]:
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, model_path='checkpoint.pt'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        self.delta = delta
        self.model_path = model_path

    def __call__(self, val_loss, model, model_path):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.model_path)
        self.val_loss_min = val_loss

# Define DataLoader 

In [7]:
class CustomDataset(Dataset):
    def __init__(self, dataframe:pd.DataFrame, labels:pd.DataFrame) -> None:
        self.data = torch.tensor(dataframe.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.float32)

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, idx) -> tuple:
        return self.data[idx], self.labels[idx]

In [8]:
dataset = CustomDataset(dataframe=data.drop(columns=["Log Price"]), labels=data[["Log Price"]])
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Training loop

In [9]:
early_stopping = EarlyStopping(patience=10, verbose=True)
num_epochs = 100
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [10]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}')

    early_stopping(running_loss, model, model_path='./model.pth')
    if early_stopping.early_stop:
        print("Early stopping")
        break

print("Done training")

Epoch 1, Loss: 0.6462971418095915
Validation loss decreased (inf --> 96.298274).  Saving model ...
Epoch 2, Loss: 0.5881686012616893
Validation loss decreased (96.298274 --> 87.637122).  Saving model ...
Epoch 3, Loss: 0.5817289246408731
Validation loss decreased (87.637122 --> 86.677610).  Saving model ...
Epoch 4, Loss: 0.571840761191893
Validation loss decreased (86.677610 --> 85.204273).  Saving model ...
Epoch 5, Loss: 0.5625836435180382
Validation loss decreased (85.204273 --> 83.824963).  Saving model ...
Epoch 6, Loss: 0.5565740720137654
Validation loss decreased (83.824963 --> 82.929537).  Saving model ...
Epoch 7, Loss: 0.5546482805437689
Validation loss decreased (82.929537 --> 82.642594).  Saving model ...
Epoch 8, Loss: 0.5479324324019004
Validation loss decreased (82.642594 --> 81.641932).  Saving model ...
Epoch 9, Loss: 0.5470537901324714
Validation loss decreased (81.641932 --> 81.511015).  Saving model ...
Epoch 10, Loss: 0.5401504417793863
Validation loss decreased (

KeyboardInterrupt: 

# Evaluate the model

In [None]:
model.load_state_dict(torch.load('./model.pth'))
model.eval()

test_data = pd.read_csv("../data/complex_data.csv", sep="\t", index_col=0)
test_dataset = CustomDataset(dataframe=test_data.drop(columns=["Log Price"]), labels=test_data[["Log Price"]])
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

test_loss = 0.0
for inputs, labels in test_dataloader:
    inputs, labels = inputs.to(device), labels.to(device)

    outputs = model(inputs)
    loss = loss_fn(outputs, labels)

    test_loss += loss.item()

print(f'Test loss: {test_loss/len(test_dataloader)}')