In [32]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import yaml
import os
import shutil
import json
import time
from datetime import datetime

from math import sqrt


In [2]:
df = pd.read_csv('num_tabular_data.csv')

In [3]:

def load_airbnb(csv_file_path):
    df = pd.read_csv(csv_file_path)
    features = df.drop(columns= ['Price_Night'])
    features = features.apply(pd.to_numeric, errors='coerce')
    features = features.values.astype(np.float32)
    labels = df["Price_Night"].values.astype(np.float32)
    loaded_data = features,labels
    return loaded_data

ds = load_airbnb('num_tabular_data.csv')


In [5]:
class AirbnbNightlyPriceDataset(Dataset):
    def __init__(self):
        # Convert the features and labels dataframes into torch tensors
        super().__init__()
        self.X, self.y = load_airbnb('num_tabular_data.csv')
        assert len(self.X) == len(self.y) # Data and labels have to be of equal length

    def __getitem__(self, index):
        return (torch.tensor(self.X[index]), torch.tensor(self.y[index]))

    def __len__(self):
        return len(self.X)


# create dataset
dataset = AirbnbNightlyPriceDataset()

In [6]:
def split_data(dataset):
    # Splits the input dataset into training, validation, and testing sets. # Splits data into 70% training and 30% test

    train_dataset, test_data = random_split(dataset, [int(len(dataset) * 0.7), len(dataset)-int(len(dataset)*0.7)])

    # Splits test data in half, percentage of total dataset is 15% test and 15% validation
    validation_dataset, test_dataset = random_split(test_data, [int(len(test_data) * 0.5), len(test_data)-int(len(test_data)*0.5)])

    print(f"    Training: {len(train_dataset)}")
    print(f"    Validation: {len(validation_dataset)}")
    print(f"    Testing: {len(test_dataset)}")


    return train_dataset, validation_dataset, test_dataset


# Load whole dataset with DataLoader
# shuffle: shuffle data, good for training
# num_workers: faster loading with multiple subprocesses
# !!! IF YOU GET AN ERROR DURING LOADING, SET num_workers TO 0 !!!

train_dataset, validation_dataset, test_dataset = split_data(dataset)

# train loader
train_loader = DataLoader(dataset = train_dataset, batch_size=4, shuffle=True, num_workers=0)


# validation loader
validation_loader = DataLoader(dataset = validation_dataset, batch_size =4, shuffle=True, num_workers=0)


# test loader
test_loader = DataLoader(dataset = test_dataset, batch_size = 4, shuffle=False, num_workers=0)

# dataiter = iter(train_loader)
# data = next(dataiter)
# features, labels = data
# print(features, labels)

    Training: 581
    Validation: 124
    Testing: 125


In [30]:
def train(model, dataloader, nn_config, dataset_type):
    training_start_time = time.time()
    num_epochs= int(nn_config['epochs'])
    if nn_config['optimiser'] == 'SGD':
        optimiser = torch.optim.SGD
        optimiser = optimiser(model.parameters(), nn_config['learning_rate'])
    elif nn_config['optimiser'] == 'Adam':
        optimiser = torch.optim.Adam
        optimiser = optimiser(model.parameters(), nn_config['learning_rate'] )
    else:
        raise ValueError("Invalid optimiser specified in nn_config.")

    n_total_steps = len(dataloader)
    writer = SummaryWriter()
    batch_idx = 0
    rmse_loss = 0.0
    r2 = 0.0

    for epoch in range(num_epochs):
        for i, (inputs, labels) in enumerate(dataloader):
            labels=labels.unsqueeze(1) #to add an extra dimension to the labels tensor.
            # Forward pass and loss
            try:
                output = model(inputs)
            except Exception as e:
                print(f"Error occurred on row {i}: {e}")
                continue
            loss = F.mse_loss(output, labels) 
            loss = loss.type(torch.float32)
            # Backward pass
            loss.backward() #back propagation
            optimiser.step() #update weights
            optimiser.zero_grad() #reset gradients
            writer.add_scalar('Loss', loss.item(), batch_idx)
            batch_idx += 1


            # print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

            #calculate rsme and r2
            rmse_loss = torch.sqrt(loss)
            rmse_loss += rmse_loss.item()

            #Calculate r2
            prediction_detached = output.detach().numpy()
            y_detached = labels.detach().numpy()
            r2 += r2_score(y_detached, prediction_detached)

    #Normalises performance metrics to the number of samples passed through the model
    training_duration = time.time() - training_start_time
    number_of_predictions = num_epochs*n_total_steps
    rmse_loss = rmse_loss/number_of_predictions
    r2 = r2 /number_of_predictions

    inference_latency = training_duration / number_of_predictions #average time taken to make a prediction 
    # creating the metric dictionary
    metrics_dic={}
    metrics_dic['training_time']=training_duration
    metrics_dic['inference_latency']=inference_latency
    
    metrics_dic["RMSE_loss"] = rmse_loss
    metrics_dic["R_squared"] = r2
  

    model_name = datetime.fromtimestamp(datetime.timestamp(datetime.now())).strftime("%d-%m-%Y, %H:%M:%S")

    if not os.path.exists('./models/regression/neural_networks'):
        os.makedirs('./models/regression/neural_networks')
    model_path = f'./models/regression/neural_networks/{dataset_type}/{model_name}'
    torch.save(model.state_dict(), model_path)

    return metrics_dic

save_model(model, hyperparameters, metrics, folder)
    
            # Break out of loop after first epoch





In [28]:
def get_nn_config(yaml_file):
    """Loads neural network configuration from a YAML file and returns as a dictionary
    
    Parameters:
        config_file: path to the .yaml file containing the hyperparameters
        
    Outputs:
        nn_config: dict containing the hyperparameters for the model"""
    
    with open(yaml_file, 'r') as f:
        nn_config = yaml.safe_load(f)
    return nn_config


In [31]:
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define diferent layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)



model = LinearRegression(9, 1)
num_epochs = 2
criterion = nn.MSELoss()
learning_rate = 0.0001
n_iters = 100
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
# 3) Train the model
nn_config = get_nn_config('nn_config.yaml')
train(model, train_loader,  nn_config)


Epoch [1/2], Step [1/146], Loss: 10019.1582
Epoch [1/2], Step [2/146], Loss: 130470.2578
Epoch [1/2], Step [3/146], Loss: 7809.3730
Epoch [1/2], Step [4/146], Loss: 200217.8125
Epoch [1/2], Step [5/146], Loss: 73128.3281
Epoch [1/2], Step [6/146], Loss: 34212.2070
Epoch [1/2], Step [7/146], Loss: 81356.9062
Epoch [1/2], Step [8/146], Loss: 39971.5078
Epoch [1/2], Step [9/146], Loss: 50307.1211
Epoch [1/2], Step [10/146], Loss: 13927.5771
Epoch [1/2], Step [11/146], Loss: 30701.9980
Epoch [1/2], Step [12/146], Loss: 41420.2695
Epoch [1/2], Step [13/146], Loss: 27714.0293
Epoch [1/2], Step [14/146], Loss: 17964.0078
Epoch [1/2], Step [15/146], Loss: 14301.1758
Epoch [1/2], Step [16/146], Loss: 170127.2812
Epoch [1/2], Step [17/146], Loss: 11756.0430
Epoch [1/2], Step [18/146], Loss: 15207.2031
Epoch [1/2], Step [19/146], Loss: 25959.1230
Epoch [1/2], Step [20/146], Loss: 29439.6426
Epoch [1/2], Step [21/146], Loss: 38778.4414
Epoch [1/2], Step [22/146], Loss: 69953.5938
Epoch [1/2], Step

In [None]:
class NeuralNet(torch.nn.Module):
    def __init__(self, nn_config):
        super(NeuralNet, self).__init__()
        #define layers
        self.hidden_layer_width = nn_config['hidden_layer_width']
        self.dropout = nn_config['dropout']
        
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(11, self.hidden_layer_width), # uses the same width in all layers of the model
            torch.nn.ReLU(),       
            torch.nn.Dropout(self.dropout),
            torch.nn.Linear(self.hidden_layer_width, self.hidden_layer_width),
            torch.nn.ReLU(),
            torch.nn.Dropout(self.dropout),
            torch.nn.Linear(self.hidden_layer_width, 1)

        )

        
    def forward(self, X):
        return self.layers(X)

In [125]:
# train the  validation_dataset
train(model, validation_loader, num_epochs, optimiser, criterion)