In [49]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import yaml



In [60]:
df = pd.read_csv('num_tabular_data.csv')

In [82]:

def load_airbnb(csv_file_path):
    df = pd.read_csv(csv_file_path)
    features = df.drop(columns= ['Price_Night'])
    features = features.apply(pd.to_numeric, errors='coerce')
    features = features.values.astype(np.float32)
    labels = df["Price_Night"].values.astype(np.float32)
    loaded_data = features,labels
    return loaded_data

ds = load_airbnb('num_tabular_data.csv')


In [83]:
class AirbnbNightlyPriceDataset(Dataset):
    def __init__(self):
        # Convert the features and labels dataframes into torch tensors
        super().__init__()
        self.X, self.y = load_airbnb('num_tabular_data.csv')
        assert len(self.X) == len(self.y) # Data and labels have to be of equal length

    def __getitem__(self, index):
        return (torch.tensor(self.X[index]), torch.tensor(self.y[index]))

    def __len__(self):
        return len(self.X)


# create dataset
dataset = AirbnbNightlyPriceDataset()

In [108]:
def split_data(dataset):
    # Splits the input dataset into training, validation, and testing sets. # Splits data into 70% training and 30% test

    train_dataset, test_data = random_split(dataset, [int(len(dataset) * 0.7), len(dataset)-int(len(dataset)*0.7)])

    # Splits test data in half, percentage of total dataset is 15% test and 15% validation
    validation_dataset, test_dataset = random_split(test_data, [int(len(test_data) * 0.5), len(test_data)-int(len(test_data)*0.5)])

    print(f"    Training: {len(train_dataset)}")
    print(f"    Validation: {len(validation_dataset)}")
    print(f"    Testing: {len(test_dataset)}")


    return train_dataset, validation_dataset, test_dataset


# Load whole dataset with DataLoader
# shuffle: shuffle data, good for training
# num_workers: faster loading with multiple subprocesses
# !!! IF YOU GET AN ERROR DURING LOADING, SET num_workers TO 0 !!!

train_dataset, validation_dataset, test_dataset = split_data(dataset)

# train loader
train_loader = DataLoader(dataset = train_dataset, batch_size=4, shuffle=True, num_workers=0)


# validation loader
validation_loader = DataLoader(dataset = validation_dataset, batch_size =4, shuffle=True, num_workers=0)


# test loader
test_loader = DataLoader(dataset = test_dataset, batch_size = 4, shuffle=False, num_workers=0)

# dataiter = iter(train_loader)
# data = next(dataiter)
# features, labels = data
# print(features, labels)

    Training: 581
    Validation: 124
    Testing: 125


In [126]:
def train(model, dataloader, num_epochs, optimiser, criterion):
    n_total_steps = len(dataloader)
    writer = SummaryWriter()
    batch_idx = 0
    for epoch in range(num_epochs):
        for i, (inputs, labels) in enumerate(dataloader):
            # Forward pass and loss
            try:
                output = model(inputs)
            except Exception as e:
                print(f"Error occurred on row {i}: {e}")
                continue
            
            loss = criterion(output, labels)

            # Backward pass
            loss.backward() #back propagation
            optimiser.step() #update weights
            optimiser.zero_grad() #reset gradients
            writer.add_scalar('Loss', loss.item(), batch_idx)
            batch_idx += 1
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

            # Break out of loop after first epoch
            # break

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define diferent layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)



model = LinearRegression(9, 1)
num_epochs = 2
criterion = nn.MSELoss()
learning_rate = 0.0001
n_iters = 100
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
# 3) Train the model
train(model, train_loader, num_epochs, optimiser, criterion)


In [125]:
# train the  validation_dataset
train(model, validation_loader, num_epochs, optimiser, criterion)

In [None]:
def get_nn_config(yaml_file):
    """Loads neural network configuration from a YAML file and returns as a dictionary
    
    Parameters:
        config_file: path to the .yaml file containing the hyperparameters
        
    Outputs:
        nn_config: dict containing the hyperparameters for the model"""
    
    with open(yaml_file, 'r') as f:
        nn_config = yaml.safe_load(f)
    return nn_config