# Import Packages

In [49]:
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, Dataset

import torchinfo
from torch_snippets import Report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
elif torch.has_mps:
    device = "mps"

# Load Datasets

In [2]:
general = pd.read_csv("../datasets/general.csv")
room = pd.read_csv("../datasets/room.csv")
categorical = pd.read_csv("../datasets/categorical.csv")
labels = pd.read_csv("../datasets/label.csv")

## Merge Features

In [22]:
input_features = pd.concat([general, room, categorical], axis=1)
input_features.head()

Unnamed: 0,Postcode,Sale or Let,Price Qualifier,DESC Council Tax Band,RTD3316_condition1 - Condition Description,# of Enquiry or viewings,# of Apps/Offers,bedroom number,kitchen number,living number,...,Double Glazing,Eco-Friendly,Electric,Gas,Gas Central,Night Storage,Oil,Solar,Solar Water,Under Floor
0,1595,Sale,4,1,0,32,12,2,1,1,...,1,0,1,0,0,1,0,0,0,0
1,389,Sale,2,3,0,14,4,3,1,0,...,1,0,0,0,1,0,0,0,0,0
2,2185,Sale,2,4,0,10,2,3,1,1,...,1,0,0,0,1,0,0,0,0,1
3,196,Sale,7,1,0,9,1,3,1,1,...,0,0,0,0,1,0,0,0,0,0
4,998,Sale,6,1,0,1,2,3,1,0,...,0,0,0,0,1,0,0,0,0,0


## Split Sale and Rental

In [33]:
sale_features = input_features[input_features["Sale or Let"] == "Sale"]
sale_features = sale_features.loc[:, ~sale_features.columns.isin(["Sale or Let"])]
sale_labels = labels.iloc[sale_features.index]

rental_features = input_features[input_features["Sale or Let"] == "Rental"]
rental_features = rental_features.loc[:, ~rental_features.columns.isin(["Sale or Let"])]
rental_labels = labels.iloc[rental_features.index]

# Create Class for Collecting data

In [34]:
class PropertyDataset(Dataset):
    def __init__(self, features, labels):
        assert len(features) == len(labels)
        self.features = features
        self.labels = labels

        self.complete_encoder = LabelEncoder()
        self.complete_encoder.fit(self.labels.Completed)

    def __getitem__(self, item):
        features = self.features.to_numpy()[item]
        labels = self.labels.to_numpy()[item]

        return torch.tensor(features).float().to(device), torch.tensor(labels).float().to(device)

    def __len__(self):
        return len(self.features)

In [36]:
temp = PropertyDataset(sale_features, sale_labels)

# Build the model

In [27]:
in_features = len(temp[0][0])
in_features

51

In [44]:
class ProbabilityAndPrice(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.in_features = in_features

        self.hidden = nn.Sequential(nn.Linear(in_features, 128),
                                    nn.ReLU(),
                                    nn.Linear(128, 128),
                                    nn.ReLU(),
                                    nn.Linear(128, 512),
                                    nn.ReLU(),
                                    nn.Linear(512, 512),
                                    nn.ReLU())
        self.probability = nn.Sequential(nn.Linear(512, 1),
                                         nn.Sigmoid())
        self.price = nn.Sequential(nn.Linear(512, 1),
                                   nn.ReLU())

    def forward(self, x):
        x = self.hidden(x)
        probability = self.probability(x)
        price = self.price(x)
        return probability, price

In [45]:
model = ProbabilityAndPrice(in_features)

probability_loss = nn.MSELoss()
price_loss = nn.MSELoss()
loss_fn = probability_loss, price_loss

In [46]:
torchinfo.summary(model, input_size=(16, in_features))

Layer (type:depth-idx)                   Output Shape              Param #
ProbabilityAndPrice                      [16, 1]                   --
├─Sequential: 1-1                        [16, 512]                 --
│    └─Linear: 2-1                       [16, 128]                 6,656
│    └─ReLU: 2-2                         [16, 128]                 --
│    └─Linear: 2-3                       [16, 128]                 16,512
│    └─ReLU: 2-4                         [16, 128]                 --
│    └─Linear: 2-5                       [16, 512]                 66,048
│    └─ReLU: 2-6                         [16, 512]                 --
│    └─Linear: 2-7                       [16, 512]                 262,656
│    └─ReLU: 2-8                         [16, 512]                 --
├─Sequential: 1-2                        [16, 1]                   --
│    └─Linear: 2-9                       [16, 1]                   513
│    └─Sigmoid: 2-10                     [16, 1]                   -

# Define train and validate loops

In [48]:
def train_batch(data, model, loss_fn, optimizer):
    model.train()

    feature, label = data
    yhat = model(feature)

    loss = loss_fn(yhat, label)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item()

@torch.no_grad()
def validate_batch(data, model, loss_fn):
    model.eval()

    feature, label = data
    yhat = model(feature)

    loss = loss_fn(yhat, label)
    return loss.item()

# Train the model for sale data

In [37]:
x_train, x_test, y_train, y_test = train_test_split(sale_features, sale_labels, random_state=1, test_size=0.1)