In [1463]:
# Import libraries
import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from joblib import dump
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [1464]:
# Set seeds for reproducibility
# torch.manual_seed(0)
# np.random.seed(0)

In [1465]:
# Select the device to use for processing
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [1466]:
# Load the preprocessed data
data = pd.read_csv("datasets/train_dataset.csv")


In [1467]:
# Split features and target variable
X = data.drop("Price", axis=1).values
y = data["Price"].values


In [1468]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)#, random_state=1)


In [1469]:
# Standardizing data
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
dump(scaler, 'scaler.joblib')

['scaler.joblib']

In [1470]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device)


In [1471]:
# Define hyperparameters
input_size = X_train.shape[1]
learning_rate = 0.01
num_epochs = 500

In [1472]:
# Define the neural network model
class HousePriceModel(nn.Module):
    def __init__(self, input_size):
        super(HousePriceModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(128, 128)
        self.fc4 = nn.Linear(128, 128)
        self.fc5 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    

In [1473]:
# Instantiate the model
model = HousePriceModel(input_size).to(device)


In [1474]:
# Define loss function and optimization algorithm
criterion = nn.L1Loss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)


In [1475]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()  # Zero the gradients
    outputs = model(X_train_tensor)  # Forward pass on training set
    train_loss = criterion(outputs, y_train_tensor.view(-1, 1))  # Compute training loss
    train_loss.backward()  # Backward pass
    optimizer.step()  # Update weights

    # Validation step
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor)  # Forward pass on validation set
        val_loss = criterion(y_pred, y_test_tensor.view(-1, 1))  # Compute validation loss

    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')



Epoch [100/500], Training Loss: 126484.2109, Validation Loss: 137071.1094
Epoch [200/500], Training Loss: 119434.1406, Validation Loss: 131440.9688
Epoch [300/500], Training Loss: 116391.5547, Validation Loss: 128946.9688
Epoch [400/500], Training Loss: 113228.0938, Validation Loss: 126999.9297
Epoch [500/500], Training Loss: 109219.4922, Validation Loss: 125144.8047


In [1476]:
# # Move tensors back to CPU
y_pred = y_pred.cpu()
y_test_tensor = y_test_tensor.cpu()


In [1477]:
# Check model performance
print(f'MAE: {metrics.mean_absolute_error(y_test_tensor.numpy(), y_pred.numpy()):.4f}')
print(f'MSE: {metrics.mean_squared_error(y_test_tensor.numpy(), y_pred.numpy()):.4f}')
print(f'RMSE: {metrics.mean_squared_error(y_test_tensor.numpy(), y_pred.numpy(), squared=False):.4f}')
print(f'VarScore: {metrics.explained_variance_score(y_test_tensor.numpy(), y_pred.numpy()):.4f}')

MAE: 125144.8047
MSE: 47679283200.0000
RMSE: 218355.8594
VarScore: 0.7322


In [1478]:
# Save the model
model_path = "models/house_price_model.pth"
torch.save(model.state_dict(), model_path)
print("Model saved successfully at:", model_path)


Model saved successfully at: models/house_price_model.pth


In [1479]:
# Check the device where the model parameters are located
device = next(model.parameters()).device

if device.type == 'cuda':
    print("Model is trained on GPU.")
else:
    print("Model is trained on CPU.")


Model is trained on GPU.
