In [1]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics


In [2]:
# Select the device to use for processing
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
# Load the preprocessed data
data = pd.read_csv("datasets/train_dataset.csv")


In [4]:
# Split features and target variable
X = data.drop("Price", axis=1).values
y = data["Price"].values


In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [6]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device)


In [7]:
# Define the neural network model
class HousePriceModel(nn.Module):
    def __init__(self, input_size):
        super(HousePriceModel, self).__init__()
        self.fc1 = nn.Linear(17, 17)
        self.fc2 = nn.Linear(17, 17)
        self.fc3 = nn.Linear(17, 17)
        self.fc4 = nn.Linear(17, 17)
        self.fc5 = nn.Linear(17, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    

In [8]:
# Instantiate the model
input_size = X_train.shape[1]
model = HousePriceModel(input_size).to(device)


In [9]:
# Define loss function and optimization algorithm
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())


In [10]:
# Train the model
num_epochs = 10000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor.view(-1, 1))
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [100/10000], Loss: 335257075712.0000
Epoch [200/10000], Loss: 157988651008.0000
Epoch [300/10000], Loss: 74021191680.0000
Epoch [400/10000], Loss: 70897893376.0000
Epoch [500/10000], Loss: 68692643840.0000
Epoch [600/10000], Loss: 66827210752.0000
Epoch [700/10000], Loss: 65269497856.0000
Epoch [800/10000], Loss: 64102797312.0000
Epoch [900/10000], Loss: 63303528448.0000
Epoch [1000/10000], Loss: 62780981248.0000
Epoch [1100/10000], Loss: 62450888704.0000
Epoch [1200/10000], Loss: 62234677248.0000
Epoch [1300/10000], Loss: 62067769344.0000
Epoch [1400/10000], Loss: 61905997824.0000
Epoch [1500/10000], Loss: 61686804480.0000
Epoch [1600/10000], Loss: 61388201984.0000
Epoch [1700/10000], Loss: 61026447360.0000
Epoch [1800/10000], Loss: 60529348608.0000
Epoch [1900/10000], Loss: 59930664960.0000
Epoch [2000/10000], Loss: 59187048448.0000
Epoch [2100/10000], Loss: 58293411840.0000
Epoch [2200/10000], Loss: 57271734272.0000
Epoch [2300/10000], Loss: 56182091776.0000
Epoch [2400/10000]

In [11]:
# Evaluate the model on the testing set
with torch.no_grad():
    model.eval()
    y_pred = model(X_test_tensor)
    test_loss = criterion(y_pred, y_test_tensor.view(-1, 1))
    print(f'Test Loss: {test_loss.item():.4f}')


Test Loss: 36958650368.0000


In [12]:
# # Move tensors back to CPU
y_pred = y_pred.cpu()
y_test_tensor = y_test_tensor.cpu()


In [13]:
# Check model performance
print(f'MAE: {metrics.mean_absolute_error(y_test_tensor.numpy(), y_pred.numpy()):.4f}')
print(f'MSE: {metrics.mean_squared_error(y_test_tensor.numpy(), y_pred.numpy()):.4f}')
print(f'RMSE: {metrics.mean_squared_error(y_test_tensor.numpy(), y_pred.numpy(), squared=False):.4f}')
print(f'VarScore: {metrics.explained_variance_score(y_test_tensor.numpy(), y_pred.numpy()):.4f}')

MAE: 129263.2734
MSE: 36958650368.0000
RMSE: 192246.3281
VarScore: 0.6800


In [14]:
# Make predictions
predictions = np.round(y_pred.numpy().flatten()).astype(int)
print(predictions)


[399125 553616 543528 ... 659376 258223 635726]


In [15]:
# Save the model
model_path = "models/house_price_model.pth"
torch.save(model.state_dict(), model_path)
print("Model saved successfully at:", model_path)


Model saved successfully at: models/house_price_model.pth


In [16]:
# Check the device where the model parameters are located
device = next(model.parameters()).device

if device.type == 'cuda':
    print("Model is trained on GPU.")
else:
    print("Model is trained on CPU.")


Model is trained on GPU.
