In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [None]:
torch.manual_seed(42)

<torch._C.Generator at 0x7ba816a74bf0>

In [None]:
device="cuda" if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [None]:
data=pd.read_excel("/content/Real estate valuation data set.xlsx")
data.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.916667,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.916667,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583333,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833333,5.0,390.5684,5,24.97937,121.54245,43.1


In [None]:
data.isnull().sum()

Unnamed: 0,0
No,0
X1 transaction date,0
X2 house age,0
X3 distance to the nearest MRT station,0
X4 number of convenience stores,0
X5 latitude,0
X6 longitude,0
Y house price of unit area,0


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 8 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   No                                      414 non-null    int64  
 1   X1 transaction date                     414 non-null    float64
 2   X2 house age                            414 non-null    float64
 3   X3 distance to the nearest MRT station  414 non-null    float64
 4   X4 number of convenience stores         414 non-null    int64  
 5   X5 latitude                             414 non-null    float64
 6   X6 longitude                            414 non-null    float64
 7   Y house price of unit area              414 non-null    float64
dtypes: float64(6), int64(2)
memory usage: 26.0 KB


In [None]:
X=data.drop(['Y house price of unit area'],axis=1)
y=data['Y house price of unit area']

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
#creating custom class
class customDataset(Dataset):
  def __init__(self,features,labels):
    self.features=torch.tensor(features.values,dtype=torch.float32)
    self.labels=torch.tensor(labels.values,dtype=torch.float32)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index],self.labels[index]

In [None]:
#creating dataset object for training and testing class
train_dataset=customDataset(X_train,y_train)
test_dataset=customDataset(X_test,y_test)

In [None]:
#creating training and testing data loader
train_loader=DataLoader(train_dataset,batch_size=64,shuffle=True,pin_memory=True)
test_loader=DataLoader(test_dataset,batch_size=64,shuffle=False,pin_memory=True)

In [None]:
len(train_loader)

6

In [None]:
len(test_loader)

2

In [None]:
#creating a simple nn model
class MyNN(nn.Module):
  def __init__(self,num_features):
    super().__init__()
    self.model=nn.Sequential(
        nn.Linear(num_features,128),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(128,64),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(64,1)

    )
  def forward(self,x):
    return self.model(x)

In [None]:
learning_rate=0.01
epochs=50

In [None]:
#instatiate the model
model=MyNN(X_train.shape[1])
model=model.to(device)
#loss function
criterion=nn.MSELoss()
#optimizer
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

In [None]:
#training loop
for epoch in range(epochs):
  total_epoch_loss=0

  for batch_features,batch_labels in train_loader:
    batch_features=batch_features.to(device)
    batch_labels=batch_labels.to(device)
    #forward pass
    outputs=model(batch_features)
    #calculate loss
    loss=criterion(outputs,batch_labels)
    #back pass
    optimizer.zero_grad()
    loss.backward()

    #update weights
    optimizer.step()
    total_epoch_loss=total_epoch_loss+loss.item()
  print(f"Epoch {epoch+1}/{epochs}, Loss: {total_epoch_loss/len(train_loader)}")

In [None]:
#setting the model to eval
model.eval()

In [None]:
from sklearn.metrics import r2_score

total_loss = 0
all_preds = []
all_labels = []
# Disable gradient computation (faster + less memory usage)
with torch.no_grad():
    for batch_features, batch_labels in test_loader:
        # move data to GPU
        batch_features = batch_features.to(device)
        batch_labels = batch_labels.to(device)
        #getting predictions for each batch
        outputs = model(batch_features)

        # accumulate loss
        loss = criterion(outputs, batch_labels)
        total_loss += loss.item()

        # collect predictions and labels for R²
        all_preds.append(outputs.cpu())
        all_labels.append(batch_labels.cpu())

# Concatenate all the batches into full prediction and label arrays
all_preds = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()

# compute R² score
r2 = r2_score(all_labels, all_preds)

print(f"Test Loss (MSE): {total_loss / len(test_loader):.4f}")
print(f"R² Score: {r2:.4f}")


In [None]:
from sklearn.metrics import r2_score

# Collect all predictions and labels
all_preds = []
all_labels = []

with torch.no_grad():
    for batch_features, batch_labels in test_loader:
        outputs = model(batch_features)
        all_preds.extend(outputs.numpy())
        all_labels.extend(batch_labels.numpy())

# Compute R² score
r2 = r2_score(all_labels, all_preds)
print(f"R² Score: {r2:.4f}")
