In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler


In [3]:
BASE_PATH = "/content/drive/MyDrive/IIT_Roorkee_Project/data/"

test_df = pd.read_csv(BASE_PATH + "test_processed.csv")
print(test_df.shape)
test_df.head()


(5404, 14)


Unnamed: 0,id,bedrooms,bathrooms,sqft_living,sqft_lot,floors,condition,grade,waterfront,view,sqft_living15,sqft_lot15,lat,long
0,2591820310,4.0,2.25,2070.0,8893.0,2.0,4.0,8.0,0.0,0.0,2390.0,7700.0,47.4388,-122.162
1,7974200820,5.0,3.0,2900.0,6730.0,1.0,5.0,8.0,0.0,0.0,2370.0,6283.0,47.6784,-122.285
2,7701450110,4.0,2.5,3770.0,10893.0,2.0,3.0,11.0,0.0,2.0,3710.0,9685.0,47.5646,-122.129
3,9522300010,3.0,3.5,4560.0,14608.0,2.0,3.0,12.0,0.0,2.0,4050.0,14226.0,47.6995,-122.228
4,9510861140,3.0,2.5,2550.0,5376.0,2.0,3.0,9.0,0.0,0.0,2250.0,4050.0,47.6647,-122.083


In [4]:
test_ids = test_df['id']          # keep for submission
X_test = test_df.drop(columns=['id'])


In [5]:
import joblib
scaler = joblib.load(BASE_PATH + "tabular_scaler.pkl")

X_test_scaled = scaler.transform(X_test)


In [6]:
class TabularMLP(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(d, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)


In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = TabularMLP(X_test_scaled.shape[1]).to(device)
model.load_state_dict(torch.load(BASE_PATH + "mlp_tabular_baseline.pth", map_location=device))
model.eval()


TabularMLP(
  (net): Sequential(
    (0): Linear(in_features=13, out_features=128, bias=True)
    (1): ReLU()
    (2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [8]:
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)

with torch.no_grad():
    log_price_preds = model(X_test_tensor).cpu().numpy().ravel()


In [9]:
price_preds = np.expm1(log_price_preds)


In [10]:
submission = pd.DataFrame({
    "id": test_ids,
    "predicted_price": price_preds
})

submission.head()


Unnamed: 0,id,predicted_price
0,2591820310,383845.4
1,7974200820,900697.7
2,7701450110,1308822.0
3,9522300010,1950412.0
4,9510861140,635696.6


In [11]:
SUBMISSION_PATH = "/content/drive/MyDrive/IIT_Roorkee_Project/predictions.csv"
submission.to_csv(SUBMISSION_PATH, index=False)

print("Saved to:", SUBMISSION_PATH)


Saved to: /content/drive/MyDrive/IIT_Roorkee_Project/predictions.csv
