In [None]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from src.dataset import HouseDataset
from src.model import MultimodalModel


In [None]:
df = pd.read_excel("../data/raw/train.xlsx")
df = df.dropna()

features = [
    "bedrooms",
    "bathrooms",
    "sqft_living",
    "sqft_lot",
    "floors",
    "waterfront",
    "view",
    "condition",
    "grade",
    "sqft_above",
    "sqft_basement",
    "lat",
    "long"
]

X = df[features]
y = df["price"]


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
rf_model = RandomForestRegressor(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)


In [None]:
y_pred = rf_model.predict(X_val)

rmse = np.sqrt(mean_squared_error(y_val, y_pred))
r2 = r2_score(y_val, y_pred)

rmse, r2


In [None]:
df_mm = df.copy()

df_mm["image_path"] = [
    f"../data/images/train/house_{i % 5}.png"
    for i in range(len(df_mm))
]

dataset = HouseDataset(df_mm, features)


In [None]:
loader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=True
)


In [None]:
tabular_dim = len(features)

model = MultimodalModel(tabular_dim)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [None]:
epochs = 5

for epoch in range(epochs):
    total_loss = 0.0

    for tab, img, price in loader:
        optimizer.zero_grad()

        output = model(tab, img).squeeze()
        loss = criterion(output, price.float())

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.2f}")


In [None]:
predictions = []

model.eval()
with torch.no_grad():
    for idx in range(len(dataset)):
        tab, img, _ = dataset[idx]
        pred = model(tab.unsqueeze(0), img.unsqueeze(0))
        predictions.append(pred.item())

output_df = pd.DataFrame({
    "id": df_mm["id"],
    "predicted_price": predictions
})

output_df.to_csv("../outputs/predictions.csv", index=False)

output_df.head()
