In [2]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch
import torch.nn as nn


In [5]:
train_df = pd.read_excel("data/train(1).xlsx")
train_df.head()


Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,9117000170,20150505T000000,268643,4,2.25,1810,9240,2.0,0,0,...,7,1810,0,1961,0,98055,47.4362,-122.187,1660,9240
1,6700390210,20140708T000000,245000,3,2.5,1600,2788,2.0,0,0,...,7,1600,0,1992,0,98031,47.4034,-122.187,1720,3605
2,7212660540,20150115T000000,200000,4,2.5,1720,8638,2.0,0,0,...,8,1720,0,1994,0,98003,47.2704,-122.313,1870,7455
3,8562780200,20150427T000000,352499,2,2.25,1240,705,2.0,0,0,...,7,1150,90,2009,0,98027,47.5321,-122.073,1240,750
4,7760400350,20141205T000000,232000,3,2.0,1280,13356,1.0,0,0,...,7,1280,0,1994,0,98042,47.3715,-122.074,1590,8071


In [6]:
TABULAR_FEATURES = [
    "bedrooms", "bathrooms", "sqft_living", "sqft_lot",
    "floors", "waterfront", "view", "condition", "grade",
    "sqft_above", "sqft_basement",
    "lat", "long",
    "sqft_living15", "sqft_lot15"
]

TARGET = "price"


In [7]:
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [8]:
class HousePriceDataset(Dataset):
    def __init__(self, dataframe, image_dir, tabular_features, target=None, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.image_dir = image_dir
        self.tabular_features = tabular_features
        self.target = target
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # ---- Image ----
        image_path = os.path.join(self.image_dir, f"{row['id']}.png")

        if os.path.exists(image_path):
            image = Image.open(image_path).convert("RGB")
            if self.transform:
                image = self.transform(image)
        else:
            # If image missing, return black image
            image = torch.zeros(3, 224, 224)

        # ---- Tabular ----
        tabular_data = torch.tensor(
            row[self.tabular_features].astype(float).values,
            dtype=torch.float32
        )


        # ---- Target ----
        if self.target:
            target = torch.tensor(row[self.target], dtype=torch.float32)
            return image, tabular_data, target

        return image, tabular_data


In [9]:
train_dataset = HousePriceDataset(
    dataframe=train_df,
    image_dir="../data/images",
    tabular_features=TABULAR_FEATURES,
    target=TARGET,
    transform=image_transform
)


In [10]:
img, tab, y = train_dataset[0]

img.shape, tab.shape, y


(torch.Size([3, 224, 224]), torch.Size([15]), tensor(268643.))

In [11]:
train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)


In [12]:
images, tabular, targets = next(iter(train_loader))

images.shape, tabular.shape, targets.shape


(torch.Size([8, 3, 224, 224]), torch.Size([8, 15]), torch.Size([8]))

In [13]:
import torch.nn as nn
import torchvision.models as models


In [14]:
resnet = models.resnet18(pretrained=True)
resnet




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [15]:
class ImageEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        backbone = models.resnet18(pretrained=True)
        self.features = nn.Sequential(*list(backbone.children())[:-1])  # remove fc
        self.output_dim = 512

    def forward(self, x):
        x = self.features(x)      # (B, 512, 1, 1)
        x = x.view(x.size(0), -1) # (B, 512)
        return x


In [16]:
image_encoder = ImageEncoder()
image_encoder




ImageEncoder(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats

In [17]:
train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=0
)


In [18]:
images, tabular, targets = next(iter(train_loader))
image_features = image_encoder(images)

image_features.shape


torch.Size([4, 512])

In [19]:
class TabularModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze(1)


In [20]:
tabular_model = TabularModel(input_dim=len(TABULAR_FEATURES))
tabular_model


TabularModel(
  (net): Sequential(
    (0): Linear(in_features=15, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [21]:
images, tabular, targets = next(iter(train_loader))

preds = tabular_model(tabular)

preds.shape, targets.shape


(torch.Size([4]), torch.Size([4]))

In [22]:
class MultimodalHousePriceModel(nn.Module):
    def __init__(self, image_encoder, tabular_dim):
        super().__init__()
        self.image_encoder = image_encoder

        self.fusion = nn.Sequential(
            nn.Linear(image_encoder.output_dim + tabular_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, images, tabular):
        img_feat = self.image_encoder(images)     # (B, 512)
        fused = torch.cat([img_feat, tabular], dim=1)  # (B, 527)
        out = self.fusion(fused)
        return out.squeeze(1)


In [23]:
multimodal_model = MultimodalHousePriceModel(
    image_encoder=image_encoder,
    tabular_dim=len(TABULAR_FEATURES)
)

multimodal_model


MultimodalHousePriceModel(
  (image_encoder): ImageEncoder(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (b

In [24]:
images, tabular, targets = next(iter(train_loader))

preds = multimodal_model(images, tabular)

preds.shape, targets.shape


(torch.Size([4]), torch.Size([4]))

In [26]:
from torch.utils.data import Subset
import numpy as np

subset_size = 500
indices = np.random.choice(len(train_dataset), subset_size, replace=False)
train_subset = Subset(train_dataset, indices)

train_loader_small = DataLoader(
    train_subset,
    batch_size=16,
    shuffle=True,
    num_workers=0
)


In [27]:
import torch.optim as optim
from sklearn.metrics import mean_squared_error, r2_score


In [28]:
def train_tabular_model(model, dataloader, epochs=5, lr=1e-3):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    model.train()

    for epoch in range(epochs):
        all_preds, all_targets = [], []
        epoch_loss = 0

        for images, tabular, targets in dataloader:
            optimizer.zero_grad()

            preds = model(tabular)
            loss = criterion(preds, targets)

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            all_preds.extend(preds.detach().numpy())
            all_targets.extend(targets.numpy())

        rmse = mean_squared_error(all_targets, all_preds) ** 0.5
        r2 = r2_score(all_targets, all_preds)

        print(f"[Tabular] Epoch {epoch+1}: Loss={epoch_loss:.2f}, RMSE={rmse:.2f}, R2={r2:.3f}")


In [29]:
def train_multimodal_model(model, dataloader, epochs=5, lr=1e-3):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    model.train()

    for epoch in range(epochs):
        all_preds, all_targets = [], []
        epoch_loss = 0

        for images, tabular, targets in dataloader:
            optimizer.zero_grad()

            preds = model(images, tabular)
            loss = criterion(preds, targets)

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            all_preds.extend(preds.detach().numpy())
            all_targets.extend(targets.numpy())

        rmse = mean_squared_error(all_targets, all_preds) ** 0.5
        r2 = r2_score(all_targets, all_preds)

        print(f"[Multimodal] Epoch {epoch+1}: Loss={epoch_loss:.2f}, RMSE={rmse:.2f}, R2={r2:.3f}")


In [30]:
print("Training Tabular Model")
train_tabular_model(tabular_model, train_loader_small, epochs=5)

print("\nTraining Multimodal Model")
train_multimodal_model(multimodal_model, train_loader_small, epochs=5)


Training Tabular Model
[Tabular] Epoch 1: Loss=12059924873216.00, RMSE=616926.79, R2=-2.284
[Tabular] Epoch 2: Loss=11694270472192.00, RMSE=609304.36, R2=-2.204
[Tabular] Epoch 3: Loss=11090367758336.00, RMSE=593312.83, R2=-2.038
[Tabular] Epoch 4: Loss=10748838477824.00, RMSE=576014.77, R2=-1.863
[Tabular] Epoch 5: Loss=10198678536192.00, RMSE=566416.48, R2=-1.769

Training Multimodal Model
[Multimodal] Epoch 1: Loss=12039566606336.00, RMSE=617896.48, R2=-2.295
[Multimodal] Epoch 2: Loss=11859022757888.00, RMSE=603622.93, R2=-2.144
[Multimodal] Epoch 3: Loss=10584716394496.00, RMSE=569041.75, R2=-1.794
[Multimodal] Epoch 4: Loss=10715321409536.00, RMSE=583462.96, R2=-1.938
[Multimodal] Epoch 5: Loss=9836991029248.00, RMSE=557387.15, R2=-1.681


In [31]:
import numpy as np

train_df = train_df.copy()
train_df["price_log"] = np.log1p(train_df["price"])


In [32]:
TARGET = "price_log"


In [33]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_df[TABULAR_FEATURES] = scaler.fit_transform(
    train_df[TABULAR_FEATURES]
)


In [34]:
train_dataset = HousePriceDataset(
    dataframe=train_df,
    image_dir="../data/images",
    tabular_features=TABULAR_FEATURES,
    target=TARGET,
    transform=image_transform
)


In [35]:
img, tab, y = train_dataset[0]
img.shape, tab.shape, y


(torch.Size([3, 224, 224]), torch.Size([15]), tensor(12.5011))

In [36]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)


In [37]:
# Tabular-only model
tabular_model = TabularModel(
    input_dim=len(TABULAR_FEATURES)
)

# Multimodal model (CORRECT WAY)
multimodal_model = MultimodalHousePriceModel(
    image_encoder=image_encoder,      # REQUIRED
    tabular_dim=len(TABULAR_FEATURES)  # REQUIRED
)


In [38]:
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=0   # IMPORTANT for Jupyter/macOS
)


In [39]:
tabular_model = TabularModel(input_dim=len(TABULAR_FEATURES))


In [40]:
from torch.utils.data import Subset, DataLoader

small_indices = list(range(32))   # only 32 samples
small_dataset = Subset(train_dataset, small_indices)

small_loader = DataLoader(
    small_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=0
)


In [41]:
print("Training Multimodal Model (FAST)")
train_multimodal_model(
    multimodal_model,
    small_loader,
    epochs=3,
    lr=1e-4
)


Training Multimodal Model (FAST)
[Multimodal] Epoch 1: Loss=1248.56, RMSE=12.49, R2=-659.637
[Multimodal] Epoch 2: Loss=1077.00, RMSE=11.60, R2=-568.861
[Multimodal] Epoch 3: Loss=932.84, RMSE=10.80, R2=-492.583


In [42]:
print("Training Tabular Model")
train_tabular_model(
    tabular_model,
    train_loader,
    epochs=20,
    lr=1e-3
)


Training Tabular Model
[Tabular] Epoch 1: Loss=14867.54, RMSE=5.42, R2=-105.949
[Tabular] Epoch 2: Loss=833.66, RMSE=1.28, R2=-4.990
[Tabular] Epoch 3: Loss=288.93, RMSE=0.76, R2=-1.078
[Tabular] Epoch 4: Loss=105.27, RMSE=0.46, R2=0.243
[Tabular] Epoch 5: Loss=55.13, RMSE=0.33, R2=0.604
[Tabular] Epoch 6: Loss=40.45, RMSE=0.28, R2=0.709
[Tabular] Epoch 7: Loss=33.79, RMSE=0.26, R2=0.757
[Tabular] Epoch 8: Loss=30.38, RMSE=0.24, R2=0.782
[Tabular] Epoch 9: Loss=28.15, RMSE=0.24, R2=0.798
[Tabular] Epoch 10: Loss=25.93, RMSE=0.23, R2=0.814
[Tabular] Epoch 11: Loss=25.49, RMSE=0.22, R2=0.817
[Tabular] Epoch 12: Loss=24.72, RMSE=0.22, R2=0.822
[Tabular] Epoch 13: Loss=24.04, RMSE=0.22, R2=0.827
[Tabular] Epoch 14: Loss=23.18, RMSE=0.21, R2=0.833
[Tabular] Epoch 15: Loss=23.59, RMSE=0.22, R2=0.830
[Tabular] Epoch 16: Loss=22.82, RMSE=0.21, R2=0.836
[Tabular] Epoch 17: Loss=21.37, RMSE=0.21, R2=0.846
[Tabular] Epoch 18: Loss=21.49, RMSE=0.21, R2=0.846
[Tabular] Epoch 19: Loss=20.82, RMSE=0.

In [43]:
for p in multimodal_model.image_encoder.parameters():
    p.requires_grad = False


In [44]:
print("Training Multimodal Model (FINAL)")
train_multimodal_model(
    multimodal_model,
    train_loader,   # full dataset
    epochs=5,       # keep small
    lr=1e-3
)


Training Multimodal Model (FINAL)
[Multimodal] Epoch 1: Loss=518.65, RMSE=1.01, R2=-2.731
[Multimodal] Epoch 2: Loss=36.99, RMSE=0.27, R2=0.734
[Multimodal] Epoch 3: Loss=38.69, RMSE=0.28, R2=0.722
[Multimodal] Epoch 4: Loss=40.07, RMSE=0.28, R2=0.712
[Multimodal] Epoch 5: Loss=42.50, RMSE=0.29, R2=0.695


In [46]:
import pandas as pd

test_df = pd.read_excel("data/test2.xlsx")


In [47]:
test_df.head()


Unnamed: 0,id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,2591820310,20141006T000000,4,2.25,2070,8893,2.0,0,0,4,8,2070,0,1986,0,98058,47.4388,-122.162,2390,7700
1,7974200820,20140821T000000,5,3.0,2900,6730,1.0,0,0,5,8,1830,1070,1977,0,98115,47.6784,-122.285,2370,6283
2,7701450110,20140815T000000,4,2.5,3770,10893,2.0,0,2,3,11,3770,0,1997,0,98006,47.5646,-122.129,3710,9685
3,9522300010,20150331T000000,3,3.5,4560,14608,2.0,0,2,3,12,4560,0,1990,0,98034,47.6995,-122.228,4050,14226
4,9510861140,20140714T000000,3,2.5,2550,5376,2.0,0,0,3,9,2550,0,2004,0,98052,47.6647,-122.083,2250,4050


In [48]:
test_dataset = HousePriceDataset(
    dataframe=test_df,
    image_dir=None,
    tabular_features=TABULAR_FEATURES,
    target=None,
    transform=None
)


In [49]:
from torch.utils.data import DataLoader

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False
)


In [50]:
tabular_model.eval()


TabularModel(
  (net): Sequential(
    (0): Linear(in_features=15, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [59]:
class TabularOnlyDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, tabular_features):
        self.df = dataframe.reset_index(drop=True)
        self.features = tabular_features

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # ✅ force numeric + handle bad values
        values = (
            row[self.features]
            .apply(pd.to_numeric, errors="coerce")
            .fillna(0.0)
            .values
        )

        x = torch.tensor(values, dtype=torch.float32)
        return x


In [60]:
test_dataset = TabularOnlyDataset(
    dataframe=test_df,
    tabular_features=TABULAR_FEATURES
)

In [61]:
test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False
)


In [62]:
tabular_model.eval()

all_preds = []

with torch.no_grad():
    for tabular in test_loader:
        preds = tabular_model(tabular)
        all_preds.extend(preds.cpu().numpy())


In [63]:
submission = pd.DataFrame({
    "id": test_df["id"].values,
    "predicted_price": all_preds
})

submission.to_csv("enrollno_final.csv", index=False)


In [64]:
submission.head()
submission.shape
submission.isna().sum()


id                 0
predicted_price    0
dtype: int64