In [1]:
import pandas as pd
import numpy as np

train_df = pd.read_csv("../data/train_with_images.csv")

In [2]:
tabular_features = ["bedrooms", "bathrooms", "sqft_living", "lat", "long"]

X_tab = train_df[tabular_features].values
y = train_df["price"].values

In [3]:
X_cnn = np.load("../data/cnn_features.npy")
X_cnn.shape


(21, 2048)

In [4]:
print(X_tab.shape)
print(X_cnn.shape)
print(y.shape)


(21, 5)
(21, 2048)
(21,)


In [5]:
X_combined = np.hstack([X_tab, X_cnn])
X_combined.shape


(21, 2053)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X_combined, y, test_size=0.2, random_state=42
)

In [7]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

In [8]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred = model.predict(X_val)

rmse = np.sqrt(mean_squared_error(y_val, y_pred))
r2 = r2_score(y_val, y_pred)

rmse, r2


(123980.15108252506, 0.5234806660139087)

### Model Comparison

- **Tabular baseline (Day 2)**:
  - RMSE ≈ 231k
  - R² ≈ 0.57

- **Multimodal model (Day 6)**:
  - RMSE ≈ 123k
  - R² ≈ 0.52

The multimodal model substantially reduces RMSE, indicating improved absolute prediction accuracy. However, a slight drop in R² suggests that while image features help reduce large errors, they do not uniformly explain variance across all samples. This highlights a trade-off between error minimization and variance explanation
