In [8]:
# 03_price_prediction.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [10]:
# Load clustered data
df = pd.read_csv("../data/listings_clustered.csv")

features = [
    'accommodates', 'bedrooms', 'beds', 'amenities_count',
    'host_experience_years', 'latitude', 'longitude',
    'property_type', 'room_type', 'city', 'cluster_kmeans'
]

X = df[features]
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Train model
model = GradientBoostingRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [14]:
# Evaluation
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")
print(f"R^2: {r2:.2f}")

RMSE: 543.90
MAE: 140.04
R^2: 0.40


