In [1]:
%pip install boto3 xgboost

Note: you may need to restart the kernel to use updated packages.


Load dependencies

In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

Loading data

In [64]:
csv = pd.read_csv('data/house_rent.csv')

print("Unique TIME_PERIOD values before splitting:", csv['TIME_PERIOD'].unique())

csv['TIME_PERIOD'] = pd.to_numeric(csv['TIME_PERIOD'], errors='coerce')

csv = pd.get_dummies(csv, columns=['building'])
csv = pd.get_dummies(csv, columns=['freq'])
csv = pd.get_dummies(csv, columns=['currency'])
csv = pd.get_dummies(csv, columns=['geo'])

X = csv.drop(['LAST UPDATE', 'value', 'n_bedrooms'], axis=1)

y = csv['value']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

Unique TIME_PERIOD values before splitting: [2014 2015 2016 2017 2018 2019 2020 2021 2022 2023]


In [65]:
print("Unique TIME_PERIOD values in X_test:", X_test['TIME_PERIOD'].unique())

avg_rent_price = (X['TIME_PERIOD']).mean()

print(avg_rent_price)

Unique TIME_PERIOD values in X_test: [2014 2021 2019 2015]
2018.5


Inspect data

In [66]:
print(X_test)
print(y_test)

    TIME_PERIOD  building_house  freq_Annual  currency_Euro  geo_Rijeka
0          2014            True         True           True        True
17         2021            True         True           True        True
15         2019            True         True           True        True
1          2015            True         True           True        True
0     1950
17    1700
15    1550
1     1950
Name: value, dtype: int64


Set parameters

In [67]:
N_ESTIMATORS = 67
MAX_DEPTH = 2
LEARNING_RATE = 0.26
MIN_CHILD_WEIGHT = 4
GAMMA = 0.18

POPULATION_SIZE = 2000
NUM_GENERATIONS = 200
NUMBER_OF_CHILDREN = 1000
MUTATION_CHANCE = 0.2

Setup model

In [68]:
xgb_reg = XGBRegressor(
    n_estimators=N_ESTIMATORS,
    max_depth=MAX_DEPTH,
    learning_rate=LEARNING_RATE,
    min_child_weight=MIN_CHILD_WEIGHT,
    gamma=GAMMA
)

Train model

In [69]:
xgb_reg.fit(X_train, y_train)

Predict test data

In [70]:
y_pred = xgb_reg.predict(X_test)

regression metrics for fitness evaluation, e.g., mean squared error (MSE)

In [71]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

rmse

np.float64(692.2705205781978)

Create model.json

In [72]:
model_path = './lambda/house_model.json'
xgb_reg.save_model(model_path)