In [1]:
!pip install xgboost
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from xgboost import XGBRegressor
import optuna

In [3]:
from google.colab import files
uploaded = files.upload()

df = pd.read_csv("ain_construction_duration_dataset.csv")
df.head()

Saving ain_construction_duration_dataset.csv to ain_construction_duration_dataset.csv


Unnamed: 0,area_sqm,floors,rooms,estimated_duration_days
0,331,3,7,455
1,472,2,9,446
2,195,3,4,402
3,380,1,8,365
4,473,2,7,441


In [4]:
X = df[["area_sqm", "floors", "rooms"]]
y = df["estimated_duration_days"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train.shape, X_test.shape

((800, 3), (200, 3))

In [5]:
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 800),
        "max_depth": trial.suggest_int("max_depth", 2, 8),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "reg_alpha": trial.suggest_float("reg_alpha", 0, 2),
        "reg_lambda": trial.suggest_float("reg_lambda", 1, 5),
        "random_state": 42
    }

    model = XGBRegressor(**params)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    return mae

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=40)

best_params = study.best_params
best_params

[I 2025-11-18 11:17:12,844] A new study created in memory with name: no-name-f24ac49c-2a54-492b-bcfd-d907e97070d9
[I 2025-11-18 11:17:13,034] Trial 0 finished with value: 19.850008010864258 and parameters: {'n_estimators': 387, 'max_depth': 6, 'learning_rate': 0.18551283002486904, 'subsample': 0.6569962618139376, 'colsample_bytree': 0.9422638551888554, 'gamma': 4.652465080842533, 'min_child_weight': 9, 'reg_alpha': 0.21990523405946338, 'reg_lambda': 1.7041904281457292}. Best is trial 0 with value: 19.850008010864258.
[I 2025-11-18 11:17:13,109] Trial 1 finished with value: 16.32289695739746 and parameters: {'n_estimators': 407, 'max_depth': 2, 'learning_rate': 0.03979511959119309, 'subsample': 0.9954619984585051, 'colsample_bytree': 0.8834525998527165, 'gamma': 1.3669356651810123, 'min_child_weight': 7, 'reg_alpha': 0.34166743712965575, 'reg_lambda': 2.5716665322128778}. Best is trial 1 with value: 16.32289695739746.
[I 2025-11-18 11:17:13,271] Trial 2 finished with value: 21.005353927

{'n_estimators': 367,
 'max_depth': 2,
 'learning_rate': 0.033476962476256804,
 'subsample': 0.9234786469207903,
 'colsample_bytree': 0.6717170977175017,
 'gamma': 3.9906547914788257,
 'min_child_weight': 8,
 'reg_alpha': 0.07804126438744086,
 'reg_lambda': 1.263485418391518}

In [6]:
model = XGBRegressor(**best_params)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MAE:", mae)
print("R²:", r2)

MAE: 16.29408073425293
R²: 0.8079439401626587


In [7]:
example = pd.DataFrame({
    "area_sqm": [320],
    "floors": [2],
    "rooms": [6]
})

pred = model.predict(example)[0]
print("⏳ Estimated duration (days):", round(pred))
print("≈", round(pred/30, 1), "months")

⏳ Estimated duration (days): 392
≈ 13.1 months
