In [1]:
import sys, os
from glob import glob
import pandas as pd
from joblib import load
from datetime import datetime 

# Add the project root to the Python path
project_root = os.path.abspath("../..")
sys.path.append(project_root)

# Import constants
from utils.constants import (
    ML_READY_DATA_FILE,
    MODELS_DIR,
    PREDICTIONS_DIR 
)

# === Step 1: Load ML-ready dataset ===
if not os.path.exists(ML_READY_DATA_FILE):
    raise FileNotFoundError(f"[ERROR] ML-ready dataset not found at: {ML_READY_DATA_FILE}")

print(f"[INFO] Using ML-ready dataset: {os.path.basename(ML_READY_DATA_FILE)}")
df = pd.read_csv(ML_READY_DATA_FILE)
#pd.set_option('display.max_columns', None)
#print("[INFO] First rows of the loaded ML-ready dataset:")
#print(df.head())

# === Step 2: Select 10 random properties (any type) ===
df_sample = df.sample(n=10, random_state=42).reset_index(drop=True)
print("[INFO] 10 random properties selected.")

# Keep only model features
X_sample = df_sample.drop(columns=["id", "url"], errors="ignore")

# === Step 3: Load all .pkl models from MODELS_DIR/pkl ===
models_pkl_dir = os.path.join(MODELS_DIR, "pkl")
pkl_files = glob(os.path.join(models_pkl_dir, "*.pkl"))
print(f">> Searching in: {os.path.join(models_pkl_dir, '*.pkl')}")

if not pkl_files:
    raise ValueError(f"[ERROR] No .pkl models found in {models_pkl_dir}")

predictions = df_sample.copy()

for pkl_path in pkl_files:
    model_name = os.path.basename(pkl_path).replace(".pkl", "")
    try:
        model = load(pkl_path)
        preds = model.predict(X_sample)
        predictions[model_name] = preds
        print(f"[OK] Prediction completed for model: {model_name}")
    except Exception as e:
        print(f"[ERROR] Failed prediction for model '{model_name}': {e}")

# === Step 4: Save predictions in PREDICTIONS_DIR ===
os.makedirs(PREDICTIONS_DIR, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
output_file = f"inference_predictions_{timestamp}.csv"
output_path = os.path.join(PREDICTIONS_DIR, output_file)

predictions.to_csv(output_path, index=False)
print(f"[✓] Inference predictions saved to: {output_path}")


[INFO] Using ML-ready dataset: immoweb_real_estate_ml_ready.csv


[INFO] 10 random properties selected.
>> Searching in: E:\_SoftEng\_BeCode\real-estate-price-predictor\models\pkl\*.pkl
[OK] Prediction completed for model: catboost_optuna_all_20250629_0814_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1033_TEST


[OK] Prediction completed for model: catboost_optuna_all_20250629_1102_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1111_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1234_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1257_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1303_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1311_TEST


[OK] Prediction completed for model: catboost_optuna_all_20250629_1320_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1329_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1408_TEST
[OK] Prediction completed for model: catboost_optuna_all_20250629_1423_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_0814_TEST


[OK] Prediction completed for model: catboost_optuna_top30_20250629_1033_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1102_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1111_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1234_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1257_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1303_TEST


[OK] Prediction completed for model: catboost_optuna_top30_20250629_1311_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1320_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1329_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1408_TEST
[OK] Prediction completed for model: catboost_optuna_top30_20250629_1423_TEST


[ERROR] Failed prediction for model 'xgboost_optuna_all_20250629_0812_TEST': feature_names mismatch: ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_DUPLEX', 'subtype_GROUND_FLOOR', 'subtype_HOUSE', 'subtype_MIXED_USE_BUILDING', 'subtype_PENTHOUSE', 'subtype_TOWN_HOUSE', 'subtype_VILLA', 'province_Antwerp', 'province_Brussels', 'province_East Flanders', 'province_Flemish Brabant', 'province_Hainaut', 'province_Limburg', 'province_Liège', 'province_Luxembourg', 'province_Namur', 'province_Walloon Brabant', 'province_West Flanders', 'locality_Anderlecht', 'locality_Antwerpen', 'locality_Bruxelles', 'locality_Gent', 'locality_Ixelles', 'locality_Knokke-Heist', 'locality_Liège', 'locality_Uccle', 'buildingCondition_AS_NEW', 'buildingCondition_GOOD', 'buildingCondition_JUST_RENOV

[ERROR] Failed prediction for model 'xgboost_optuna_all_20250629_1100_TEST': feature_names mismatch: ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_DUPLEX', 'subtype_GROUND_FLOOR', 'subtype_HOUSE', 'subtype_MIXED_USE_BUILDING', 'subtype_PENTHOUSE', 'subtype_TOWN_HOUSE', 'subtype_VILLA', 'province_Antwerp', 'province_Brussels', 'province_East Flanders', 'province_Flemish Brabant', 'province_Hainaut', 'province_Limburg', 'province_Liège', 'province_Luxembourg', 'province_Namur', 'province_Walloon Brabant', 'province_West Flanders', 'locality_Anderlecht', 'locality_Antwerpen', 'locality_Bruxelles', 'locality_Gent', 'locality_Ixelles', 'locality_Knokke-Heist', 'locality_Liège', 'locality_Uccle', 'buildingCondition_AS_NEW', 'buildingCondition_GOOD', 'buildingCondition_JUST_RENOV

[ERROR] Failed prediction for model 'xgboost_optuna_all_20250629_1301_TEST': feature_names mismatch: ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_DUPLEX', 'subtype_GROUND_FLOOR', 'subtype_HOUSE', 'subtype_MIXED_USE_BUILDING', 'subtype_PENTHOUSE', 'subtype_TOWN_HOUSE', 'subtype_VILLA', 'province_Antwerp', 'province_Brussels', 'province_East Flanders', 'province_Flemish Brabant', 'province_Hainaut', 'province_Limburg', 'province_Liège', 'province_Luxembourg', 'province_Namur', 'province_Walloon Brabant', 'province_West Flanders', 'locality_Anderlecht', 'locality_Antwerpen', 'locality_Bruxelles', 'locality_Gent', 'locality_Ixelles', 'locality_Knokke-Heist', 'locality_Liège', 'locality_Uccle', 'buildingCondition_AS_NEW', 'buildingCondition_GOOD', 'buildingCondition_JUST_RENOV

[ERROR] Failed prediction for model 'xgboost_optuna_all_20250629_1326_TEST': feature_names mismatch: ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_DUPLEX', 'subtype_GROUND_FLOOR', 'subtype_HOUSE', 'subtype_MIXED_USE_BUILDING', 'subtype_PENTHOUSE', 'subtype_TOWN_HOUSE', 'subtype_VILLA', 'province_Antwerp', 'province_Brussels', 'province_East Flanders', 'province_Flemish Brabant', 'province_Hainaut', 'province_Limburg', 'province_Liège', 'province_Luxembourg', 'province_Namur', 'province_Walloon Brabant', 'province_West Flanders', 'locality_Anderlecht', 'locality_Antwerpen', 'locality_Bruxelles', 'locality_Gent', 'locality_Ixelles', 'locality_Knokke-Heist', 'locality_Liège', 'locality_Uccle', 'buildingCondition_AS_NEW', 'buildingCondition_GOOD', 'buildingCondition_JUST_RENOV

[ERROR] Failed prediction for model 'xgboost_optuna_all_20250629_1422_TEST': feature_names mismatch: ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_DUPLEX', 'subtype_GROUND_FLOOR', 'subtype_HOUSE', 'subtype_MIXED_USE_BUILDING', 'subtype_PENTHOUSE', 'subtype_TOWN_HOUSE', 'subtype_VILLA', 'province_Antwerp', 'province_Brussels', 'province_East Flanders', 'province_Flemish Brabant', 'province_Hainaut', 'province_Limburg', 'province_Liège', 'province_Luxembourg', 'province_Namur', 'province_Walloon Brabant', 'province_West Flanders', 'locality_Anderlecht', 'locality_Antwerpen', 'locality_Bruxelles', 'locality_Gent', 'locality_Ixelles', 'locality_Knokke-Heist', 'locality_Liège', 'locality_Uccle', 'buildingCondition_AS_NEW', 'buildingCondition_GOOD', 'buildingCondition_JUST_RENOV

[ERROR] Failed prediction for model 'xgboost_optuna_top30_20250629_1052_TEST': feature_names mismatch: ['habitableSurface', 'bathroomCount', 'postCode', 'toiletCount', 'buildingConstructionYear', 'locality_Knokke-Heist', 'building_age', 'surface_per_room', 'facedeCount', 'kitchenType_HYPER_EQUIPPED', 'buildingCondition_AS_NEW', 'province_West Flanders', 'subtype_VILLA', 'subtype_HOUSE', 'province_Hainaut', 'room_count', 'bedroomCount', 'buildingCondition_TO_RENOVATE', 'epcScore_B', 'hasTerrace', 'subtype_PENTHOUSE', 'epcScore_C', 'buildingCondition_GOOD', 'heatingType_nan', 'hasLivingRoom', 'locality_Ixelles', 'kitchenType_INSTALLED', 'epcScore_A', 'epcScore_F', 'locality_Gent'] ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'is_big_property', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_BUNGALOW', 'subtype_CHALET', '

[ERROR] Failed prediction for model 'xgboost_optuna_top30_20250629_1108_TEST': feature_names mismatch: ['habitableSurface', 'bathroomCount', 'postCode', 'toiletCount', 'buildingConstructionYear', 'locality_Knokke-Heist', 'building_age', 'surface_per_room', 'facedeCount', 'kitchenType_HYPER_EQUIPPED', 'buildingCondition_AS_NEW', 'province_West Flanders', 'subtype_VILLA', 'subtype_HOUSE', 'province_Hainaut', 'room_count', 'bedroomCount', 'buildingCondition_TO_RENOVATE', 'epcScore_B', 'hasTerrace', 'subtype_PENTHOUSE', 'epcScore_C', 'buildingCondition_GOOD', 'heatingType_nan', 'hasLivingRoom', 'locality_Ixelles', 'kitchenType_INSTALLED', 'epcScore_A', 'epcScore_F', 'locality_Gent'] ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'is_big_property', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_BUNGALOW', 'subtype_CHALET', '

[ERROR] Failed prediction for model 'xgboost_optuna_top30_20250629_1314_TEST': feature_names mismatch: ['habitableSurface', 'bathroomCount', 'postCode', 'toiletCount', 'buildingConstructionYear', 'locality_Knokke-Heist', 'building_age', 'surface_per_room', 'facedeCount', 'kitchenType_HYPER_EQUIPPED', 'buildingCondition_AS_NEW', 'province_West Flanders', 'subtype_VILLA', 'subtype_HOUSE', 'province_Hainaut', 'room_count', 'bedroomCount', 'buildingCondition_TO_RENOVATE', 'epcScore_B', 'hasTerrace', 'subtype_PENTHOUSE', 'epcScore_C', 'buildingCondition_GOOD', 'heatingType_nan', 'hasLivingRoom', 'locality_Ixelles', 'kitchenType_INSTALLED', 'epcScore_A', 'epcScore_F', 'locality_Gent'] ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'is_big_property', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_BUNGALOW', 'subtype_CHALET', '

[ERROR] Failed prediction for model 'xgboost_optuna_top30_20250629_1422_TEST': feature_names mismatch: ['habitableSurface', 'bathroomCount', 'postCode', 'toiletCount', 'buildingConstructionYear', 'locality_Knokke-Heist', 'building_age', 'surface_per_room', 'facedeCount', 'kitchenType_HYPER_EQUIPPED', 'buildingCondition_AS_NEW', 'province_West Flanders', 'subtype_VILLA', 'subtype_HOUSE', 'province_Hainaut', 'room_count', 'bedroomCount', 'buildingCondition_TO_RENOVATE', 'epcScore_B', 'hasTerrace', 'subtype_PENTHOUSE', 'epcScore_C', 'buildingCondition_GOOD', 'heatingType_nan', 'hasLivingRoom', 'locality_Ixelles', 'kitchenType_INSTALLED', 'epcScore_A', 'epcScore_F', 'locality_Gent'] ['bedroomCount', 'bathroomCount', 'postCode', 'habitableSurface', 'buildingConstructionYear', 'facedeCount', 'toiletCount', 'is_big_property', 'room_count', 'surface_per_room', 'building_age', 'type_APARTMENT', 'type_HOUSE', 'subtype_APARTMENT', 'subtype_APARTMENT_BLOCK', 'subtype_BUNGALOW', 'subtype_CHALET', '