In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler, RobustScaler, FunctionTransformer
import category_encoders as ce

In [3]:
import dagshub
dagshub.init(repo_owner='sourav664', repo_name='real-estate-hybrid-app', mlflow=True)


In [4]:
import mlflow
mlflow.set_tracking_uri("https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow")

In [5]:
# mlflow experiment

mlflow.set_experiment("Exp 1 - Model Selection")

<Experiment: artifact_location='mlflow-artifacts:/e3f1b77bb6984c30bf68e5e81ceb1bb7', creation_time=1764310688561, experiment_id='0', last_update_time=1764310688561, lifecycle_stage='active', name='Exp 1 - Model Selection', tags={}>

In [6]:
from sklearn import set_config

set_config(transform_output="pandas")

## Load the data

In [7]:
from pathlib import Path

path = Path.cwd().parent
data_path = path / 'data/raw'

In [8]:
df = pd.read_csv(f"{data_path}/final_data.csv")

In [9]:
X = df.drop(columns=["price"])
y = df["price"]

In [10]:
df.duplicated().sum()

np.int64(5808)

In [11]:
df.drop_duplicates(inplace=True)

In [12]:
# train test split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
     

In [13]:
print("The size of train data is",X_train.shape)
print("The shape of test data is",X_test.shape)

The size of train data is (38187, 11)
The shape of test data is (9547, 11)


In [14]:
ohe_encode = ['transactiontype','region','propertytype','furnished','ageofcons']
target_encode = ['locality']
robust_scaler = ['bedrooms','bathrooms','balconies','superbuiltupareasqft']

In [15]:
pt = FunctionTransformer(np.log1p, inverse_func=np.expm1, validate=True)
y_train_pt = pt.fit_transform(y_train.values.reshape(-1,1))
y_test_pt = pt.transform(y_test.values.reshape(-1,1))



In [16]:
preprocessor = ColumnTransformer(
    [
        ('ohe_encode', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), ohe_encode),
        ('robust_scaler', RobustScaler(), robust_scaler)
    ],remainder="passthrough", n_jobs=-1, force_int_remainder_cols=False,verbose_feature_names_out=False
)


preprocessor.set_output(transform="pandas")

0,1,2
,transformers,"[('ohe_encode', ...), ('robust_scaler', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,-1
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,with_centering,True
,with_scaling,True
,quantile_range,"(25.0, ...)"
,copy,True
,unit_variance,False


In [17]:
# Creating a pipeline
pipeline = Pipeline([
    ('target_encoder', ce.TargetEncoder(cols=target_encode)),
    ('preprocessor', preprocessor)
    
])

In [18]:
# do data preprocessing

X_train_trans = pipeline.fit_transform(X_train, y_train)

X_test_trans = pipeline.transform(X_test)



In [19]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import optuna
from sklearn.metrics import r2_score, mean_absolute_error

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
def objective(trial):
    with mlflow.start_run(nested=True):
        model_name = trial.suggest_categorical("model",["RF","KNN","GB","XGB","LGBM"])
                
        if model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,200)
            max_depth_rf = trial.suggest_int("max_depth_rf",2,20)
            model = RandomForestRegressor(n_estimators=n_estimators_rf,
                                        max_depth=max_depth_rf,
                                        random_state=42,
                                        n_jobs=-1)

        elif model_name == "GB":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,200)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",0,1)
            max_depth_gb = trial.suggest_int("max_depth_gb",2,20)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb,
                                                learning_rate=learning_rate_gb,
                                                max_depth=max_depth_gb,
                                                random_state=42)

        elif model_name == "KNN":
            n_neighbors_knn = trial.suggest_int("n_neighbors_knn",1,25)
            weights_knn = trial.suggest_categorical("weights_knn",["uniform","distance"])
            model = KNeighborsRegressor(n_neighbors=n_neighbors_knn,
                                        weights=weights_knn,n_jobs=-1)

        elif model_name == "XGB":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,200)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",0.1,0.5)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",2,20)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                    learning_rate=learning_rate_xgb,
                                    max_depth=max_depth_xgb,
                                    random_state=42,
                                    n_jobs=-1)

        elif model_name == "LGBM":
            n_estimators_lgbm = trial.suggest_int("n_estimators_lgbm",10,200)
            learning_rate_lgbm = trial.suggest_float("learning_rate_lgbm",0.1,0.5)
            max_depth_lgbm = trial.suggest_int("max_depth_lgbm",2,20)
            model = LGBMRegressor(n_estimators=n_estimators_lgbm,
                                    learning_rate=learning_rate_lgbm,
                                    max_depth=max_depth_lgbm,
                                    random_state=42)
        # train the model
        model.fit(X_train_trans,y_train_pt)

        # log model params
        mlflow.log_params(model.get_params())

        # get the predictions
        y_pred_train = model.predict(X_train_trans)
        y_pred_test = model.predict(X_test_trans)

        # get the actual predictions values
        y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
        y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))

        # calculate the error
        error = mean_absolute_error(y_test,y_pred_test_org)
        
        # r2_score
        r2 = r2_score(y_test,y_pred_test_org)

        # log model_name
        mlflow.log_param("model",model_name)

        # log error
        mlflow.log_metric("MAE",error)

        # log r2_score
        mlflow.log_metric("r2",r2)

        return r2

In [21]:
# create optuna study
study = optuna.create_study(direction="maximize",study_name="model_selection")

with mlflow.start_run(run_name="Best Model") as parent:
    # optimize the objective function
    study.optimize(objective,n_trials=30,n_jobs=-1)

    # log the best parameters
    mlflow.log_params(study.best_params)

    # log the best score
    mlflow.log_metric("best_score",study.best_value)

[I 2025-11-30 17:39:12,827] A new study created in memory with name: model_selection
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  return fit_method(estimator, *args, **kwargs)
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?


üèÉ View run receptive-worm-618 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/f59f99052b894700818cd6b9ed9f77f5
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:39:25,074] Trial 3 finished with value: 0.7716103976700943 and parameters: {'model': 'RF', 'n_estimators_rf': 120, 'max_depth_rf': 6}. Best is trial 3 with value: 0.7716103976700943.
  return fit_method(estimator, *args, **kwargs)


üèÉ View run serious-moose-404 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/1602d70f616e47edb14638b50f0aa107
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:39:53,489] Trial 8 finished with value: 0.8563636512784829 and parameters: {'model': 'RF', 'n_estimators_rf': 152, 'max_depth_rf': 11}. Best is trial 8 with value: 0.8563636512784829.
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?


üèÉ View run inquisitive-gnu-545 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/8748dc8970644581a41d95f60d60e891
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:39:58,547] Trial 5 finished with value: 0.7235485680254639 and parameters: {'model': 'GB', 'n_estimators_gb': 38, 'learning_rate_gb': 0.03822792769227912, 'max_depth_gb': 11}. Best is trial 8 with value: 0.8563636512784829.


üèÉ View run tasteful-zebra-318 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/8bf1dd3b908a43a9a65d58e7b11e365c
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:03,064] Trial 7 finished with value: 0.8291368952661723 and parameters: {'model': 'KNN', 'n_neighbors_knn': 5, 'weights_knn': 'uniform'}. Best is trial 8 with value: 0.8563636512784829.


üèÉ View run fun-crow-977 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/a1d1a31afa5740cb82f719ee70ffc8ca
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:07,052] Trial 4 finished with value: 0.818776976070889 and parameters: {'model': 'KNN', 'n_neighbors_knn': 21, 'weights_knn': 'distance'}. Best is trial 8 with value: 0.8563636512784829.
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?


üèÉ View run serious-auk-125 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/18238c4d35e4424a850fc619cd8a37f0
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:21,276] Trial 9 finished with value: 0.8514711096662799 and parameters: {'model': 'GB', 'n_estimators_gb': 48, 'learning_rate_gb': 0.15263214338979647, 'max_depth_gb': 5}. Best is trial 8 with value: 0.8563636512784829.
  return fit_method(estimator, *args, **kwargs)


üèÉ View run delightful-ant-550 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/326e0e8e26f5484dbb62e250333246a8
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:22,502] Trial 12 finished with value: 0.8285660587356897 and parameters: {'model': 'GB', 'n_estimators_gb': 53, 'learning_rate_gb': 0.8093724159441807, 'max_depth_gb': 2}. Best is trial 8 with value: 0.8563636512784829.
  return fit_method(estimator, *args, **kwargs)


üèÉ View run industrious-boar-79 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/01b16ecc44d947a2ad92367331bcd98d
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:26,998] Trial 10 finished with value: 0.8006598851572725 and parameters: {'model': 'KNN', 'n_neighbors_knn': 18, 'weights_knn': 'uniform'}. Best is trial 8 with value: 0.8563636512784829.
  return fit_method(estimator, *args, **kwargs)


üèÉ View run bright-whale-533 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/9b307ab9c49b45328adf84c62d146aa8
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:34,995] Trial 11 finished with value: 0.8187773078799222 and parameters: {'model': 'KNN', 'n_neighbors_knn': 21, 'weights_knn': 'distance'}. Best is trial 8 with value: 0.8563636512784829.
  return fit_method(estimator, *args, **kwargs)


üèÉ View run angry-whale-173 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/a169694a8ae84802b983a6c8ec41cd35
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run inquisitive-grub-1 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/0c05bcf92ade4ad7bb1ecfa4bb6fa145
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:45,129] Trial 14 finished with value: 0.47941133075128795 and parameters: {'model': 'RF', 'n_estimators_rf': 140, 'max_depth_rf': 2}. Best is trial 8 with value: 0.8563636512784829.
[I 2025-11-30 17:40:46,995] Trial 15 finished with value: 0.8441151745666374 and parameters: {'model': 'RF', 'n_estimators_rf': 20, 'max_depth_rf': 10}. Best is trial 8 with value: 0.8563636512784829.


üèÉ View run gifted-asp-250 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/a8a16e9ff7b04cb68a786880d655b81f
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:40:57,005] Trial 16 finished with value: 0.7690342505977836 and parameters: {'model': 'RF', 'n_estimators_rf': 80, 'max_depth_rf': 6}. Best is trial 8 with value: 0.8563636512784829.


üèÉ View run unleashed-bee-584 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/cc8b7071376c4d1a91bc4a88b34ad5f0
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run handsome-robin-528 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/087a0bd126e44d47bd2528dc1047361d
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run capable-ray-47 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/fee370f3e6d548eab756ab3706121489
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:41:28,030] Trial 13 finished with value: 0.873125050643491 and parameters: {'model': 'RF', 'n_estimators_rf': 173, 'max_depth_rf': 20}. Best is trial 13 with value: 0.873125050643491.
[I 2025-11-30 17:41:28,993] Trial 2 finished with value: 0.8771562172010914 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 89, 'learning_rate_lgbm': 0.4974095492866254, 'max_depth_lgbm': 5}. Best is trial 2 with value: 0.8771562172010914.


üèÉ View run clumsy-wolf-677 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/7376e56821f9447eb1833963412fea6d
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:41:30,991] Trial 1 finished with value: 0.8713287408792469 and parameters: {'model': 'XGB', 'n_estimators_xgb': 165, 'learning_rate_xgb': 0.24547076723085196, 'max_depth_xgb': 7}. Best is trial 2 with value: 0.8771562172010914.


üèÉ View run crawling-moose-883 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/1556b8a7b6d5449ca4493627835cfc39
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run rumbling-snipe-160 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/33d10f80a16a48b99957f35f5ef1e742
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:41:38,001] Trial 0 finished with value: 0.8734307997400544 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 104, 'learning_rate_lgbm': 0.21372894477616913, 'max_depth_lgbm': 14}. Best is trial 2 with value: 0.8771562172010914.
[I 2025-11-30 17:41:39,997] Trial 18 finished with value: 0.8751497944885767 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 128, 'learning_rate_lgbm': 0.17443999888225517, 'max_depth_lgbm': 9}. Best is trial 2 with value: 0.8771562172010914.
[I 2025-11-30 17:41:41,001] Trial 17 finished with value: 0.8735881572734218 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 145, 'learning_rate_lgbm': 0.10205990750049831, 'max_depth_lgbm': 20}. Best is trial 2 with value: 0.8771562172010914.


üèÉ View run rumbling-ram-315 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/d85ecff793a546ebb3e7ec958c1660d9
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0




üèÉ View run illustrious-kite-595 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/4334c5d43f8d46799c538f4ccce3140e
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:41:51,014] Trial 19 finished with value: 0.8812877995384193 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 158, 'learning_rate_lgbm': 0.3057175097839958, 'max_depth_lgbm': 9}. Best is trial 19 with value: 0.8812877995384193.
[I 2025-11-30 17:41:58,005] Trial 6 finished with value: 0.8612968646836998 and parameters: {'model': 'GB', 'n_estimators_gb': 172, 'learning_rate_gb': 0.34607576305133203, 'max_depth_gb': 10}. Best is trial 19 with value: 0.8812877995384193.


üèÉ View run debonair-whale-876 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/5025c39c7989471782016b07b818a484
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run worried-flea-985 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/b03088d558bc4e588e4138bf821e3d70
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run gentle-mule-199 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/3c97efbfd5964664a6ef7e21ab2079c2
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:42:24,005] Trial 20 finished with value: 0.8647228031175584 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 83, 'learning_rate_lgbm': 0.49639499982835084, 'max_depth_lgbm': 4}. Best is trial 19 with value: 0.8812877995384193.
[I 2025-11-30 17:42:24,986] Trial 21 finished with value: 0.8570570607085042 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 96, 'learning_rate_lgbm': 0.4765555175480849, 'max_depth_lgbm': 3}. Best is trial 19 with value: 0.8812877995384193.


üèÉ View run awesome-loon-696 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/6fcfbbac45ea45faaa6b64f96fdc8f9a
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:42:27,003] Trial 22 finished with value: 0.8767951596671316 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 91, 'learning_rate_lgbm': 0.23421773445040628, 'max_depth_lgbm': 11}. Best is trial 19 with value: 0.8812877995384193.


üèÉ View run able-quail-627 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/de6b6fdab2f3413f988a956badbf2385
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run melodic-grouse-580 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/7423f6951cbc4b8fa423535ec2cc97f2
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:42:33,986] Trial 23 finished with value: 0.8468361859037311 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 37, 'learning_rate_lgbm': 0.4884655613686397, 'max_depth_lgbm': 4}. Best is trial 19 with value: 0.8812877995384193.
[I 2025-11-30 17:42:34,992] Trial 24 finished with value: 0.8384819119685774 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 41, 'learning_rate_lgbm': 0.4832235205789973, 'max_depth_lgbm': 3}. Best is trial 19 with value: 0.8812877995384193.
[I 2025-11-30 17:42:35,990] Trial 25 finished with value: 0.8552214794158342 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 48, 'learning_rate_lgbm': 0.48339902524855577, 'max_depth_lgbm': 4}. Best is trial 19 with value: 0.8812877995384193.


üèÉ View run powerful-vole-861 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/91a51cbf28464ee2bc1a14fa09827824
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run fearless-worm-839 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/4161c7e25e9142a4953f3f1a052f822d
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:42:41,986] Trial 26 finished with value: 0.8330752795865191 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 22, 'learning_rate_lgbm': 0.48246118179864816, 'max_depth_lgbm': 4}. Best is trial 19 with value: 0.8812877995384193.
[I 2025-11-30 17:42:45,037] Trial 27 finished with value: 0.7804667889383071 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 24, 'learning_rate_lgbm': 0.4798337967830924, 'max_depth_lgbm': 2}. Best is trial 19 with value: 0.8812877995384193.


üèÉ View run loud-cod-261 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/7f01f48992ad4c0db8f97ff64f98bb2d
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0
üèÉ View run delicate-slug-353 at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/4ddca7ffa8fa4ea097a30ecafc7aaf6e
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


[I 2025-11-30 17:42:51,990] Trial 28 finished with value: 0.8626626526983985 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 25, 'learning_rate_lgbm': 0.3347702114597547, 'max_depth_lgbm': 10}. Best is trial 19 with value: 0.8812877995384193.
[I 2025-11-30 17:42:53,015] Trial 29 finished with value: 0.8836617607049171 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 199, 'learning_rate_lgbm': 0.3289192832305745, 'max_depth_lgbm': 10}. Best is trial 29 with value: 0.8836617607049171.


üèÉ View run Best Model at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0/runs/d3a78c3330cd4a579e545ae270866414
üß™ View experiment at: https://dagshub.com/sourav664/real-estate-hybrid-app.mlflow/#/experiments/0


In [22]:
study.best_params

{'model': 'LGBM',
 'n_estimators_lgbm': 199,
 'learning_rate_lgbm': 0.3289192832305745,
 'max_depth_lgbm': 10}

In [23]:
# train the model on best parameters

lgbm = LGBMRegressor(**study.best_params)

lgbm.fit(X_train_trans,y_train_pt)





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.188311 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 542
[LightGBM] [Info] Number of data points in the train set: 38187, number of used features: 22
[LightGBM] [Info] Start training from score 1.335152


0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,-1
,learning_rate,0.1
,n_estimators,100
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [24]:
# get the predictions
y_pred_train = lgbm.predict(X_train_trans)
y_pred_test = lgbm.predict(X_test_trans)



In [25]:
# get the actual predictions values

y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))

In [26]:
print(f"The train error is {mean_absolute_error(y_train,y_pred_train_org):.2f} lakhs")
print(f"The test error is {mean_absolute_error(y_test,y_pred_test_org):.2f} lakhs")

The train error is 0.63 lakhs
The test error is 0.67 lakhs


In [27]:
print(f"The r2 score is {r2_score(y_train,y_pred_train_org):.2f}")
print(f"The r2 score is {r2_score(y_test,y_pred_test_org):.2f}")

The r2 score is 0.89
The r2 score is 0.87


In [28]:
# dataframe of results

study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_learning_rate_gb,params_learning_rate_lgbm,params_learning_rate_xgb,params_max_depth_gb,params_max_depth_lgbm,params_max_depth_rf,params_max_depth_xgb,params_model,params_n_estimators_gb,params_n_estimators_lgbm,params_n_estimators_rf,params_n_estimators_xgb,params_n_neighbors_knn,params_weights_knn,state
0,0,0.873431,2025-11-30 17:39:13.459072,2025-11-30 17:41:38.001834,0 days 00:02:24.542762,,0.213729,,,14.0,,,LGBM,,104.0,,,,,COMPLETE
1,1,0.871329,2025-11-30 17:39:13.460920,2025-11-30 17:41:30.991789,0 days 00:02:17.530869,,,0.245471,,,,7.0,XGB,,,,165.0,,,COMPLETE
2,2,0.877156,2025-11-30 17:39:13.461864,2025-11-30 17:41:28.993023,0 days 00:02:15.531159,,0.49741,,,5.0,,,LGBM,,89.0,,,,,COMPLETE
3,3,0.77161,2025-11-30 17:39:13.462868,2025-11-30 17:39:25.073954,0 days 00:00:11.611086,,,,,,6.0,,RF,,,120.0,,,,COMPLETE
4,4,0.818777,2025-11-30 17:39:13.464204,2025-11-30 17:40:07.052111,0 days 00:00:53.587907,,,,,,,,KNN,,,,,21.0,distance,COMPLETE
5,5,0.723549,2025-11-30 17:39:13.465417,2025-11-30 17:39:58.546817,0 days 00:00:45.081400,0.038228,,,11.0,,,,GB,38.0,,,,,,COMPLETE
6,6,0.861297,2025-11-30 17:39:13.466448,2025-11-30 17:41:58.005136,0 days 00:02:44.538688,0.346076,,,10.0,,,,GB,172.0,,,,,,COMPLETE
7,7,0.829137,2025-11-30 17:39:13.467789,2025-11-30 17:40:03.063795,0 days 00:00:49.596006,,,,,,,,KNN,,,,,5.0,uniform,COMPLETE
8,8,0.856364,2025-11-30 17:39:25.084216,2025-11-30 17:39:53.489228,0 days 00:00:28.405012,,,,,,11.0,,RF,,,152.0,,,,COMPLETE
9,9,0.851471,2025-11-30 17:39:53.496159,2025-11-30 17:40:21.276043,0 days 00:00:27.779884,0.152632,,,5.0,,,,GB,48.0,,,,,,COMPLETE


In [29]:
study.trials_dataframe()['params_model'].value_counts()

params_model
LGBM    15
RF       6
KNN      4
GB       4
XGB      1
Name: count, dtype: int64

In [30]:
# avg scores for all tested models

study.trials_dataframe().groupby("params_model")['value'].mean().sort_values()

params_model
RF      0.765610
GB      0.816221
KNN     0.816838
LGBM    0.858640
XGB     0.871329
Name: value, dtype: float64