In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler, RobustScaler
import category_encoders as ce

In [2]:
import dagshub
dagshub.init(repo_owner='sourav664', repo_name='real-estate-price-prediction', mlflow=True)


In [3]:
import mlflow
mlflow.set_tracking_uri("https://dagshub.com/sourav664/real-estate-price-prediction.mlflow")

In [4]:
# mlflow experiment

mlflow.set_experiment("Exp 1 - Model Selection")

<Experiment: artifact_location='mlflow-artifacts:/73393645fca64bf7978ed8616097e7bf', creation_time=1744263602599, experiment_id='0', last_update_time=1744263602599, lifecycle_stage='active', name='Exp 1 - Model Selection', tags={}>

In [5]:

from sklearn import set_config

set_config(transform_output="pandas")

## Load the Data

In [None]:
df = pd.read_csv("../data/raw/real_estatesv9.csv")

In [None]:
df = df[['price','carpet_area','bedroom','bathroom','transaction_type','floor_category','luxury_category','property_type','regions','balconies_iter','furnished_status_imputed','additional_room']]

Unnamed: 0,property_type,price,carpet_area,bedroom,bathroom,status,transaction_type,additional_room,regions,balconies_imputed,balconies_iter,floorNum_iter,furnished_status_imputed,luxury_score_iter,luxury_category,floor_category
0,flat,0.52,1200.0,3.0,2.0,ready to move,resale,0.0,bangalore,2.0,3.0,4.0,0,10.0,Medium,Mid Floor
1,flat,1.64,1286.0,3.0,3.0,under construction,new property,0.0,bangalore,3.0,3.0,6.0,0,27.0,High,Mid Floor
2,flat,1.41,952.0,2.0,2.0,ready to move,new property,0.0,bangalore,2.0,2.0,4.0,0,8.0,Low,Mid Floor
3,flat,0.67,1128.0,3.0,2.0,ready to move,new property,0.0,bangalore,2.0,3.0,1.0,0,9.0,Low,Low Floor
4,villa,1.05,960.0,3.0,3.0,ready to move,new property,1.0,bangalore,2.0,2.0,4.0,0,15.0,Medium,Mid Floor


In [None]:
df.duplicated().sum(
    
)

Unnamed: 0,property_type,price,carpet_area,bedroom,bathroom,transaction_type,additional_room,regions,balconies_iter,furnished_status_imputed,luxury_category,floor_category
0,flat,0.52,1200.0,3.0,2.0,resale,0.0,bangalore,3.0,0,Medium,Mid Floor
1,flat,1.64,1286.0,3.0,3.0,new property,0.0,bangalore,3.0,0,High,Mid Floor
2,flat,1.41,952.0,2.0,2.0,new property,0.0,bangalore,2.0,0,Low,Mid Floor
3,flat,0.67,1128.0,3.0,2.0,new property,0.0,bangalore,3.0,0,Low,Low Floor
4,villa,1.05,960.0,3.0,3.0,new property,1.0,bangalore,2.0,0,Medium,Mid Floor
...,...,...,...,...,...,...,...,...,...,...,...,...
17869,villa,2.50,2220.0,3.0,5.0,resale,0.0,bangalore,3.0,1,Medium,Mid Floor
17870,flat,5.85,2365.0,5.0,5.0,resale,0.0,bangalore,4.0,1,Medium,High Floor
17871,flat,1.48,1400.0,3.0,3.0,resale,0.0,bangalore,3.0,1,Medium,Low Floor
17872,residential,1.60,1920.0,4.0,4.0,resale,1.0,bangalore,2.0,1,Medium,Mid Floor


In [None]:
df.drop_duplicates(inplace=True)

In [35]:
X = df.drop(columns=["price"])
y = df["price"]

In [36]:
# train test split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
     

In [37]:
print("The size of train data is",X_train.shape)
print("The shape of test data is",X_test.shape)

The size of train data is (14299, 11)
The shape of test data is (3575, 11)


In [38]:
ohe_encode = ['transaction_type']
ordinal_encode = ['luxury_category','floor_category']
target_encode = ['regions','property_type']
std_encode = ['bedroom','bathroom','balconies_iter']
robust_encode = ['carpet_area']

In [39]:
luxury_order = ['Low', 'Medium', 'High']

floor_order = ['Low Floor', 'Mid Floor', 'High Floor']

In [40]:
from sklearn.preprocessing import FunctionTransformer

pt = FunctionTransformer(np.log1p, inverse_func=np.expm1, validate=True)
y_train_pt = pt.fit_transform(y_train.values.reshape(-1,1))
y_test_pt = pt.transform(y_test.values.reshape(-1,1))



In [41]:
preprocessor = ColumnTransformer(
    transformers=[
        ("ohe", OneHotEncoder(drop="first", handle_unknown="ignore", sparse_output=False), ohe_encode),
        ("ordinal", OrdinalEncoder(categories=[luxury_order, floor_order]), ordinal_encode),
        
        ("num", StandardScaler(), std_encode),
        ("robust", RobustScaler(), robust_encode)
    ], remainder="passthrough", n_jobs=-1, force_int_remainder_cols=False,verbose_feature_names_out=False
)

preprocessor.set_output(transform="pandas")

In [42]:
# Creating a pipeline
pipeline = Pipeline([
    ('target_encoder', ce.TargetEncoder(cols=target_encode)),
    ('preprocessor', preprocessor)
    
])

In [43]:
# do data preprocessing

X_train_trans = pipeline.fit_transform(X_train, y_train)

X_test_trans = pipeline.transform(X_test)

In [44]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import optuna
from sklearn.metrics import r2_score, mean_absolute_error

In [45]:
def objective(trial):
    with mlflow.start_run(nested=True):
        model_name = trial.suggest_categorical("model",["SVM","RF","KNN","GB","XGB","LGBM"])
        
        if model_name == "SVM":
            kernel_svm = trial.suggest_categorical("kernel",["rbf","linear", "poly"])
            if kernel_svm == "linear":
                c_linear = trial.suggest_float("c_linear",0,10)
                model = SVR(kernel="linear", C=c_linear)
                
            elif kernel_svm == "poly":
                c_poly = trial.suggest_float("c_poly",0,10)
                degree_poly = trial.suggest_int("degree_poly",1,5)
                model = SVR(kernel="poly", C=c_poly, degree=degree_poly)
                
            else:
                c_rbf = trial.suggest_float("c_rbf",0,100)
                gamma_rbf = trial.suggest_float("gamma_rbf",0,10)
                model = SVR(kernel="rbf", C=c_rbf, gamma=gamma_rbf)
                
        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,200)
            max_depth_rf = trial.suggest_int("max_depth_rf",2,20)
            model = RandomForestRegressor(n_estimators=n_estimators_rf,
                                        max_depth=max_depth_rf,
                                        random_state=42,
                                        n_jobs=-1)

        elif model_name == "GB":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,200)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",0,1)
            max_depth_gb = trial.suggest_int("max_depth_gb",2,20)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb,
                                                learning_rate=learning_rate_gb,
                                                max_depth=max_depth_gb,
                                                random_state=42)

        elif model_name == "KNN":
            n_neighbors_knn = trial.suggest_int("n_neighbors_knn",1,25)
            weights_knn = trial.suggest_categorical("weights_knn",["uniform","distance"])
            model = KNeighborsRegressor(n_neighbors=n_neighbors_knn,
                                        weights=weights_knn,n_jobs=-1)

        elif model_name == "XGB":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,200)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",0.1,0.5)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",2,20)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                    learning_rate=learning_rate_xgb,
                                    max_depth=max_depth_xgb,
                                    random_state=42,
                                    n_jobs=-1)

        elif model_name == "LGBM":
            n_estimators_lgbm = trial.suggest_int("n_estimators_lgbm",10,200)
            learning_rate_lgbm = trial.suggest_float("learning_rate_lgbm",0.1,0.5)
            max_depth_lgbm = trial.suggest_int("max_depth_lgbm",2,20)
            model = LGBMRegressor(n_estimators=n_estimators_lgbm,
                                    learning_rate=learning_rate_lgbm,
                                    max_depth=max_depth_lgbm,
                                    random_state=42)
        # train the model
        model.fit(X_train_trans,y_train_pt)

        # log model params
        mlflow.log_params(model.get_params())

        # get the predictions
        y_pred_train = model.predict(X_train_trans)
        y_pred_test = model.predict(X_test_trans)

        # get the actual predictions values
        y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
        y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))

        # calculate the error
        error = mean_absolute_error(y_test,y_pred_test_org)

        # log model_name
        mlflow.log_param("model",model_name)

        # log error
        mlflow.log_metric("MAE",error)

        return error

In [46]:
# create optuna study
study = optuna.create_study(direction="minimize",study_name="model_selection")

with mlflow.start_run(run_name="Best Model") as parent:
    # optimize the objective function
    study.optimize(objective,n_trials=30,n_jobs=-1)

    # log the best parameters
    mlflow.log_params(study.best_params)

    # log the best score
    mlflow.log_metric("best_score",study.best_value)

[I 2025-04-15 15:49:20,874] A new study created in memory with name: model_selection
  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)
  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


🏃 View run serious-fox-79 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/7cbe69f561cc42bebe3860747b4f2e44
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:49:38,516] Trial 4 finished with value: 0.7463738077966304 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 49, 'learning_rate_lgbm': 0.43608630259814796, 'max_depth_lgbm': 12}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run secretive-wasp-760 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/d5ee3985f66f406cb0e5eb3d958d14c5
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run adventurous-crab-357 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/c7c00d1599e54b89929ad0da6d5a67a4
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run big-horse-859 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/ca43bf65a6d74d90b24575f0e55d69d5
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run honorable-slug-847 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/82736da6ac8645a09cb0bdc8e0900a40
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-predi

  return fit_method(estimator, *args, **kwargs)
[I 2025-04-15 15:49:46,486] Trial 3 finished with value: 0.7721016014990688 and parameters: {'model': 'RF', 'n_estimators_rf': 18, 'max_depth_rf': 10}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:49:47,375] Trial 7 finished with value: 0.7613362803283666 and parameters: {'model': 'RF', 'n_estimators_rf': 38, 'max_depth_rf': 19}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run bustling-ape-878 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/596498f1df924036ac0bf7623305b02b
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:49:50,409] Trial 2 finished with value: 0.8073513433249236 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 18, 'learning_rate_lgbm': 0.1307231773235692, 'max_depth_lgbm': 7}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:49:51,396] Trial 1 finished with value: 0.7538729984234397 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 74, 'learning_rate_lgbm': 0.23855665142001753, 'max_depth_lgbm': 3}. Best is trial 4 with value: 0.7463738077966304.
  y = column_or_1d(y, warn=True)
[I 2025-04-15 15:49:56,375] Trial 6 finished with value: 0.7587619095275453 and parameters: {'model': 'XGB', 'n_estimators_xgb': 103, 'learning_rate_xgb': 0.17784130839430223, 'max_depth_xgb': 8}. Best is trial 4 with value: 0.7463738077966304.
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?


🏃 View run angry-skunk-688 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/7e2a1849eaad442c8835e05105088505
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run intrigued-fowl-521 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/453fa6e1670e4ce1856f52d994c79faa
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run agreeable-mouse-394 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/c8e8d606b5c3417ea8ce707a50cc19b7
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:50:15,563] Trial 5 finished with value: 1.1454756946630902 and parameters: {'model': 'SVM', 'kernel': 'poly', 'c_poly': 2.3484457966940253, 'degree_poly': 1}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run awesome-shrimp-86 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/8cec9f362c6b4bf0a45dfe38223b0026
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:50:20,479] Trial 8 finished with value: 0.7576575395389794 and parameters: {'model': 'RF', 'n_estimators_rf': 187, 'max_depth_rf': 12}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:50:21,505] Trial 0 finished with value: 0.8770642732334406 and parameters: {'model': 'SVM', 'kernel': 'poly', 'c_poly': 1.8568371356493718, 'degree_poly': 2}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run fearless-skunk-898 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/fe78a799c4bf4769be47994cd5887058
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run efficient-quail-438 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/8b0aa7f606054363afa4dec9110180f1
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:50:26,378] Trial 9 finished with value: 0.8525393615960337 and parameters: {'model': 'KNN', 'n_neighbors_knn': 7, 'weights_knn': 'distance'}. Best is trial 4 with value: 0.7463738077966304.
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
[I 2025-04-15 15:50:31,538] Trial 12 finished with value: 0.7519590549125302 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 126, 'learning_rate_lgbm': 0.30505405411267744, 'max_depth_lgbm': 7}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:50:32,397] Trial 11 finished with value: 0.8049988733658423 and parameters: {'model': 'XGB', 'n_estimators_xgb': 120, 'learning_rate_xgb': 0.11556782229916829, 'max_depth_xgb': 14}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run efficient-quail-785 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/60cabda4c217409aaf003e31ce0d551c
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:50:42,386] Trial 13 finished with value: 0.9838338997887958 and parameters: {'model': 'GB', 'n_estimators_gb': 97, 'learning_rate_gb': 0.9104036445042171, 'max_depth_gb': 17}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run magnificent-wolf-360 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/0d6adaf330744994967c0f7b2762d557
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:50:52,367] Trial 14 finished with value: 0.7545744202058929 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 191, 'learning_rate_lgbm': 0.29920676134606544, 'max_depth_lgbm': 16}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run orderly-yak-322 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/732d42f633fc4ba39b2ca3fba91e87cd
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run clumsy-sheep-743 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/dff88fc170574f968d510f5a19f164af
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0




🏃 View run stylish-midge-287 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/d2ff6776f49d4d69a766f8a9e73d238a
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:51:04,513] Trial 16 finished with value: 0.8654556752939562 and parameters: {'model': 'GB', 'n_estimators_gb': 135, 'learning_rate_gb': 0.5696349828063663, 'max_depth_gb': 8}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:51:05,366] Trial 17 finished with value: 0.7737350690267213 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 153, 'learning_rate_lgbm': 0.48118986277910136, 'max_depth_lgbm': 14}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run hilarious-loon-483 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/219baf4467834bd98ccd55b89f872acb
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run dazzling-fish-378 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/15cc40c29ed24f24b44d8e69689c713e
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:51:09,380] Trial 15 finished with value: 0.7751464549413054 and parameters: {'model': 'GB', 'n_estimators_gb': 197, 'learning_rate_gb': 0.22044123200378773, 'max_depth_gb': 9}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:51:14,549] Trial 18 finished with value: 0.7724385202864785 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 164, 'learning_rate_lgbm': 0.48651974416151317, 'max_depth_lgbm': 16}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:51:15,422] Trial 19 finished with value: 0.7887796716012184 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 172, 'learning_rate_lgbm': 0.4911440054934442, 'max_depth_lgbm': 15}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run shivering-turtle-364 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/fc3f70e1eca64e8db2bb21a1aa99a581
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:51:24,367] Trial 20 finished with value: 0.772453246208001 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 161, 'learning_rate_lgbm': 0.48646109704804846, 'max_depth_lgbm': 16}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run tasteful-yak-706 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/e6f85fe29d8043dba0b63463dbf068b4
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:51:33,396] Trial 21 finished with value: 0.7815445751697805 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 126, 'learning_rate_lgbm': 0.49075943902075725, 'max_depth_lgbm': 12}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run upset-slug-441 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/024b9e9212bc48dd9a19d7ca950daec1
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run upbeat-shrike-25 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/5e485d2dd643426d8d6be758c22a5efa
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:51:42,608] Trial 22 finished with value: 0.7532634134381578 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 97, 'learning_rate_lgbm': 0.47014804777242486, 'max_depth_lgbm': 9}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:51:43,424] Trial 23 finished with value: 0.7516763040711403 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 77, 'learning_rate_lgbm': 0.46237954417203, 'max_depth_lgbm': 9}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run honorable-hare-22 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/b91e0d9d92034ff0846ca1d7c7642d45
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:51:50,533] Trial 24 finished with value: 0.8454917190615979 and parameters: {'model': 'KNN', 'n_neighbors_knn': 24, 'weights_knn': 'uniform'}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run rambunctious-roo-883 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/c0a10333a64a4688a39c725a4104b5d8
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run resilient-chimp-791 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/2719c85b12374995b91d46008f14ad41
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0
🏃 View run tasteful-mule-702 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/7030d88b6f474f9db9b5f63b7c5ead44
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:51:57,563] Trial 25 finished with value: 0.8437934329139645 and parameters: {'model': 'KNN', 'n_neighbors_knn': 23, 'weights_knn': 'uniform'}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:51:58,587] Trial 26 finished with value: 0.8454917190615979 and parameters: {'model': 'KNN', 'n_neighbors_knn': 24, 'weights_knn': 'uniform'}. Best is trial 4 with value: 0.7463738077966304.
[I 2025-04-15 15:52:00,440] Trial 27 finished with value: 0.842480800712538 and parameters: {'model': 'KNN', 'n_neighbors_knn': 19, 'weights_knn': 'uniform'}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run invincible-seal-317 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/cc8c9802b2e44f7b83ec33f603133a11
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:52:03,384] Trial 28 finished with value: 0.8454917190615979 and parameters: {'model': 'KNN', 'n_neighbors_knn': 24, 'weights_knn': 'uniform'}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run zealous-pug-872 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/a5f99452ce9349ac832fa4a6d6b27981
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 15:52:06,366] Trial 29 finished with value: 0.8454917190615979 and parameters: {'model': 'KNN', 'n_neighbors_knn': 24, 'weights_knn': 'uniform'}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run traveling-finch-448 at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/67536f91173b46c4a8d3d5f97b3be0c1
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


[I 2025-04-15 16:45:28,627] Trial 10 finished with value: 25633.959196113632 and parameters: {'model': 'SVM', 'kernel': 'poly', 'c_poly': 5.924343628091833, 'degree_poly': 5}. Best is trial 4 with value: 0.7463738077966304.


🏃 View run Best Model at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0/runs/54022e8b33fd44fea054d1ff3b60ca06
🧪 View experiment at: https://dagshub.com/sourav664/real-estate-price-prediction.mlflow/#/experiments/0


In [47]:
# best score

study.best_value

0.7463738077966304

In [48]:
study.best_params

{'model': 'LGBM',
 'n_estimators_lgbm': 49,
 'learning_rate_lgbm': 0.43608630259814796,
 'max_depth_lgbm': 12}

In [49]:
lgbm_params = {'model': 'LGBM',
 'n_estimators_lgbm': 150,
 'learning_rate_lgbm': 0.14275108608162546,
 'max_depth_lgbm': 20}

In [50]:
# train the model on best parameters

lgbm = LGBMRegressor(**lgbm_params)

lgbm.fit(X_train_trans,y_train_pt)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002034 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 14299, number of used features: 11
[LightGBM] [Info] Start training from score 0.984896




In [51]:
# get the predictions
y_pred_train = lgbm.predict(X_train_trans)
y_pred_test = lgbm.predict(X_test_trans)



In [52]:
# get the actual predictions values

y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))

In [53]:
print(f"The train error is {mean_absolute_error(y_train,y_pred_train_org):.2f} cr")
print(f"The test error is {mean_absolute_error(y_test,y_pred_test_org):.2f} cr")

The train error is 0.62 cr
The test error is 0.74 cr


In [54]:
print(f"The train r2 score is {r2_score(y_train,y_pred_train_org):.2f}")
print(f"The test r2 score is {r2_score(y_test,y_pred_test_org):.2f}")

The train r2 score is 0.76
The test r2 score is 0.68


In [29]:
# dataframe of results

study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_c_linear,params_c_poly,params_c_rbf,params_degree_poly,params_gamma_rbf,...,params_max_depth_rf,params_max_depth_xgb,params_model,params_n_estimators_gb,params_n_estimators_lgbm,params_n_estimators_rf,params_n_estimators_xgb,params_n_neighbors_knn,params_weights_knn,state
0,0,0.3768186,2025-04-15 14:49:05.936288,2025-04-15 14:49:15.949693,0 days 00:00:10.013405,,,,,,...,4.0,,RF,,,35.0,,,,COMPLETE
1,1,0.9824688,2025-04-15 14:49:05.937288,2025-04-15 14:50:42.886089,0 days 00:01:36.948801,,,91.505491,,4.187046,...,,,SVM,,,,,,,COMPLETE
2,2,0.07701416,2025-04-15 14:49:05.938532,2025-04-15 14:49:41.904691,0 days 00:00:35.966159,,,,,,...,,10.0,XGB,,,,90.0,,,COMPLETE
3,3,0.04671177,2025-04-15 14:49:05.939521,2025-04-15 14:49:18.954390,0 days 00:00:13.014869,,,,,,...,16.0,,RF,,,99.0,,,,COMPLETE
4,4,0.190864,2025-04-15 14:49:05.939521,2025-04-15 14:49:39.888318,0 days 00:00:33.948797,,,,,,...,,13.0,XGB,,,,14.0,,,COMPLETE
5,5,0.07664935,2025-04-15 14:49:05.940521,2025-04-15 14:50:08.139975,0 days 00:01:02.199454,,,,,,...,,,GB,189.0,,,,,,COMPLETE
6,6,0.398539,2025-04-15 14:49:05.941505,2025-04-15 14:49:21.889581,0 days 00:00:15.948076,,,,,,...,,,KNN,,,,,11.0,uniform,COMPLETE
7,7,0.06905378,2025-04-15 14:49:05.942535,2025-04-15 14:49:51.885267,0 days 00:00:45.942732,,,,,,...,,19.0,XGB,,,,63.0,,,COMPLETE
8,8,0.08185312,2025-04-15 14:49:15.954797,2025-04-15 14:49:56.917903,0 days 00:00:40.963106,,,,,,...,,,GB,38.0,,,,,,COMPLETE
9,9,0.4404108,2025-04-15 14:49:18.958979,2025-04-15 14:49:57.912746,0 days 00:00:38.953767,,,,,,...,,,KNN,,,,,25.0,uniform,COMPLETE


In [41]:
study.trials_dataframe()['params_model'].value_counts()

params_model
LGBM    11
XGB      8
KNN      4
GB       3
SVM      2
RF       2
Name: count, dtype: int64

In [None]:
# avg scores for all tested models

study.trials_dataframe().groupby("params_model")['value'].mean().sort_values()

params_model
LGBM    7.078952e-01
RF      7.467782e-01
KNN     7.851501e-01
XGB     7.946543e-01
GB      8.707128e-01
SVM     3.284914e+13
Name: value, dtype: float64