In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.12 (from alembic>=1.5.0->optuna)
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
Downloading alembic-1.15.1-py3-none-any.whl (231 kB)
Downloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
Installing collected packages: typing-extensions, Mako, colorlog, alembic, optuna
  Attempting uninstall: typing-extensions
    Found existing installation: typing_extensions 4.11.0
    Uninstalling typing_e

In [5]:
import numpy as np
import pandas as pd

from  sklearn.model_selection import train_test_split,KFold,cross_val_score
from sklearn.preprocessing import StandardScaler,OneHotEncoder,OrdinalEncoder

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.metrics import r2_score,mean_absolute_error

In [6]:
#we made all columns as it is , so that we can perform all operation in a pipeline
df=pd.read_csv("gurgaon_properties_post_feature_selection_v2.csv")

In [7]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,0.0,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,0.0,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,0.0,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,1.0,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,0.0,High,Mid Floor


In [8]:
# 0 -> unfurnished
# 1 -> semifurnished
# 2 -> furnished
df['furnishing_type'] = df['furnishing_type'].replace({0.0:'unfurnished',1.0:'semifurnished',2.0:'furnished'})

In [9]:
X=df.drop(columns='price')
y=df["price"]

In [10]:
# Applying the log1p transformation to the target variable
y_transformed = np.log1p(y)

In [14]:
import optuna
import category_encoders as ce
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score

columns_to_encode = ['property_type', 'sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Define the objective function for Optuna

def objective(trial):
    # Hyperparameter search space
    n_estimators = trial.suggest_int('n_estimators', 50, 1000)
    max_depth = trial.suggest_int('max_depth', 3, 20)
    learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.5)
    subsample = trial.suggest_uniform('subsample', 0.3, 1.0)
    colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.3, 1.0)
    gamma = trial.suggest_uniform('gamma', 0, 10)
    reg_lambda = trial.suggest_uniform('lambda', 0, 20)
    reg_alpha = trial.suggest_uniform('alpha', 0, 20)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
    max_delta_step = trial.suggest_int('max_delta_step', 0, 10)

    # Define preprocessing pipeline
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
            ('cat', OrdinalEncoder(), columns_to_encode),
            ('cat1', OneHotEncoder(drop='first', sparse_output=False), ['agePossession']),
            ('target_enc', ce.TargetEncoder(), ['sector'])
        ],
        remainder='passthrough'
    )
    
    # Define the pipeline
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', XGBRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            gamma=gamma,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha,
            min_child_weight=min_child_weight,
            max_delta_step=max_delta_step,
            objective='reg:squarederror',
            random_state=42
        ))
    ])
    
    # Perform cross-validation
    score = cross_val_score(pipeline, X, y_transformed, cv=3, scoring='r2').mean()
    
    return score  # Return R2 score for Optuna to maximize

# Run Optuna study
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=50)  # Optimize for 50 trials

# Print the best parameters
print("Best hyperparameters:", study.best_params)

[I 2025-03-06 19:26:22,252] A new study created in memory with name: no-name-11c6eaa0-5dba-4788-927e-72be4c18fc87
  learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.5)
  subsample = trial.suggest_uniform('subsample', 0.3, 1.0)
  colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.3, 1.0)
  gamma = trial.suggest_uniform('gamma', 0, 10)
  reg_lambda = trial.suggest_uniform('lambda', 0, 20)
  reg_alpha = trial.suggest_uniform('alpha', 0, 20)
Traceback (most recent call last):
  File "C:\Users\LENOVO\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 140, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "C:\Users\LENOVO\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 380, in _score
    y_pred = method_caller(
             ^^^^^^^^^^^^^^
  File "C:\Users\LENOVO\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 90, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^

ValueError: No trials are completed yet.