In [1]:
import mlflow
import dagshub
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
from sklearn.svm import SVR

In [50]:
df = pd.read_csv('../../data/processed/gurgaon_properties_post_feature_selection.csv')

In [51]:
X = df.drop(columns=['price'])
y = df['price']

In [4]:
y = np.log1p(y)
columns_to_scale = ['property_type', 'bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']
columns_to_encode = ['sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), columns_to_scale),
        ('cat', OneHotEncoder(drop='first'), columns_to_encode)
    ],
    remainder='passthrough'
)

X = preprocessor.fit_transform(X)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


svr_model = SVR()

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'sigmoid'],
    # 'degree': [3, 4, 5],
    'gamma': [ 0.001, 0.1, 1],
    'epsilon': [0.01, 0.1, 0.2]
}


In [12]:
scoring = {'mse': make_scorer(mean_squared_error, greater_is_better=False),
           'r2': make_scorer(r2_score)}

grid_search = GridSearchCV(estimator=svr_model, 
                           param_grid=param_grid, 
                           refit='r2',
                           cv=5, 
                           scoring=scoring, 
                           verbose=1, 
                           n_jobs=-1)

In [13]:
import dagshub
import mlflow

dagshub.init(repo_owner='kevalsakhiya', repo_name='property-scout', mlflow=True)
mlflow.set_tracking_uri('https://dagshub.com/kevalsakhiya/property-scout.mlflow')

In [14]:
mlflow.set_experiment('SVR-hyperparameter-tuning')
mlflow.autolog()
with mlflow.start_run():
    grid_search.fit(X_train, y_train)
    results = grid_search.cv_results_

    for i in range(len(results['params'])):
        with mlflow.start_run(nested=True):
            mlflow.log_params(results['params'][i])
            
            # r2 score
            mean_r2_score = results['mean_test_r2'][i]
            mlflow.log_metric('mean_r2_score',mean_r2_score)

            # mean mse
            mean_mse_score = -results['mean_test_mse'][i]
            mlflow.log_metric('mean_mse_score', mean_mse_score)

2024/08/02 15:29:19 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Fitting 5 folds for each of 54 candidates, totalling 270 fits


2024/08/02 15:29:52 INFO mlflow.sklearn.utils: Logging the 5 best runs, 49 runs will be omitted.
2024/08/02 15:29:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run popular-wasp-854 at: https://dagshub.com/kevalsakhiya/property-scout.mlflow/#/experiments/7/runs/2f7a908af8b9429a84ff6408894fd11e.
2024/08/02 15:29:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/kevalsakhiya/property-scout.mlflow/#/experiments/7.
2024/08/02 15:29:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run legendary-shrike-560 at: https://dagshub.com/kevalsakhiya/property-scout.mlflow/#/experiments/7/runs/562105126cc84c74a23d7bd4866b0bd8.
2024/08/02 15:29:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/kevalsakhiya/property-scout.mlflow/#/experiments/7.
2024/08/02 15:29:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run luminous-slug-312 at: https://dagshub.com/kevalsakhiya/property-scout.mlflow/#/ex

In [16]:
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Print the best parameters
print("Best parameters found: ", best_params)

Best parameters found:  {'C': 1, 'epsilon': 0.01, 'gamma': 0.1, 'kernel': 'rbf'}


In [17]:
y_pred = best_estimator.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse,r2

(0.034955629001406585, 0.8810501881173792)