In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.svm import SVR

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

import mlflow

In [2]:
# set the dagshub tracking server

mlflow.set_tracking_uri("https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow")

In [3]:
import dagshub
dagshub.init(repo_owner='kunalkaushik1982', repo_name='uber-demand-prediction', mlflow=True)

In [4]:
# load the training and test data

train_data_path = "../data/processed/train.csv"
test_data_path = "../data/processed/test.csv"

train_df = pd.read_csv(train_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

test_df = pd.read_csv(test_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

train_df

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,total_pickups,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,187,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,194,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,180,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,197,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185,185.0,4
...,...,...,...,...,...,...,...,...
2016-02-29 22:45:00,15.0,9.0,11.0,11.0,29,12,12.0,0
2016-02-29 23:00:00,12.0,15.0,9.0,11.0,29,17,14.0,0
2016-02-29 23:15:00,17.0,12.0,15.0,9.0,29,15,14.0,0
2016-02-29 23:30:00,15.0,17.0,12.0,15.0,29,15,15.0,0


In [5]:
# missing value in training data

train_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

In [6]:
# missing values in the test data

test_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

In [7]:
# make X_train and y_train

X_train = train_df.drop(columns=["total_pickups"])

y_train = train_df["total_pickups"]

In [8]:
X_train.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185.0,4


In [9]:
# make X_test and y_test

X_test = test_df.drop(columns=["total_pickups"])

y_test = test_df["total_pickups"]

In [10]:
X_test.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-03-01 00:00:00,36.0,44.0,31.0,29.0,0,39.0,1
2016-03-01 00:15:00,41.0,36.0,44.0,31.0,0,37.0,1
2016-03-01 00:30:00,35.0,41.0,36.0,44.0,0,41.0,1
2016-03-01 00:45:00,47.0,35.0,41.0,36.0,0,38.0,1
2016-03-01 01:00:00,34.0,47.0,35.0,41.0,0,35.0,1


In [11]:
from sklearn import set_config

set_config(transform_output="pandas")

In [12]:
# encode the data

encoder = ColumnTransformer([
    ("ohe", OneHotEncoder(drop="first",sparse_output=False), ["region","day_of_week"])
], remainder="passthrough", n_jobs=-1,force_int_remainder_cols=False)

In [13]:
encoder

In [14]:
# encode the train and test data

X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

In [15]:
import optuna
import tqdm 

In [16]:
# set the experiment

mlflow.set_experiment("Model Selection")

<Experiment: artifact_location='mlflow-artifacts:/1d7b9d6c79d24670b50a4b268594bc07', creation_time=1742924267558, experiment_id='0', last_update_time=1742924267558, lifecycle_stage='active', name='Model Selection', tags={}>

In [17]:
def objective(trial):
    # start the child run
    with mlflow.start_run(nested=True) as child:
        
        # model name search space
        list_of_models = ["LR", "RF", "GBR", "XGBR"]
        model_name = trial.suggest_categorical("model_name", list_of_models)
    
        if model_name == "LR":
            model = LinearRegression()
    
        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,100,step=10)
            max_depth_rf = trial.suggest_int("max_depth_rf",3,10)
            model = RandomForestRegressor(n_estimators=n_estimators_rf, 
                                          max_depth=max_depth_rf, 
                                          random_state=42, n_jobs=-1)
    
        elif model_name == "GBR":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,100,step=10)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",1e-4,1e-1, log=True)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb, 
                                              learning_rate=learning_rate_gb,
                                             random_state=42)
    
        elif model_name == "XGBR":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,100,step=10)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",1e-4,1e-1, log=True)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",3,10)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                learning_rate=learning_rate_xgb,
                                max_depth=max_depth_xgb)
    
        # log the model name
        mlflow.log_param("model_name",model_name)
        
        # log the model parameters
        mlflow.log_params(model.get_params())
        
        # fit on the data
        model.fit(X_train_encoded,y_train)
    
        # get the predictions
        y_pred = model.predict(X_test_encoded)
    
        # calculate the loss
        loss = mean_absolute_percentage_error(y_test, y_pred)
    
        # log the metric
        mlflow.log_metric("MAPE",loss)
        return loss

In [18]:
# optimize the objective function

with mlflow.start_run(run_name="best_model", nested=True) as parent:

    # create a study object
    study = optuna.create_study(study_name="model_selection", direction="minimize")
    # optimize the objective function
    study.optimize(func=objective, n_trials=50, n_jobs=-1)
    
    # log the best parameters
    mlflow.log_params(study.best_params)
    # log the best error value
    mlflow.log_metric("Best_MAPE", study.best_value)

[I 2025-03-25 23:55:38,489] A new study created in memory with name: model_selection


🏃 View run resilient-hen-924 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/479e13686e3a4af8867b32ed2a967346
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run stately-rat-450 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/baee20f4b2d04222bbd5053987159d3c
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run burly-shrike-667 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/2f52416c5c75488e8351c59dc9a16779
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run bright-gnat-461 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/d4d902c1c20d48f094e614c0d5de4b21
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-pre

[I 2025-03-25 23:56:17,493] Trial 0 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run whimsical-ram-772 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/02f726d78d27472a8fbc6b16bd91e10b
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:56:19,457] Trial 4 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run upset-goat-355 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/f4a8c856c3a1413bab43a530972fe2cf
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run salty-lamb-218 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/ffb8a816b19f4662aea65c8c9a72eea2
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:56:23,468] Trial 1 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run powerful-fowl-251 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/7b0961d6d61949edb617787fde075d04
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:56:25,480] Trial 2 finished with value: 6.411798477172852 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 90, 'learning_rate_xgb': 0.0002875300710319123, 'max_depth_xgb': 7}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:56:27,483] Trial 5 finished with value: 6.506432056427002 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 40, 'learning_rate_xgb': 0.0002769404638853813, 'max_depth_xgb': 5}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run casual-moose-191 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/543dba2ff9084b238ff377dbb8e77bac
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:56:30,519] Trial 11 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:56:31,478] Trial 8 finished with value: 0.17712952157922202 and parameters: {'model_name': 'RF', 'n_estimators_rf': 10, 'max_depth_rf': 7}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:56:33,459] Trial 6 finished with value: 4.647510051727295 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 90, 'learning_rate_xgb': 0.003912214874511848, 'max_depth_xgb': 8}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:56:38,461] Trial 10 finished with value: 5.326450824737549 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 60, 'learning_rate_xgb': 0.0035601829728731525, 'max_depth_xgb': 9}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run colorful-cod-958 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/6885edf66840446199bac3900baa7269
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run gregarious-doe-865 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/e86b29ee4331444db12e027093a88d22
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run polite-turtle-551 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/34331c8fa858469792990481e8156b6b
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:57:04,469] Trial 7 finished with value: 0.23638771239003423 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 90, 'learning_rate_gb': 0.08768708058073904}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run enthused-elk-343 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/c1244b5bd4e941b7834f8d5a63517af8
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:57:10,449] Trial 9 finished with value: 6.461465901777294 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 90, 'learning_rate_gb': 0.00020970252669769854}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:57:11,473] Trial 3 finished with value: 5.479746560548497 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 90, 'learning_rate_gb': 0.002162871296067304}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run wise-asp-541 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/09772f6b3512401e894cd2d1b0255a8d
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:57:15,498] Trial 12 finished with value: 1.397666573524475 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 70, 'learning_rate_xgb': 0.024620381893518263, 'max_depth_xgb': 3}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run aged-goat-925 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/e750c6179fa14fa3af5112c732ec247f
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run honorable-hare-442 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/25cf96f6aa78404d913a06ff35383aa3
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:57:21,463] Trial 13 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run gregarious-wolf-860 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/864a368646fa4c5a9bbb88d963834345
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:57:26,521] Trial 14 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:57:27,461] Trial 17 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run delightful-gnat-351 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/99072528c0134ff08ecdc66fdd0bbe7b
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:57:34,466] Trial 16 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run capable-flea-216 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/fd6f7eb65ff94cacafa35200047a494a
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run puzzled-mare-420 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/9a4fd7ad716749008c4c99d7180cbf3d
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run nimble-sheep-100 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/80c9abcf7d4e45599616b9752dbac84e
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run redolent-cow-332 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/101286ce3b90468d882eae4c4866df5d
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-pr

[I 2025-03-25 23:57:55,476] Trial 19 finished with value: 0.142839040399777 and parameters: {'model_name': 'RF', 'n_estimators_rf': 30, 'max_depth_rf': 9}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:57:58,472] Trial 15 finished with value: 0.1284471279720147 and parameters: {'model_name': 'RF', 'n_estimators_rf': 90, 'max_depth_rf': 10}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run whimsical-snail-593 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/e47a1a5f95024072900cf0d486a21ee2
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:58:00,484] Trial 18 finished with value: 2.9153846570094863 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 20, 'learning_rate_gb': 0.043627655351627936}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:58:01,515] Trial 21 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run valuable-steed-1 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/2e8be766b82b4fc583318ffb63724e87
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:58:07,210] Trial 23 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:58:11,070] Trial 22 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:58:11,464] Trial 20 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run resilient-cat-824 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/6c22423671c14c6fa9f2f30f44b0283e
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:58:27,460] Trial 25 finished with value: 0.5443209920941997 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 3}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run enthused-lamb-783 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/5b38c956af464b5eb5c672970e19f627
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run delicate-turtle-362 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/7eb78b2a991e481aa63d766699ccd63a
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run indecisive-steed-108 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/99ad2be52888406e8a5ba88a124d47a2
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run learned-dog-935 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/935da7d07adc4d26a8dd208d3d496b19
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-de

[I 2025-03-25 23:58:44,457] Trial 24 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run upset-pug-346 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/3235a2c8b88140d88a06ea3129b6daa1
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run lyrical-hog-11 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/d066d3841a89451aacb665751a98d184
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:58:48,445] Trial 27 finished with value: 0.5443160512645059 and parameters: {'model_name': 'RF', 'n_estimators_rf': 90, 'max_depth_rf': 3}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:58:49,454] Trial 26 finished with value: 0.5443209920941997 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 3}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:58:52,454] Trial 29 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run youthful-shoat-432 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/fb137ce5c8d748f78f47888afbd93c7a
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:58:56,490] Trial 30 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:58:57,446] Trial 32 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:59:03,467] Trial 33 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run auspicious-skunk-418 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/68fc49a8b3c54953b5accd7ae4fbe843
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run learned-fowl-542 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/4d1fd79cc0664165b9002945f8abe4ef
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:59:15,485] Trial 31 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:59:20,456] Trial 34 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run thoughtful-gnat-127 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/e39f4fa328bb4d579517dae990423b2e
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run industrious-stag-187 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/acbfd20e9b814cdb8816a1610c6283c0
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run angry-conch-1000 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/ca91846576ab4d7c888e1a765b3d2aa8
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run upbeat-midge-441 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/dd4619315c034e09b55ef5e17b0ea2b0
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-de

[I 2025-03-25 23:59:41,528] Trial 36 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run salty-crane-554 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/a2a456aa15144e9a8af186e191a882df
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run handsome-koi-293 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/1a8ebce01c3e472f9eee32177eb81336
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:59:47,714] Trial 35 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run burly-hawk-618 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/b4cedd1e1c044ff9b6c2b909fa983c18
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run resilient-zebra-392 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/b702f273968940ddb207af3a31b443b9
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:59:49,495] Trial 39 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:59:52,449] Trial 40 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:59:53,492] Trial 38 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run unique-finch-612 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/eb754d76045946299a38ae789d4c591d
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-25 23:59:55,456] Trial 28 finished with value: 0.5443160512645059 and parameters: {'model_name': 'RF', 'n_estimators_rf': 90, 'max_depth_rf': 3}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-25 23:59:57,508] Trial 41 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-26 00:00:03,455] Trial 42 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-26 00:00:04,476] Trial 43 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run adventurous-toad-879 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/70ee3fd0534c4debac268a54427eb417
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run bright-eel-950 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/3326800fe7bf4e9dbc890a6ef52d82e9
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


[I 2025-03-26 00:00:11,509] Trial 37 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-26 00:00:12,474] Trial 44 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-26 00:00:15,449] Trial 45 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run sedate-ox-806 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/8315c0685e6c400fa07b238a843b7083
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run brawny-kite-43 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/10fdfc98fd154d679aa894f3460fd0da
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run angry-grouse-364 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/ca90a03668f64d039bf8c6a5a5a07504
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0
🏃 View run receptive-croc-912 at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/aa02a688482b40c29a3aa7204d16707c
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-predi

[I 2025-03-26 00:00:25,498] Trial 47 finished with value: 2.513364791870117 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.09347438961425887, 'max_depth_xgb': 10}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-26 00:00:26,474] Trial 46 finished with value: 6.570655445370785 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 10, 'learning_rate_gb': 0.00011031385694360848}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-26 00:00:27,477] Trial 48 finished with value: 3.0119144916534424 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.07646351811064608, 'max_depth_xgb': 10}. Best is trial 0 with value: 0.07934790285463077.
[I 2025-03-26 00:00:28,453] Trial 49 finished with value: 3.1740829944610596 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.07148860942297751, 'max_depth_xgb': 10}. Best is trial 0 with value: 0.07934790285463077.


🏃 View run best_model at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0/runs/6689308820f6408ea04e4a0543c6a2b1
🧪 View experiment at: https://dagshub.com/kunalkaushik1982/uber-demand-prediction.mlflow/#/experiments/0


In [19]:
# best value

study.best_value

0.07934790285463077

In [20]:
# best parameters

study.best_params

{'model_name': 'LR'}

In [21]:
# model value counts

study.trials_dataframe()['params_model_name'].value_counts()

params_model_name
LR      30
XGBR     8
RF       7
GBR      5
Name: count, dtype: int64

In [22]:
from optuna.visualization import (
    plot_optimization_history, 
    plot_parallel_coordinate, 
    plot_param_importances
)

In [23]:
plot_optimization_history(study)

In [24]:
plot_parallel_coordinate(study, params=["model_name"])

In [25]:
# train the linear regression model

lr = LinearRegression()

lr.fit(X_train_encoded, y_train)

# get predictions
y_pred_train = lr.predict(X_train_encoded) 
y_pred_test = lr.predict(X_test_encoded)

# loss

mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)

print("The training error is ", mape_train)
print("The test error is ", mape_test)

The training error is  0.0877801330456651
The test error is  0.07934790285463077


In [26]:
lr.coef_

array([-2.33737604,  0.71512405, -0.55601505, -1.25311068, -3.20463231,
       -0.86685973, -2.79925402, -3.62516859,  0.41386463, -2.9376376 ,
       -1.97624678, -3.75050442,  0.51806283, -2.54033388, -2.43297463,
        0.47632075,  0.61254786, -4.7417372 , -2.03077217, -1.26960984,
       -4.03690273, -2.08863167, -1.0414428 ,  0.73561736, -0.99999442,
       -0.85944985, -2.43098478,  0.67112238,  0.57385071, -0.11719951,
       -0.28045898, -0.37180749, -0.5238324 , -0.4233113 , -0.34045774,
       -0.54170892, -0.36264553, -0.2493965 , -0.31905518,  2.4912456 ])

In [27]:
def tune_ridge(trial):
    # hyperparameter space
    alpha = trial.suggest_float("alpha",30,100)
    
    # make the model object
    ridge = Ridge(alpha=alpha, random_state=42)
    
    # train the model
    ridge.fit(X_train_encoded, y_train)
    
    # get predictions
    y_pred = ridge.predict(X_test_encoded)
    
    # calculate loss
    loss = mean_absolute_percentage_error(y_test, y_pred)

    return loss
        

In [28]:
# create study

study = optuna.create_study(study_name="tune_model", direction="minimize")

[I 2025-03-26 00:00:35,449] A new study created in memory with name: tune_model


In [29]:
# optimize

study.optimize(func=tune_ridge, n_trials=100, n_jobs=-1, show_progress_bar=True)

  0%|          | 0/100 [00:00<?, ?it/s]

[I 2025-03-26 00:00:36,332] Trial 4 finished with value: 0.07922751242647928 and parameters: {'alpha': 55.89936337069982}. Best is trial 4 with value: 0.07922751242647928.
[I 2025-03-26 00:00:36,468] Trial 6 finished with value: 0.07918795138338022 and parameters: {'alpha': 81.90176254994185}. Best is trial 6 with value: 0.07918795138338022.
[I 2025-03-26 00:00:36,489] Trial 0 finished with value: 0.07919378096049605 and parameters: {'alpha': 77.81903409224333}. Best is trial 6 with value: 0.07918795138338022.
[I 2025-03-26 00:00:36,493] Trial 1 finished with value: 0.07920453321044453 and parameters: {'alpha': 70.50680224040278}. Best is trial 6 with value: 0.07918795138338022.
[I 2025-03-26 00:00:36,494] Trial 3 finished with value: 0.07926524468856297 and parameters: {'alpha': 35.1863211979158}. Best is trial 6 with value: 0.07918795138338022.
[I 2025-03-26 00:00:36,495] Trial 5 finished with value: 0.07917196472235352 and parameters: {'alpha': 93.71510459770052}. Best is trial 5 wi

In [30]:
# best parameters

study.best_params

{'alpha': 99.94565892011927}

In [31]:
# best value

study.best_value

0.07916413226684169