In [1]:

import numpy as np
import pandas as pd
import data_clean_utils
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer, KNNImputer, MissingIndicator
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder, MinMaxScaler, PowerTransformer, OrdinalEncoder
from sklearn.model_selection import train_test_split

In [2]:
%pip install mlflow dagshub

Note: you may need to restart the kernel to use updated packages.


In [3]:
import dagshub
dagshub.init(repo_owner='manikantmnnit', repo_name='swiggy-delivery-time-prediction', mlflow=True)


In [4]:
# mlflow experiment
import mlflow
mlflow.set_experiment("Exp 2 - Model Selection")

<Experiment: artifact_location='mlflow-artifacts:/d121ded190a04346a1beeed20f436cf2', creation_time=1736879990338, experiment_id='1', last_update_time=1736879990338, lifecycle_stage='active', name='Exp 2 - Model Selection', tags={}>

In [5]:

from sklearn import set_config

set_config(transform_output="pandas")

# Load the Data


In [6]:
# load the data

df = pd.read_csv(r'D:\Campus X\projects\swiggy-delivery-time-prediction\data\raw\swiggy.csv')

df

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weatherconditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken(min)
0,0x4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,19-03-2022,11:30:00,11:45:00,conditions Sunny,High,2,Snack,motorcycle,0,No,Urban,(min) 24
1,0xb379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,25-03-2022,19:45:00,19:50:00,conditions Stormy,Jam,2,Snack,scooter,1,No,Metropolitian,(min) 33
2,0x5d6d,BANGRES19DEL01,23,4.4,12.914264,77.678400,12.924264,77.688400,19-03-2022,08:30:00,08:45:00,conditions Sandstorms,Low,0,Drinks,motorcycle,1,No,Urban,(min) 26
3,0x7a6a,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,05-04-2022,18:00:00,18:10:00,conditions Sunny,Medium,0,Buffet,motorcycle,1,No,Metropolitian,(min) 21
4,0x70a2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,26-03-2022,13:30:00,13:45:00,conditions Cloudy,High,1,Snack,scooter,1,No,Metropolitian,(min) 30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45588,0x7c09,JAPRES04DEL01,30,4.8,26.902328,75.794257,26.912328,75.804257,24-03-2022,11:35:00,11:45:00,conditions Windy,High,1,Meal,motorcycle,0,No,Metropolitian,(min) 32
45589,0xd641,AGRRES16DEL01,21,4.6,0.000000,0.000000,0.070000,0.070000,16-02-2022,19:55:00,20:10:00,conditions Windy,Jam,0,Buffet,motorcycle,1,No,Metropolitian,(min) 36
45590,0x4f8d,CHENRES08DEL03,30,4.9,13.022394,80.242439,13.052394,80.272439,11-03-2022,23:50:00,00:05:00,conditions Cloudy,Low,1,Drinks,scooter,0,No,Metropolitian,(min) 16
45591,0x5eee,COIMBRES11DEL01,20,4.7,11.001753,76.986241,11.041753,77.026241,07-03-2022,13:35:00,13:40:00,conditions Cloudy,High,0,Snack,motorcycle,1,No,Metropolitian,(min) 26


In [7]:
data_clean_utils.perform_data_cleaning(df)

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,time_taken(min),is_weekend,pickup_time_minutes,order_time_of_day,distance,distance_type
0,37.0,4.9,sunny,high,2,snack,motorcycle,0.0,no,urban,(min) 24,1,15.0,morning,3.025149,short
1,34.0,4.5,stormy,jam,2,snack,scooter,1.0,no,metropolitian,(min) 33,0,5.0,evening,20.183530,very_long
2,23.0,4.4,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,(min) 26,1,15.0,morning,1.552758,short
3,38.0,4.7,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,(min) 21,0,10.0,evening,7.790401,medium
4,32.0,4.6,cloudy,high,1,snack,scooter,1.0,no,metropolitian,(min) 30,1,15.0,afternoon,6.210138,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45587,35.0,4.2,windy,jam,2,drinks,motorcycle,1.0,no,metropolitian,(min) 33,0,10.0,night,16.600272,very_long
45588,30.0,4.8,windy,high,1,meal,motorcycle,0.0,no,metropolitian,(min) 32,0,10.0,morning,1.489846,short
45590,30.0,4.9,cloudy,low,1,drinks,scooter,0.0,no,metropolitian,(min) 16,0,15.0,night,4.657195,short
45591,20.0,4.7,cloudy,high,0,snack,motorcycle,1.0,no,metropolitian,(min) 26,0,5.0,afternoon,6.232393,medium


In [8]:

df=pd.read_csv('swiggy_cleaned.csv')
# drop columns not required for model input

columns_to_drop =  ['rider_id',
                    'restaurant_latitude',
                    'restaurant_longitude',
                    'delivery_latitude',
                    'delivery_longitude',
                    'order_date',
                    "order_time_hour",
                    "order_day",
                    "city_name",
                    "order_day_of_week",
                    "order_month"]

df.drop(columns=columns_to_drop, inplace=True)

df

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,time_taken,is_weekend,pickup_time_minutes,order_time_of_day,distance,distance_type
0,37.0,4.9,sunny,high,2,snack,motorcycle,0.0,no,urban,24,1,15.0,morning,3.025149,short
1,34.0,4.5,stormy,jam,2,snack,scooter,1.0,no,metropolitian,33,0,5.0,evening,20.183530,very_long
2,23.0,4.4,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,26,1,15.0,morning,1.552758,short
3,38.0,4.7,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,21,0,10.0,evening,7.790401,medium
4,32.0,4.6,cloudy,high,1,snack,scooter,1.0,no,metropolitian,30,1,15.0,afternoon,6.210138,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45497,30.0,4.8,windy,high,1,meal,motorcycle,0.0,no,metropolitian,32,0,10.0,morning,1.489846,short
45498,21.0,4.6,windy,jam,0,buffet,motorcycle,1.0,no,metropolitian,36,0,15.0,evening,,
45499,30.0,4.9,cloudy,low,1,drinks,scooter,0.0,no,metropolitian,16,0,15.0,night,4.657195,short
45500,20.0,4.7,cloudy,high,0,snack,motorcycle,1.0,no,metropolitian,26,0,5.0,afternoon,6.232393,medium


In [9]:
# check for missing values

df.isna().sum()

age                    1854
ratings                1908
weather                 525
traffic                 510
vehicle_condition         0
type_of_order             0
type_of_vehicle           0
multiple_deliveries     993
festival                228
city_type              1198
time_taken                0
is_weekend                0
pickup_time_minutes    1640
order_time_of_day      2070
distance               3630
distance_type          3630
dtype: int64

In [10]:
# check for duplicates

df.duplicated().sum()

0

In [11]:

# columns that have missing values

missing_cols = (
                    df
                    .isna()
                    .any(axis=0)
                    .loc[lambda x: x]
                    .index
                )

missing_cols

Index(['age', 'ratings', 'weather', 'traffic', 'multiple_deliveries',
       'festival', 'city_type', 'pickup_time_minutes', 'order_time_of_day',
       'distance', 'distance_type'],
      dtype='object')

# Drop Missing values

In [12]:
df.columns

Index(['age', 'ratings', 'weather', 'traffic', 'vehicle_condition',
       'type_of_order', 'type_of_vehicle', 'multiple_deliveries', 'festival',
       'city_type', 'time_taken', 'is_weekend', 'pickup_time_minutes',
       'order_time_of_day', 'distance', 'distance_type'],
      dtype='object')

In [13]:

temp_df = df.copy().dropna()

# split into X and y

X = temp_df.drop(columns='time_taken')
y = temp_df['time_taken']

X

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,is_weekend,pickup_time_minutes,order_time_of_day,distance,distance_type
0,37.0,4.9,sunny,high,2,snack,motorcycle,0.0,no,urban,1,15.0,morning,3.025149,short
1,34.0,4.5,stormy,jam,2,snack,scooter,1.0,no,metropolitian,0,5.0,evening,20.183530,very_long
2,23.0,4.4,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,1,15.0,morning,1.552758,short
3,38.0,4.7,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,0,10.0,evening,7.790401,medium
4,32.0,4.6,cloudy,high,1,snack,scooter,1.0,no,metropolitian,1,15.0,afternoon,6.210138,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45496,35.0,4.2,windy,jam,2,drinks,motorcycle,1.0,no,metropolitian,0,10.0,night,16.600272,very_long
45497,30.0,4.8,windy,high,1,meal,motorcycle,0.0,no,metropolitian,0,10.0,morning,1.489846,short
45499,30.0,4.9,cloudy,low,1,drinks,scooter,0.0,no,metropolitian,0,15.0,night,4.657195,short
45500,20.0,4.7,cloudy,high,0,snack,motorcycle,1.0,no,metropolitian,0,5.0,afternoon,6.232393,medium


In [14]:

# train test split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

print("The size of train data is",X_train.shape)
print("The shape of test data is",X_test.shape)


# missing values in train data

X_train.isna().sum()



The size of train data is (30156, 15)
The shape of test data is (7539, 15)


age                    0
ratings                0
weather                0
traffic                0
vehicle_condition      0
type_of_order          0
type_of_vehicle        0
multiple_deliveries    0
festival               0
city_type              0
is_weekend             0
pickup_time_minutes    0
order_time_of_day      0
distance               0
distance_type          0
dtype: int64

In [15]:
# transform target column

pt = PowerTransformer()

y_train_pt = pt.fit_transform(y_train.values.reshape(-1,1))
y_test_pt = pt.transform(y_test.values.reshape(-1,1))
     

In [16]:
(
    X_train
    .isna()
    .any(axis=1)
    .mean()
    .round(2) * 100
)

0.0

# Pre Processing Pipeline

In [17]:

num_cols = ["age","ratings","pickup_time_minutes","distance"]

nominal_cat_cols = ['weather',
                    'type_of_order',
                    'type_of_vehicle',
                    "festival",
                    "city_type",
                    "is_weekend",
                    "order_time_of_day"]



ordinal_cat_cols = ["traffic","distance_type"]


X_train.isna().sum()

age                    0
ratings                0
weather                0
traffic                0
vehicle_condition      0
type_of_order          0
type_of_vehicle        0
multiple_deliveries    0
festival               0
city_type              0
is_weekend             0
pickup_time_minutes    0
order_time_of_day      0
distance               0
distance_type          0
dtype: int64

In [18]:
# # features to fill values with mode

# features_to_fill_mode = ['multiple_deliveries','festival','city_type']
# features_to_fill_missing = [col for col in nominal_cat_cols if col not in features_to_fill_mode]

# features_to_fill_missing

In [19]:

# # simple imputer to fill categorical vars with mode

# simple_imputer = ColumnTransformer(transformers=[
#     ("mode_imputer",SimpleImputer(strategy="most_frequent",add_indicator=True),features_to_fill_mode),
#     ("missing_imputer",SimpleImputer(strategy="constant",fill_value="missing",add_indicator=True),features_to_fill_missing)
# ],remainder="passthrough",n_jobs=-1,force_int_remainder_cols=False,verbose_feature_names_out=False)

# simple_imputer

In [20]:
# simple_imputer.fit_transform(X_train)

In [21]:

# simple_imputer.fit_transform(X_train).isna().sum()

In [22]:

# # knn imputer

# knn_imputer = KNNImputer(n_neighbors=5)


# do basic preprocessing

num_cols = ["age","ratings","pickup_time_minutes","distance"]

nominal_cat_cols = ['weather','type_of_order',
                    'type_of_vehicle',"festival",
                    "city_type",
                    "is_weekend",
                    "order_time_of_day"]

ordinal_cat_cols = ["traffic","distance_type"]


# generate order for ordinal encoding

traffic_order = ["low","medium","high","jam"]

distance_type_order = ["short","medium","long","very_long"]

In [23]:
# build a preprocessor

preprocessor = ColumnTransformer(transformers=[
    ("scale", MinMaxScaler(), num_cols),
    ("nominal_encode", OneHotEncoder(drop="first",handle_unknown="ignore",
                                     sparse_output=False), nominal_cat_cols),
    ("ordinal_encode", OrdinalEncoder(categories=[traffic_order,distance_type_order],
                                      encoded_missing_value=-999,
                                      handle_unknown="use_encoded_value",
                                      unknown_value=-1), ordinal_cat_cols)
],remainder="passthrough",n_jobs=-1,force_int_remainder_cols=False,verbose_feature_names_out=False)


preprocessor

In [24]:

# build the pipeline

processing_pipeline = Pipeline(steps=[
                                # ("simple_imputer",simple_imputer),
                                ("preprocess",preprocessor),
                                # ("knn_imputer",knn_imputer)
                            ])

processing_pipeline

In [25]:

# do data preprocessing

X_train_trans = processing_pipeline.fit_transform(X_train)

X_test_trans = processing_pipeline.transform(X_test)

In [26]:

%pip install optuna

Note: you may need to restart the kernel to use updated packages.


In [30]:
!pip install xgboost lightgbm

Collecting lightgbm
  Downloading lightgbm-4.5.0-py3-none-win_amd64.whl.metadata (17 kB)
Downloading lightgbm-4.5.0-py3-none-win_amd64.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 18.8 MB/s eta 0:00:00
Installing collected packages: lightgbm
Successfully installed lightgbm-4.5.0


In [32]:


from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import optuna
from sklearn.metrics import r2_score, mean_absolute_error

In [33]:

def objective(trial):
    with mlflow.start_run(nested=True):
        model_name = trial.suggest_categorical("model",["SVM","RF","KNN","GB","XGB","LGBM"])

        if model_name == "SVM":
            kernel_svm = trial.suggest_categorical("kernel_svm",["linear","poly","rbf"])
            if kernel_svm == "linear":
                c_linear = trial.suggest_float("c_linear",0,10)
                model = SVR(C=c_linear,kernel="linear")

            elif kernel_svm == "poly":
                c_poly = trial.suggest_float("c_poly",0,10)
                degree_poly = trial.suggest_int("degree_poly",1,5)
                model = SVR(C=c_poly,degree=degree_poly,
                            kernel="poly")

            else:
                c_rbf = trial.suggest_float("c_rbf",0,100)
                gamma_rbf = trial.suggest_float("gamma_rbf",0,10)
                model = SVR(C=c_rbf,gamma=gamma_rbf,
                            kernel="rbf")

        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,200)
            max_depth_rf = trial.suggest_int("max_depth_rf",2,20)
            model = RandomForestRegressor(n_estimators=n_estimators_rf,
                                        max_depth=max_depth_rf,
                                        random_state=42,
                                        n_jobs=-1)

        elif model_name == "GB":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,200)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",0,1)
            max_depth_gb = trial.suggest_int("max_depth_gb",2,20)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb,
                                                learning_rate=learning_rate_gb,
                                                max_depth=max_depth_gb,
                                                random_state=42)

        elif model_name == "KNN":
            n_neighbors_knn = trial.suggest_int("n_neighbors_knn",1,25)
            weights_knn = trial.suggest_categorical("weights_knn",["uniform","distance"])
            model = KNeighborsRegressor(n_neighbors=n_neighbors_knn,
                                        weights=weights_knn,n_jobs=-1)

        elif model_name == "XGB":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,200)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",0.1,0.5)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",2,20)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                    learning_rate=learning_rate_xgb,
                                    max_depth=max_depth_xgb,
                                    random_state=42,
                                    n_jobs=-1)

        elif model_name == "LGBM":
            n_estimators_lgbm = trial.suggest_int("n_estimators_lgbm",10,200)
            learning_rate_lgbm = trial.suggest_float("learning_rate_lgbm",0.1,0.5)
            max_depth_lgbm = trial.suggest_int("max_depth_lgbm",2,20)
            model = LGBMRegressor(n_estimators=n_estimators_lgbm,
                                    learning_rate=learning_rate_lgbm,
                                    max_depth=max_depth_lgbm,
                                    random_state=42)


        # train the model
        model.fit(X_train_trans,y_train_pt.values.ravel())

        # log model params
        mlflow.log_params(model.get_params())

        # get the predictions
        y_pred_train = model.predict(X_train_trans)
        y_pred_test = model.predict(X_test_trans)

        # get the actual predictions values
        y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
        y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))

        # calculate the error
        error = mean_absolute_error(y_test,y_pred_test_org)

        # log model_name
        mlflow.log_param("model",model_name)

        # log error
        mlflow.log_metric("MAE",error)

        return error

In [34]:

# create optuna study
study = optuna.create_study(direction="minimize",study_name="model_selection")

with mlflow.start_run(run_name="Best Model") as parent:
    # optimize the objective function
    study.optimize(objective,n_trials=30,n_jobs=-1)

    # log the best parameters
    mlflow.log_params(study.best_params)

    # log the best score
    mlflow.log_metric("best_score",study.best_value)

[I 2025-01-14 12:59:18,301] A new study created in memory with name: model_selection


üèÉ View run thoughtful-asp-880 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/58f1f6d490b54d34bcb7da121e765569
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 12:59:23,616] Trial 6 finished with value: 3.1204203352663713 and parameters: {'model': 'RF', 'n_estimators_rf': 15, 'max_depth_rf': 18}. Best is trial 6 with value: 3.1204203352663713.


üèÉ View run sassy-fish-720 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/5b5bc73167ef42b6b24ac6dafb7ab7a2
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run serious-stoat-139 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/a1c8eb4711d34646b33804918319ce5d
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 12:59:42,626] Trial 7 finished with value: 4.2755217317039635 and parameters: {'model': 'KNN', 'n_neighbors_knn': 18, 'weights_knn': 'uniform'}. Best is trial 6 with value: 3.1204203352663713.
[I 2025-01-14 12:59:43,626] Trial 4 finished with value: 4.209041844409861 and parameters: {'model': 'KNN', 'n_neighbors_knn': 21, 'weights_knn': 'distance'}. Best is trial 6 with value: 3.1204203352663713.


üèÉ View run bright-midge-609 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/d40b740ee7e54611b878a9d7922d8edd
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run selective-pug-144 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/36c3d6e46a0d49f88ba88ece843d2b49
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run zealous-auk-401 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/d4c681ddc3cd412db656f39dea0a0f4f
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run salty-shark-315 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/b9d93ad3dd804fb58dad51a990e78717
üß™ View e

[I 2025-01-14 12:59:52,651] Trial 1 finished with value: 3.0508734421891153 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 71, 'learning_rate_lgbm': 0.1406599635878022, 'max_depth_lgbm': 16}. Best is trial 1 with value: 3.0508734421891153.
[I 2025-01-14 12:59:54,624] Trial 2 finished with value: 3.082502352417482 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 147, 'learning_rate_lgbm': 0.17975122531752144, 'max_depth_lgbm': 5}. Best is trial 1 with value: 3.0508734421891153.
[I 2025-01-14 12:59:55,676] Trial 3 finished with value: 3.0473246769280258 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 98, 'learning_rate_lgbm': 0.24921766302744675, 'max_depth_lgbm': 15}. Best is trial 3 with value: 3.0473246769280258.
[I 2025-01-14 12:59:57,639] Trial 5 finished with value: 3.3268835686908753 and parameters: {'model': 'GB', 'n_estimators_gb': 184, 'learning_rate_gb': 0.7397324055436043, 'max_depth_gb': 3}. Best is trial 3 with value: 3.0473246769280258.


üèÉ View run unequaled-lark-702 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/495f1acd93844921b69e7b580c18e1df
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:00:08,649] Trial 0 finished with value: 3.5800527869479293 and parameters: {'model': 'GB', 'n_estimators_gb': 185, 'learning_rate_gb': 0.5793031777287713, 'max_depth_gb': 11}. Best is trial 3 with value: 3.0473246769280258.


üèÉ View run funny-koi-570 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/1c866cc9bc8e4434ad8d306d32a838b0
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:00:14,618] Trial 9 finished with value: 4.2809560508335185 and parameters: {'model': 'RF', 'n_estimators_rf': 189, 'max_depth_rf': 5}. Best is trial 3 with value: 3.0473246769280258.


üèÉ View run zealous-fox-189 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/b37040b23da34d0690be55c8574a1d0e
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run auspicious-pug-293 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/729e8ec5a2e24a39bab4a1127505e9bb
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:00:20,642] Trial 10 finished with value: 3.3609817028045654 and parameters: {'model': 'XGB', 'n_estimators_xgb': 82, 'learning_rate_xgb': 0.20880061102790184, 'max_depth_xgb': 20}. Best is trial 3 with value: 3.0473246769280258.
[I 2025-01-14 13:00:22,610] Trial 13 finished with value: 4.198899564759723 and parameters: {'model': 'KNN', 'n_neighbors_knn': 9, 'weights_knn': 'distance'}. Best is trial 3 with value: 3.0473246769280258.


üèÉ View run bemused-gnu-815 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/2621461feb9a402c8591a75749a8efd4
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:00:30,618] Trial 11 finished with value: 3.440373260679686 and parameters: {'model': 'GB', 'n_estimators_gb': 158, 'learning_rate_gb': 0.5720997038500932, 'max_depth_gb': 7}. Best is trial 3 with value: 3.0473246769280258.


üèÉ View run youthful-squid-903 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/9ef2940315544194b78316dfa724d537
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run learned-bear-961 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/b3a17e94a68549558752383f91e0fb0c
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:00:41,629] Trial 17 finished with value: 3.039934534480359 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 65, 'learning_rate_lgbm': 0.1904334006887606, 'max_depth_lgbm': 18}. Best is trial 17 with value: 3.039934534480359.
[I 2025-01-14 13:00:45,626] Trial 18 finished with value: 3.056491083647486 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 54, 'learning_rate_lgbm': 0.1702618363580457, 'max_depth_lgbm': 18}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run amusing-midge-666 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/fd1446dc6401415f93f3a3b036190b2d
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:00:51,632] Trial 15 finished with value: 3.890621753716124 and parameters: {'model': 'GB', 'n_estimators_gb': 80, 'learning_rate_gb': 0.7986239950022745, 'max_depth_gb': 14}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run classy-ant-909 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/7514dd71fe5b47a39f50e7a2e96897dc
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run dazzling-sloth-895 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/67671ec031194fe7aae4f1e31dff7a75
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:00:55,646] Trial 19 finished with value: 3.0476885873732242 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 60, 'learning_rate_lgbm': 0.18515533117427047, 'max_depth_lgbm': 18}. Best is trial 17 with value: 3.039934534480359.
[I 2025-01-14 13:00:56,640] Trial 16 finished with value: 3.269901797510919 and parameters: {'model': 'GB', 'n_estimators_gb': 61, 'learning_rate_gb': 0.10869746253270818, 'max_depth_gb': 15}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run illustrious-pug-457 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/10c8a84ebdd94743a28ab60570c69948
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:03:49,799] Trial 24 finished with value: 4.6754115036405635 and parameters: {'model': 'SVM', 'kernel_svm': 'poly', 'c_poly': 5.794552947681483, 'degree_poly': 1}. Best is trial 17 with value: 3.039934534480359.
[I 2025-01-14 13:04:43,328] Trial 25 finished with value: 3.093110670947102 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 122, 'learning_rate_lgbm': 0.3634482180388511, 'max_depth_lgbm': 13}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run masked-cat-363 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/8eaaf5932d154115835049fe2b64386a
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:05:01,127] Trial 26 finished with value: 3.502485752105713 and parameters: {'model': 'XGB', 'n_estimators_xgb': 200, 'learning_rate_xgb': 0.49628032495675567, 'max_depth_xgb': 2}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run victorious-wolf-345 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/399212f235eb4820a9470e76b41f03b9
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:05:21,269] Trial 22 finished with value: 3.968080840279331 and parameters: {'model': 'SVM', 'kernel_svm': 'poly', 'c_poly': 4.541472262458507, 'degree_poly': 2}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run secretive-kit-739 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/404fb56570854bbcaaf7a08297aa41a1
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:05:35,741] Trial 14 finished with value: 3.9647845791739686 and parameters: {'model': 'SVM', 'kernel_svm': 'poly', 'c_poly': 8.602374061148168, 'degree_poly': 2}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run sincere-boar-839 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/7b47f8e25bca4d0aa004842fa515884c
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:05:38,356] Trial 28 finished with value: 3.100437978118242 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 25, 'learning_rate_lgbm': 0.25639747471278274, 'max_depth_lgbm': 20}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run omniscient-bear-319 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/cf79094734424a9699b674ac0c2b48b9
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run calm-roo-950 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/6b8bfc1b77af4c70b8ccf20a44255929
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:05:43,646] Trial 29 finished with value: 3.048521702602399 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 32, 'learning_rate_lgbm': 0.26126684709035364, 'max_depth_lgbm': 19}. Best is trial 17 with value: 3.039934534480359.
[I 2025-01-14 13:12:12,837] Trial 8 finished with value: 6.323281912284635 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 53.150041763369096, 'gamma_rbf': 5.000818578537842}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run righteous-shrimp-654 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/fc26999be40e46ee8879f1feb03dee9e
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:14:25,015] Trial 20 finished with value: 6.485826738874771 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 50.8043548379683, 'gamma_rbf': 6.871372982217737}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run redolent-hen-719 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/8cd3a4f1d1d242cb97bc73a4b610b3f1
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:16:50,706] Trial 27 finished with value: 6.584237499487038 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 30.881235982290917, 'gamma_rbf': 8.676346216747694}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run placid-colt-32 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/3aed32a799064c7898a2c095e13cc4c1
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:38:55,583] Trial 23 finished with value: 3.6418943218476003 and parameters: {'model': 'SVM', 'kernel_svm': 'poly', 'c_poly': 5.2800712137406505, 'degree_poly': 4}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run thundering-doe-661 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/89ba700f46cc457c9558e60951cfb1c7
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:47:59,031] Trial 21 finished with value: 3.7208872348691573 and parameters: {'model': 'SVM', 'kernel_svm': 'poly', 'c_poly': 2.56713757609233, 'degree_poly': 5}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run grandiose-snake-942 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/d4d9eb03c593429aaf8dd5f827bb85f9
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-01-14 13:51:46,027] Trial 12 finished with value: 4.326381056056421 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 41.69942887679821, 'gamma_rbf': 0.33826083680993824}. Best is trial 17 with value: 3.039934534480359.


üèÉ View run beautiful-shrimp-998 at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/05042a2a519b4f619de8d447e895adff
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1
üèÉ View run Best Model at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/0fe76005834941c3bfc97fa0f37add2b
üß™ View experiment at: https://dagshub.com/manikantmnnit/swiggy-delivery-time-prediction.mlflow/#/experiments/1


In [35]:
# best score

study.best_value

3.039934534480359

In [36]:
lgbm_params = {
    "n_estimators": 145,
    "learning_rate": 0.16632111599858262,
    "max_depth": 17
}

In [37]:

# train the model on best parameters

lgbm = LGBMRegressor(**lgbm_params)

lgbm.fit(X_train_trans,y_train_pt.values.ravel())

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011836 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 352
[LightGBM] [Info] Number of data points in the train set: 30156, number of used features: 25
[LightGBM] [Info] Start training from score -0.000000




In [38]:

# get the predictions
y_pred_train = lgbm.predict(X_train_trans)
y_pred_test = lgbm.predict(X_test_trans)

In [39]:

# get the actual predictions values

y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))
     

In [40]:
from sklearn.metrics import mean_absolute_error, r2_score

print(f"The train error is {mean_absolute_error(y_train,y_pred_train_org):.2f} minutes")
print(f"The test error is {mean_absolute_error(y_test,y_pred_test_org):.2f} minutes")

The train error is 2.78 minutes
The test error is 3.02 minutes


In [41]:

print(f"The train r2 score is {r2_score(y_train,y_pred_train_org):.2f}")
print(f"The test r2 score is {r2_score(y_test,y_pred_test_org):.2f}")

The train r2 score is 0.86
The test r2 score is 0.84


In [42]:

# dataframe of results

study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_c_poly,params_c_rbf,params_degree_poly,params_gamma_rbf,params_kernel_svm,...,params_max_depth_rf,params_max_depth_xgb,params_model,params_n_estimators_gb,params_n_estimators_lgbm,params_n_estimators_rf,params_n_estimators_xgb,params_n_neighbors_knn,params_weights_knn,state
0,0,3.580053,2025-01-14 12:59:18.682466,2025-01-14 13:00:08.649337,0 days 00:00:49.966871,,,,,,...,,,GB,185.0,,,,,,COMPLETE
1,1,3.050873,2025-01-14 12:59:18.684463,2025-01-14 12:59:52.650926,0 days 00:00:33.966463,,,,,,...,,,LGBM,,71.0,,,,,COMPLETE
2,2,3.082502,2025-01-14 12:59:18.686460,2025-01-14 12:59:54.624475,0 days 00:00:35.938015,,,,,,...,,,LGBM,,147.0,,,,,COMPLETE
3,3,3.047325,2025-01-14 12:59:18.688461,2025-01-14 12:59:55.676159,0 days 00:00:36.987698,,,,,,...,,,LGBM,,98.0,,,,,COMPLETE
4,4,4.209042,2025-01-14 12:59:18.688461,2025-01-14 12:59:43.625537,0 days 00:00:24.937076,,,,,,...,,,KNN,,,,,21.0,distance,COMPLETE
5,5,3.326884,2025-01-14 12:59:18.689662,2025-01-14 12:59:57.638079,0 days 00:00:38.948417,,,,,,...,,,GB,184.0,,,,,,COMPLETE
6,6,3.12042,2025-01-14 12:59:18.689662,2025-01-14 12:59:23.616573,0 days 00:00:04.926911,,,,,,...,18.0,,RF,,,15.0,,,,COMPLETE
7,7,4.275522,2025-01-14 12:59:18.690660,2025-01-14 12:59:42.625149,0 days 00:00:23.934489,,,,,,...,,,KNN,,,,,18.0,uniform,COMPLETE
8,8,6.323282,2025-01-14 12:59:23.619080,2025-01-14 13:12:12.836989,0 days 00:12:49.217909,,53.150042,,5.000819,rbf,...,,,SVM,,,,,,,COMPLETE
9,9,4.280956,2025-01-14 12:59:42.627042,2025-01-14 13:00:14.617171,0 days 00:00:31.990129,,,,,,...,5.0,,RF,,,189.0,,,,COMPLETE


In [43]:
# model frequency

study.trials_dataframe()['params_model'].value_counts()

params_model
LGBM    9
SVM     9
GB      5
KNN     3
RF      2
XGB     2
Name: count, dtype: int64

In [44]:
# avg scores for all tested models

study.trials_dataframe().groupby("params_model")['value'].mean().sort_values()

params_model
LGBM    3.062987
XGB     3.431734
GB      3.501567
RF      3.700688
KNN     4.227821
SVM     4.854532
Name: value, dtype: float64

In [45]:
from sklearn.compose import TransformedTargetRegressor

model = TransformedTargetRegressor(regressor=lgbm,
                                    transformer=pt)

In [46]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model,
                         X_train_trans,
                         y_train,
                         scoring="neg_mean_absolute_error",
                         cv=5,n_jobs=-1)

scores



array([-3.06435673, -3.04327813, -3.0700516 , -3.06852842, -3.05699881])

In [47]:

# mean score

- scores.mean()

3.060642738151514

In [48]:

# optimization history plot

optuna.visualization.plot_optimization_history(study)

In [49]:

# partial coord plot

optuna.visualization.plot_parallel_coordinate(study,params=["model"])