In [1]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer, KNNImputer, MissingIndicator
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder, MinMaxScaler, PowerTransformer, OrdinalEncoder
from sklearn.model_selection import train_test_split

In [2]:
import dagshub
dagshub.init(repo_owner='kbs.kartik', repo_name='delivery-time-prediction', mlflow=True)

In [3]:
import mlflow

In [4]:
# set the tracking server

mlflow.set_tracking_uri("https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/")

In [5]:
# mlflow experiment

mlflow.set_experiment("Exp 4 - RF HP Tuning")

<Experiment: artifact_location='mlflow-artifacts:/4624c48306534d5495f0a0bdcb88e835', creation_time=1751779927531, experiment_id='3', last_update_time=1751779927531, lifecycle_stage='active', name='Exp 4 - RF HP Tuning', tags={}>

In [6]:
from sklearn import set_config

set_config(transform_output="pandas")

# Load the Data

In [7]:
# load the data

df = pd.read_csv(r'C:\Users\KARTIK\Documents\delivery-time-prediction\data\raw\swiggy.csv')

# Clean Data

In [8]:
import numpy as np
import pandas as pd


columns_to_drop =  ['rider_id',
                    'restaurant_latitude',
                    'restaurant_longitude',
                    'delivery_latitude',
                    'delivery_longitude',
                    'order_date',
                    "order_time_hour",
                    "order_day",
                    "city_name",
                    "order_day_of_week",
                    "order_month"]


def change_column_names(data: pd.DataFrame):
    return (
        data.rename(str.lower,axis=1).rename({
            "delivery_person_id" : "rider_id",
            "delivery_person_age": "age",
            "delivery_person_ratings": "ratings",
            "delivery_location_latitude": "delivery_latitude",
            "delivery_location_longitude": "delivery_longitude",
            "time_orderd": "order_time",
            "time_order_picked": "order_picked_time",
            "weatherconditions": "weather",
            "road_traffic_density": "traffic",
            "city": "city_type",
            "time_taken(min)": "time_taken"},
            axis=1)
    )


def data_cleaning(data: pd.DataFrame):
    minors_data = data.loc[data['age'].astype('float') < 18]
    minor_index = minors_data.index.tolist()
    six_star_data = data.loc[data['ratings'] == "6"]
    six_star_index = six_star_data.index.tolist()

    return (
        data
        .drop(columns="id")
        .drop(index=minor_index)                                                # Minor riders in data dropped
        .drop(index=six_star_index)                                             # six star rated drivers dropped
        .replace("NaN ",np.nan)                                                 # missing values in the data
        .assign(
            # city column out of rider id
            city_name = lambda x: x['rider_id'].str.split("RES").str.get(0),
            # convert age to float
            age = lambda x: x['age'].astype(float),
            # convert ratings to float
            ratings = lambda x: x['ratings'].astype(float),
            # absolute values for location based columns
            restaurant_latitude = lambda x: x['restaurant_latitude'].abs(),
            restaurant_longitude = lambda x: x['restaurant_longitude'].abs(),
            delivery_latitude = lambda x: x['delivery_latitude'].abs(),
            delivery_longitude = lambda x: x['delivery_longitude'].abs(),
            # order date to datetime and feature extraction
            order_date = lambda x: pd.to_datetime(x['order_date'],
                                                  dayfirst=True),
            order_day = lambda x: x['order_date'].dt.day,
            order_month = lambda x: x['order_date'].dt.month,
            order_day_of_week = lambda x: x['order_date'].dt.day_name().str.lower(),
            is_weekend = lambda x: (x['order_date']
                                    .dt.day_name()
                                    .isin(["Saturday","Sunday"])
                                    .astype(int)),
            # time based columns
            order_time = lambda x: pd.to_datetime(x['order_time'],
                                                  format='mixed'),
            order_picked_time = lambda x: pd.to_datetime(x['order_picked_time'],
                                                         format='mixed'),
            # time taken to pick order
            pickup_time_minutes = lambda x: (
                                            (x['order_picked_time'] - x['order_time'])
                                            .dt.seconds / 60
                                            ),
            # hour in which order was placed
            order_time_hour = lambda x: x['order_time'].dt.hour,
            # time of the day when order was placed
            order_time_of_day = lambda x: (
                                x['order_time_hour'].pipe(time_of_day)),
            # categorical columns
            weather = lambda x: (
                                x['weather']
                                .str.replace("conditions ","")
                                .str.lower()
                                .replace("nan",np.nan)),
            traffic = lambda x: x["traffic"].str.rstrip().str.lower(),
            type_of_order = lambda x: x['type_of_order'].str.rstrip().str.lower(),
            type_of_vehicle = lambda x: x['type_of_vehicle'].str.rstrip().str.lower(),
            festival = lambda x: x['festival'].str.rstrip().str.lower(),
            city_type = lambda x: x['city_type'].str.rstrip().str.lower(),
            # multiple deliveries column
            multiple_deliveries = lambda x: x['multiple_deliveries'].astype(float),
            # target column modifications
            time_taken = lambda x: (x['time_taken']
                                     .str.replace("(min) ","")
                                     .astype(int)))
        .drop(columns=["order_time","order_picked_time"])
    )
    
    
    
def clean_lat_long(data: pd.DataFrame, threshold=1):
    location_columns = ['restaurant_latitude',
                        'restaurant_longitude',
                        'delivery_latitude',
                        'delivery_longitude']

    return (
        data
        .assign(**{
            col: (
                np.where(data[col] < threshold, np.nan, data[col].values)
            )
            for col in location_columns
        })
    )
    
    
# extract day, day name, month and year
def extract_datetime_features(ser):
    date_col = pd.to_datetime(ser,dayfirst=True)

    return (
        pd.DataFrame(
            {
                "day": date_col.dt.day,
                "month": date_col.dt.month,
                "year": date_col.dt.year,
                "day_of_week": date_col.dt.day_name(),
                "is_weekend": date_col.dt.day_name().isin(["Saturday","Sunday"]).astype(int)
            }
        ))
    
    
def time_of_day(ser):

    return(
        pd.cut(ser,bins=[0,6,12,17,20,24],right=True,
               labels=["after_midnight","morning","afternoon","evening","night"])
    )


def drop_columns(data: pd.DataFrame, columns: list) -> pd.DataFrame:
    df = data.drop(columns=columns)
    return df


def calculate_haversine_distance(df):
    location_columns = ['restaurant_latitude',
                        'restaurant_longitude',
                        'delivery_latitude',
                        'delivery_longitude']
    
    lat1 = df[location_columns[0]]
    lon1 = df[location_columns[1]]
    lat2 = df[location_columns[2]]
    lon2 = df[location_columns[3]]

    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(
        dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    distance = 6371 * c

    return (
        df.assign(
            distance = distance)
    )

def create_distance_type(data: pd.DataFrame):
    return(
        data
        .assign(
                distance_type = pd.cut(data["distance"],bins=[0,5,10,15,25],
                                        right=False,labels=["short","medium","long","very_long"])
    ))


def perform_data_cleaning(data: pd.DataFrame):
    
    cleaned_data = (
        data
        .pipe(change_column_names)
        .pipe(data_cleaning)
        .pipe(clean_lat_long)
        .pipe(calculate_haversine_distance)
        .pipe(create_distance_type)
        .pipe(drop_columns,columns=columns_to_drop)
    )
    
    return cleaned_data,cleaned_data.dropna()
    
    

if __name__ == "__main__":
    # data path for data
    #DATA_PATH = "swiggy.csv"
    
    # read the data from path
    #df = pd.read_csv(DATA_PATH)
    #print('swiggy data loaded successfuly')
    
    df_with_nans, df_without_nans = perform_data_cleaning(df)

In [9]:
temp_df = df_without_nans.copy().dropna()

In [10]:
# split into X and y

X = temp_df.drop(columns='time_taken')
y = temp_df['time_taken']

X

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,is_weekend,pickup_time_minutes,order_time_of_day,distance,distance_type
0,37.0,4.9,sunny,high,2,snack,motorcycle,0.0,no,urban,1,15.0,morning,3.025149,short
1,34.0,4.5,stormy,jam,2,snack,scooter,1.0,no,metropolitian,0,5.0,evening,20.183530,very_long
2,23.0,4.4,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,1,15.0,morning,1.552758,short
3,38.0,4.7,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,0,10.0,evening,7.790401,medium
4,32.0,4.6,cloudy,high,1,snack,scooter,1.0,no,metropolitian,1,15.0,afternoon,6.210138,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45587,35.0,4.2,windy,jam,2,drinks,motorcycle,1.0,no,metropolitian,0,10.0,night,16.600272,very_long
45588,30.0,4.8,windy,high,1,meal,motorcycle,0.0,no,metropolitian,0,10.0,morning,1.489846,short
45590,30.0,4.9,cloudy,low,1,drinks,scooter,0.0,no,metropolitian,0,15.0,night,4.657195,short
45591,20.0,4.7,cloudy,high,0,snack,motorcycle,1.0,no,metropolitian,0,5.0,afternoon,6.232393,medium


In [11]:
# train test split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [12]:
print("The size of train data is",X_train.shape)
print("The shape of test data is",X_test.shape)

The size of train data is (30156, 15)
The shape of test data is (7539, 15)


In [13]:
# missing values in train data

X_train.isna().sum()

age                    0
ratings                0
weather                0
traffic                0
vehicle_condition      0
type_of_order          0
type_of_vehicle        0
multiple_deliveries    0
festival               0
city_type              0
is_weekend             0
pickup_time_minutes    0
order_time_of_day      0
distance               0
distance_type          0
dtype: int64

In [14]:
# transform target column

pt = PowerTransformer()

y_train_pt = pt.fit_transform(y_train.values.reshape(-1,1))
y_test_pt = pt.transform(y_test.values.reshape(-1,1))

In [15]:
y_train_pt

Unnamed: 0,x0
0,2.028672
1,0.554539
2,-2.024267
3,-0.173699
4,0.554539
...,...
30151,0.457580
30152,-0.173699
30153,-1.350937
30154,0.047111


In [16]:
# percentage of rows in data having missing values

(
    X_train
    .isna()
    .any(axis=1)
    .mean()
    .round(2) * 100
)


np.float64(0.0)

# Pre-Processing Pipeline

In [17]:
num_cols = ["age","ratings","pickup_time_minutes","distance"]

nominal_cat_cols = ['weather',
                    'type_of_order',
                    'type_of_vehicle',
                    "festival",
                    "city_type",
                    "is_weekend",
                    "order_time_of_day"]

ordinal_cat_cols = ["traffic","distance_type"]

In [18]:
nominal_cat_cols

['weather',
 'type_of_order',
 'type_of_vehicle',
 'festival',
 'city_type',
 'is_weekend',
 'order_time_of_day']

In [19]:
X_train.isna().sum()

age                    0
ratings                0
weather                0
traffic                0
vehicle_condition      0
type_of_order          0
type_of_vehicle        0
multiple_deliveries    0
festival               0
city_type              0
is_weekend             0
pickup_time_minutes    0
order_time_of_day      0
distance               0
distance_type          0
dtype: int64

In [20]:
# # features to fill values with mode

# features_to_fill_mode = ['multiple_deliveries','festival','city_type']
# features_to_fill_missing = [col for col in nominal_cat_cols if col not in features_to_fill_mode]

# features_to_fill_missing

In [21]:
# # simple imputer to fill categorical vars with mode

# simple_imputer = ColumnTransformer(transformers=[
#     ("mode_imputer",SimpleImputer(strategy="most_frequent",add_indicator=True),features_to_fill_mode),
#     ("missing_imputer",SimpleImputer(strategy="constant",fill_value="missing",add_indicator=True),features_to_fill_missing)
# ],remainder="passthrough",n_jobs=-1,force_int_remainder_cols=False,verbose_feature_names_out=False)

# simple_imputer

In [22]:
# simple_imputer.fit_transform(X_train)

In [23]:
# simple_imputer.fit_transform(X_train).isna().sum()

In [24]:
# knn imputer

# knn_imputer = KNNImputer(n_neighbors=5)

In [25]:
# do basic preprocessing

num_cols = ["age","ratings","pickup_time_minutes","distance"]

nominal_cat_cols = ['weather','type_of_order',
                    'type_of_vehicle',"festival",
                    "city_type",
                    "is_weekend",
                    "order_time_of_day"]

ordinal_cat_cols = ["traffic","distance_type"]

In [26]:
# generate order for ordinal encoding

traffic_order = ["low","medium","high","jam"]

distance_type_order = ["short","medium","long","very_long"]

In [27]:
# unique categories the ordinal columns

for col in ordinal_cat_cols:
    print(col,X_train[col].unique())

traffic ['jam' 'medium' 'high' 'low']
distance_type ['medium', 'short', 'long', 'very_long']
Categories (4, object): ['short' < 'medium' < 'long' < 'very_long']


In [28]:
# build a preprocessor

preprocessor = ColumnTransformer(transformers=[
    ("scale", MinMaxScaler(), num_cols),
    ("nominal_encode", OneHotEncoder(drop="first",handle_unknown="ignore",
                                     sparse_output=False), nominal_cat_cols),
    ("ordinal_encode", OrdinalEncoder(categories=[traffic_order,distance_type_order],
                                      encoded_missing_value=-999,
                                      handle_unknown="use_encoded_value",
                                      unknown_value=-1), ordinal_cat_cols)
],remainder="passthrough",n_jobs=-1,force_int_remainder_cols=False,verbose_feature_names_out=False)


preprocessor

0,1,2
,transformers,"[('scale', ...), ('nominal_encode', ...), ...]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,-1
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,False

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,categories,"[['low', 'medium', ...], ['short', 'medium', ...]]"
,dtype,<class 'numpy.float64'>
,handle_unknown,'use_encoded_value'
,unknown_value,-1
,encoded_missing_value,-999
,min_frequency,
,max_categories,


In [29]:
# build the pipeline

processing_pipeline = Pipeline(steps=[
                                # ("simple_imputer",simple_imputer),
                                ("preprocess",preprocessor)
                                # ("knn_imputer",knn_imputer)
                            ])

processing_pipeline

0,1,2
,steps,"[('preprocess', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('scale', ...), ('nominal_encode', ...), ...]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,-1
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,False

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,categories,"[['low', 'medium', ...], ['short', 'medium', ...]]"
,dtype,<class 'numpy.float64'>
,handle_unknown,'use_encoded_value'
,unknown_value,-1
,encoded_missing_value,-999
,min_frequency,
,max_categories,


In [30]:
# do data preprocessing

X_train_trans = processing_pipeline.fit_transform(X_train)

X_test_trans = processing_pipeline.transform(X_test)



In [31]:
X_train_trans

Unnamed: 0,age,ratings,pickup_time_minutes,distance,weather_fog,weather_sandstorms,weather_stormy,weather_sunny,weather_windy,type_of_order_drinks,...,city_type_semi-urban,city_type_urban,is_weekend_1,order_time_of_day_evening,order_time_of_day_morning,order_time_of_day_night,traffic,distance_type,vehicle_condition,multiple_deliveries
8720,0.473684,0.56,1.0,0.404165,0.0,0.0,0.0,1.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,0,2.0
25245,1.000000,0.76,0.0,0.154044,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0,1.0
34118,0.473684,0.80,0.5,0.002461,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1,0.0
26036,1.000000,0.92,1.0,0.460411,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0,1.0
37194,0.526316,0.76,0.5,0.243676,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20275,0.578947,0.92,0.5,0.451895,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,3.0,2.0,0,0.0
7601,0.052632,1.00,1.0,0.612270,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,1.0,2.0,1,1.0
13632,0.526316,0.92,0.0,0.322877,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1,0.0
1045,0.947368,0.96,0.5,0.004486,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0,1.0


In [32]:
from sklearn.ensemble import RandomForestRegressor
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [33]:
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.compose import TransformedTargetRegressor

In [34]:
def objective(trial):
    with mlflow.start_run(nested=True):
        params = {
            "n_estimators": trial.suggest_int("n_estimators",10,500),
            "max_depth": trial.suggest_int("max_depth",1,30),
            "max_features": trial.suggest_categorical("max_features",[None,"sqrt","log2"]),
            "min_samples_split": trial.suggest_int("min_samples_split",2,10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf",1,10),
            "max_samples": trial.suggest_float("max_samples",0.5,1),
            "random_state": 42,
            "n_jobs": -1,
        }

        # log model parameters
        mlflow.log_params(params)

        # build the model
        rf = RandomForestRegressor(**params)
        model = TransformedTargetRegressor(regressor=rf,transformer=pt)

        # train the model
        model.fit(X_train_trans,y_train)

        # get the predictions
        y_pred_train = model.predict(X_train_trans)
        y_pred_test = model.predict(X_test_trans)


        # perform cross validation
        cv_score = cross_val_score(model,
                                X_train_trans,
                                y_train,
                                cv=5,
                                scoring="neg_mean_absolute_error",
                                n_jobs=-1)

        # mean score
        mean_score = -(cv_score.mean())

        # log avg cross val error
        mlflow.log_metric("cross_val_error",mean_score)

        return mean_score

In [35]:
from sklearn.compose import TransformedTargetRegressor

In [None]:
# create optuna study
study = optuna.create_study(direction="minimize")

with mlflow.start_run(run_name="best_model"):
    # optimize the objective function
    study.optimize(objective,n_trials=20,n_jobs=-1,show_progress_bar=True)

    # log the best parameters
    mlflow.log_params(study.best_params)

    # log the best score
    mlflow.log_metric("best_score",study.best_value)

    # train the model on best parameters
    best_rf = RandomForestRegressor(**study.best_params)

    best_rf.fit(X_train_trans,y_train_pt.values.ravel())

    # get the predictions
    y_pred_train = best_rf.predict(X_train_trans)
    y_pred_test = best_rf.predict(X_test_trans)

    # get the actual predictions values
    y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
    y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))


    # perform cross validation
    model = TransformedTargetRegressor(regressor=best_rf,
                                        transformer=pt)


    scores = cross_val_score(model,
                         X_train_trans,
                         y_train,
                         scoring="neg_mean_absolute_error",
                         cv=5,n_jobs=-1)

    # log metrics
    mlflow.log_metric("training_error",mean_absolute_error(y_train,y_pred_train_org))
    mlflow.log_metric("test_error",mean_absolute_error(y_test,y_pred_test_org))
    mlflow.log_metric("training_r2",r2_score(y_train,y_pred_train_org))
    mlflow.log_metric("test_r2",r2_score(y_test,y_pred_test_org))
    mlflow.log_metric("cross_val",- scores.mean())

    # log the best model
    mlflow.sklearn.log_model(best_rf,artifact_path="model")

[I 2025-07-06 11:16:43,196] A new study created in memory with name: no-name-cb41e41f-1151-4e82-9373-5ff0c354ad8f
  0%|          | 0/20 [00:00<?, ?it/s]

🏃 View run abrasive-ram-886 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/57296b40604c4067980fb4210adf9a85
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3
🏃 View run judicious-horse-989 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/d3ca486f1dbb4968a0b231b06d2ffe97
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 8. Best value: 3.79912:   5%|▌         | 1/20 [00:27<08:35, 27.11s/it]

[I 2025-07-06 11:17:10,580] Trial 8 finished with value: 3.7991178039250144 and parameters: {'n_estimators': 245, 'max_depth': 8, 'max_features': 'sqrt', 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_samples': 0.6036028718182975}. Best is trial 8 with value: 3.7991178039250144.


Best trial: 1. Best value: 3.54781:  10%|█         | 2/20 [00:28<03:40, 12.27s/it]

[I 2025-07-06 11:17:12,571] Trial 1 finished with value: 3.547810076778299 and parameters: {'n_estimators': 37, 'max_depth': 13, 'max_features': 'log2', 'min_samples_split': 5, 'min_samples_leaf': 10, 'max_samples': 0.5968713959554343}. Best is trial 1 with value: 3.547810076778299.
🏃 View run efficient-squid-225 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/708ab0d92334491180442a199c91f5e9
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 11. Best value: 3.23616:  15%|█▌        | 3/20 [00:34<02:33,  9.01s/it]

[I 2025-07-06 11:17:17,691] Trial 11 finished with value: 3.2361609199020505 and parameters: {'n_estimators': 302, 'max_depth': 25, 'max_features': 'log2', 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_samples': 0.9593891615191918}. Best is trial 11 with value: 3.2361609199020505.
🏃 View run whimsical-worm-385 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/1e202c805c7f44d1bbb255da12552da8
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 11. Best value: 3.23616:  20%|██        | 4/20 [00:42<02:18,  8.65s/it]

[I 2025-07-06 11:17:25,654] Trial 5 finished with value: 3.396175581460772 and parameters: {'n_estimators': 410, 'max_depth': 22, 'max_features': 'log2', 'min_samples_split': 4, 'min_samples_leaf': 7, 'max_samples': 0.6402391470509248}. Best is trial 11 with value: 3.2361609199020505.
🏃 View run whimsical-bat-905 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/c03ca718fdc74b0b96972a1f39539758
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3
🏃 View run fearless-horse-165 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/aa6273365b204818b794d78293e7c13e
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 11. Best value: 3.23616:  25%|██▌       | 5/20 [00:56<02:37, 10.51s/it]

[I 2025-07-06 11:17:39,605] Trial 7 finished with value: 4.13590175416816 and parameters: {'n_estimators': 222, 'max_depth': 7, 'max_features': 'log2', 'min_samples_split': 7, 'min_samples_leaf': 7, 'max_samples': 0.909603917658351}. Best is trial 11 with value: 3.2361609199020505.


Best trial: 11. Best value: 3.23616:  30%|███       | 6/20 [00:57<01:41,  7.27s/it]

[I 2025-07-06 11:17:40,581] Trial 2 finished with value: 3.7983905823482496 and parameters: {'n_estimators': 358, 'max_depth': 9, 'max_features': 'log2', 'min_samples_split': 5, 'min_samples_leaf': 9, 'max_samples': 0.7029296913596572}. Best is trial 11 with value: 3.2361609199020505.
🏃 View run hilarious-pig-45 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/2f69d815790f40999c1dd43da19ab109
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 11. Best value: 3.23616:  35%|███▌      | 7/20 [01:02<01:25,  6.59s/it]

[I 2025-07-06 11:17:45,654] Trial 4 finished with value: 3.534952577435862 and parameters: {'n_estimators': 385, 'max_depth': 12, 'max_features': 'log2', 'min_samples_split': 6, 'min_samples_leaf': 8, 'max_samples': 0.594146950054738}. Best is trial 11 with value: 3.2361609199020505.
🏃 View run monumental-shoat-902 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/79e8e63722764e1cb4c1e14d1e7097c2
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 11. Best value: 3.23616:  40%|████      | 8/20 [01:07<01:12,  6.02s/it]

[I 2025-07-06 11:17:50,593] Trial 3 finished with value: 3.764969252080346 and parameters: {'n_estimators': 435, 'max_depth': 9, 'max_features': 'log2', 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_samples': 0.9893115582438932}. Best is trial 11 with value: 3.2361609199020505.
🏃 View run efficient-kit-65 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/4819fd8bb4d5493981098901b5f7b74a
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3
🏃 View run grandiose-wolf-565 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/92ad39bb0e0c476596177b952860a779
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 11. Best value: 3.23616:  45%|████▌     | 9/20 [01:22<01:37,  8.85s/it]

[I 2025-07-06 11:18:05,633] Trial 10 finished with value: 3.3156334451408283 and parameters: {'n_estimators': 461, 'max_depth': 21, 'max_features': 'sqrt', 'min_samples_split': 5, 'min_samples_leaf': 8, 'max_samples': 0.6117247513831847}. Best is trial 11 with value: 3.2361609199020505.


Best trial: 9. Best value: 3.08644:  50%|█████     | 10/20 [01:23<01:04,  6.41s/it]

[I 2025-07-06 11:18:06,596] Trial 9 finished with value: 3.0864438548114417 and parameters: {'n_estimators': 431, 'max_depth': 20, 'max_features': None, 'min_samples_split': 6, 'min_samples_leaf': 6, 'max_samples': 0.6810868704856865}. Best is trial 9 with value: 3.0864438548114417.
🏃 View run rare-penguin-381 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/b448162627194a8e8061ce87b9df7b6d
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3
🏃 View run abundant-wasp-322 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/fc5045d861364215bf60adbad1eb103b
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 9. Best value: 3.08644:  55%|█████▌    | 11/20 [01:29<00:56,  6.28s/it]

[I 2025-07-06 11:18:12,580] Trial 12 finished with value: 3.416996580497705 and parameters: {'n_estimators': 33, 'max_depth': 18, 'max_features': 'log2', 'min_samples_split': 3, 'min_samples_leaf': 8, 'max_samples': 0.7688132031449728}. Best is trial 9 with value: 3.0864438548114417.


Best trial: 9. Best value: 3.08644:  60%|██████    | 12/20 [01:29<00:37,  4.66s/it]

[I 2025-07-06 11:18:13,563] Trial 6 finished with value: 3.564952381722039 and parameters: {'n_estimators': 313, 'max_depth': 8, 'max_features': None, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_samples': 0.9815201628264763}. Best is trial 9 with value: 3.0864438548114417.
🏃 View run salty-calf-548 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/7d857ac33ca34fddb79d53d6c1caf0fb
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 9. Best value: 3.08644:  65%|██████▌   | 13/20 [01:35<00:33,  4.77s/it]

[I 2025-07-06 11:18:18,581] Trial 13 finished with value: 3.9790385201798424 and parameters: {'n_estimators': 329, 'max_depth': 7, 'max_features': 'sqrt', 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_samples': 0.6655258006871209}. Best is trial 9 with value: 3.0864438548114417.
🏃 View run rare-rook-268 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/e69d0a38c1a64f65bbfd1f5176850487
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3
🏃 View run nervous-shrew-461 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/3ce6426a014b46eeacf3e6a0310cf781
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 9. Best value: 3.08644:  70%|███████   | 14/20 [01:44<00:36,  6.06s/it]

[I 2025-07-06 11:18:27,610] Trial 15 finished with value: 3.1680879266225785 and parameters: {'n_estimators': 209, 'max_depth': 23, 'max_features': 'sqrt', 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_samples': 0.8668382208855056}. Best is trial 9 with value: 3.0864438548114417.


Best trial: 9. Best value: 3.08644:  75%|███████▌  | 15/20 [01:45<00:22,  4.53s/it]

[I 2025-07-06 11:18:28,597] Trial 0 finished with value: 3.0901227447531965 and parameters: {'n_estimators': 417, 'max_depth': 23, 'max_features': None, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_samples': 0.9503219452999954}. Best is trial 9 with value: 3.0864438548114417.
🏃 View run fun-midge-878 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/aa59836b794b4c2aa3662d7fb12e3345
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 9. Best value: 3.08644:  80%|████████  | 16/20 [01:49<00:18,  4.66s/it]

[I 2025-07-06 11:18:33,569] Trial 16 finished with value: 4.88132066832368 and parameters: {'n_estimators': 413, 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_samples': 0.5756417823298761}. Best is trial 9 with value: 3.0864438548114417.
🏃 View run gifted-bear-263 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/27ef0fb3915949498a82668ed4e869d2
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 9. Best value: 3.08644:  85%|████████▌ | 17/20 [01:58<00:17,  5.96s/it]

[I 2025-07-06 11:18:42,563] Trial 14 finished with value: 3.3206941834153687 and parameters: {'n_estimators': 402, 'max_depth': 10, 'max_features': None, 'min_samples_split': 9, 'min_samples_leaf': 8, 'max_samples': 0.5525483759953078}. Best is trial 9 with value: 3.0864438548114417.
🏃 View run charming-auk-104 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/afd2b39a85f34d3bb0136a8bf0021a7f
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3
🏃 View run colorful-stoat-86 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/61206b2b6cc241a6b359b5a4f3246f9e
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 9. Best value: 3.08644:  90%|█████████ | 18/20 [02:06<00:13,  6.57s/it]

[I 2025-07-06 11:18:50,550] Trial 17 finished with value: 5.254997951767299 and parameters: {'n_estimators': 379, 'max_depth': 3, 'max_features': None, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_samples': 0.8859878090455375}. Best is trial 9 with value: 3.0864438548114417.
🏃 View run bouncy-kite-164 at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/3aa1e6b83e8f4890ace1b813427d8440
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


Best trial: 9. Best value: 3.08644:  95%|█████████▌| 19/20 [02:08<00:05,  5.20s/it]

[I 2025-07-06 11:18:52,563] Trial 18 finished with value: 3.925301271794433 and parameters: {'n_estimators': 176, 'max_depth': 8, 'max_features': 'log2', 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_samples': 0.8657520789963067}. Best is trial 9 with value: 3.0864438548114417.


Best trial: 9. Best value: 3.08644: 100%|██████████| 20/20 [02:09<00:00,  6.50s/it]


[I 2025-07-06 11:18:53,565] Trial 19 finished with value: 3.2832923719936096 and parameters: {'n_estimators': 344, 'max_depth': 22, 'max_features': 'sqrt', 'min_samples_split': 8, 'min_samples_leaf': 7, 'max_samples': 0.7153708627295792}. Best is trial 9 with value: 3.0864438548114417.




🏃 View run best_model at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3/runs/3f91ad852b964bb2af87db612c815cb1
🧪 View experiment at: https://dagshub.com/kbs.kartik/delivery-time-prediction.mlflow/#/experiments/3


RestException: INTERNAL_ERROR: Response: {'error': 'unsupported endpoint, please contact support@dagshub.com'}

In [None]:
# optimization history plot

optuna.visualization.plot_optimization_history(study)

In [None]:
# plot hyperparameter importance plot

optuna.visualization.plot_param_importances(study)

In [None]:
# slice plot

optuna.visualization.plot_slice(study)