In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [18]:
# Load cleaned data after EDA
df = pd.read_csv("cleaned_delivary_data.csv")
df.head()

Unnamed: 0,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,19.03,Clear,Low,Morning,Bike,16,5.0,68


In [19]:
df.isnull().sum()

Distance_km               0
Weather                   0
Traffic_Level             0
Time_of_Day               0
Vehicle_Type              0
Preparation_Time_min      0
Courier_Experience_yrs    0
Delivery_Time_min         0
dtype: int64

# Separate Features & Target

In [46]:
X = df.drop("Delivery_Time_min", axis=1)
y = df["Delivery_Time_min"]

In [47]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler


In [48]:
categorical_cols = ["Weather", "Traffic_Level", "Time_of_Day", "Vehicle_Type"]
numerical_cols = ["Distance_km", "Preparation_Time_min", "Courier_Experience_yrs"]
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_cols),
        ("cat", OneHotEncoder(drop="first", handle_unknown="ignore"), categorical_cols)
    ]
)
# Apply preprocessing
X_processed = preprocessor.fit_transform(X)


In [49]:
X_processed

array([[-0.37316642, -0.6917298 , -1.24630819, ...,  0.        ,
         0.        ,  1.        ],
       [ 1.12014266,  0.41079261, -0.90390305, ...,  0.        ,
         0.        ,  0.        ],
       [-0.09350076,  1.51331501, -1.24630819, ...,  1.        ,
         0.        ,  1.        ],
       ...,
       [ 0.97943038,  1.23768441, -0.90390305, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.72438937, -1.242991  , -1.58871334, ...,  0.        ,
         0.        ,  0.        ],
       [-0.60182387,  0.96205381, -0.5614979 , ...,  1.        ,
         0.        ,  1.        ]])

# Elastic model

In [50]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_processed,y,test_size=0.2,random_state=42)
from sklearn.linear_model import ElasticNet

model = ElasticNet(alpha=0.01, l1_ratio=0.5)
model.fit(X_train, y_train)
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

y_pred = model.predict(X_test)

print("R2 Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

R2 Score: 0.8339769679061956
RMSE: 8.23753366062938


In [51]:
import pandas as pd

test_data = pd.DataFrame([{
    "Distance_km": 8.5,
    "Weather": "Rainy",
    "Traffic_Level": "High",
    "Time_of_Day": "Evening",
    "Vehicle_Type": "Bike",
    "Preparation_Time_min": 15,
    "Courier_Experience_yrs": 3
}])
test_processed = preprocessor.transform(test_data)
prediction = model.predict(test_processed)

print("Predicted Delivery Time (minutes):", prediction[0])

Predicted Delivery Time (minutes): 60.94908666996824


In [53]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNet
import pickle

elasticnet_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", ElasticNet(alpha=0.01, l1_ratio=0.5))
])


# TRAIN PIPELINE ON FULL DATA

⚠️ Important: Train on raw X, NOT X_processed.

In [54]:

elasticnet_pipeline.fit(X, y)


# Pickiling

In [55]:
with open("elasticnet_delivery_model.pkl", "wb") as f:
    pickle.dump(elasticnet_pipeline, f)


In [56]:
with open("elasticnet_delivery_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)


In [57]:
import pandas as pd

sample_input = pd.DataFrame([{
    "Distance_km": 8.5,
    "Weather": "Rainy",
    "Traffic_Level": "High",
    "Time_of_Day": "Evening",
    "Vehicle_Type": "Bike",
    "Preparation_Time_min": 15,
    "Courier_Experience_yrs": 3
}])

prediction = loaded_model.predict(sample_input)

print("Predicted Delivery Time (minutes):", prediction[0])


Predicted Delivery Time (minutes): 60.350055664547675
