In [103]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [104]:
df = pd.read_csv(r"C:\Projects\food-delivery-regression\data\raw\Food_Delivery_Time_Prediction.csv")
df.head()

Unnamed: 0,Order_ID,Customer_Location,Restaurant_Location,Distance,Weather_Conditions,Traffic_Conditions,Delivery_Person_Experience,Order_Priority,Order_Time,Vehicle_Type,Restaurant_Rating,Customer_Rating,Delivery_Time,Order_Cost,Tip_Amount
0,ORD0001,"(17.030479, 79.743077)","(12.358515, 85.100083)",1.57,Rainy,Medium,4,Medium,Afternoon,Car,4.1,3.0,26.22,1321.1,81.54
1,ORD0002,"(15.398319, 86.639122)","(14.174874, 77.025606)",21.32,Cloudy,Medium,8,Low,Night,Car,4.5,4.2,62.61,152.21,29.02
2,ORD0003,"(15.687342, 83.888808)","(19.594748, 82.048482)",6.95,Snowy,Medium,9,High,Night,Bike,3.3,3.4,48.43,1644.38,64.17
3,ORD0004,"(20.415599, 78.046984)","(16.915906, 78.278698)",13.79,Cloudy,Low,2,Medium,Evening,Bike,3.2,3.7,111.63,541.25,79.23
4,ORD0005,"(14.786904, 78.706532)","(15.206038, 86.203182)",6.72,Rainy,High,6,Low,Night,Bike,3.5,2.8,32.38,619.81,2.34


In [105]:
df.drop(columns = ['Order_ID'], inplace= True)
df.columns

Index(['Customer_Location', 'Restaurant_Location', 'Distance',
       'Weather_Conditions', 'Traffic_Conditions',
       'Delivery_Person_Experience', 'Order_Priority', 'Order_Time',
       'Vehicle_Type', 'Restaurant_Rating', 'Customer_Rating', 'Delivery_Time',
       'Order_Cost', 'Tip_Amount'],
      dtype='object')

In [106]:
feature_cols = [
    'Distance',
    'Delivery_Person_Experience',
    'Restaurant_Rating',
    'Customer_Rating',
    'Order_Cost',
    'Tip_Amount',
    'Traffic_Conditions',
    'Vehicle_Type',
    'Weather_Conditions',
    'Order_Time',
    'Order_Priority'
]

X = df[feature_cols]
y = df['Delivery_Time']

In [107]:
X_encoded = pd.get_dummies(
    X,
    columns=[
        'Traffic_Conditions',
        'Vehicle_Type',
        'Weather_Conditions',
        'Order_Time',
        'Order_Priority'
    ],
    drop_first=True
)
X_encoded

Unnamed: 0,Distance,Delivery_Person_Experience,Restaurant_Rating,Customer_Rating,Order_Cost,Tip_Amount,Traffic_Conditions_Low,Traffic_Conditions_Medium,Vehicle_Type_Bike,Vehicle_Type_Car,Weather_Conditions_Rainy,Weather_Conditions_Snowy,Weather_Conditions_Sunny,Order_Time_Evening,Order_Time_Morning,Order_Time_Night,Order_Priority_Low,Order_Priority_Medium
0,1.57,4,4.1,3.0,1321.10,81.54,False,True,False,True,True,False,False,False,False,False,False,True
1,21.32,8,4.5,4.2,152.21,29.02,False,True,False,True,False,False,False,False,False,True,True,False
2,6.95,9,3.3,3.4,1644.38,64.17,False,True,True,False,False,True,False,False,False,True,False,False
3,13.79,2,3.2,3.7,541.25,79.23,True,False,True,False,False,False,False,True,False,False,False,True
4,6.72,6,3.5,2.8,619.81,2.34,False,False,True,False,True,False,False,False,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,23.82,8,4.7,4.0,1432.26,66.34,False,False,True,False,False,False,False,False,False,True,False,True
196,6.09,8,3.0,3.6,1720.25,40.27,False,True,False,False,False,True,False,False,False,True,True,False
197,20.61,4,2.9,3.4,1356.58,5.10,False,False,True,False,False,True,False,False,False,False,False,True
198,24.06,9,3.9,4.8,354.39,85.25,False,False,False,True,True,False,False,False,False,True,True,False


In [108]:
#   Traffic has an ordinal nature, but its impact on delivery time is non-linear and asymmetric, so we encode it categorically to avoid imposing 
#   false linear structure.

In [109]:
#   No ordinal encoding, because ordinal encoding would incorrectly enforce equal linear spacing between traffic levels, which linear regression 
#   cannot relax.

In [110]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [111]:
X_train.shape, X_test.shape

((160, 18), (40, 18))

In [112]:
num_cols = [
    'Distance',
    'Delivery_Person_Experience',
    'Restaurant_Rating',
    'Customer_Rating',
    'Order_Cost',
    'Tip_Amount'
]

In [113]:
scaler = StandardScaler()

In [114]:
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])

In [115]:
X_train[num_cols].describe().round(2)

Unnamed: 0,Distance,Delivery_Person_Experience,Restaurant_Rating,Customer_Rating,Order_Cost,Tip_Amount
count,160.0,160.0,160.0,160.0,160.0,160.0
mean,-0.0,0.0,-0.0,-0.0,0.0,0.0
std,1.0,1.0,1.0,1.0,1.0,1.0
min,-1.58,-1.6,-1.79,-1.53,-1.66,-1.55
25%,-0.85,-0.87,-0.77,-0.86,-0.89,-0.88
50%,-0.19,0.22,0.11,0.02,-0.02,0.02
75%,0.73,0.95,0.85,0.86,0.9,0.81
max,1.94,1.68,1.87,1.85,1.72,1.85


In [116]:
X_encoded.head()

Unnamed: 0,Distance,Delivery_Person_Experience,Restaurant_Rating,Customer_Rating,Order_Cost,Tip_Amount,Traffic_Conditions_Low,Traffic_Conditions_Medium,Vehicle_Type_Bike,Vehicle_Type_Car,Weather_Conditions_Rainy,Weather_Conditions_Snowy,Weather_Conditions_Sunny,Order_Time_Evening,Order_Time_Morning,Order_Time_Night,Order_Priority_Low,Order_Priority_Medium
0,1.57,4,4.1,3.0,1321.1,81.54,False,True,False,True,True,False,False,False,False,False,False,True
1,21.32,8,4.5,4.2,152.21,29.02,False,True,False,True,False,False,False,False,False,True,True,False
2,6.95,9,3.3,3.4,1644.38,64.17,False,True,True,False,False,True,False,False,False,True,False,False
3,13.79,2,3.2,3.7,541.25,79.23,True,False,True,False,False,False,False,True,False,False,False,True
4,6.72,6,3.5,2.8,619.81,2.34,False,False,True,False,True,False,False,False,False,True,True,False


In [117]:
processed_df = X_encoded.copy()
processed_df['Delivery_Time'] = y.values

In [119]:
processed_df.to_csv(r"C:\Projects\food-delivery-regression\data\processed\food_delivery_processed.csv",index=False)

In [121]:
df1=pd.read_csv(r"C:\Projects\food-delivery-regression\data\processed\food_delivery_processed.csv")

In [123]:
df1.head()

Unnamed: 0,Distance,Delivery_Person_Experience,Restaurant_Rating,Customer_Rating,Order_Cost,Tip_Amount,Traffic_Conditions_Low,Traffic_Conditions_Medium,Vehicle_Type_Bike,Vehicle_Type_Car,Weather_Conditions_Rainy,Weather_Conditions_Snowy,Weather_Conditions_Sunny,Order_Time_Evening,Order_Time_Morning,Order_Time_Night,Order_Priority_Low,Order_Priority_Medium,Delivery_Time
0,1.57,4,4.1,3.0,1321.1,81.54,False,True,False,True,True,False,False,False,False,False,False,True,26.22
1,21.32,8,4.5,4.2,152.21,29.02,False,True,False,True,False,False,False,False,False,True,True,False,62.61
2,6.95,9,3.3,3.4,1644.38,64.17,False,True,True,False,False,True,False,False,False,True,False,False,48.43
3,13.79,2,3.2,3.7,541.25,79.23,True,False,True,False,False,False,False,True,False,False,False,True,111.63
4,6.72,6,3.5,2.8,619.81,2.34,False,False,True,False,True,False,False,False,False,True,True,False,32.38
