In [334]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline,make_pipeline
import numpy as np


### Loaded the Data

In [256]:
df = pd.read_csv('/Users/shivendragupta/Downloads/Food_delivery_Times.csv')

In [257]:
df.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68


### Finding na values

In [258]:
df.isna().sum()

Order_ID                   0
Distance_km                0
Weather                   30
Traffic_Level             30
Time_of_Day               30
Vehicle_Type               0
Preparation_Time_min       0
Courier_Experience_yrs    30
Delivery_Time_min          0
dtype: int64

In [259]:
df.dropna(inplace=True)

### Selecting X and Y from Dataset

In [260]:
X = df.iloc[:,1:8]
X

Unnamed: 0,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs
0,7.93,Windy,Low,Afternoon,Scooter,12,1.0
1,16.42,Clear,Medium,Evening,Bike,20,2.0
2,9.52,Foggy,Low,Night,Scooter,28,1.0
3,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0
4,19.03,Clear,Low,Morning,Bike,16,5.0
...,...,...,...,...,...,...,...
995,8.50,Clear,High,Evening,Car,13,3.0
996,16.28,Rainy,Low,Morning,Scooter,8,9.0
997,15.62,Snowy,High,Evening,Scooter,26,2.0
998,14.17,Clear,Low,Afternoon,Bike,8,0.0


In [261]:
y = df.iloc[:,-1]
y

0      43
1      84
2      59
3      37
4      68
       ..
995    54
996    71
997    81
998    55
999    58
Name: Delivery_Time_min, Length: 883, dtype: int64

### Creating a train test split

In [276]:
from sklearn.model_selection import train_test_split

In [321]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3,random_state = 42,shuffle=True)

In [322]:
X.isna().sum()

Distance_km               0
Weather                   0
Traffic_Level             0
Time_of_Day               0
Vehicle_Type              0
Preparation_Time_min      0
Courier_Experience_yrs    0
dtype: int64

### Defining column transformer. Applying Ordinal encoder on [Weather,Traffic_Level,Time_of_Day,Vehicle_Type]

In [324]:
trf2 = ColumnTransformer([
    ('ordinal',OrdinalEncoder(handle_unknown='use_encoded_value',unknown_value=-1), [1,2,3,4]),
],remainder='passthrough')

### Defining column transformer. Applying MinMax Scalar on all Columns

In [325]:
trf3 = ColumnTransformer([
    ('scale',MinMaxScaler(),slice(0,7))
])
print(X_test.columns)

Index(['Distance_km', 'Weather', 'Traffic_Level', 'Time_of_Day',
       'Vehicle_Type', 'Preparation_Time_min', 'Courier_Experience_yrs'],
      dtype='object')


### Defining Linear Regression Class

In [326]:
trf4 = LinearRegression()

### Making a pipe line

In [327]:
pipe_line = make_pipeline(trf2,trf3,trf4)

In [328]:
pipe_line.fit(X_train,y_train)

### Making prediction on X_test

In [329]:
y_pred = pipe_line.predict(X_test)

In [330]:
y_test

48      36
910     27
816     49
748     89
995     54
      ... 
474     62
410     36
416     49
192     46
495    115
Name: Delivery_Time_min, Length: 265, dtype: int64

### Accuracy based on R2 score

In [331]:
r2_score(y_test,y_pred)

0.7696605540600809

### Saving the pipeline as .pkl file

In [336]:
import pickle

# Assuming 'pipeline' is your trained pipeline object
with open('pipeline_model.pkl', 'wb') as f:
    pickle.dump(pipe_line, f)
