<a href="https://colab.research.google.com/github/kadirferik/Delivery_Time_Pred/blob/main/Food_Delivery_Time.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load the Dataset

In [None]:
import pandas as pd

df = pd.read_csv('deliverytime.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45593 entries, 0 to 45592
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   ID                           45593 non-null  object 
 1   Delivery_person_ID           45593 non-null  object 
 2   Delivery_person_Age          45593 non-null  int64  
 3   Delivery_person_Ratings      45593 non-null  float64
 4   Restaurant_latitude          45593 non-null  float64
 5   Restaurant_longitude         45593 non-null  float64
 6   Delivery_location_latitude   45593 non-null  float64
 7   Delivery_location_longitude  45593 non-null  float64
 8   Type_of_order                45593 non-null  object 
 9   Type_of_vehicle              45593 non-null  object 
 10  Time_taken(min)              45593 non-null  int64  
dtypes: float64(5), int64(2), object(4)
memory usage: 3.8+ MB


In [None]:
df.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle,Time_taken(min)
0,4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,Snack,motorcycle,24
1,B379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,Snack,scooter,33
2,5D6D,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,Drinks,motorcycle,26
3,7A6A,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,Buffet,motorcycle,21
4,70A2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,Snack,scooter,30


# Data Preprocessing


## Replacing Categorical Variables With Numerical Values

In [None]:
y_data = df["Time_taken(min)"]
x_data = df.drop("Time_taken(min)", axis=1)

type_of_order = x_data["Type_of_order"].map({
    'Snack ': 1,
    'Meal ': 2
}).fillna(3)
type_of_vehicle = x_data["Type_of_vehicle"].map({
    'motorcycle ': 1,
    'scooter ': 2
}).fillna(3)

x_data["Type_of_order"] = type_of_order
x_data["Type_of_vehicle"] = type_of_vehicle

x_data.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle
0,4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,1.0,1.0
1,B379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,1.0,2.0
2,5D6D,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,3.0,1.0
3,7A6A,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,3.0,1.0
4,70A2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,1.0,2.0


## Removing Unnecessary Columns from the Dataset

In [None]:
x_data = x_data.drop("Delivery_person_ID", axis=1)
x_data = x_data.drop("ID", axis=1)
x_data = x_data.drop("Delivery_person_Age", axis=1)
x_data.head()

Unnamed: 0,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle
0,4.9,22.745049,75.892471,22.765049,75.912471,1.0,1.0
1,4.5,12.913041,77.683237,13.043041,77.813237,1.0,2.0
2,4.4,12.914264,77.6784,12.924264,77.6884,3.0,1.0
3,4.7,11.003669,76.976494,11.053669,77.026494,3.0,1.0
4,4.6,12.972793,80.249982,13.012793,80.289982,1.0,2.0


# Splitting the Dataset into Training and Testing

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=42)

# Training

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(x_train, y_train)

# Prediction

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np
y_pred = model.predict(x_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(rmse)
print(mse)
print(r2)

8.757323498101833
76.69071485040652
0.12531541257973922


# Hyperparamter Tuning

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', LinearRegression())
])

param_grid = {
    'regressor__fit_intercept': [True, False],
    'regressor__copy_X': [True, False]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(x_train, y_train)

best_params = grid_search.best_params_
print("Best Parameters:", best_params)

grid_search.best_estimator_.fit(x_train, y_train)
y_pred = grid_search.best_estimator_.predict(x_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(rmse)
print(mse)
print(r2)

Best Parameters: {'regressor__copy_X': True, 'regressor__fit_intercept': True}
8.757323498101833
76.69071485040652
0.12531541257973922
