<a href="https://colab.research.google.com/github/quantum-mechanics405/Food-Delivery-Time-Reg.-Model/blob/main/Food_delivey_reg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from geopy.distance import great_circle

# Scikit-learn imports
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

# TensorFlow imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
df = pd.read_excel('/content/food_del.xlsx')
df.head(3)

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle,Time_taken(min)
0,4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,Snack,motorcycle,24
1,B379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,Snack,scooter,33
2,5D6D,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,Drinks,motorcycle,26


In [3]:
df.shape

(45593, 11)

In [11]:
# Assuming df is your DataFrame and it contains the 'Type_of_order' column
# First, we need to count the occurrences of each order type
order_counts = df['Type_of_order'].value_counts().reset_index()
order_counts.columns = ['Type_of_order', 'Order_Count']

# Create the pie chart with a hole (donut chart)
fig_pie = px.pie(order_counts, names='Type_of_order', values='Order_Count', hole=0.5,
                 title="Distribution of Order Types")

# Show the pie chart
fig_pie.show()


In [12]:
# Assuming df is your DataFrame and it contains the 'Type_of_order' column
# First, we need to count the occurrences of each order type
order_counts = df['Type_of_vehicle'].value_counts().reset_index()
order_counts.columns = ['Type_of_vehicle', 'Order_Count']

# Create the pie chart with a hole (donut chart)
fig_pie = px.pie(order_counts, names='Type_of_vehicle', values='Order_Count', hole=0.5,
                 title="Distribution of Type_of_vehicle")

# Show the pie chart
fig_pie.show()


In [15]:
# Create a histogram using mean of the ratings for each age bin
fig = px.histogram(df, x='Delivery_person_Age', y='Delivery_person_Ratings', nbins=30,
                   histfunc='avg',  # Using 'avg' to compute the mean instead of the sum
                   title="Histogram of Delivery Person Age vs Mean Ratings", labels={
                       "Delivery_person_Age": "Age",
                       "Delivery_person_Ratings": "Mean Ratings"
                   })

# Show the plot
fig.show()


#Preprocessing the data


adding distance feature

In [4]:
# Function to calculate distance
def calculate_distance(row):
    restaurant_coords = (row['Restaurant_latitude'], row['Restaurant_longitude'])
    delivery_coords = (row['Delivery_location_latitude'], row['Delivery_location_longitude'])
    return great_circle(restaurant_coords, delivery_coords).kilometers  # You can also use miles

# Applying the function to create a new column 'Distance_km'
df['Distance_km'] = df.apply(calculate_distance, axis=1)

df.head(3)


Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle,Time_taken(min),Distance_km
0,4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,Snack,motorcycle,24,3.025154
1,B379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,Snack,scooter,33,20.183558
2,5D6D,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,Drinks,motorcycle,26,1.55276


In [5]:
# Step 1: Drop 'ID' and 'Delivery_person_ID'
df = df.drop(['ID', 'Delivery_person_ID'], axis=1)

# Step 2: Split the data into input (X) and output (y)
X = df.drop('Time_taken(min)', axis=1)  # Input features
y = df['Time_taken(min)']               # Output column

# Step 2: Identify numerical and categorical columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

# Step 4: Create a preprocessing pipeline
# Pipeline for numerical features: Standardize/Normalize
numerical_transformer = StandardScaler()

# Pipeline for categorical features: OneHotEncoder
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Combine numerical and categorical transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Step 5: Create the final pipeline that includes preprocessing
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

# Step 6: Fit the pipeline and transform the data
X_preprocessed = pipeline.fit_transform(X)

In [11]:
df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Step 2: Split the data into train and temp (validation + test) sets
train_size = 0.85
temp_size = 0.15  # Remaining size for validation + test

X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42)  # Test size is 0.1 of the original data

# Step 3: Split temp into validation and test sets
X_train, X_val, y_train, y_val = train_test_split( X_temp, y_temp, test_size=0.08, random_state=42)  # Split temp into validation

In [8]:
X_train.shape

(37750, 9)

In [9]:
X_preprocessed[3,:]

array([ 1.48434708,  0.20638504, -0.73476425,  0.29476226, -0.87409414,
        0.2926708 , -0.08321534,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  1.        ,  0.        ])

#1. Logistic Regression

In [10]:
# Step 1: Create a pipeline that includes the preprocessing and the model
pipeline_lg = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Preprocessing step
    ('model', LogisticRegression(max_iter=1000))  # Logistic Regression model
])

# Step 2: Perform cross-validation with 4 folds
cross_val_scores = cross_val_score(pipeline_lg, X_train, y_train, cv=4, scoring='neg_mean_squared_error')

# Convert negative MSE to positive MSE
cross_val_mse = -cross_val_scores.mean()
print(f'Cross-validated Mean Squared Error: {cross_val_mse:.2f} minutes')

# Step 3: Fit the pipeline on the training data
pipeline_lg.fit(X_train, y_train)

# Step 4: Make predictions on the validation set
y_val_pred = pipeline_lg.predict(X_val)

# Step 5: Calculate the Mean Squared Error for the validation set
val_mse = mean_squared_error(y_val, y_val_pred)
val_MAE = mean_absolute_error(y_val, y_val_pred)
print(f'Mean Squared Error on Validation Set: {val_mse:.2f} minutes')
print(f'Mean Absolute Error on Validation Set: {val_MAE:.2f} minutes')
# Step 6: Make predictions on the test set
y_test_pred = pipeline.predict(X_test)

# Step 7: Calculate the Mean Squared Error for the test set
test_mse = mean_squared_error(y_test, y_test_pred)
test_MAE = mean_absolute_error(y_test, y_test_pred)
print(f'Mean Squared Error on Test Set: {test_mse:.2f} minutes')
print(f'Mean Absolute on Validation Set: {val_MAE:.2f} minutes')

Cross-validated Mean Squared Error: 77.64 minutes
Mean Squared Error on Validation Set: 80.39 minutes
Mean Absolute Error on Validation Set: 6.81 minutes
Mean Squared Error on Test Set: 77.41 minutes
Mean Absolute on Validation Set: 6.81 minutes


In [11]:
inst = 8
# Get the 5th instance of the test set (index 4)
instance_5 = X_test.iloc[inst]  # Extract the 5th instance from the test set

# Convert the instance to a DataFrame with the same column names
instance_5_df = pd.DataFrame([instance_5], columns=X_test.columns)

# Predict using the pipeline
predicted_time_taken = pipeline.predict(instance_5_df)
print(f'Actual time taken by this delivery: {y_test.iloc[inst]:.2f} minutes')
# Print the predicted result
print(f'Predicted Time Taken for the {inst} Instance: {predicted_time_taken[0]:.2f} minutes')


Actual time taken by this delivery: 22.00 minutes
Predicted Time Taken for the 8 Instance: 24.00 minutes


In [12]:
y_train.mean()

26.250225165562913

In [13]:
y_train.std()

9.381967195678088

#2. Polynomial Regression

In [14]:
# Step 1: Create a new pipeline with polynomial features and linear regression
degree = 2  # Change this to the desired degree of the polynomial
pipeline_poly = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Preprocessing step
    ('poly_features', PolynomialFeatures(degree=degree, include_bias=False)),  # Polynomial features
    ('model', LinearRegression())  # Linear Regression model
])

# Step 2: Fit the polynomial regression model on the training data
pipeline_poly.fit(X_train, y_train)

# Step 3: Make predictions on the validation set
y_val_pred_poly = pipeline_poly.predict(X_val)

# Step 4: Calculate Mean Absolute Error and Mean Squared Error for the validation set
val_mae_poly = mean_absolute_error(y_val, y_val_pred_poly)
val_mse_poly = mean_squared_error(y_val, y_val_pred_poly)

print(f'Mean Absolute Error on Validation Set: {val_mae_poly:.2f} minutes')
print(f'Mean Squared Error on Validation Set: {val_mse_poly:.2f} minutes')

# Step 5: Make predictions on the test set
y_test_pred_poly = pipeline_poly.predict(X_test)

# Step 6: Calculate Mean Absolute Error and Mean Squared Error for the test set
test_mae_poly = mean_absolute_error(y_test, y_test_pred_poly)
test_mse_poly = mean_squared_error(y_test, y_test_pred_poly)

print(f'Mean Absolute Error on Test Set: {test_mae_poly:.2f} minutes')
print(f'Mean Squared Error on Test Set: {test_mse_poly:.2f} minutes')


Mean Absolute Error on Validation Set: 6.37 minutes
Mean Squared Error on Validation Set: 64.80 minutes
Mean Absolute Error on Test Set: 6.22 minutes
Mean Squared Error on Test Set: 61.26 minutes


#3. Decision tree Regression

In [15]:
# Step 1: Create a new pipeline with preprocessing and Decision Tree Regressor
pipeline_tree = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Preprocessing step
    ('model', DecisionTreeRegressor())  # Decision Tree Regressor model
])

# Step 2: Fit the Decision Tree Regressor on the training data
pipeline_tree.fit(X_train, y_train)

# Step 3: Make predictions on the validation set
y_val_pred_tree = pipeline_tree.predict(X_val)

# Step 4: Calculate Mean Absolute Error and Mean Squared Error for the validation set
val_mae_tree = mean_absolute_error(y_val, y_val_pred_tree)
val_mse_tree = mean_squared_error(y_val, y_val_pred_tree)

print(f'Mean Absolute Error on Validation Set: {val_mae_tree:.2f} minutes')
print(f'Mean Squared Error on Validation Set: {val_mse_tree:.2f} minutes')

# Step 5: Make predictions on the test set
y_test_pred_tree = pipeline_tree.predict(X_test)

# Step 6: Calculate Mean Absolute Error and Mean Squared Error for the test set
test_mae_tree = mean_absolute_error(y_test, y_test_pred_tree)
test_mse_tree = mean_squared_error(y_test, y_test_pred_tree)

print(f'Mean Absolute Error on Test Set: {test_mae_tree:.2f} minutes')
print(f'Mean Squared Error on Test Set: {test_mse_tree:.2f} minutes')


Mean Absolute Error on Validation Set: 8.09 minutes
Mean Squared Error on Validation Set: 109.15 minutes
Mean Absolute Error on Test Set: 8.03 minutes
Mean Squared Error on Test Set: 107.57 minutes


#4. Random Forest Regression

In [16]:
# Step 1: Create a new pipeline with preprocessing and Random Forest Regressor
pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Preprocessing step
    ('model', RandomForestRegressor())  # Random Forest Regressor model
])

# Step 2: Fit the Random Forest Regressor on the training data
pipeline_rf.fit(X_train, y_train)

# Step 3: Make predictions on the validation set
y_val_pred_rf = pipeline_rf.predict(X_val)

# Step 4: Calculate Mean Absolute Error and Mean Squared Error for the validation set
val_mae_rf = mean_absolute_error(y_val, y_val_pred_rf)
val_mse_rf = mean_squared_error(y_val, y_val_pred_rf)

print(f'Mean Absolute Error on Validation Set: {val_mae_rf:.2f} minutes')
print(f'Mean Squared Error on Validation Set: {val_mse_rf:.2f} minutes')

# Step 5: Make predictions on the test set
y_test_pred_rf = pipeline_rf.predict(X_test)

# Step 6: Calculate Mean Absolute Error and Mean Squared Error for the test set
test_mae_rf = mean_absolute_error(y_test, y_test_pred_rf)
test_mse_rf = mean_squared_error(y_test, y_test_pred_rf)

print(f'Mean Absolute Error on Test Set: {test_mae_rf:.2f} minutes')
print(f'Mean Squared Error on Test Set: {test_mse_rf:.2f} minutes')


Mean Absolute Error on Validation Set: 6.02 minutes
Mean Squared Error on Validation Set: 59.85 minutes
Mean Absolute Error on Test Set: 5.91 minutes
Mean Squared Error on Test Set: 57.29 minutes


#5. Support Vector Regressor

In [17]:
# Step 1: Create a new pipeline with preprocessing and Support Vector Regressor
pipeline_svr = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Preprocessing step
    ('model', SVR(kernel='rbf'))  # Support Vector Regressor with RBF kernel
])

# Step 2: Fit the Support Vector Regressor on the training data
pipeline_svr.fit(X_train, y_train)

# Step 3: Make predictions on the validation set
y_val_pred_svr = pipeline_svr.predict(X_val)

# Step 4: Calculate Mean Absolute Error and Mean Squared Error for the validation set
val_mae_svr = mean_absolute_error(y_val, y_val_pred_svr)
val_mse_svr = mean_squared_error(y_val, y_val_pred_svr)

print(f'Mean Absolute Error on Validation Set: {val_mae_svr:.2f} minutes')
print(f'Mean Squared Error on Validation Set: {val_mse_svr:.2f} minutes')

# Step 5: Make predictions on the test set
y_test_pred_svr = pipeline_svr.predict(X_test)

# Step 6: Calculate Mean Absolute Error and Mean Squared Error for the test set
test_mae_svr = mean_absolute_error(y_test, y_test_pred_svr)
test_mse_svr = mean_squared_error(y_test, y_test_pred_svr)

print(f'Mean Absolute Error on Test Set: {test_mae_svr:.2f} minutes')
print(f'Mean Squared Error on Test Set: {test_mse_svr:.2f} minutes')


Mean Absolute Error on Validation Set: 6.33 minutes
Mean Squared Error on Validation Set: 67.90 minutes
Mean Absolute Error on Test Set: 6.25 minutes
Mean Squared Error on Test Set: 65.92 minutes


#6. DNN

In [9]:
X_train = pipeline.fit_transform(X_train)
X_train.shape

(37750, 15)

In [13]:
X_train = pipeline.fit_transform(X_train)
X_val = pipeline.fit_transform(X_val)
X_test = pipeline.fit_transform(X_test)


# Step 2: Build the Neural Network model
model = keras.Sequential()
model.add(layers.Input(shape=[15]))  # Input layer
model.add(layers.Dense(256, activation='relu'))            # Hidden layer 1
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))            # Hidden layer 2
model.add(layers.Dense(64, activation='relu'))            # Hidden layer 3
model.add(layers.Dense(32, activation='relu'))            # Hidden layer 4
model.add(layers.Dense(1))                                # Output layer

# Step 3: Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Step 4: Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=32, verbose=1)

# Step 5: Predict on the test set
y_test_pred = model.predict(X_test)

# Step 6: Calculate evaluation metrics
mse = mean_squared_error(y_test, y_test_pred)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test, y_test_pred)
r2 = r2_score(y_test, y_test_pred)

# Step 7: Print results
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R2): {r2:.2f}")


Epoch 1/10
[1m944/944[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 134.3470 - mae: 8.6139 - val_loss: 69.7893 - val_mae: 6.5922
Epoch 2/10
[1m944/944[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 67.1467 - mae: 6.4705 - val_loss: 70.4715 - val_mae: 6.5271
Epoch 3/10
[1m944/944[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 66.7475 - mae: 6.4151 - val_loss: 68.7966 - val_mae: 6.6772
Epoch 4/10
[1m944/944[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 66.8146 - mae: 6.4167 - val_loss: 68.5088 - val_mae: 6.3916
Epoch 5/10
[1m944/944[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 64.9976 - mae: 6.3181 - val_loss: 68.7693 - val_mae: 6.3635
Epoch 6/10
[1m944/944[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 63.9107 - mae: 6.2480 - val_loss: 68.1781 - val_mae: 6.4241
Epoch 7/10
[1m944/944[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0