In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

In [None]:
data = pd.read_csv('data/deliverytime.txt')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
# Add distance feature using latitute and longitute of the given point using haversine formula

r = 6371

# Convert deg to radian
def deg_to_rad(deg):
    return deg * np.pi/180

def calculate_distance(lat1, lon1, lat2, lon2):
    lat_diff = deg_to_rad(lat2-lat1)
    lon_diff = deg_to_rad(lon2-lon1)
    h = np.square(lat_diff/2)+ np.cos(deg_to_rad(lat1))*np.cos(deg_to_rad(lat2))*np.square(lon_diff/2)
    d = round(2*r*np.arcsin(min(1,np.sqrt(h))),3)
    return d

In [None]:
calculate_distance(22.745049, 75.892471, 22.765049, 75.912471)

In [None]:
data.columns

In [None]:
# Calculate distance of each point

data['distance'] = np.nan

In [None]:
for i in range(len(data)):
    data.loc[i,'distance'] = calculate_distance(data.loc[i, 'Restaurant_latitude'],
                                                data.loc[i, 'Restaurant_longitude'],
                                                data.loc[i, 'Delivery_location_latitude'],
                                                data.loc[i, 'Delivery_location_longitude'])

In [None]:
data.head()

In [None]:
data.isnull().sum()

In [None]:
# Data exploration part

fig = px.scatter(data,
                 x="distance",
                 y="Time_taken(min)",
                 size="Time_taken(min)",
                 trendline="ols",
                 title="Relationship Between Distance and Time Taken",
                 trendline_color_override="red"
                )
fig.update_layout(
    width=700,
    height=500
)
fig.show()

It means that most of the delivery parter deliver food in the range of 25-30 min of time

In [None]:
data.columns

In [None]:
# Now let’s have a look at the relationship between the time taken to deliver the food and the age of the delivery partner:

fig = px.scatter(data,
                 x="Delivery_person_Age",
                 y="Time_taken(min)",
                 size="Time_taken(min)",
                 color="distance",
                 trendline="ols",
                 trendline_color_override="red",
                 title="Relationship Between Time Taken and Age"
                )

fig.update_layout(
    width=900,
    height=500
)
fig.show()
                 

There is a linear relationship between the time taken to deliver the food and the age of the delivery partner. It means young delivery partners take less time to deliver the food compared to the elder partners.

In [None]:
# Now let’s have a look at the relationship between the time taken to deliver the food and the ratings of the delivery partner:

fig = px.scatter(data,
                 x="Delivery_person_Ratings",
                 y="Time_taken(min)",
                 size="Time_taken(min)",
                 color="distance",
                 trendline="ols",
                 trendline_color_override="red",
                 title="Relationship between the time taken to deliver the food and the ratings of the delivery partner"
                )

fig.update_layout(
    width=900,
    height=500
)
fig.show()

There is an inverse linear relationship between the time taken to deliver the food and the ratings of the delivery partner. It means delivery partners with higher ratings take less time to deliver the food compared to partners with low ratings.

In [None]:
# Now let’s have a look if the type of food ordered by the customer and the type of vehicle used by the delivery partner affects the delivery time or not:

fig = px.box(data,
             x="Type_of_vehicle",
             y="Time_taken(min)",
             color="Type_of_order"
            )

fig.update_layout(
    width=900,
    height=500
)

fig.show()

So there is not much difference between the time taken by delivery partners depending on the vehicle they are driving and the type of food they are delivering.

### So the features that contribute most to the food delivery time based on our analysis are:

* age of the delivery partner
* ratings of the delivery partner
* distance between the restaurant and the delivery location

In [None]:
data.columns

In [None]:
# Splitting the data in train and test dataset

from sklearn.model_selection import train_test_split

In [None]:
X = np.array(data[['Delivery_person_Age',
                   'Delivery_person_Ratings',
                   'distance']]
            )
y = np.array(data[['Time_taken(min)']])

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)