In [18]:
# Import necessary libraries
import pandas as pd
import numpy as np
import plotly.express as px


In [19]:
# Load dataset from a .txt file (assumed to be in CSV format)
data = pd.read_csv("deliverytime.txt")
data.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle,Time_taken(min)
0,4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,Snack,motorcycle,24
1,B379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,Snack,scooter,33
2,5D6D,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,Drinks,motorcycle,26
3,7A6A,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,Buffet,motorcycle,21
4,70A2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,Snack,scooter,30


In [20]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45593 entries, 0 to 45592
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   ID                           45593 non-null  object 
 1   Delivery_person_ID           45593 non-null  object 
 2   Delivery_person_Age          45593 non-null  int64  
 3   Delivery_person_Ratings      45593 non-null  float64
 4   Restaurant_latitude          45593 non-null  float64
 5   Restaurant_longitude         45593 non-null  float64
 6   Delivery_location_latitude   45593 non-null  float64
 7   Delivery_location_longitude  45593 non-null  float64
 8   Type_of_order                45593 non-null  object 
 9   Type_of_vehicle              45593 non-null  object 
 10  Time_taken(min)              45593 non-null  int64  
dtypes: float64(5), int64(2), object(4)
memory usage: 3.8+ MB


In [21]:
print(data.describe)

<bound method NDFrame.describe of          ID Delivery_person_ID  Delivery_person_Age  Delivery_person_Ratings  \
0      4607     INDORES13DEL02                   37                      4.9   
1      B379     BANGRES18DEL02                   34                      4.5   
2      5D6D     BANGRES19DEL01                   23                      4.4   
3      7A6A    COIMBRES13DEL02                   38                      4.7   
4      70A2     CHENRES12DEL01                   32                      4.6   
...     ...                ...                  ...                      ...   
45588  7C09      JAPRES04DEL01                   30                      4.8   
45589  D641      AGRRES16DEL01                   21                      4.6   
45590  4F8D     CHENRES08DEL03                   30                      4.9   
45591  5EEE    COIMBRES11DEL01                   20                      4.7   
45592  5FB2   RANCHIRES09DEL02                   23                      4.9   

     

In [22]:
data.isnull().sum()

Unnamed: 0,0
ID,0
Delivery_person_ID,0
Delivery_person_Age,0
Delivery_person_Ratings,0
Restaurant_latitude,0
Restaurant_longitude,0
Delivery_location_latitude,0
Delivery_location_longitude,0
Type_of_order,0
Type_of_vehicle,0


In [23]:
# Function to calculate the distance between two GPS points
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the Earth in kilometers
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return R * c


In [24]:
# Apply the Haversine function to each row in the dataset
data['distance'] = data.apply(
    lambda row: haversine_distance(
        row['Restaurant_latitude'],
        row['Restaurant_longitude'],
        row['Delivery_location_latitude'],
        row['Delivery_location_longitude']
    ), axis=1
)


In [25]:
# Convert "Time_taken(min)" column from string to float
data['Time_taken(min)'] = data['Time_taken(min)'].astype(str).str.replace('(min)', '', regex=False).astype(float)


In [26]:
# Plot relationship between distance and time taken
figure = px.scatter(
    data_frame=data,
    x="distance",
    y="Time_taken(min)",
    size="Time_taken(min)",
    trendline="ols",
    title="Relationship Between Distance and Time Taken"
)
figure.show()


In [27]:
# Plot relationship between delivery person age and time taken
figure = px.scatter(
    data_frame=data,
    x="Delivery_person_Age",
    y="Time_taken(min)",
    size="Time_taken(min)",
    color="distance",
    trendline="ols",
    title="Relationship Between Time Taken and Age"
)
figure.show()


In [28]:
# Plot relationship between delivery person ratings and time taken
figure = px.scatter(
    data_frame=data,
    x="Delivery_person_Ratings",
    y="Time_taken(min)",
    size="Time_taken(min)",
    color="distance",
    trendline="ols",
    title="Relationship Between Time Taken and Ratings"
)
figure.show()


In [29]:
# Define input features and target variable
x = np.array(data[["Delivery_person_Age", "Delivery_person_Ratings", "distance"]])
y = np.array(data[["Time_taken(min)"]])


In [30]:
from sklearn.model_selection import train_test_split
import numpy as np

# Features and target
x = np.array(data[["Delivery_person_Age", "Delivery_person_Ratings", "distance"]])
y = np.array(data[["Time_taken(min)"]])

# Train-test split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.10, random_state=42)

# Reshape input for LSTM: (samples, timesteps, features)
xtrain = xtrain.reshape((xtrain.shape[0], xtrain.shape[1], 1))
xtest = xtest.reshape((xtest.shape[0], xtest.shape[1], 1))


In [31]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Define the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(xtrain, ytrain, batch_size=1, epochs=3)

# Display the model summary
model.summary()



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



Epoch 1/3
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 5ms/step - loss: 74.5261
Epoch 2/3
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 5ms/step - loss: 65.5820
Epoch 3/3
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 5ms/step - loss: 61.9961


In [32]:
print("🚚 Food Delivery Time Prediction")

# Get user input
a = int(input("Enter Age of Delivery Partner: "))
b = float(input("Enter Ratings of Previous Deliveries: "))
c = float(input("Enter Total Distance (in km): "))

# Prepare input and reshape for LSTM
features = np.array([[a, b, c]])
features = features.reshape((features.shape[0], features.shape[1], 1))

# Make prediction
prediction = model.predict(features)
print(f"Predicted Delivery Time in Minutes: {prediction[0][0]:.2f}")


🚚 Food Delivery Time Prediction
Enter Age of Delivery Partner: 25
Enter Ratings of Previous Deliveries: 4.6
Enter Total Distance (in km): 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
Predicted Delivery Time in Minutes: 19.58
