# 🛵 Food Delivery Time Prediction with LSTM
This notebook builds an LSTM model to predict delivery time based on delivery partner features and distance. It also includes visualizations to analyze feature relationships.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# Load dataset
df = pd.read_csv('food_delivery_data.csv')
df.head()


In [None]:
# Drop unnecessary columns
df.drop(columns=['ID', 'Delivery_person_ID', 'Type_of_order', 'Type_of_vehicle'], inplace=True)


In [None]:
# Calculate distance using Haversine formula
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in KM
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

df['Distance_km'] = haversine(df['Restaurant_latitude'], df['Restaurant_longitude'],
                              df['Delivery_location_latitude'], df['Delivery_location_longitude'])


In [None]:
# Keep only relevant columns
df = df[['Delivery_person_Age', 'Delivery_person_Ratings', 'Distance_km', 'Time_taken(min)']]
df.dropna(inplace=True)
df.head()


In [None]:
# Pairplot to see relationships
sns.pairplot(df)
plt.suptitle("Pairwise Feature Relationships", y=1.02)
plt.show()


In [None]:
# Heatmap of correlations
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()


In [None]:
# Distribution of target variable
plt.figure(figsize=(6,4))
sns.histplot(df['Time_taken(min)'], bins=20, kde=True)
plt.title("Distribution of Delivery Time")
plt.xlabel("Time Taken (min)")
plt.ylabel("Frequency")
plt.show()


In [None]:
# Feature and target split
X = df[['Delivery_person_Age', 'Delivery_person_Ratings', 'Distance_km']].values
y = df[['Time_taken(min)']].values

# Normalize
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Reshape for LSTM [samples, time_steps, features]
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)


In [None]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(1, 3)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Train the model
early_stop = EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test),
                    callbacks=[early_stop], verbose=1)


In [None]:
# Save the model
model.save('lstm_delivery_model.h5')
