### **Intelligent Delivery Route Optimization System - Data Simulation**  

This notebook is responsible for **creating a dataset that closely resembles real-world delivery operations**. It is designed to simulate **realistic logistics data**, similar to the actual data I worked with during my **Intelligent Delivery Route Optimization System** freelance project.  

📌 **What this dataset contains:**  
✅ **One month of delivery data** (20 workdays) for a company operating with **2 delivery cars**.  
✅ **Each car handles 2 delivery batches per day** (morning: 9 AM - 12 PM, afternoon: 1 PM - 5 PM).  
✅ **Randomized drop-off locations, traffic conditions, fuel consumption, and costs**.  

This dataset will be useful for **route optimization, predictive modeling, and cost analysis**.

In [42]:
#uncomment the next line if you need to install dependencies
#!pip install pandas numpy matplotlib

In [43]:
import pandas as pd
import numpy as np
import math
from math import radians, sin, cos, sqrt, atan2
import random

In [44]:
HQ_LAT, HQ_LON = 36.8065, 10.1815  # random HQ Location in Tunis

In [45]:
def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculates the great-circle distance between two latitude/longitude points
    using the Haversine formula.
    """
    R = 6371  # Earth's radius in km
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c  # Distance in km

In [46]:
def realistic_distance():
    """
    Generates a delivery distance mostly between 1 km and 12 km,
    with some between 12-15 km and rare cases reaching 15-20 km (~5% chance).
    """
    base_distance = np.random.normal(loc=6, scale=3)


    if random.random() < 0.05:  # 5% chance for long trips
        return random.uniform(15, 20)

    return max(min(base_distance, 15), 1)  # Ensure range is 1-15 km

In [47]:
def generate_dropoff_location(prev_lat, prev_lon):
    """
    Generates a nearby location with a distance between 1 km and 20 km.
    Now correctly matches the distance function!
    """
    while True:
        # Get a realistic distance
        distance_km = realistic_distance()

        # Convert distance to degrees (approximation)
        delta_lat = (distance_km / 111)  # 1 degree lat ≈ 111 km
        delta_lon = (distance_km / (111 * math.cos(math.radians(prev_lat))))  # Adjust for longitude scaling

        # Apply random direction
        new_lat = prev_lat + random.uniform(-delta_lat, delta_lat)
        new_lon = prev_lon + random.uniform(-delta_lon, delta_lon)

        # Ensure calculated distance is correct
        actual_distance = haversine_distance(prev_lat, prev_lon, new_lat, new_lon)

        if 1 <= actual_distance <= 20:  # Now allows up to 20 km!
            return new_lat, new_lon, round(actual_distance, 2)


In [48]:
def get_traffic_condition(hour):
    """
    Determines traffic conditions based on the hour of the day.
    Returns the traffic condition and average speed.
    """
    if 9 <= hour < 12 or 13 <= hour < 15.5:
        return "Moderate Traffic", 30  # Speed: 30 km/h
    elif 15.5 <= hour < 17:
        return "Rush Hour", 20  # Speed: 20 km/h

In [49]:
def estimate_delivery_time(distance_km, traffic_condition):
    """
    Estimates delivery time based on distance and traffic conditions.
    Uses lower speed for short trips and higher speed for long trips.
    """
    if distance_km < 5:
        avg_speed = 25  # Short trips → lower speed (neighborhoods, stoplights)
    elif distance_km < 10:
        avg_speed = 30  #  Medium trips → moderate speed (city roads)
    else:
        avg_speed = 40  #  Long trips → higher speed (highways, main roads)

    # we adjust speed based on traffic condition
    if traffic_condition == "Rush Hour":
        avg_speed -= 5
    elif traffic_condition == "Moderate Traffic":
        avg_speed -= 2

    return round((distance_km / avg_speed) * 60, 2)  # To convert hours to minutes


In [50]:
def generate_realistic_deliveries(num_days=20, min_orders=10, max_orders=50):
    """
    Generates a fully random but realistic dataset of deliveries over a one-month period (20 workdays).
    Each batch is assigned to one of two cars.
    """
    deliveries = []
    car_ids = ["Car_1", "Car_2"]
    batch_id = 1

    for day in range(num_days):
        for car_id in car_ids:
            # Two batches per car per day: Morning (9-12) and Afternoon (1-5)
            for shift, (start_hour, end_hour) in enumerate([(9, 12), (13, 17)]):
                num_orders = random.randint(min_orders, max_orders)  # Random orders per batch
                prev_lat, prev_lon = HQ_LAT, HQ_LON  # The start is at HQ

                for order_num in range(num_orders):
                    order_id = f"{batch_id}-{order_num+1}"
                    delivery_hour = random.randint(start_hour, end_hour - 1)  # Ensures time is within batch range
                    delivery_minute = random.randint(0, 59)
                    time_of_day = f"{delivery_hour}:{delivery_minute:02d}"

                    # Generating a nearby drop-off location and distance
                    dropoff_lat, dropoff_lon, distance_km = generate_dropoff_location(prev_lat, prev_lon)

                    # Getting traffic condition & speed based on time
                    traffic_condition, avg_speed = get_traffic_condition(delivery_hour)

                    # Estimating realistic delivery time
                    delivery_time_min = estimate_delivery_time(distance_km, traffic_condition)

                    # Fuel consumption & cost
                    fuel_efficiency = 10 if traffic_condition == "Moderate Traffic" else 7
                    fuel_consumed_L = round(distance_km / fuel_efficiency, 2)
                    fuel_cost_TND = round(fuel_consumed_L * 2.2, 2)  # Fuel price estimated as 2.2 TND/L

                    deliveries.append({
                        "Batch_ID": batch_id,
                        "Car_ID": car_id,
                        "Order_ID": order_id,
                        "Dropoff_Lat": dropoff_lat,
                        "Dropoff_Lon": dropoff_lon,
                        "Time_of_Day": time_of_day,
                        "Traffic_Condition": traffic_condition,
                        "Distance_km": distance_km,
                        "Actual_Delivery_Time_Min": delivery_time_min,
                        "Fuel_Consumed_L": fuel_consumed_L,
                        "Fuel_Cost_TND": fuel_cost_TND
                    })

                    # Updating previous location for next order in the batch
                    prev_lat, prev_lon = dropoff_lat, dropoff_lon

                batch_id += 1  # To increment batch ID

    return pd.DataFrame(deliveries)


In [51]:
monthly_deliveries_df = generate_realistic_deliveries(num_days=20)
monthly_deliveries_df

Unnamed: 0,Batch_ID,Car_ID,Order_ID,Dropoff_Lat,Dropoff_Lon,Time_of_Day,Traffic_Condition,Distance_km,Actual_Delivery_Time_Min,Fuel_Consumed_L,Fuel_Cost_TND
0,1,Car_1,1-1,36.814148,10.158456,11:17,Moderate Traffic,2.22,5.79,0.22,0.48
1,1,Car_1,1-2,36.785373,10.213686,11:57,Moderate Traffic,5.87,12.58,0.59,1.30
2,1,Car_1,1-3,36.836597,10.249003,11:47,Moderate Traffic,6.51,13.95,0.65,1.43
3,1,Car_1,1-4,36.855471,10.233147,11:28,Moderate Traffic,2.53,6.60,0.25,0.55
4,1,Car_1,1-5,36.896445,10.263521,11:25,Moderate Traffic,5.30,11.36,0.53,1.17
...,...,...,...,...,...,...,...,...,...,...,...
2274,80,Car_2,80-27,36.575055,10.016741,14:07,Moderate Traffic,3.50,9.13,0.35,0.77
2275,80,Car_2,80-28,36.553582,10.054221,15:41,Moderate Traffic,4.11,10.72,0.41,0.90
2276,80,Car_2,80-29,36.540166,10.109756,14:31,Moderate Traffic,5.18,11.10,0.52,1.14
2277,80,Car_2,80-30,36.518702,10.094426,13:11,Moderate Traffic,2.75,7.17,0.28,0.62


In [52]:
print("Max distance in dataset:", monthly_deliveries_df["Distance_km"].max())
print("Min distance in dataset:", monthly_deliveries_df["Distance_km"].min())
print("Mean distance in dataset:", monthly_deliveries_df["Distance_km"].mean())


Max distance in dataset: 19.71
Min distance in dataset: 1.0
Mean distance in dataset: 5.395458534444932


In [54]:
monthly_deliveries_df.to_csv("monthly_deliveries.csv", index=False)