### **Intelligent Delivery Route Optimization System - Data Simulation**  

This notebook is responsible for **creating a dataset that closely resembles real-world delivery operations**. It is designed to simulate **realistic logistics data**, similar to the actual data I worked with during my **Intelligent Delivery Route Optimization System** freelance project.  

📌 **What this dataset contains:**  
✅ **One month of delivery data** (20 workdays) for a company operating with **2 delivery cars**.  
✅ **Each car handles 2 delivery batches per day** (morning: 9 AM - 12 PM, afternoon: 1 PM - 5 PM).  
✅ **Randomized drop-off locations, traffic conditions, fuel consumption, and costs**.  

This dataset will be useful for **route optimization, predictive modeling, and cost analysis**.

In [1]:
#uncomment the next line if you need to install dependencies
#!pip install pandas numpy matplotlib

In [2]:
import pandas as pd
import numpy as np
from math import radians, sin, cos, sqrt, atan2
import random

In [3]:
HQ_LAT, HQ_LON = 36.8065, 10.1815  # random HQ Location in Tunis

In [4]:
def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculates the great-circle distance between two latitude/longitude points
    using the Haversine formula.
    """
    R = 6371  # Earth's radius in km
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c  # Distance in km

In [5]:
def realistic_distance():
    """
    Generates a delivery distance that is mostly between 1 km and 10 km,
    with rare cases reaching up to 20 km (~5% chance).
    """
    base_distance = abs(np.random.normal(loc=4, scale=3))  # Mean: 4 km, Std Dev: 3 km

    if base_distance > 15:  # Make 15+ km very rare (~5% chance)
        return random.choice([random.uniform(15, 20), random.uniform(1, 10)])
    return max(min(base_distance, 20), 1)

In [6]:
def generate_dropoff_location(prev_lat, prev_lon):
    """
    Generates a nearby delivery location within a realistic range (1 km - 20 km),
    ensuring the lat/lon points match real-world distances.
    """
    while True:
        new_lat = prev_lat + random.uniform(-0.05, 0.05)  # Small lat variation
        new_lon = prev_lon + random.uniform(-0.05, 0.05)  # Small lon variation
        distance = haversine_distance(prev_lat, prev_lon, new_lat, new_lon)

        if 1 <= distance <= 20:  # To ensure distance is within 1 km - 20 km range
            return new_lat, new_lon, round(distance, 2)

In [7]:
def get_traffic_condition(hour):
    """
    Determines traffic conditions based on the hour of the day.
    Returns the traffic condition and average speed.
    """
    if 9 <= hour < 12 or 13 <= hour < 15.5:
        return "Moderate Traffic", 30  # Speed: 30 km/h
    elif 15.5 <= hour < 17:
        return "Rush Hour", 20  # Speed: 20 km/h

In [8]:
def estimate_delivery_time(distance_km, traffic_condition):
    """
    Estimates delivery time based on distance and traffic conditions.
    Uses lower speed for short trips and higher speed for long trips.
    """
    if distance_km < 5:
        avg_speed = 25  # Short trips → lower speed (neighborhoods, stoplights)
    elif distance_km < 10:
        avg_speed = 30  #  Medium trips → moderate speed (city roads)
    else:
        avg_speed = 40  #  Long trips → higher speed (highways, main roads)

    # we adjust speed based on traffic condition
    if traffic_condition == "Rush Hour":
        avg_speed -= 5
    elif traffic_condition == "Moderate Traffic":
        avg_speed -= 2

    return round((distance_km / avg_speed) * 60, 2)  # To convert hours to minutes


In [9]:
def generate_realistic_deliveries(num_days=20, min_orders=10, max_orders=50):
    """
    Generates a fully random but realistic dataset of deliveries over a one-month period (20 workdays).
    Each batch is assigned to one of two cars.
    """
    deliveries = []
    car_ids = ["Car_1", "Car_2"]
    batch_id = 1

    for day in range(num_days):
        for car_id in car_ids:
            # Two batches per car per day: Morning (9-12) and Afternoon (1-5)
            for shift, (start_hour, end_hour) in enumerate([(9, 12), (13, 17)]):
                num_orders = random.randint(min_orders, max_orders)  # Random orders per batch
                prev_lat, prev_lon = HQ_LAT, HQ_LON  # The start is at HQ

                for order_num in range(num_orders):
                    order_id = f"{batch_id}-{order_num+1}"
                    delivery_hour = random.randint(start_hour, end_hour - 1)  # Ensures time is within batch range
                    delivery_minute = random.randint(0, 59)
                    time_of_day = f"{delivery_hour}:{delivery_minute:02d}"

                    # Generating a nearby drop-off location and distance
                    dropoff_lat, dropoff_lon, distance_km = generate_dropoff_location(prev_lat, prev_lon)

                    # Getting traffic condition & speed based on time
                    traffic_condition, avg_speed = get_traffic_condition(delivery_hour)

                    # Estimating realistic delivery time
                    delivery_time_min = estimate_delivery_time(distance_km, traffic_condition)

                    # Fuel consumption & cost
                    fuel_efficiency = 10 if traffic_condition == "Moderate Traffic" else 7
                    fuel_consumed_L = round(distance_km / fuel_efficiency, 2)
                    fuel_cost_TND = round(fuel_consumed_L * 2.2, 2)  # Fuel price estimated as 2.2 TND/L

                    deliveries.append({
                        "Batch_ID": batch_id,
                        "Car_ID": car_id,
                        "Order_ID": order_id,
                        "Dropoff_Lat": dropoff_lat,
                        "Dropoff_Lon": dropoff_lon,
                        "Time_of_Day": time_of_day,
                        "Traffic_Condition": traffic_condition,
                        "Distance_km": distance_km,
                        "Actual_Delivery_Time_Min": delivery_time_min,
                        "Fuel_Consumed_L": fuel_consumed_L,
                        "Fuel_Cost_TND": fuel_cost_TND
                    })

                    # Updating previous location for next order in the batch
                    prev_lat, prev_lon = dropoff_lat, dropoff_lon

                batch_id += 1  # To increment batch ID

    return pd.DataFrame(deliveries)


In [10]:
monthly_deliveries_df = generate_realistic_deliveries(num_days=20)
monthly_deliveries_df

Unnamed: 0,Batch_ID,Car_ID,Order_ID,Dropoff_Lat,Dropoff_Lon,Time_of_Day,Traffic_Condition,Distance_km,Actual_Delivery_Time_Min,Fuel_Consumed_L,Fuel_Cost_TND
0,1,Car_1,1-1,36.804603,10.228468,11:06,Moderate Traffic,4.19,10.93,0.42,0.92
1,1,Car_1,1-2,36.817916,10.222097,9:11,Moderate Traffic,1.59,4.15,0.16,0.35
2,1,Car_1,1-3,36.799883,10.256978,10:04,Moderate Traffic,3.70,9.65,0.37,0.81
3,1,Car_1,1-4,36.779985,10.282146,11:52,Moderate Traffic,3.15,8.22,0.32,0.70
4,1,Car_1,1-5,36.761682,10.291856,11:03,Moderate Traffic,2.21,5.77,0.22,0.48
...,...,...,...,...,...,...,...,...,...,...,...
2427,80,Car_2,80-30,36.784151,10.076086,14:44,Moderate Traffic,2.66,6.94,0.27,0.59
2428,80,Car_2,80-31,36.774119,10.094604,14:53,Moderate Traffic,1.99,5.19,0.20,0.44
2429,80,Car_2,80-32,36.740491,10.102621,16:01,Rush Hour,3.81,11.43,0.54,1.19
2430,80,Car_2,80-33,36.736264,10.124030,16:43,Rush Hour,1.96,5.88,0.28,0.62


In [11]:
monthly_deliveries_df.to_csv("monthly_deliveries.csv", index=False)