### **Intelligent Delivery Route Optimization System - Data Simulation**  

This notebook is responsible for **creating a dataset that closely resembles real-world delivery operations**. It is designed to simulate **realistic logistics data**, similar to the actual data I worked with during my **Intelligent Delivery Route Optimization System** freelance project.  

📌 **What this dataset contains:**  
✅ **One month of delivery data** (20 workdays) for a company operating with **2 delivery cars**.  
✅ **Each car handles 2 delivery batches per day** (morning: 9 AM - 12 PM, afternoon: 1 PM - 5 PM).  
✅ **Randomized drop-off locations, traffic conditions, fuel consumption, and costs**.  

This dataset will be useful for **route optimization, predictive modeling, and cost analysis**.

In [None]:
#uncomment the next line if you need to install dependencies
#!pip install pandas numpy matplotlib

In [1]:
import pandas as pd
import numpy as np
from math import radians, sin, cos, sqrt, atan2
import random

In [2]:
HQ_LAT, HQ_LON = 36.8065, 10.1815  # random HQ Location in Tunis

In [3]:
def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculates the great-circle distance between two latitude/longitude points
    using the Haversine formula.
    """
    R = 6371  # Earth's radius in km
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c  # Distance in km

In [18]:
import numpy as np
import random

def realistic_distance():
    """
    Generates a delivery distance mostly between 1 km and 12 km,
    with some between 12-15 km and rare cases reaching 15-20 km (~5% chance).
    """
    base_distance = np.random.normal(loc=6, scale=3)  # Mean: 6 km, More variation

    # 🚀 Force 15-20 km distances to appear ~5% of the time
    if random.random() < 0.05:  # 5% chance for long trips
        return random.uniform(15, 20)

    return max(min(base_distance, 15), 1)  # Ensure range is 1-15 km

In [19]:
def generate_dropoff_location(prev_lat, prev_lon):
    """
    Generates a nearby delivery location within a realistic range (1 km - 20 km),
    ensuring the lat/lon points match real-world distances.
    """
    while True:
        new_lat = prev_lat + random.uniform(-0.05, 0.05)  # Small lat variation
        new_lon = prev_lon + random.uniform(-0.05, 0.05)  # Small lon variation
        distance = haversine_distance(prev_lat, prev_lon, new_lat, new_lon)

        if 1 <= distance <= 20:  # To ensure distance is within 1 km - 20 km range
            return new_lat, new_lon, round(distance, 2)

In [20]:
def get_traffic_condition(hour):
    """
    Determines traffic conditions based on the hour of the day.
    Returns the traffic condition and average speed.
    """
    if 9 <= hour < 12 or 13 <= hour < 15.5:
        return "Moderate Traffic", 30  # Speed: 30 km/h
    elif 15.5 <= hour < 17:
        return "Rush Hour", 20  # Speed: 20 km/h

In [21]:
def estimate_delivery_time(distance_km, traffic_condition):
    """
    Estimates delivery time based on distance and traffic conditions.
    Uses lower speed for short trips and higher speed for long trips.
    """
    if distance_km < 5:
        avg_speed = 25  # Short trips → lower speed (neighborhoods, stoplights)
    elif distance_km < 10:
        avg_speed = 30  #  Medium trips → moderate speed (city roads)
    else:
        avg_speed = 40  #  Long trips → higher speed (highways, main roads)

    # we adjust speed based on traffic condition
    if traffic_condition == "Rush Hour":
        avg_speed -= 5
    elif traffic_condition == "Moderate Traffic":
        avg_speed -= 2

    return round((distance_km / avg_speed) * 60, 2)  # To convert hours to minutes


In [22]:
def generate_realistic_deliveries(num_days=20, min_orders=10, max_orders=50):
    """
    Generates a fully random but realistic dataset of deliveries over a one-month period (20 workdays).
    Each batch is assigned to one of two cars.
    """
    deliveries = []
    car_ids = ["Car_1", "Car_2"]
    batch_id = 1

    for day in range(num_days):
        for car_id in car_ids:
            # Two batches per car per day: Morning (9-12) and Afternoon (1-5)
            for shift, (start_hour, end_hour) in enumerate([(9, 12), (13, 17)]):
                num_orders = random.randint(min_orders, max_orders)  # Random orders per batch
                prev_lat, prev_lon = HQ_LAT, HQ_LON  # The start is at HQ

                for order_num in range(num_orders):
                    order_id = f"{batch_id}-{order_num+1}"
                    delivery_hour = random.randint(start_hour, end_hour - 1)  # Ensures time is within batch range
                    delivery_minute = random.randint(0, 59)
                    time_of_day = f"{delivery_hour}:{delivery_minute:02d}"

                    # Generating a nearby drop-off location and distance
                    dropoff_lat, dropoff_lon, distance_km = generate_dropoff_location(prev_lat, prev_lon)

                    # Getting traffic condition & speed based on time
                    traffic_condition, avg_speed = get_traffic_condition(delivery_hour)

                    # Estimating realistic delivery time
                    delivery_time_min = estimate_delivery_time(distance_km, traffic_condition)

                    # Fuel consumption & cost
                    fuel_efficiency = 10 if traffic_condition == "Moderate Traffic" else 7
                    fuel_consumed_L = round(distance_km / fuel_efficiency, 2)
                    fuel_cost_TND = round(fuel_consumed_L * 2.2, 2)  # Fuel price estimated as 2.2 TND/L

                    deliveries.append({
                        "Batch_ID": batch_id,
                        "Car_ID": car_id,
                        "Order_ID": order_id,
                        "Dropoff_Lat": dropoff_lat,
                        "Dropoff_Lon": dropoff_lon,
                        "Time_of_Day": time_of_day,
                        "Traffic_Condition": traffic_condition,
                        "Distance_km": distance_km,
                        "Actual_Delivery_Time_Min": delivery_time_min,
                        "Fuel_Consumed_L": fuel_consumed_L,
                        "Fuel_Cost_TND": fuel_cost_TND
                    })

                    # Updating previous location for next order in the batch
                    prev_lat, prev_lon = dropoff_lat, dropoff_lon

                batch_id += 1  # To increment batch ID

    return pd.DataFrame(deliveries)


In [35]:
monthly_deliveries_df = generate_realistic_deliveries(num_days=20)
monthly_deliveries_df

Unnamed: 0,Batch_ID,Car_ID,Order_ID,Dropoff_Lat,Dropoff_Lon,Time_of_Day,Traffic_Condition,Distance_km,Actual_Delivery_Time_Min,Fuel_Consumed_L,Fuel_Cost_TND
0,1,Car_1,1-1,36.782997,10.161383,10:28,Moderate Traffic,3.17,8.27,0.32,0.70
1,1,Car_1,1-2,36.783516,10.114312,9:39,Moderate Traffic,4.19,10.93,0.42,0.92
2,1,Car_1,1-3,36.741343,10.118382,9:26,Moderate Traffic,4.70,12.26,0.47,1.03
3,1,Car_1,1-4,36.776170,10.113040,10:52,Moderate Traffic,3.90,10.17,0.39,0.86
4,1,Car_1,1-5,36.753952,10.129740,9:07,Moderate Traffic,2.88,7.51,0.29,0.64
...,...,...,...,...,...,...,...,...,...,...,...
2451,80,Car_2,80-37,36.922247,10.220375,16:58,Rush Hour,2.84,8.52,0.41,0.90
2452,80,Car_2,80-38,36.895050,10.171239,15:27,Moderate Traffic,5.31,11.38,0.53,1.17
2453,80,Car_2,80-39,36.874738,10.136932,15:06,Moderate Traffic,3.80,9.91,0.38,0.84
2454,80,Car_2,80-40,36.866207,10.102965,14:22,Moderate Traffic,3.17,8.27,0.32,0.70


In [37]:
monthly_deliveries_df.to_csv("monthly_deliveries.csv", index=False)