In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import random
from datetime import datetime, timedelta

In [3]:
# Create a sample geographic area (e.g., a city)
city_center = Point(-122.4194, 37.7749)  # San Francisco coordinates
city_radius = 0.1  # Roughly 11km

# Generate random delivery locations
longs = np.random.uniform(city_center.x - city_radius, city_center.x + city_radius, 1000)
lats = np.random.uniform(city_center.y - city_radius, city_center.y + city_radius, 1000)
locations = gpd.GeoSeries([Point(xy) for xy in zip(longs, lats)])


In [6]:
type(locations)

geopandas.geoseries.GeoSeries

Deine a function to generate deliver data

In [None]:

def generate_delivery_data(num_deliveries=1000, start_date='2024-01-01', num_days=30):
    # Create a sample geographic area (e.g., a city)
    city_center = Point(-122.4194, 37.7749)  # San Francisco coordinates
    city_radius = 0.1  # Roughly 11km

    # Generate random delivery locations
    longs = np.random.uniform(city_center.x - city_radius, city_center.x + city_radius, num_deliveries)
    lats = np.random.uniform(city_center.y - city_radius, city_center.y + city_radius, num_deliveries)
    locations = gpd.GeoSeries([Point(xy) for xy in zip(longs, lats)])

    # Generate delivery times
    start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
    datetimes = [start_datetime + timedelta(
        days=random.randint(0, num_days-1),
        hours=random.randint(8, 20),
        minutes=random.randint(0, 59)
    ) for _ in range(num_deliveries)]

    # Generate delivery durations (in minutes)
    durations = np.random.normal(30, 10, num_deliveries).astype(int)
    durations = np.clip(durations, 5, 120)  # Clip between 5 and 120 minutes

    # Create weather conditions
    weather_conditions = np.random.choice(['Clear', 'Cloudy', 'Rainy'], num_deliveries, p=[0.6, 0.3, 0.1])

    # Create traffic conditions
    traffic_conditions = np.random.choice(['Light', 'Moderate', 'Heavy'], num_deliveries, p=[0.5, 0.3, 0.2])

    # Create DataFrame
    df = pd.DataFrame({
        'delivery_id': range(1, num_deliveries + 1),
        'datetime': datetimes,
        'duration_minutes': durations,
        'weather': weather_conditions,
        'traffic': traffic_conditions,
        'latitude': lats,
        'longitude': longs
    })

    # Convert to GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry=locations, crs='EPSG:4326')

    return gdf



In [None]:
# Generate the data
delivery_data = generate_delivery_data()

# Save to CSV (you can also save to GeoJSON or other formats if needed)
delivery_data.to_csv('synthetic_delivery_data.csv', index=False)

print("Synthetic delivery data generated and saved to 'synthetic_delivery_data.csv'")
print(delivery_data.head())