In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
from geopy.distance import geodesic

# Load cleaned location data
locations_df = pd.read_csv("philippine_cities_and_municipalities.csv")

# Normalize class
locations_df["Class"] = locations_df["Class"].replace({"CC": "City", "ICC": "City", "HUC": "City", "Mun": "Municipality"})

# Origin restricted to cities only
city_origins = locations_df[locations_df["Class"] == "City"].copy()
all_locations = locations_df.copy()

# Helper functions
def get_random_origin():
    return city_origins.sample(1).iloc[0]

def get_random_delivery():
    return all_locations.sample(1).iloc[0]

def generate_route(origin, delivery):
    steps = random.randint(2, 5)
    route = [(origin[0], origin[1])]
    for _ in range(steps):
        origin = (origin[0] + random.uniform(-0.02, 0.02), origin[1] + random.uniform(-0.02, 0.02))
        route.append((round(origin[0], 6), round(origin[1], 6)))
    route.append((delivery[0], delivery[1]))
    return route

def get_delivery_days(distance):
    if distance < 5:
        return 1
    elif distance < 20:
        return 2
    elif distance < 100:
        return random.randint(3, 4)
    elif distance < 500:
        return random.randint(5, 7)
    else:
        return random.randint(8, 10)

# Record generation
records = []
num_records = 100
delivered_indices = random.sample(range(1, num_records + 1), int(num_records * 0.15))

for i in range(1, num_records + 1):
    is_delivered = i in delivered_indices
    origin = get_random_origin()
    delivery = get_random_delivery()

    origin_str = f"{origin['City or municipality']}, {origin['Province']}"
    origin_coords = (origin["Latitude"], origin["Longitude"])

    delivery_str = f"{delivery['City or municipality']}, {delivery['Province']}"
    delivery_coords = (delivery["Latitude"], delivery["Longitude"])

    distance = geodesic(origin_coords, delivery_coords).km
    delivery_days = get_delivery_days(distance)

    order_date = datetime.now() - timedelta(days=random.randint(3, 6))
    delivery_date = order_date + timedelta(days=delivery_days)
    timestamp = datetime.now() - timedelta(minutes=random.randint(0, 1440))
    delay_days = (timestamp.date() - delivery_date.date()).days

    if is_delivered:
        current_coords = delivery_coords
        current_str = delivery_str
        status = "Delivered"
    else:
        route = generate_route(origin_coords, delivery_coords)
        step = random.randint(0, len(route) - 1)
        current_coords = route[step]
        current_str = origin_str if step == 0 else delivery_str
        coord_dist = geodesic(current_coords, delivery_coords).km
        if delay_days > 1 and coord_dist >= 5:
            status = "Delayed"
        elif coord_dist < 2:
            status = random.choice(["Delivered", "Awaiting Pickup"])
        else:
            status = random.choice(["In Transit", "Awaiting Pickup"])

    perishable = random.choice(["Yes", "No"])
    temp = round(np.random.uniform(2, 12), 2) if perishable == "Yes" else round(np.random.uniform(10, 30), 2)
    temp_issue = "Temp Alert >7°C" if perishable == "Yes" and temp > 7 else "Normal" if perishable == "Yes" else "N/A"

    records.append({
        "timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S"),
        "package_id": f"PKG{str(i).zfill(3)}",
        "order_date": order_date.strftime("%Y-%m-%d"),
        "delivery_date": delivery_date.strftime("%Y-%m-%d"),
        "origin": origin_str,
        "origin_coordinates": f"{origin_coords[0]},{origin_coords[1]}",
        "current_location": current_str,
        "current_coordinates": f"{current_coords[0]},{current_coords[1]}",
        "delivery_location": delivery_str,
        "delivery_coordinates": f"{delivery_coords[0]},{delivery_coords[1]}",
        "perishable": perishable,
        "temperature_celsius": temp,
        "temperature_issue": temp_issue,
        "status": status
    })

# Save dataset
df = pd.DataFrame(records)
df.to_csv("iot_data.csv", index=False)
df.to_json("iot_data.json", orient="records")