In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Load and clean your location data
locations_df = pd.read_csv("locations_latlng.csv")

locations = []
for _, row in locations_df.iterrows():
    try:
        latlng_clean = row['LatLng'].replace(";", ",").strip()
        lat, lon = map(float, latlng_clean.split(","))
        locations.append({
            "Province": row['Province'],
            "City": row['City'],
            "LatLng": latlng_clean,
            "Latitude": lat,
            "Longitude": lon
        })
    except:
        continue  # skip invalid rows

num_records = 100
forced_delivered_count = int(num_records * 0.15)

raw_data = []

for i in range(1, num_records + 1):
    is_forced_delivered = i <= forced_delivered_count

    if is_forced_delivered:
        # Force locations to be close to simulate a real delivery
        base = random.choice(locations)
        current_location = delivery_location = f"{base['City']}, {base['Province']}"
        base_lat = base['Latitude']
        base_lon = base['Longitude']

        current_lat = round(base_lat + np.random.uniform(-0.002, 0.002), 6)
        current_lon = round(base_lon + np.random.uniform(-0.002, 0.002), 6)
        delivery_lat = round(base_lat + np.random.uniform(-0.002, 0.002), 6)
        delivery_lon = round(base_lon + np.random.uniform(-0.002, 0.002), 6)

        current_coordinates = f"{current_lat},{current_lon}"
        delivery_coordinates = f"{delivery_lat},{delivery_lon}"
    else:
        # Random locations for general records
        current = random.choice(locations)
        delivery = random.choice(locations)

        current_location = f"{current['City']}, {current['Province']}"
        delivery_location = f"{delivery['City']}, {delivery['Province']}"
        current_coordinates = current['LatLng'].replace(";", ",").strip()
        delivery_coordinates = delivery['LatLng'].replace(";", ",").strip()

        current_lat, current_lon = current['Latitude'], current['Longitude']
        delivery_lat, delivery_lon = delivery['Latitude'], delivery['Longitude']

    # Parse and calculate distance
    try:
        curr_lat, curr_lon = map(float, current_coordinates.split(","))
        dest_lat, dest_lon = map(float, delivery_coordinates.split(","))
        distance = abs(curr_lat - dest_lat) + abs(curr_lon - dest_lon)
    except ValueError:
        continue

    # Generate order date
    order_date = datetime.now() - timedelta(days=np.random.randint(3, 6))

    # Generate realistic delivery duration based on distance
    if distance < 0.1:
        delivery_days = 1
    elif distance < 0.5:
        delivery_days = 2
    elif distance < 1.5:
        delivery_days = np.random.randint(3, 6)
    else:
        delivery_days = np.random.randint(6, 11)

    delivery_date = order_date + timedelta(days=delivery_days)

    # Generate timestamp (last known status time)
    timestamp = datetime.now() - timedelta(minutes=np.random.randint(0, 1440))
    delay_by_days = (timestamp.date() - delivery_date.date()).days

    # Perishable and temperature logic
    perishable = np.random.choice(["Yes", "No"])
    if perishable == "Yes":
        temperature = round(np.random.uniform(2.0, 12.0), 2)
        temperature_issue = "Exceeded" if temperature > 7.0 else "Normal"
    else:
        temperature = round(np.random.uniform(10.0, 30.0), 2)
        temperature_issue = "N/A"

    # Final status logic
    if is_forced_delivered:
        status = "Delivered"
    elif delay_by_days > 1 and distance >= 0.01:
        status = "Delayed"
    elif distance < 0.01:
        status = np.random.choice(["Delivered", "Awaiting Pickup"])
    else:
        status = np.random.choice(["In Transit", "Awaiting Pickup"])

    # Build record
    raw_data.append({
        "timestamp": timestamp,
        "order_date": order_date,
        "delivery_date": delivery_date,
        "current_location": current_location,
        "delivery_location": delivery_location,
        "current_coordinates": current_coordinates,
        "delivery_coordinates": delivery_coordinates,
        "perishable": perishable,
        "temperature_celsius": temperature,
        "temperature_issue": temperature_issue,
        "status": status
    })

# Sort records by timestamp (oldest to newest)
sorted_data = sorted(raw_data, key=lambda x: x["timestamp"])

# Assign package IDs and format fields
for idx, record in enumerate(sorted_data, start=1):
    record["package_id"] = f"PKG{str(idx).zfill(3)}"
    record["timestamp"] = record["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
    record["order_date"] = record["order_date"].strftime("%Y-%m-%d")
    record["delivery_date"] = record["delivery_date"].strftime("%Y-%m-%d")

# Final column order
columns_order = [
    "timestamp",
    "package_id",
    "order_date",
    "delivery_date",
    "current_location",
    "delivery_location",
    "current_coordinates",
    "delivery_coordinates",
    "perishable",
    "temperature_celsius",
    "temperature_issue",
    "status"
]

# Save to files
df = pd.DataFrame(sorted_data)[columns_order]
df.to_csv("iot_data.csv", index=False)
df.to_json("iot_data.json", orient="records")

# Preview
df.head()

Unnamed: 0,timestamp,package_id,order_date,delivery_date,current_location,delivery_location,current_coordinates,delivery_coordinates,perishable,temperature_celsius,temperature_issue,status
0,2025-05-17 14:25:47,PKG001,2025-05-15,2025-05-23,"Alfonso, Cavite","Malitbog, Southern Leyte","14.14083, 120.85389","10.150, 125.000",No,15.06,,In Transit
1,2025-05-17 14:43:47,PKG002,2025-05-14,2025-05-20,"Sibuco, Zamboanga del Norte","Hinundayan, Southern Leyte","7.283, 122.067","10.350, 125.250",No,14.49,,Awaiting Pickup
2,2025-05-17 14:59:47,PKG003,2025-05-15,2025-05-16,"Cabucgayan, Biliran","Cabucgayan, Biliran","11.483413,124.568992","11.484093,124.5666",Yes,5.03,Normal,Delivered
3,2025-05-17 15:32:47,PKG004,2025-05-13,2025-05-22,"Dipaculao, Aurora","Kiamba, Sarangani","15.983, 121.633","5.983, 124.617",No,25.33,,Awaiting Pickup
4,2025-05-17 15:35:47,PKG005,2025-05-15,2025-05-16,"Upi, Maguindanao","Upi, Maguindanao","7.030856,124.165478","7.030856,124.163023",No,24.58,,Delivered
