In [0]:
%sh
pip install faker

In [0]:
import random
import time
import json
from datetime import datetime, timedelta
from faker import Faker

In [0]:

fake = Faker()

# Get parameters
dbutils.widgets.text("catalog_name", "cargo_fleet_dev", "Catalog")
dbutils.widgets.text("num_containers", "500", "Number of Containers")
dbutils.widgets.text("interval_seconds", "60", "Generation Interval")
dbutils.widgets.text("change_probability", "0.2", "Status Change Probability")

catalog_name = dbutils.widgets.get("catalog_name")
num_containers = int(dbutils.widgets.get("num_containers"))
interval_seconds = int(dbutils.widgets.get("interval_seconds"))
change_probability = float(dbutils.widgets.get("change_probability"))

volume_path = f"/Volumes/{catalog_name}/bronze/status_landing"

In [0]:
# Status progression (state machine)
STATUS_FLOW = {
    "LOADED": ["IN_TRANSIT"],
    "IN_TRANSIT": ["PORT_ARRIVAL", "IN_TRANSIT"],
    "PORT_ARRIVAL": ["CUSTOMS_CLEARANCE", "PORT_ARRIVAL"],
    "CUSTOMS_CLEARANCE": ["DELIVERED", "CUSTOMS_CLEARANCE"],
    "DELIVERED": ["EMPTY_RETURN"],
    "EMPTY_RETURN": ["LOADED"]
}

# Checkpoint types
CHECKPOINT_TYPES = [
    "GPS_UPDATE", "PORT_SCAN", "CUSTOMS_SCAN", 
    "DELIVERY_CONFIRMATION", "TEMPERATURE_CHECK"
]

# Cargo types with temperature requirements
CARGO_TYPES = {
    "ELECTRONICS": {"temp_range": (15, 25), "humidity_max": 60},
    "PHARMACEUTICALS": {"temp_range": (2, 8), "humidity_max": 50},
    "FOODSTUFFS": {"temp_range": (-20, 5), "humidity_max": 80},
    "CHEMICALS": {"temp_range": (10, 30), "humidity_max": 70},
    "MACHINERY": {"temp_range": (-10, 40), "humidity_max": 90},
    "TEXTILES": {"temp_range": (15, 30), "humidity_max": 65}
}

# Initialize container fleet
CONTAINERS = []
for i in range(num_containers):
    cargo_type = random.choice(list(CARGO_TYPES.keys()))
    
    CONTAINERS.append({
        "container_id": f"CONT{str(i+1).zfill(6)}",
        "cargo_type": cargo_type,
        "current_status": "LOADED",
        "location": random.choice([
            "Shanghai Port", "Singapore Port", "Rotterdam Port",
            "Los Angeles Port", "Hamburg Port", "Dubai Port"
        ]),
        "shipper": fake.company(),
        "consignee": fake.company()
    })

print(f"Initialized {len(CONTAINERS)} containers")

In [0]:

def generate_status_change(container):
    """Generate status change event for a container"""
    current_status = container["current_status"]
    
    # Determine if status should change
    if random.random() > change_probability:
        next_status = current_status  # No change
    else:
        possible_next = STATUS_FLOW.get(current_status, [current_status])
        next_status = random.choice(possible_next)
    
    # Update container status
    if next_status != current_status:
        container["current_status"] = next_status
        
        # Update location based on status
        if next_status == "PORT_ARRIVAL":
            container["location"] = random.choice([
                "Shanghai Port", "Singapore Port", "Rotterdam Port",
                "Los Angeles Port", "Hamburg Port"
            ])
    
    # Generate temperature and humidity
    cargo_type = container["cargo_type"]
    temp_range = CARGO_TYPES[cargo_type]["temp_range"]
    humidity_max = CARGO_TYPES[cargo_type]["humidity_max"]
    
    temperature = random.uniform(temp_range[0] - 2, temp_range[1] + 2)
    humidity = random.uniform(30, min(humidity_max + 10, 100))
    
    # Seal integrity check
    seal_intact = random.random() > 0.02  # 2% chance of broken seal
    
    # Build status change record
    status_change = {
        "container_id": container["container_id"],
        "status": container["current_status"],
        "location": container["location"],
        "checkpoint_type": random.choice(CHECKPOINT_TYPES),
        "checkpoint_time": datetime.now().isoformat(),
        "temperature_celsius": round(temperature, 1),
        "humidity_percent": round(humidity, 1),
        "seal_intact": seal_intact,
        "inspected_by": fake.name() if random.random() > 0.5 else None,
        "notes": None if seal_intact else "Seal integrity compromised - requires inspection"
    }
    
    return status_change

print(f"ðŸ“¦ Starting container status generator")
print(f"   - Catalog: {catalog_name}")
print(f"   - Containers: {num_containers}")
print(f"   - Interval: {interval_seconds}s")
print(f"   - Change probability: {change_probability}")
print(f"   - Output: {volume_path}")

batch_number = 1

try:
    while True:
        timestamp = datetime.now()
        
        # Generate status changes for all containers
        status_changes = []
        changes_count = 0
        
        for container in CONTAINERS:
            old_status = container["current_status"]
            status_change = generate_status_change(container)
            status_changes.append(status_change)
            
            if status_change["status"] != old_status:
                changes_count += 1
        
        # Write to volume as JSON file
        filename = f"{volume_path}/status_{timestamp.strftime('%Y%m%d_%H%M%S')}_{batch_number}.json"
        json_content = "\n".join([json.dumps(record) for record in status_changes])
        
        dbutils.fs.put(filename, json_content, overwrite=True)
        
        print(f"âœ“ Batch {batch_number}: {len(status_changes)} records ({changes_count} status changes) â†’ {filename}")
        
        batch_number += 1
        time.sleep(interval_seconds)
        
except KeyboardInterrupt:
    print("\nðŸ›‘ Generator stopped")
    print(f"Total batches generated: {batch_number - 1}")