## Ship Telemetry Generator (Streaming)

In [0]:
%sh
pip install faker

In [0]:
import random
import time
import json
from datetime import datetime, timedelta
from faker import Faker
import math

In [0]:

# Initialize Faker
fake = Faker()


In [0]:

# Get parameters
dbutils.widgets.text("catalog_name", "cargo_fleet_dev", "Catalog")
dbutils.widgets.text("num_ships", "100", "Number of Ships")
dbutils.widgets.text("interval_seconds", "30", "Generation Interval")
dbutils.widgets.text("simulate_delays", "true", "Simulate Transmission Delays")

catalog_name = dbutils.widgets.get("catalog_name")
num_ships = int(dbutils.widgets.get("num_ships"))
interval_seconds = int(dbutils.widgets.get("interval_seconds"))
simulate_delays = dbutils.widgets.get("simulate_delays").lower() == "true"

volume_path = f"/Volumes/{catalog_name}/bronze/telemetry_landing"

In [0]:
# Ship types with characteristics
SHIP_TYPES = {
    "CONTAINER": {"speed_range": (15, 25), "fuel_consumption": (0.5, 1.5)},
    "TANKER": {"speed_range": (12, 20), "fuel_consumption": (0.8, 2.0)},
    "BULK_CARRIER": {"speed_range": (13, 22), "fuel_consumption": (0.6, 1.8)},
    "RO_RO": {"speed_range": (18, 28), "fuel_consumption": (1.0, 2.5)}
}

# Define maritime routes (simplified)
ROUTES = [
    {"name": "Transpacific", "start": (35.4, 139.4), "end": (34.0, -118.2)},  # Tokyo to LA
    {"name": "Transatlantic", "start": (51.5, -0.1), "end": (40.7, -74.0)},  # London to NY
    {"name": "Asia-Europe", "start": (1.3, 103.8), "end": (51.9, 4.5)},  # Singapore to Rotterdam
    {"name": "Asia-Middle East", "start": (22.3, 114.2), "end": (25.3, 55.5)}  # Hong Kong to Dubai
]

# Initialize ship fleet
SHIPS = []
for i in range(num_ships):
    ship_type = random.choice(list(SHIP_TYPES.keys()))
    route = random.choice(ROUTES)
    
    SHIPS.append({
        "ship_id": f"SHIP{str(i+1).zfill(4)}",
        "ship_name": fake.company() + " " + random.choice(["Star", "Pride", "Express", "Navigator"]),
        "ship_type": ship_type,
        "route": route,
        "current_position": 0.0,  # Progress along route (0.0 to 1.0)
        "fuel_level": random.uniform(40, 100)
    })

print(f"Initialized fleet of {len(SHIPS)} ships")

In [0]:
def interpolate_position(start, end, progress):
    """Interpolate position between start and end coordinates"""
    lat = start[0] + (end[0] - start[0]) * progress
    lon = start[1] + (end[1] - start[1]) * progress
    return lat, lon

def calculate_course(lat1, lon1, lat2, lon2):
    """Calculate bearing between two points"""
    d_lon = math.radians(lon2 - lon1)
    lat1_rad = math.radians(lat1)
    lat2_rad = math.radians(lat2)
    
    x = math.sin(d_lon) * math.cos(lat2_rad)
    y = math.cos(lat1_rad) * math.sin(lat2_rad) - \
        math.sin(lat1_rad) * math.cos(lat2_rad) * math.cos(d_lon)
    
    bearing = math.degrees(math.atan2(x, y))
    return (bearing + 360) % 360

def generate_telemetry(ship, timestamp):
    """Generate single telemetry record for a ship"""
    ship_type = ship["ship_type"]
    characteristics = SHIP_TYPES[ship_type]
    route = ship["route"]
    
    # Update position (move ship along route)
    speed = random.uniform(*characteristics["speed_range"])
    ship["current_position"] = min(ship["current_position"] + 0.001, 1.0)
    
    # Reset if reached destination
    if ship["current_position"] >= 1.0:
        ship["current_position"] = 0.0
        ship["fuel_level"] = random.uniform(80, 100)
    
    # Calculate position
    lat, lon = interpolate_position(
        route["start"], route["end"], ship["current_position"]
    )
    
    # Calculate course
    next_progress = min(ship["current_position"] + 0.01, 1.0)
    next_lat, next_lon = interpolate_position(
        route["start"], route["end"], next_progress
    )
    course = calculate_course(lat, lon, next_lat, next_lon)
    
    # Update fuel level
    fuel_consumption = random.uniform(*characteristics["fuel_consumption"])
    ship["fuel_level"] = max(ship["fuel_level"] - fuel_consumption * 0.1, 5.0)
    
    # Generate weather conditions
    weather_conditions = ["CLEAR", "CLOUDY", "RAIN", "FOG", "STORM"]
    weather = random.choice(weather_conditions)
    wave_height = random.uniform(0.5, 5.0) if weather == "STORM" else random.uniform(0.1, 2.0)
    
    # Simulate transmission delays (late arriving data)
    transmission_delay = 0
    if simulate_delays and random.random() < 0.15:  # 15% delayed
        transmission_delay = random.randint(5, 120)  # 5 to 120 minutes delay
    
    actual_timestamp = timestamp - timedelta(minutes=transmission_delay)
    
    # Build telemetry record
    telemetry = {
        "ship_id": ship["ship_id"],
        "timestamp": actual_timestamp.isoformat(),
        "latitude": round(lat, 6),
        "longitude": round(lon, 6),
        "speed_knots": round(speed, 2),
        "course_degrees": round(course, 1),
        "fuel_level_percent": round(ship["fuel_level"], 2),
        "fuel_consumption_rate": round(fuel_consumption, 3),
        "engine_status": random.choice(["NORMAL", "NORMAL", "NORMAL", "WARNING"]),
        "weather_condition": weather,
        "wave_height_meters": round(wave_height, 2),
        "transmission_delay_minutes": transmission_delay
    }
    
    return telemetry

print(f"ðŸš¢ Starting ship telemetry generator")
print(f"   - Catalog: {catalog_name}")
print(f"   - Ships: {num_ships}")
print(f"   - Interval: {interval_seconds}s")
print(f"   - Simulate delays: {simulate_delays}")
print(f"   - Output: {volume_path}")

In [0]:
batch_number = 1

try:
    while True:
        timestamp = datetime.now()
        
        # Generate telemetry for all ships
        telemetry_batch = []
        for ship in SHIPS:
            telemetry = generate_telemetry(ship, timestamp)
            telemetry_batch.append(telemetry)
        
        # Write to volume as JSON file
        filename = f"{volume_path}/telemetry_{timestamp.strftime('%Y%m%d_%H%M%S')}_{batch_number}.json"
        json_content = "\n".join([json.dumps(record) for record in telemetry_batch])
        
        dbutils.fs.put(filename, json_content, overwrite=True)
        
        delayed_count = sum(1 for t in telemetry_batch if t["transmission_delay_minutes"] > 0)
        print(f"âœ“ Batch {batch_number}: {len(telemetry_batch)} records ({delayed_count} delayed) â†’ {filename}")
        
        batch_number += 1
        time.sleep(interval_seconds)
        
except KeyboardInterrupt:
    print("\nðŸ›‘ Generator stopped")
    print(f"Total batches generated: {batch_number - 1}")