In [0]:
# 00_data_generator: Smart Manufacturing â€“ Production Line IoT stream generator

import time, random
from datetime import datetime, timezone, timedelta
from concurrent.futures import ThreadPoolExecutor

# -----------------------------
# Generic stream generator
# -----------------------------
def generate_stream(
    path: str,
    column_name: str,
    low: float,
    high: float,
    device_count: int,
    batch_interval_s: int,
    latency_max_s: int
):
    """
    Generic IoT stream generator:
      - Emits `device_count` rows each `batch_interval_s` seconds.
      - Each row has:
          device_id:    1..device_count
          event_time:   now - random(0..latency_max_s) seconds
          <column_name>: random value in [low, high]
      - Appends into a Delta path or Delta table.
    """
    while True:
        now = datetime.now(timezone.utc)
        data = []
        for device_id in range(1, device_count + 1):
            ts = now - timedelta(seconds=random.uniform(0, latency_max_s))
            value = round(random.uniform(low, high), 4)
            data.append({
                "device_id": device_id,
                "event_time": ts,
                column_name: value
            })
        df = spark.createDataFrame(data)
        df.write.format("delta").mode("append").save(path)
        time.sleep(batch_interval_s)

In [0]:
# -----------------------------
# Configuration
# -----------------------------
device_count     = 6  # 6 machines
batch_interval_s = 60
latency_max_s    = 60

# Define streams: (path, column_name, low, high)
streams = [
    # Main Drive Motor
    ("00_landing/main_motor_temperature", "temperature", 40, 90),
    ("00_landing/main_motor_vibration", "vibration", 0, 0.08),
    ("00_landing/main_motor_power", "power", 10, 50),
    
    # Conveyor Belt Motor
    ("00_landing/conveyor_motor_temperature", "temperature", 35, 75),
    ("00_landing/conveyor_motor_vibration", "vibration", 0, 0.05),
    ("00_landing/conveyor_motor_speed", "speed", 0.5, 2.5),
    
    # Hydraulic Press
    ("00_landing/hydraulic_press_temperature", "temperature", 40, 85),
    ("00_landing/hydraulic_press_pressure", "pressure", 100, 300),
    ("00_landing/hydraulic_press_cycles", "cycle_count", 0, 60),
    
    # CNC Spindle
    ("00_landing/cnc_spindle_temperature", "temperature", 35, 80),
    ("00_landing/cnc_spindle_vibration", "vibration", 0, 0.06),
    ("00_landing/cnc_spindle_rpm", "rpm", 500, 5000),
    
    # Cooling Pump
    ("00_landing/cooling_pump_temperature", "temperature", 20, 60),
    ("00_landing/cooling_pump_flow", "flow_rate", 50, 200),
    
    # Packaging Unit Motor
    ("00_landing/packaging_motor_temperature", "temperature", 30, 70),
    ("00_landing/packaging_motor_vibration", "vibration", 0, 0.05),
    ("00_landing/packaging_motor_current", "current", 5, 20)
]

In [0]:
# -----------------------------
# Start all generators concurrently
# -----------------------------
with ThreadPoolExecutor(max_workers=len(streams)) as executor:
    for path, column_name, low, high in streams:
        executor.submit(
            generate_stream,
            path,
            column_name,
            low,
            high,
            device_count,
            batch_interval_s,
            latency_max_s
        )
    executor.shutdown(wait=True)  # blocks forever, streams run continuously