In [0]:
# 00_data_generator: Smart Manufacturing â€“ Sensor-type based IoT stream generator

import time, random
from datetime import datetime, timezone, timedelta
from concurrent.futures import ThreadPoolExecutor

# -----------------------------
# Generic stream generator
# -----------------------------
def generate_stream(
    path: str,
    sensor_type: str,
    low: float,
    high: float,
    machine_count: int,
    batch_interval_s: int,
    latency_max_s: int
):
    """
    Generic IoT stream generator:
      - Emits `machine_count` rows each `batch_interval_s` seconds
      - One row per machine
      - Schema:
          machine_id
          event_time
          sensor_type
          value
    """
    while True:
        now = datetime.now(timezone.utc)
        data = []

        for machine_id in range(1, machine_count + 1):
            ts = now - timedelta(seconds=random.uniform(0, latency_max_s))
            value = round(random.uniform(low, high), 4)

            data.append({
                "machine_id": machine_id,
                "event_time": ts,
                "sensor_type": sensor_type,
                "value": value
            })

        df = spark.createDataFrame(data)
        df.write.format("delta").mode("append").save(path)
        time.sleep(batch_interval_s)

In [0]:
# -----------------------------
# Configuration
# -----------------------------
machine_count     = 6
batch_interval_s  = 60
latency_max_s     = 60

# (path, sensor_type, low, high)
streams = [
    ("/Volumes/industrial-iot/00_landing/streaming/temperature", "temperature", 20, 90),
    ("/Volumes/industrial-iot/00_landing/streaming/vibration", "vibration", 0.0, 0.08),
    ("/Volumes/industrial-iot/00_landing/streaming/power", "power", 5, 50),
    ("/Volumes/industrial-iot/00_landing/streaming/pressure", "pressure", 100, 300),
    ("/Volumes/industrial-iot/00_landing/streaming/flow_rate", "flow_rate", 50, 200),
    ("/Volumes/industrial-iot/00_landing/streaming/rpm", "rpm", 500, 5000),
    ("/Volumes/industrial-iot/00_landing/streaming/speed", "speed", 0.5, 2.5),
    ("/Volumes/industrial-iot/00_landing/streaming/current", "current", 5, 20)
]

In [0]:
# -----------------------------
# Start all generators concurrently
# -----------------------------
with ThreadPoolExecutor(max_workers=len(streams)) as executor:
    for path, sensor_type, low, high in streams:
        executor.submit(
            generate_stream,
            path,
            sensor_type,
            low,
            high,
            machine_count,
            batch_interval_s,
            latency_max_s
        )

    executor.shutdown(wait=True) # blocks forever, streams run continuously