# 01 — AIoT Ingestion (Synthetic Telemetry)

Creates a synthetic AIoT telemetry dataset to drive the DSPM lifecycle stages. No external data or credentials are used.

In [4]:
import os, json, math, random, re, time
from datetime import datetime, timedelta, timezone
import pandas as pd

BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
OUT_DIR = os.path.join(BASE_DIR, "outputs")
os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(os.path.join(OUT_DIR, "audit_logs"), exist_ok=True)

print("BASE_DIR:", BASE_DIR)
print("OUT_DIR:", OUT_DIR)

BASE_DIR: /
OUT_DIR: /outputs


In [5]:

# Generate synthetic AIoT telemetry (public-safe) — deterministic seed
random.seed(42)

def make_event(i: int) -> dict:
    device_id = f"dev-{random.randint(1000, 9999)}"
    ts = datetime.now(timezone.utc) - timedelta(minutes=random.randint(0, 60*24))
    # A few fields that may contain sensitive patterns (PII-like) for classification demo
    payload = {
        "event_id": f"evt-{i:06d}",
        "device_id": device_id,
        "timestamp": ts.isoformat(),
        "sensor_temp_c": round(random.uniform(18.0, 95.0), 2),
        "sensor_vibration": round(random.uniform(0.0, 9.9), 3),
        "location": random.choice(["plant-A", "plant-B", "warehouse-3", "field-7"]),
        "operator_note": random.choice([
            "normal operation",
            "maintenance required",
            "call +1-415-555-0199",          # PII-like phone
            "email tech@example.com",        # PII-like email
            "badge id: 123-45-6789",         # PII-like (SSN pattern) for demo
        ])
    }
    return payload

events = [make_event(i) for i in range(2000)]
df = pd.DataFrame(events)
df.head()


Unnamed: 0,event_id,device_id,timestamp,sensor_temp_c,sensor_vibration,location,operator_note
0,evt-000000,dev-2824,2026-01-30T15:13:28.569111+00:00,75.1,2.424,plant-B,normal operation
1,evt-000001,dev-9935,2026-01-30T13:06:28.569155+00:00,63.47,0.315,plant-A,maintenance required
2,evt-000002,dev-4811,2026-01-29T22:50:28.569171+00:00,64.36,5.556,field-7,maintenance required
3,evt-000003,dev-8359,2026-01-29T19:58:28.569183+00:00,39.42,8.606,plant-B,email tech@example.com
4,evt-000004,dev-6574,2026-01-30T06:35:28.569193+00:00,29.97,9.476,warehouse-3,normal operation


In [6]:

# Persist for downstream DSPM stages
data_path = os.path.join(OUT_DIR, "aiot_telemetry.csv")
df.to_csv(data_path, index=False)
print("Wrote:", data_path, "rows:", len(df))


Wrote: /outputs/aiot_telemetry.csv rows: 2000
