## Prepare data

In [None]:
import random
import uuid
import pandas as pd
from datetime import datetime, timedelta

# -----------------------------
# Attacker base & topic
# -----------------------------
ATTACKERS_BASE = [
    "unknown_client","unauth_device","unregistered_node","legacy_client","temp_client",
    "mqtt_explorer","mosquitto_pub","mosquitto_sub","paho_test_client","mqtt_debugger",
    "scanner_node","net_probe","port_scanner","edge_scanner","recon_client",
    "rogue_plc","ghost_plc","fake_plc_node","plc_emulator","plc_testbench",
    "sensor_emulator","sensor_spoofer","virtual_sensor","fake_temp_sensor","fake_humidity_node",
    "esp32_gateway","esp8266_node","arduino_gateway","edge_iot_node","iot_gateway_test",
    "maintenance_client","field_service_tool","hmi_debug_client","engineering_workstation",
    "diagnostic_laptop","vendor_support_tool","remote_access_client","backup_node"
]

ATTACKERS_TOPIC = [
    "iot/factory/#","iot/factory/+/cmd","iot/+/+/cmd","iot/system/#","iot/debug/#",
    "iot/factory/admin","iot/factory/root","iot/factory/supervisor",
    "iot/factory/engineering","iot/factory/maintenance",
    "iot/factory/cmd","iot/factory/control",
    "iot/factory/line1/plc01/write","iot/factory/line1/plc02/write",
    "iot/factory/line2/plc01/override",
    "iot/system/restart","iot/system/shutdown","iot/system/update",
    "iot/system/firmware","iot/system/config",
    "iot/factory/config/upload","iot/factory/config/download",
    "iot/factory/config/backup","iot/factory/credentials","iot/factory/secrets",
    "iot/debug/raw","iot/debug/test","iot/debug/memory",
    "iot/factory/logs","iot/factory/diagnostics",
    "iot/factory/broadcast","iot/factory/line99/sensor01",
    "iot/factory/line1/hmi01/control","iot/factory/line3/plc07/write",
    "iot/factory/line5/plc02/cmd","iot/system/time_sync",
    "iot/system/network/reset","iot/system/service/stop"
]

NORMAL_CLIENTS = [
    "sensor_temp_01","sensor_humid_01","sensor_pressure_01",
    "plc_line1","plc_line2","edge_gateway_01"
]

NORMAL_TOPICS = [
    "iot/factory/line1/sensor01/temp",
    "iot/factory/line1/sensor01/humidity",
    "iot/factory/line2/sensor02/temp",
    "iot/factory/line2/sensor02/humidity"
]

# -----------------------------
# Append suffix (ของที่ขาด)
# -----------------------------
def append_suffix(base):
    suffix_type = random.choice(["hex", "ip", "uuid"])

    if suffix_type == "hex":
        suffix = hex(random.randint(4096, 65535))[2:]
    elif suffix_type == "ip":
        suffix = f"192.168.{random.randint(0,10)}.{random.randint(2,254)}"
    else:
        suffix = str(uuid.uuid4())[:4]

    return f"{base}-{suffix}"

# -----------------------------
# Dataset generator
# -----------------------------
def generate_dataset(n_rows=5000, attack_ratio=0.3):
    data = []
    current_time = datetime.now().replace(microsecond=0)

    for _ in range(n_rows):
        current_time += timedelta(seconds=random.randint(1, 5))
        is_attack = 1 if random.random() < attack_ratio else 0

        if is_attack:
            base = append_suffix(random.choice(ATTACKERS_BASE))
            topic = random.choice(ATTACKERS_TOPIC)
            temp = random.uniform(80, 150)
            humidity = random.uniform(5, 95)
            message_rate = random.randint(20, 200)
        else:
            base = append_suffix(random.choice(NORMAL_CLIENTS))
            topic = random.choice(NORMAL_TOPICS)
            temp = random.uniform(20, 35)
            humidity = random.uniform(30, 70)
            message_rate = random.randint(1, 10)

        data.append({
            "timestamp": current_time,
            "client_id": base,
            "topic": topic,
            "temp": round(temp, 2),
            "humidity": round(humidity, 2),
            "message_rate": message_rate,
            "is_attack": is_attack
        })

    return pd.DataFrame(data)

# -----------------------------
# Generate & save
# -----------------------------
df = generate_dataset(n_rows=10000, attack_ratio=0.35)
df.to_csv("iot_dataset.csv", index=False)

df.head()


Unnamed: 0,timestamp,client_id,topic,temp,humidity,message_rate,is_attack
0,2026-02-05 18:34:35,esp8266_node-db1b,iot/system/restart,110.2,61.88,179,1
1,2026-02-05 18:34:37,edge_gateway_01-192.168.0.193,iot/factory/line1/sensor01/humidity,20.48,68.76,3,0
2,2026-02-05 18:34:41,plc_line2-192.168.1.254,iot/factory/line2/sensor02/temp,31.04,63.62,6,0
3,2026-02-05 18:34:45,sensor_temp_01-f644,iot/factory/line1/sensor01/temp,32.48,62.34,2,0
4,2026-02-05 18:34:48,plc_line2-192.168.7.52,iot/factory/line2/sensor02/humidity,22.0,36.66,9,0
