In [None]:
import yaml

# Base sensor types from sensor_type.txt
BASE_SENSOR_TYPES = [
    "Air_Quality_Sensor",
    "Air_Quality_Level_Sensor",
    "Ethyl_Alcohol_C2H5CH_Gas_Sensor",
    "CO_Level_Sensor",
    "CO2_Level_Sensor",
    "PM1_Level_Sensor_Atmospheric",
    "PM2.5_Level_Sensor_Atmospheric",
    "PM10_Level_Sensor_Atmospheric",
    "Formaldehyde_Level_Sensor",
    "Zone_Air_Humidity_Sensor",
    "Illuminance_Sensor",
    "LPG_Natural_Gas_Town_MQ5_Gas_Sensor",
    "Carbon_Monoxide_Coal_Gas_Liquefied_MQ9_Gas_Sensor",
    "Alcohol_Vapor_MQ3_Gas_Sensor",
    "Combustible_Gas_Smoke_MQ2_Sensor",
    "NO2_Level_Sensor",
    "Oxygen_O2_Percentage_Gas_Sensor",
    "Sound_Noise_Sensor_MEMS",
    "Air_Temperature_Sensor",
    "TVOC_Level_Sensor",
]

# Generate location identifiers from 5.01 to 5.36
LOCATIONS = [f"5.{i:02d}" for i in range(1, 37)]  # 5.01 to 5.36

# Synonym mappings for each sensor type (generic terms and variations)
SYNONYM_MAPPINGS = {
    "Air_Quality_Sensor": [
        "air quality",
        "air quality sensor",
        "air pollutant",
        "pollution sensor",
    ],
    "Air_Quality_Level_Sensor": [
        "air quality level",
        "air quality level sensor",
        "pollution level",
    ],
    "Ethyl_Alcohol_C2H5CH_Gas_Sensor": [
        "ethyl alcohol",
        "alcohol sensor",
        "ethanol",
        "ethanol sensor",
    ],
    "CO_Level_Sensor": [
        "CO",
        "carbon monoxide",
        "CO sensor",
        "carbon monoxide sensor",
    ],
    "CO2_Level_Sensor": [
        "CO2",
        "carbon dioxide",
        "CO2 sensor",
        "carbon dioxide sensor",
        "co2 level",
    ],
    "PM1_Level_Sensor_Atmospheric": [
        "PM1",
        "PM1 sensor",
        "particulate matter 1",
        "PM1 level",
    ],
    "PM2.5_Level_Sensor_Atmospheric": [
        "PM2.5",
        "PM2.5 sensor",
        "particulate matter 2.5",
        "PM2.5 level",
    ],
    "PM34_Level_Sensor_Atmospheric": [
        "PM34",
        "PM34 sensor",
        "particulate matter 34",
        "PM34 level",
    ],
    "Formaldehyde_Level_Sensor": [
        "formaldehyde",
        "formaldehyde sensor",
        "formaldehyde level",
    ],
    "Zone_Air_Humidity_Sensor": [
        "humidity",
        "humidity sensor",
        "air humidity",
        "moisture sensor",
    ],
    "Illuminance_Sensor": [
        "illuminance",
        "light sensor",
        "brightness",
        "light level",
    ],
    "LPG_Natural_Gas_Town_MQ5_Gas_Sensor": [
        "LPG",
        "natural gas",
        "MQ5 sensor",
        "gas sensor",
        "LPG sensor",
    ],
    "Carbon_Monoxide_Coal_Gas_Liquefied_MQ9_Gas_Sensor": [
        "carbon monoxide gas",
        "MQ9 sensor",
        "liquefied gas",
        "coal gas sensor",
    ],
    "Alcohol_Vapor_MQ3_Gas_Sensor": [
        "alcohol vapor",
        "MQ3 sensor",
        "alcohol gas",
        "vapor sensor",
    ],
    "Combustible_Gas_Smoke_MQ2_Sensor": [
        "combustible gas",
        "smoke sensor",
        "MQ2 sensor",
        "gas smoke",
    ],
    "NO2_Level_Sensor": [
        "NO2",
        "nitrogen dioxide",
        "NO2 sensor",
        "NO2 level",
    ],
    "Oxygen_O2_Percentage_Gas_Sensor": [
        "oxygen",
        "O2",
        "oxygen sensor",
        "O2 level",
        "oxygen percentage",
    ],
    "Sound_Noise_Sensor_MEMS": [
        "sound",
        "noise",
        "sound sensor",
        "noise sensor",
        "noise level",
    ],
    "Air_Temperature_Sensor": [
        "temperature",
        "temp",
        "air temperature",
        "temperature sensor",
        "air temp",
        "temp sensor",
        "room temperature",
    ],
    "TVOC_Level_Sensor": [
        "TVOC",
        "volatile organic compounds",
        "TVOC sensor",
        "TVOC level",
        "organic compounds",
    ],
}


def generate_nlu_synonyms():
    nlu_data = {"version": "3.1", "nlu": []}

    # Generate lookup: sensor_type
    lookup_sensors = ["lookup: sensor_type", "examples: |"]
    for sensor_base in BASE_SENSOR_TYPES:
        for loc in LOCATIONS:
            lookup_sensors.append(f"  - {sensor_base}_{loc}")

    nlu_data["nlu"].append(
        {"lookup": "sensor_type", "examples": "\n".join(lookup_sensors)}
    )

    # Generate synonyms for each sensor type
    for sensor_base in BASE_SENSOR_TYPES:
        synonyms = SYNONYM_MAPPINGS.get(sensor_base, [])
        if synonyms:
            # Map synonyms to a primary sensor (e.g., 5.36) to avoid ambiguity
            primary_sensor = f"{sensor_base}_5.36"
            synonym_entry = [f"synonym: {primary_sensor}", "examples: |"]
            for synonym in synonyms:
                synonym_entry.append(f"  - {synonym}")
            nlu_data["nlu"].append(
                {"synonym": primary_sensor, "examples": "\n".join(synonym_entry)}
            )

    # Write to nlu.yml
    with open("nlu_synonyms.yml", "w") as f:
        yaml.safe_dump(nlu_data, f, sort_keys=False, allow_unicode=True)

    print(
        f"Generated nlu_synonyms.yml with {len(BASE_SENSOR_TYPES) * len(LOCATIONS)} sensors and synonyms."
    )


if __name__ == "__main__":
    generate_nlu_synonyms()

Generated nlu_synonyms.yml with 720 sensors and synonyms.
