# Version 2

In [1]:
import json
import random

# Define label and phrase templates
templates = {
    "fan_on": [
        "turn on the fan", "switch on the fan", "can you turn on the fan", "please start the fan",
        "I want the fan on", "fan on please", "activate the fan", "start the fan now"
    ],
    "fan_off": [
        "turn off the fan", "switch off the fan", "please stop the fan", "can you shut the fan off",
        "I want the fan off", "fan off please", "deactivate the fan", "kill the fan"
    ],
    "lights_on": [
        "turn on the lights", "switch on the lights", "lights on please", "I want light",
        "please turn the lights on", "activate the lights", "light it up", "start the lights"
    ],
    "lights_off": [
        "turn off the lights", "switch off the lights", "lights off please", "I want it dark",
        "please turn the lights off", "deactivate the lights", "cut the lights", "kill the lights"
    ]
}

# Generate dataset
dataset = []
samples_per_label = 250  # 250 samples × 4 labels = 1000 samples

for label, phrases in templates.items():
    for _ in range(samples_per_label):
        base = random.choice(phrases)
        # Optionally add polite prefixes/suffixes
        prefix = random.choice(["", "hey,", "ok", "yo", "hey assistant,", "please"])
        suffix = random.choice(["", "thanks", "now", "right away", "if you can", "ASAP"])

        # Combine and clean up
        command = f"{prefix} {base} {suffix}".strip().replace("  ", " ")
        dataset.append({
            "text": command.lower(),
            "label": label
        })

# Shuffle for randomness
random.shuffle(dataset)

# Save to JSON
with open("synthetic_dataset.json", "w", encoding="utf-8") as f:
    json.dump(dataset, f, indent=2, ensure_ascii=False)

print("Dataset of 1000 samples generated and saved to 'synthetic_dataset.json'")

Dataset of 1000 samples generated and saved to 'synthetic_dataset.json'


# Version 1

In [None]:
import itertools
import json

adjectives = {
    "fast": 4, "bright": 5, "cold": 2, "warm": 3, "powerful": 6, 
    "heavy": 4, "strong": 5, "intense": 6, "vibrant": 5, "sturdy": 4, 
    "steady": 4, "flashing": 6, "refreshing": 5, "blinding": 6, "brightened": 5, 
    "eco-friendly": 3, "electric": 5, "brilliant": 5, "dynamic": 5, "luminous": 6, 
    "sparkling": 6
}
verbs = {
    "turn": 3, "make": 4, "activate": 5, "stop": 2, "increase": 6, 
    "decrease": 3, "start": 4, "control": 7, "adjust": 5, "boost": 6, 
    "power": 7, "optimize": 6, "trigger": 6, "enhance": 6, 
    "improve": 5, "intensify": 6, "brighten": 5, "elevate": 6, "supercharge": 7, 
    "recharge": 5, "start-up": 6, "reboot": 5, "adjust intensity": 6, 
    "speed up": 6, "increase speed": 7, "boost power": 7, "light up": 5, 
    "flicker": 3, "disable": 2, "activate power": 6,
}



nouns = ["fan", "light", "fans", "lights"]

action_labels = {
    0: "lights_on", 
    1: "lights_off",
    2: "fan_on",
    3: "fan_off" 
}

adjective_noun_verb = list(itertools.product(adjectives.keys(), nouns, verbs.keys()))

def calculate_weight(adjective, verb):
    adj_weight = adjectives[adjective]
    verb_weight = verbs[verb]
    total_weight = adj_weight + verb_weight
    return total_weight

json_output = []
threshold = int(input("Enter the threshold for classification (1-10): "))
for perm in adjective_noun_verb:
    adjective, noun, verb = perm
    total_weight = calculate_weight(adjective, verb)
    
    if noun == "light" or noun == "lights":
        if total_weight > threshold:
            label = action_labels[0]
        else:
            label = action_labels[1]
    elif noun == "fan" or noun == "fans":
        if total_weight > threshold:
            label = action_labels[2]
        else:
            label = action_labels[3]
    
    json_output.append({
        "text": f"{adjective} {noun} {verb}",
        "label": label
    })

with open("synthetic_dataset.json", "w") as f:
    json.dump(json_output, f, indent=2)
    print("Synthetic dataset saved to synthetic_dataset.json")