In [None]:
#Generate the dataset
import pandas as pd
import random
import colorsys
import numpy as np

# Convert RGB to HSL
def rgb_to_hsl(R, G, B):
    r, g, b = R/255, G/255, B/255
    h, l, s = colorsys.rgb_to_hls(r, g, b)
    return round(h*360,2), round(s,2), round(l,2)

# Compute perceived brightness
def brightness(R, G, B):
    return round(0.299*R + 0.587*G + 0.114*B, 2)

# Assign emotional label
def assign_label(R, G, B):
    # Edge colors
    if R==0 and G==0 and B==0:
        return "Bad"           # Black
    if R==255 and G==255 and B==255:
        return "Medium"        # White
    if abs(R-G)<30 and abs(G-B)<30 and abs(R-B)<30:
        return "Medium"        # Grays / neutrals
    
    # General rules
    if G>150 and B>150 and R<150:
        return "Good"          # Calm blues/greens
    if R>200 and G<100 and B<100:
        return "Bad"           # Aggressive reds
    return "Medium"            # All others

data = []
rows = 5000

for _ in range(rows):
    R = random.randint(0,255)
    G = random.randint(0,255)
    B = random.randint(0,255)

    # Introduce missing values (~5% chance per channel)
    if random.random() < 0.05:
        R = np.nan
    if random.random() < 0.05:
        G = np.nan
    if random.random() < 0.05:
        B = np.nan

    # Compute features only if RGB not missing
    if pd.notna(R) and pd.notna(G) and pd.notna(B):
        H, S, L = rgb_to_hsl(R, G, B)
        Bright = brightness(R, G, B)
        Target = assign_label(R, G, B)
    else:
        H = S = L = Bright = np.nan
        Target = np.nan

    data.append([R, G, B, H, S, L, Bright, Target])

df = pd.DataFrame(data, columns=['R','G','B','H','S','L','Brightness','Target'])
df.to_csv("dataset.csv", index=False)
print(f"dataset.csv generated with {rows} rows including missing values and edge colors.")

dataset.csv generated with 5000 rows including missing values and edge colors.
