In [4]:
import pandas as pd
import random
import colorsys
import numpy as np
#Converts RGB [0–255 to HSL Hue 0–360, Saturation 0–1, Lightness 0–1]
def rgb_to_hsl(R, G, B):
    r, g, b = R/255, G/255, B/255
    h, l, s = colorsys.rgb_to_hls(r, g, b)
    return round(h*360,2), round(s,2), round(l,2)
#Perceived brightness formula, this is standard in color perception
def brightness(R, G, B):
    return round(0.299*R + 0.587*G + 0.114*B, 2)
#Research‑inspired emotional labeler
def research_label(H, S, L):
#Very dark: often negative ambience
    if L < 0.15:
        return 'Bad'
#High lightness: generally positive valence
    if L > 0.75:
        return 'Good'
#Cool hue range with moderate brightness: Positive
#(blue/green/cyan area approx H in [180, 300])
    if 180 <= H <= 300 and L > 0.4:
        return 'Good'
#Low saturation achromatic: neutral perception
    if S < 0.10:
        return 'Medium'
#Strong red
    if (H<30 or H>330) and L>0.20:
        return 'Bad'
#Otherwise default to neutral
    return 'Medium'
data=[]
rows=5500
for _ in range(rows):
    R=random.randint(0,255)
    G=random.randint(0,255)
    B=random.randint(0,255)
#almost 5% missing values 
    if random.random()<0.05:
        R=np.nan
    if random.random()<0.05:
        G=np.nan
    if random.random()<0.05:
        B=np.nan
    if pd.notna(R) and pd.notna(G) and pd.notna(B):
        H,S,L=rgb_to_hsl(R, G, B)
        Bright=brightness(R, G, B)
        Target=research_label(H, S, L)
    else:
        H=S=L=Bright=Target=np.nan
    data.append([R, G, B, H, S, L, Bright, Target])
df=pd.DataFrame(data, columns=['R','G','B','H','S','L','Brightness','Target'])
df.to_csv('color_dataset.csv', index=False)


In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)

N = 12000
N_PER_CLASS = N // 3

rows = []

def add_rows(n, b_range, s_range, l_range, label):
    for _ in range(n):
        Brightness = np.random.uniform(*b_range)
        S = np.random.uniform(*s_range)
        L = np.random.uniform(*l_range)

        # Generate plausible RGB from brightness
        base = Brightness + np.random.normal(0, 5)
        R = np.clip(base + np.random.normal(0, 10), 0, 255)
        G = np.clip(base + np.random.normal(0, 10), 0, 255)
        B = np.clip(base + np.random.normal(0, 10), 0, 255)

        # Hue is weakly informative
        H = np.random.uniform(0, 360)

        rows.append([
            R, G, B, H, S, L, Brightness, label
        ])

# BAD: dark, dull
add_rows(
    N_PER_CLASS,
    b_range=(0, 70),
    s_range=(0, 25),
    l_range=(0, 25),
    label="Bad"
)

# MEDIUM: acceptable but not pleasant
add_rows(
    N_PER_CLASS,
    b_range=(70, 120),
    s_range=(25, 45),
    l_range=(25, 45),
    label="Medium"
)
# GOOD: comfortable colors
add_rows(
    N_PER_CLASS,
    b_range=(120, 200),
    s_range=(45, 70),
    l_range=(45, 70),
    label="Good"
)

df = pd.DataFrame(rows, columns=[
    "R", "G", "B", "H", "S", "L", "Brightness", "Target"
])

# Inject missing values (controlled) for preprocessing
for col in ["R", "G", "B"]:
    df.loc[df.sample(frac=0.03).index, col] = np.nan

for col in ["H", "S", "L"]:
    df.loc[df.sample(frac=0.05).index, col] = np.nan

df.loc[df.sample(frac=0.04).index, "Brightness"] = np.nan

df.to_csv("color_perception.csv", index=False)

print(df["Target"].value_counts())
print("Dataset generated successfully.")

Target
Bad       4000
Medium    4000
Good      4000
Name: count, dtype: int64
Dataset generated successfully.
