In [None]:
import pandas as pd
import random
import colorsys
import numpy as np
#Converts RGB [0–255 to HSL Hue 0–360, Saturation 0–1, Lightness 0–1]
def rgb_to_hsl(R, G, B):
    r, g, b = R/255, G/255, B/255
    h, l, s = colorsys.rgb_to_hls(r, g, b)
    return round(h*360,2), round(s,2), round(l,2)
#Perceived brightness formula, this is standard in color perception
def brightness(R, G, B):
    return round(0.299*R + 0.587*G + 0.114*B, 2)
#Research‑inspired emotional labeler
def research_label(H, S, L):
#Very dark: often negative ambience
    if L < 0.15:
        return 'Bad'
#High lightness: generally positive valence
    if L > 0.75:
        return 'Good'
#Cool hue range with moderate brightness: Positive
#(blue/green/cyan area approx H in [180, 300])
    if 180 <= H <= 300 and L > 0.4:
        return 'Good'
#Low saturation achromatic: neutral perception
    if S < 0.10:
        return 'Medium'
#Strong red
    if (H<30 or H>330) and L>0.20:
        return 'Bad'
#Otherwise default to neutral
    return 'Medium'
data=[]
rows=5500
for _ in range(rows):
    R=random.randint(0,255)
    G=random.randint(0,255)
    B=random.randint(0,255)
#almost 5% missing values 
    if random.random()<0.05:
        R=np.nan
    if random.random()<0.05:
        G=np.nan
    if random.random()<0.05:
        B=np.nan
    if pd.notna(R) and pd.notna(G) and pd.notna(B):
        H,S,L=rgb_to_hsl(R, G, B)
        Bright=brightness(R, G, B)
        Target=research_label(H, S, L)
    else:
        H=S=L=Bright=Target=np.nan
    data.append([R, G, B, H, S, L, Bright, Target])
df=pd.DataFrame(data, columns=['R','G','B','H','S','L','Brightness','Target'])
df.to_csv('color_dataset.csv', index=False)


In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)
n_samples = 12000

# RGB values
R = np.random.randint(0, 256, n_samples)
G = np.random.randint(0, 256, n_samples)
B = np.random.randint(0, 256, n_samples)

# Brightness (perceptual luminance)
Brightness = (0.299 * R + 0.587 * G + 0.114 * B)

# Convert RGB → HSL (approximation)
max_rgb = np.maximum.reduce([R, G, B])
min_rgb = np.minimum.reduce([R, G, B])
delta = max_rgb - min_rgb

L = (max_rgb + min_rgb) / 2
S = np.where(delta == 0, 0, delta / (1 - np.abs(2 * L / 255 - 1)))
H = np.zeros(n_samples)

mask = delta != 0
H[mask & (max_rgb == R)] = (60 * ((G - B) / delta) + 360)[mask & (max_rgb == R)]
H[mask & (max_rgb == G)] = (60 * ((B - R) / delta) + 120)[mask & (max_rgb == G)]
H[mask & (max_rgb == B)] = (60 * ((R - G) / delta) + 240)[mask & (max_rgb == B)]
H = H % 360

# Scale S and L
S = S * 100
L = (L / 255) * 100

# Label assignment (perception-inspired)
Target = []

for b, s, l in zip(Brightness, S, L):
    if b < 60 or l < 20 or s < 20:
        Target.append("Bad")
    elif 60 <= b <= 180 and 20 <= s <= 70:
        Target.append("Good")
    else:
        Target.append("Medium")

# Create DataFrame
df = pd.DataFrame({
    "R": R,
    "G": G,
    "B": B,
    "H": H,
    "S": S,
    "L": L,
    "Brightness": Brightness,
    "Target": Target
})

# Inject missing values (realistic noise)
for col in ["R", "G", "B"]:
    df.loc[df.sample(frac=0.03).index, col] = np.nan

for col in ["H", "S", "L"]:
    df.loc[df.sample(frac=0.05).index, col] = np.nan

df.loc[df.sample(frac=0.04).index, "Brightness"] = np.nan

# Save
df.to_csv("color_dataset.csv", index=False)
