In [3]:
!pip install opencv-python



In [5]:
!pip install pandas numpy



In [9]:
import os
import cv2
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import pickle

# Path dataset
celebA_folder = "dataset/celebA/img"
faceshape_folder = "dataset/faceshape"
celebA_attr_file = "dataset/celebA/list_attr_celeba.csv"

# Fungsi untuk ekstraksi warna kulit menggunakan K-Means
def extract_skin_color(image_path, k=5):
    image = cv2.imread(image_path)
    
    if image is None:
        print(f"Gambar tidak ditemukan: {image_path}")
        return [0, 0, 0]  # Mengembalikan warna default (hitam)

    image = cv2.resize(image, (100, 100))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape(-1, 3)

    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(image)
    dominant_color = kmeans.cluster_centers_[0]
    return dominant_color

# Ekstraksi Data dari CelebA
celebA_image_paths = [os.path.join(celebA_folder, img) for img in os.listdir(celebA_folder) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]

# Membaca file anotasi CelebA
celebA_attr = pd.read_csv(celebA_attr_file, sep=r'\s+', skiprows=1)
celebA_attr.reset_index(inplace=True)
celebA_attr.rename(columns={'index': 'image_id'}, inplace=True)

# Membersihkan nama kolom dari spasi atau karakter tersembunyi
celebA_attr.columns = celebA_attr.columns.str.strip()

# Memastikan kolom yang tersedia dalam file anotasi CelebA
print("\n Kolom yang tersedia dalam file anotasi CelebA:")
print(celebA_attr.columns)

# Validasi keberadaan kolom 'Pale_Skin'
if 'Pale_Skin' not in celebA_attr.columns:
    print("Kolom 'Pale_Skin' tidak ditemukan dalam file anotasi!")
    # Jika kolom 'Pale_Skin' tidak ada, kita bisa membuat kolom dummy
    celebA_attr['Pale_Skin'] = np.random.randint(0, 2, size=len(celebA_attr))  # Contoh: membuat kolom dummy
else:
    print("Kolom 'Pale_Skin' ditemukan dan siap digunakan!")

# Menambahkan padding nol pada image_id agar sesuai dengan format file gambar
celebA_attr['image_id'] = celebA_attr['image_id'].apply(lambda x: f"{int(x):06d}.jpg")

# Ekstraksi warna kulit dari gambar CelebA
valid_celebA_images = celebA_attr[celebA_attr['image_id'].isin(os.listdir(celebA_folder))]
celebA_skin_colors = [extract_skin_color(os.path.join(celebA_folder, img)) for img in valid_celebA_images['image_id']]

# Membuat DataFrame untuk CelebA
celebA_df = pd.DataFrame(celebA_skin_colors, columns=['R', 'G', 'B'])

# Jika tidak ada kolom 'Pale_Skin', tentukan 'Tone' berdasarkan nilai R (merah)
celebA_df['Tone'] = ['Light' if r > 150 else 'Medium' for r in celebA_df['R']]

# Ekstraksi Data dari FaceShape
faceshape_image_paths = []
faceshape_labels = []
valid_labels = ['Heart', 'Oblong', 'Oval', 'Round', 'Square']

for set_type in ['training_set', 'testing_set']:
    set_folder = os.path.join(faceshape_folder, set_type)
    if os.path.exists(set_folder):
        for label in os.listdir(set_folder):
            label_folder = os.path.join(set_folder, label)
            if os.path.isdir(label_folder) and label in valid_labels:
                for img in os.listdir(label_folder):
                    if img.lower().endswith(('.png', '.jpg', '.jpeg')):
                        img_path = os.path.join(label_folder, img)
                        faceshape_image_paths.append(img_path)
                        faceshape_labels.append(label)

# Ekstraksi warna kulit dari gambar FaceShape
faceshape_skin_colors = [extract_skin_color(img) for img in faceshape_image_paths]
faceshape_df = pd.DataFrame(faceshape_skin_colors, columns=['R', 'G', 'B'])
faceshape_df['Tone'] = faceshape_labels

# Menggabungkan kedua dataset
df = pd.concat([celebA_df, faceshape_df], ignore_index=True)

print("\n Contoh DataFrame:")
print(df.head())

# Validasi DataFrame sebelum pembagian dataset
if df.empty:
    print(" DataFrame kosong! Tidak dapat melanjutkan pelatihan model.")
else:
    # Pembagian dataset untuk pelatihan model
    X = df[['R', 'G', 'B']]
    y = df['Tone']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Pelatihan model Random Forest
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Evaluasi model
    y_pred = model.predict(X_test)
    print("\n Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred))

    # Menyimpan model dalam format .pkl
    model_path = 'backend/model/skin_tone_model.pkl'
    os.makedirs(os.path.dirname(model_path), exist_ok=True)

    with open(model_path, 'wb') as f:
        pickle.dump(model, f)

    print(f"\n Model berhasil disimpan di: {model_path}")


 Kolom yang tersedia dalam file anotasi CelebA:
Index(['image_id', '000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,1,1,-1,1,-1,-1,1,-1,-1,1,-1,-1,-1,1,1,-1,1,-1,1,-1,-1,1'], dtype='object')
Kolom 'Pale_Skin' tidak ditemukan dalam file anotasi!


Premature end of JPEG file
Premature end of JPEG file



 Contoh DataFrame:
            R           G           B   Tone
0  251.694985  231.900075  200.387350  Light
1  185.025803  165.225917  158.207569  Light
2  156.688329  116.305886   89.495747  Light
3  170.353371  118.959383   91.560520  Light
4  232.305898  206.739753  193.077641  Light

 Laporan Klasifikasi:
              precision    recall  f1-score   support

       Heart       0.20      0.21      0.20       196
       Light       0.18      0.04      0.07        46
      Medium       0.00      0.00      0.00        10
      Oblong       0.19      0.17      0.18       210
        Oval       0.18      0.20      0.19       199
       Round       0.23      0.28      0.25       195
      Square       0.24      0.23      0.23       192

    accuracy                           0.21      1048
   macro avg       0.17      0.16      0.16      1048
weighted avg       0.20      0.21      0.20      1048


 Model berhasil disimpan di: backend/model/skin_tone_model.pkl
