# K-Prototypes Clustering: Mencari K Terbaik (Elbow & Silhouette)
Notebook ini berisi perbaikan alur preprocessing dan loop otomatis untuk menentukan jumlah cluster (K) yang paling optimal.

In [72]:
import pandas as pd
import numpy as np
from kmodes.kprototypes import KPrototypes
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

In [73]:
df = pd.read_csv('DataCleanLabel.csv')

In [80]:
df['BMI'] = df['Weight'] / (df['Height'] ** 2)

features_selected = [
    'BMI', 'FAF', 'FCVC', # Numerik
    'family_history_with_overweight', 'CAEC' , 'FAVC' # Kategorik
]
data_kproto = df[features_selected].copy()


In [82]:
scaler = MinMaxScaler()
num_cols = ['BMI', 'FAF', 'FCVC'] 
data_kproto[num_cols] = scaler.fit_transform(data_kproto[num_cols])

cat_cols_index = [3, 4,5] # Index kolom kategorikal (0=BMI, 1=FAF, 2=FCVC, 3=History...)
data_matrix = data_kproto.values



In [83]:
kproto = KPrototypes(n_clusters=4, init='Cao', verbose=1)
clusters = kproto.fit_predict(data_matrix, categorical=cat_cols_index)

Initialization method and algorithm are deterministic. Setting n_init to 1.
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run: 1, iteration: 1/100, moves: 514, ncost: 334.0976654587752
Run: 1, iteration: 2/100, moves: 326, ncost: 297.5204009836637
Run: 1, iteration: 3/100, moves: 298, ncost: 284.9534936673045
Run: 1, iteration: 4/100, moves: 133, ncost: 277.8353172214227
Run: 1, iteration: 5/100, moves: 74, ncost: 275.62465030886557
Run: 1, iteration: 6/100, moves: 47, ncost: 274.3118462225146
Run: 1, iteration: 7/100, moves: 42, ncost: 273.1848106849919
Run: 1, iteration: 8/100, moves: 10, ncost: 273.16357400578704
Run: 1, iteration: 9/100, moves: 0, ncost: 273.16357400578704
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run: 2, iteration: 1/100, moves: 848, ncost: 307.4288048847685
Run: 2, iteration: 2/100, moves: 288, ncost: 291.178181512879
Run: 2, iteration: 3/100, moves: 291, ncost: 273.71818754973236
Run: 2, ite

In [85]:
df_eval = pd.get_dummies(data_kproto, columns=['family_history_with_overweight', 'CAEC'])
df_eval_scaled = MinMaxScaler().fit_transform(df_eval)

# Hitung Skor
score = silhouette_score(df_eval_scaled, clusters)
print(f"Silhouette Score (K-Prototypes): {score}")

Silhouette Score (K-Prototypes): 0.1437709854607381
