Gaussian Mixture Models (GMM)

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score

In [2]:
df = pd.read_csv('CleanDataLabelAndOneHot.csv')
df.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,SMOKE,CH2O,...,CAEC_Frequently,CAEC_Sometimes,CAEC_no,CALC_Frequently,CALC_Sometimes,CALC_no,MTRANS_Bike,MTRANS_Motorbike,MTRANS_Public_Transportation,MTRANS_Walking
0,0,21.0,1.62,64.0,1,0,2.0,3.0,0,2.0,...,0,1,0,0,0,1,0,0,1,0
1,0,21.0,1.52,56.0,1,0,3.0,3.0,1,3.0,...,0,1,0,0,1,0,0,0,1,0
2,1,23.0,1.8,77.0,1,0,2.0,3.0,0,2.0,...,0,1,0,1,0,0,0,0,1,0
3,1,27.0,1.8,87.0,0,0,3.0,3.0,0,2.0,...,0,1,0,1,0,0,0,0,0,1
4,1,22.0,1.78,89.8,0,0,2.0,1.0,0,2.0,...,0,1,0,0,1,0,0,0,1,0


In [3]:
features = [
    'Weight', 'Height',                         # Physical
    'family_history_with_overweight',           # History
    'FAVC', 'CAEC_Sometimes', 'CAEC_no', 'CAEC_Frequently', # Diet Habits
    'FAF',                                      # Activity
    'NCP', 'SMOKE', 'SCC'                       # Lifestyle
]

In [4]:
# 3. Scale (MinMax)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df[features])

In [13]:
# 4. Train GMM (n=4 components)
gmm = GaussianMixture(n_components=4, random_state=42)
labels = gmm.fit_predict(X_scaled)

In [14]:
# 5. Evaluate
score = silhouette_score(X_scaled, labels)
print(f"GMM Silhouette Score (n=4): {score:.4f}")

GMM Silhouette Score (n=4): 0.4704


In [15]:
# 6. Profile Clusters
df['Cluster_GMM'] = labels
print("\n--- GMM Cluster Profiles ---")
print(df.groupby('Cluster_GMM')[features].mean().T)


--- GMM Cluster Profiles ---
Cluster_GMM                             0          1          2          3
Weight                          92.245424  58.455132  66.768832  63.681398
Height                           1.708499   1.663481   1.688041   1.682715
family_history_with_overweight   0.896104   0.000000   1.000000   0.622881
FAVC                             0.938017   0.000000   0.000000   1.000000
CAEC_Sometimes                   1.000000   0.406250   0.236842   0.000000
CAEC_no                          0.000000   0.031250   0.157895   0.118644
CAEC_Frequently                  0.000000   0.489583   0.473684   0.711864
FAF                              0.985360   1.304552   1.488130   1.048362
NCP                              2.689936   2.763105   2.918626   2.712385
SMOKE                            0.013577   0.000000   0.157895   0.046610
SCC                              0.024203   0.229167   0.368421   0.072034
