In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler 
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score


In [None]:
Path="spending_l9_dataset.csv"
df=pd.read_csv(Path)
Features=["Income_$","SpendingScore"]
x=df[Features].copy()
for col in Features:
    if x[col].isna().any():
        x[col]= x[col].fillna(x[col].median())
        
Scaler=StandardScaler()
x_sacler=Scaler.fit_transform(x)
K=4
km=KMeans(n_clusters=K,n_init="auto", random_state=42)
labels=km.fit_predict(x_sacler)
df["cluster"]=labels.astype(int)

sil =silhouette_score(x_sacler, labels)
dbi =davies_bouldin_score(x_sacler, labels)
print("\n=== METRICS ===")
print(f"Silhouette Score : {sil:.3f}")
print(f"Davies–Bouldin   : {dbi:.3f} ")

centers_scaled = km.cluster_centers_
centers_original = Scaler.inverse_transform(centers_scaled)

centers_df = pd.DataFrame(centers_original, columns=Features)
centers_df.index.name = "cluster"

print("\n=== CLUSTER CENTERS (Original Units) ===")
print(centers_df.round(2))

sample = [0, 1, 2]  
sanity = df.loc[sample, Features + ["cluster"]]
print("\n=== SANITY CHECK (3 Customers) ===")
print(sanity)

OUT_PATH = "spending_labeled_clusters.csv"
df.to_csv(OUT_PATH, index=False)
print(f"\nSaved clustered dataset → {OUT_PATH}")



=== METRICS ===
Silhouette Score : 0.729
Davies–Bouldin   : 0.387 

=== CLUSTER CENTERS (Original Units) ===
         Income_$  SpendingScore
cluster                         
0           56.32          53.58
1           28.92          19.60
2           24.14          83.10
3           99.16          79.24

=== SANITY CHECK (3 Customers) ===
   Income_$  SpendingScore  cluster
0        33             78        2
1        25             87        2
2        24             88        2

Saved clustered dataset → spending_labeled_clusters.csv


