In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import pickle
import os

In [2]:
# Load dataset
df = pd.read_csv("Mall_Customers.csv")
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]

In [3]:
# ----- ELBOW METHOD -----
wcss = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

plt.figure()
plt.plot(range(1, 11), wcss, marker='o')
plt.xlabel("Number of Clusters")
plt.ylabel("WCSS")
plt.title("Elbow Method")
plt.savefig("static/elbow.png")
plt.close()

In [4]:
# ----- FINAL MODEL -----
kmeans = KMeans(n_clusters=5, random_state=42)
labels = kmeans.fit_predict(X)


In [5]:
# ----- SILHOUETTE SCORE -----
sil_score = silhouette_score(X, labels)

In [6]:
# ----- CLUSTER VISUALIZATION -----
plt.figure()
plt.scatter(
    X.iloc[:, 0],
    X.iloc[:, 1],
    c=labels,
    cmap="viridis"
)
plt.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    s=200,
    c='red',
    marker='X'
)
plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score")
plt.title("Customer Segments")
plt.savefig("static/clusters.png")
plt.close()

In [7]:
# Save model & score
with open("model.pkl", "wb") as f:
    pickle.dump(kmeans, f)

with open("metrics.pkl", "wb") as f:
    pickle.dump({"silhouette": sil_score}, f)

print("Training completed")
print("Silhouette Score:", sil_score)

Training completed
Silhouette Score: 0.553931997444648
