In [1]:
# Week 8: K-Means Clustering (Mall Customers Dataset)

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("Mall_Customers.csv")

# Select features
X = df[["Annual Income (k$)", "Spending Score (1-100)"]]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply K-Means
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df["Cluster"] = kmeans.fit_predict(X_scaled)

# Plot clusters and centroids
plt.figure()
for i in range(3):
    plt.scatter(
        X_scaled[df["Cluster"] == i, 0],
        X_scaled[df["Cluster"] == i, 1]
    )

centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], marker='X', s=200)

plt.xlabel("Annual Income (Scaled)")
plt.ylabel("Spending Score (Scaled)")
plt.title("K-Means Clustering of Mall Customers")
plt.show()

# Print cluster centers (original scale)
original_centroids = scaler.inverse_transform(centroids)
for i, center in enumerate(original_centroids):
    print(f"Cluster {i}: Income = {center[0]:.2f}, Spending = {center[1]:.2f}")


KeyboardInterrupt: 