In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# Load dataset (Mall Customers Data)
df = pd.read_csv('Mall_Customers.csv')
# Display first few rows
print(df.head())
df.shape
# Selecting relevant features: "Annual Income (k$)" & "Spending Score (1-100)"
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Apply K-Means Clustering (k=5 based on the Elbow Method)
kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
df['Cluster'] = kmeans.fit_predict(X_scaled)
# Apply PCA for visualization (reduce to 2D)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
# Convert to DataFrame for visualization
df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
df_pca['Cluster'] = df['Cluster']
df_pca['CustomerID'] = df['CustomerID']
# Interactive Scatter Plot using Plotly
fig = px.scatter(
df_pca, x="PCA1", y="PCA2", color=df_pca["Cluster"].astype(str),
title="K-Means Clustering on Mall Customers Dataset",
labels={"Cluster": "Customer Segment", "PCA1": "Principal Component 1",
"PCA2": "Principal Component 2"},
hover_data=["CustomerID"]
)
fig.show()

   CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
0           1    Male   19                  15                      39
1           2    Male   21                  15                      81
2           3  Female   20                  16                       6
3           4  Female   23                  16                      77
4           5  Female   31                  17                      40
