In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import pairwise_distances

In [None]:
df = pd.read_csv("datasets/cust_segmentation.csv")
print(df)

In [None]:
# Selecting relevant numerical features for clustering
features = df[["Age", "Edu", "Years Employed", "Income", "Card Debt", "Other Debt", "DebtIncomeRatio"]]

# Standardizing features for fair distance computation
X = StandardScaler().fit_transform(features)

# K-Means (Euclidean Distance)
kmeans_euclidean = KMeans(n_clusters=7, random_state=42).fit_predict(X)

# K-Means (Manhattan Distance)
distance_matrix = pairwise_distances(X, metric="manhattan") # Precompute distance matrix with L1 norm
kmeans_manhattan = KMeans(n_clusters=4, random_state=42, n_init=10).fit_predict(distance_matrix)

In [None]:
# Add cluster labels to dataset

df["Cluster_Euclidean"] = kmeans_euclidean
df["Cluster_Manhattan"] = kmeans_manhattan

In [None]:
# 2D Plot (Euclidean Distance)

plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 3], c=kmeans_euclidean, cmap="rainbow")
plt.title("K-Means Clustering (Euclidean Distance) -2D")
plt.xlabel("Age (standardized)")
plt.ylabel("Income (standardized)")
plt.show()

In [None]:
# 3D Plot (Manhattan Distance)

plt.figure(figsize=(10, 8))
ax = plt.axes(projection="3d")
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=kmeans_manhattan, cmap="rainbow")
ax.set_title("K-Means Clustering (Manhattan Distance)-3D")
ax.set_xlabel("Age")
ax.set_ylabel("Income")
ax.set_zlabel("Card Debt")
plt.show()