# Load Libraries

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from yellowbrick.cluster import KElbowVisualizer, SilhouetteVisualizer

from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import Birch
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import MeanShift
from sklearn.cluster import SpectralClustering
from sklearn.cluster import OPTICS
from sklearn.mixture import GaussianMixture

from sklearn.metrics import silhouette_score, davies_bouldin_score

import warnings
warnings.filterwarnings("ignore")

# Load Data

In [None]:
data = pd.read_csv("/mnt/hdd/Datasets/Mall_Customers.csv")
df = data.copy()
df.head()

In [None]:
df = df.iloc[:, 3:]
df.head()

# Functions

In [None]:
def plot_clustering(labels, centers, title):
    plt.figure(figsize=(12, 7))
    sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], cmap="viridis", c=labels, s=100)
    sns.scatterplot(x=centers[:, 0], y=centers[:, 1], color="red", marker="h", s=300)
    plt.title(title)
    plt.show()

In [None]:
def plot_elbow(model, k):
    visualizer = KElbowVisualizer(model, k=k)
    visualizer.fit(df)
    visualizer.poof()

In [None]:
def plot_silhouette(model):
    visualizer = SilhouetteVisualizer(model)
    visualizer.fit(df)
    visualizer.poof()

# 1. K-Means

In [None]:
kmeans = KMeans(n_clusters=2)
kmeans.fit(df)

In [None]:
kmeans_labels = kmeans.labels_
kmeans_centers = kmeans.cluster_centers_

In [None]:
kmeans_sil = silhouette_score(df, kmeans_labels)
kmeans_dav = davies_bouldin_score(df, kmeans_labels)

print("Silhouette Score:", kmeans_sil)
print("Davies Bouldin Score:", kmeans_dav)

In [None]:
plot_clustering(kmeans_labels, kmeans_centers, "K-Means(2)")

In [None]:
plot_elbow(KMeans(), (1, 15))

In [None]:
kmeans = KMeans(n_clusters=5)
kmeans.fit(df)

In [None]:
kmeans_labels = kmeans.labels_
kmeans_centers = kmeans.cluster_centers_

In [None]:
kmeans_sil = silhouette_score(df, kmeans_labels)
kmeans_dav = davies_bouldin_score(df, kmeans_labels)

print("Silhouette Score:", kmeans_sil)
print("Davies Bouldin Score:", kmeans_dav)

In [None]:
plot_clustering(kmeans_labels, kmeans_centers, "K-Means(5)")

# 2. DBSCAN

In [None]:
X = df.iloc[:, [0,1]].values

In [None]:
dbscan = DBSCAN(eps=0.5, min_samples=5)
yhat = dbscan.fit_predict(X)

In [None]:
clusters = np.unique(yhat)
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("DBSCAN(eps=0.5, min_samples=5)")
plt.show()

# 3. Agglomerative Clustering

In [None]:
agg = AgglomerativeClustering(n_clusters=2)
yhat = agg.fit_predict(X)

In [None]:
clusters = np.unique(yhat)
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("AgglomerativeClustering(n_clusters=2)")
plt.show()

In [None]:
agg_labels = agg.labels_
agg_sil = silhouette_score(df, agg_labels)
agg_dav = davies_bouldin_score(df, agg_labels)

print("Silhouette Score:", agg_sil)
print("Davies Bouldin Score:", agg_dav)

In [None]:
plot_elbow(AgglomerativeClustering(), k=(1, 15))

In [None]:
agg = AgglomerativeClustering(n_clusters=5)
yhat = agg.fit_predict(X)

In [None]:
clusters = np.unique(yhat)
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("AgglomerativeClustering(n_clusters=5)")
plt.show()

In [None]:
agg_labels = agg.labels_
agg_sil = silhouette_score(df, agg_labels)
agg_dav = davies_bouldin_score(df, agg_labels)

print("Silhouette Score:", agg_sil)
print("Davies Bouldin Score:", agg_dav)

# 4. Affinity Propagation

In [None]:
ap = AffinityPropagation(damping=0.8)
yhat = ap.fit_predict(df)

In [None]:
clusters = np.unique(yhat)
ap_labels = ap.labels_
ap_centers = ap.cluster_centers_

In [None]:
plot_clustering(ap_labels, ap_centers, "AffinityPropagation(damping=0.8)")

In [None]:
ap_sil = silhouette_score(df, ap_labels)
ap_dav = davies_bouldin_score(df, ap_labels)

print("Silhouette Score:", ap_sil)
print("Davies Bouldin Score:", ap_dav)

In [None]:
best_damping = 0.5
best_silhouette_score = -1

for damping in np.arange(0.5, 1.0, 0.1):
    affinity_propagation = AffinityPropagation(damping=damping)
    affinity_propagation.fit(df)
    labels = affinity_propagation.labels_
    silhouette = metrics.silhouette_score(df, labels, metric='euclidean')
    
    if silhouette > best_silhouette_score:
        best_silhouette_score = silhouette
        best_damping = damping

print("En iyi damping değeri:", best_damping)

In [None]:
ap = AffinityPropagation(damping=0.9)
yhat = ap.fit_predict(df)

In [None]:
clusters = np.unique(yhat)
ap_labels = ap.labels_
ap_centers = ap.cluster_centers_

In [None]:
plot_clustering(ap_labels, ap_centers, "AffinityPropagation(damping=0.9)")

In [None]:
ap_sil = silhouette_score(df, ap_labels)
ap_dav = davies_bouldin_score(df, ap_labels)

print("Silhouette Score:", ap_sil)
print("Davies Bouldin Score:", ap_dav)

# 5. BIRCH

In [None]:
birch = Birch(threshold=0.01, n_clusters=2)
yhat = birch.fit_predict(df)

In [None]:
clusters = np.unique(yhat)
birch_labels = birch.labels_

In [None]:
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("Birch(threshold=0.01, n_clusters=2)")
plt.show()

In [None]:
birch_sil = silhouette_score(df, birch_labels)
birch_dav = davies_bouldin_score(df, birch_labels)

print("Silhouette Score:", birch_sil)
print("Davies Bouldin Score:", birch_dav)

In [None]:
plot_elbow(Birch(), k=(1, 15))

In [None]:
birch = Birch(threshold=0.01, n_clusters=5)
yhat = birch.fit_predict(df)

In [None]:
clusters = np.unique(yhat)
birch_labels = birch.labels_

In [None]:
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("Birch(threshold=0.01, n_clusters=5)")
plt.show()

In [None]:
birch_sil = silhouette_score(df, birch_labels)
birch_dav = davies_bouldin_score(df, birch_labels)

print("Silhouette Score:", birch_sil)
print("Davies Bouldin Score:", birch_dav)

In [None]:
best_threshold = 0.5
best_silhouette_score = -1

for threshold in np.arange(0.5, 1.0, 0.1):
    birch = Birch(n_clusters=5, threshold=threshold)
    birch.fit(df)
    labels = birch.labels_
    silhouette = silhouette_score(df, labels)

    if silhouette > best_silhouette_score:
        best_silhouette_score = silhouette
        best_threshold = threshold

print("Best threshold:", best_threshold)

In [None]:
birch = Birch(threshold=0.5, n_clusters=5)
yhat = birch.fit_predict(df)

In [None]:
clusters = np.unique(yhat)
birch_labels = birch.labels_

In [None]:
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("Birch(threshold=0.5, n_clusters=5)")
plt.show()

In [None]:
birch_sil = silhouette_score(df, birch_labels)
birch_dav = davies_bouldin_score(df, birch_labels)

print("Silhouette Score:", birch_sil)
print("Davies Bouldin Score:", birch_dav)

# 6. Mini Batch K-Means

In [None]:
mini = MiniBatchKMeans(n_clusters=2)
yhat = mini.fit_predict(df)

In [None]:
mini_labels = mini.labels_
mini_clusters = mini.cluster_centers_

In [None]:
plot_clustering(mini_labels, mini_clusters, "MiniBatchKMeans(n_clusters=2)")

In [None]:
mini_sil = silhouette_score(df, mini_labels)
mini_dav = davies_bouldin_score(df, mini_labels)

print("Silhouette Score:", mini_sil)
print("Davies Bouldin Score:", mini_dav)

In [None]:
plot_elbow(MiniBatchKMeans(), k=(1, 15))

In [None]:
mini = MiniBatchKMeans(n_clusters=5)
yhat = mini.fit_predict(df)

In [None]:
mini_labels = mini.labels_
mini_clusters = mini.cluster_centers_

In [None]:
plot_clustering(mini_labels, mini_clusters, "MiniBatchKMeans(n_clusters=5)")

In [None]:
mini_sil = silhouette_score(df, mini_labels)
mini_dav = davies_bouldin_score(df, mini_labels)

print("Silhouette Score:", mini_sil)
print("Davies Bouldin Score:", mini_dav)

# 7. Mean Shift

In [None]:
ms = MeanShift(bandwidth=10)
yhat = ms.fit_predict(df)

In [None]:
ms_labels = ms.labels_
ms_clusters = ms.cluster_centers_

In [None]:
plot_clustering(ms_labels, ms_clusters, "MeanShift(bandwidth=10)")

In [None]:
ms_sil = silhouette_score(df, ms_labels)
ms_dav = davies_bouldin_score(df, ms_labels)

print("Silhouette Score:", ms_sil)
print("Davies Bouldin Score:", ms_dav)

In [None]:
best_bandwidth = 5
best_silhouette_score = -1

for bandwidth in np.arange(5, 30, 5):
    meanshift = MeanShift(bandwidth=bandwidth)
    yhat = meanshift.fit_predict(df)

    labels = meanshift.labels_
    silhouette = silhouette_score(df, labels)
    if silhouette > best_silhouette_score:
        best_silhouette_score = silhouette
        best_bandwidth = bandwidth

print("Best bandwidth:", best_bandwidth)

In [None]:
ms = MeanShift(bandwidth=25)
yhat = ms.fit_predict(df)

In [None]:
ms_labels = ms.labels_
ms_clusters = ms.cluster_centers_

In [None]:
plot_clustering(ms_labels, ms_clusters, "MeanShift(bandwidth=25)")

In [None]:
ms_sil = silhouette_score(df, ms_labels)
ms_dav = davies_bouldin_score(df, ms_labels)

print("Silhouette Score:", ms_sil)
print("Davies Bouldin Score:", ms_dav)

# 8. Spectral Shifting

In [None]:
sc = SpectralClustering(n_clusters=2)
yhat = sc.fit_predict(df)

In [None]:
sc_labels = sc.labels_
clusters = np.unique(yhat)

In [None]:
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("SpectralClustering(n_clusters=2)")
plt.show()

In [None]:
sc_sil = silhouette_score(df, sc_labels)
sc_dav = davies_bouldin_score(df, sc_labels)

print("Silhouette Score:", sc_sil)
print("Davies Bouldin Score:", sc_dav)

# 9. OPTICS

In [None]:
op = OPTICS(eps=0.5, min_samples=10)
yhat = op.fit_predict(df)

In [None]:
op_labels = op.labels_

In [None]:
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("OPTICS(eps=0.5, min_samples=5)")
plt.show()

In [None]:
op_sil = silhouette_score(df, op_labels)
op_dav = davies_bouldin_score(df, op_labels)

print("Silhouette Score:", op_sil)
print("Davies Bouldin Score:", op_dav)

# 10. Gaussian Mixture

In [None]:
gb = GaussianMixture(n_components=5)
yhat = gb.fit_predict(df)

In [None]:
clusters = np.unique(yhat)
for cluster in clusters:
    idx = np.where(yhat == cluster)
    plt.scatter(X[idx, 0], X[idx, 1], cmap="viridis", s=100)

plt.title("GaussianMixture(n_components=5)")
plt.show()