In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy import unique
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import MeanShift
from sklearn.mixture import GaussianMixture
from sklearn import metrics
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('/kaggle/input/mall-customers/Mall_Customers.csv', index_col=0)
data.head()

In [None]:
data.drop(['Genre'], axis=1, inplace=True)
data.drop(['Age'], axis=1, inplace=True)

data.head()

In [None]:
data = data.sample(frac=1)
data.head()

In [None]:
sns.pairplot(data)

In [None]:
plt.figure(figsize=(10, 8))
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.scatter(data['Annual Income (k$)'], 
            data['Spending Score (1-100)'],)
plt.show()

# **K - Means Clustering**

In [None]:
k_means = KMeans(n_clusters=5)
k_means.fit(data)

In [None]:
np.unique(k_means.labels_)

In [None]:
centers = k_means.cluster_centers_

centers

In [None]:
plt.figure(figsize=(7, 6))

plt.scatter(data['Annual Income (k$)'], 
            data['Spending Score (1-100)'], 
            c=k_means.labels_, s=100)

plt.scatter(centers[:,0], centers[:,1], color ='yellow', marker='s', s=200) 

plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.title('K-Means with 5 clusters')

plt.show()

In [None]:
score = metrics.silhouette_score(data, k_means.labels_)

print("Score = ", score)

# **Affinity Propogation**

In [None]:
model_aff = AffinityPropagation(damping=0.9)
model_aff.fit(data)
#
yhat_aff = model_aff.predict(data)
clusters_aff = unique(yhat_aff)
print("Clusters of Affinity Prop.",clusters_aff)
labels_aff = model_aff.labels_
centroids_aff = model_aff.cluster_centers_

In [None]:
plt.figure(figsize=(7, 6))

plt.scatter(data['Annual Income (k$)'], 
            data['Spending Score (1-100)'], 
            c=labels_aff, s=100)

plt.scatter(centroids_aff[:,0], centroids_aff[:,1], color='red', marker='*', s=200) 

plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.title('Affinity Propagation')
plt.grid()
plt.show()

In [None]:
score_aff = metrics.silhouette_score(data,labels_aff)

print("Score of Affinity Propagation = ", score_aff)

# **Mean Shift Algorithm**

In [None]:
model_ms = MeanShift(bandwidth=25)
model_ms.fit(data)
#
yhat_ms = model_ms.predict(data)
clusters_ms = unique(yhat_ms)
print("Clusters of Mean Shift.",clusters_ms)
labels_ms = model_ms.labels_
centroids_ms = model_ms.cluster_centers_

In [None]:
plt.figure(figsize=(10, 8))

plt.scatter(data['Annual Income (k$)'], 
            data['Spending Score (1-100)'], 
            c=labels_ms, s=100)

plt.scatter(centroids_ms[:,0], centroids_ms[:,1], color='red', marker='*', s=200) 

plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.title('Mean Shift')
plt.grid()
plt.show()

In [None]:
score_ms = metrics.silhouette_score(data,labels_ms)

print("Score of Mean Shift = ", score_ms)

# **Gausiann Mixture Models**

In [None]:
from numpy import unique
from numpy import where
data_X = data.iloc[:,[0,1]].values

In [None]:
model_gb = GaussianMixture(n_components=5)
model_gb.fit(data_X)
#
yhat_gb = model_gb.predict(data_X)
clusters_gb = unique(yhat_gb)
# create scatter plot for samples from each cluster
for cluster in clusters_gb:
    row_ix = where(yhat_gb == cluster)
    plt.scatter(data_X[row_ix, 0], data_X[row_ix, 1])
plt.show()