In [None]:
# Importing Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')

In [None]:
# Loading data
data = pd.read_csv('../input/market-segmentationcsv/3.12. Example.csv')
data.head()

In [None]:
# Checking for Null values
data.isna().sum().to_frame('Null Values')

In [None]:
# Structure of Data
data.shape

In [None]:
# Statistics of data
data.describe()

In [None]:
# Plotting distribution of data
sns.jointplot(data=data, s=200, palette='crest', height=7)
plt.show()

In [None]:
# Plotting correlation between variables
plt.figure(figsize=(5,4))
sns.heatmap(data.corr(), annot=True, cbar=False, cmap='crest', annot_kws={'size':18})
plt.show()

In [None]:
sns.pairplot(data, kind='reg', height=3)
plt.show()

In [None]:
fig, ax = plt.subplots(1, 4, figsize=(16,5))
ax[0].set_title('BOX PLOT', fontsize=14)
ax[1].set_title('STRIP PLOT', fontsize=14)
ax[2].set_title('SWARM PLOT', fontsize=14)
ax[3].set_title('VIOLIN PLOT', fontsize=14)
sns.boxplot(data=data, ax=ax[0], palette='crest')
sns.stripplot(data=data, s=8, ax=ax[1], palette='crest')
sns.swarmplot(data=data, s=9, ax=ax[2], palette='crest')
sns.violinplot(data=data, ax=ax[3], palette='crest')
plt.show()

# K-Means Clustering

In [None]:
# Importing Libraries
from sklearn.cluster import KMeans
from sklearn import metrics

In [None]:
# Preparing data
X = data.values

In [None]:
# Finding optimum number of Clusters
wcss = []
for i in range(1,11):
    km_clust = KMeans(n_clusters=i, init='k-means++', random_state=0)
    km_clust.fit(X)
    wcss.append(km_clust.inertia_)
    print(f'WCSS {i}: {km_clust.inertia_}')

In [None]:
# Plotting Elbow Curve
plt.figure(figsize=(10,5))
plt.plot(range(1,11), wcss, color='#2F4F4F')
plt.title('The Elbow Curve', fontsize=15)
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()

In [None]:
for i in range(2,6):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=0)
    y_means = kmeans.fit_predict(X)
    print(f'Silhouette Score for n={i}: {np.round(metrics.silhouette_score(X, y_means), decimals=3)}')

In [None]:
kmeans = KMeans(n_clusters=2, init='k-means++', random_state=0)
y_means = kmeans.fit_predict(X)
y_means

In [None]:
# Cluster Centers
kmeans.cluster_centers_

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
ax[0].set_title('Unclustered Data', fontsize=15)
ax[0].set_xlabel('Satisfaction', fontsize=15)
ax[0].set_ylabel('Loyalty', fontsize=15)
sns.scatterplot(data.iloc[:,0], data.iloc[:,-1], ax=ax[0], s=150, color='#2F4F4F')
ax[1].set_title('Clustered Data', fontsize=15)
ax[1].set_xlabel('Satisfaction', fontsize=15)
for i in range(0,2):
    sns.scatterplot(X[y_means == i, 0], X[y_means == i, 1], s=150, label='Cluster '+str(i+1), ax=ax[1])
sns.scatterplot(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s=800, marker='*', 
                color='purple', label='Centroids', ax=ax[1])
fig.show()

## Thank You!