In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')

In [None]:
# Loading Data
data = pd.read_csv('../input/clustering/musteriler.csv')
data

# Data Exploration

In [None]:
data = data.set_index('No')
data.head()

In [None]:
# Structure of data
data.shape

In [None]:
# Data Statistics
data.describe()

In [None]:
# Checking for Null Values
data.isna().sum().to_frame('Null Values')

In [None]:
data.corr()

In [None]:
sns.jointplot(data=data, s=150, alpha=0.7, height=7)
plt.show()

In [None]:
plt.figure(figsize=(5,4))
sns.heatmap(data.corr(), annot=True, cbar=False)
plt.show()

In [None]:
sns.pairplot(data, kind='reg', height=3)
plt.show()

In [None]:
ndata = data.iloc[:,2:]
plt.figure(figsize=(9,9))
sns.violinplot(data=ndata, color='white', linewidth=3)
sns.swarmplot(data=ndata, s=8, alpha=0.7)
plt.show()

# K-Means Clustering

In [None]:
from sklearn.cluster import KMeans

# Preparing Data
X = ndata.values

In [None]:
wcss = []
for i in range(1,11):
    kmeans_c = KMeans(n_clusters=i, init='k-means++', random_state=0)
    kmeans_c.fit(X)
    wcss.append(kmeans_c.inertia_)
    print(f'wcss {i}: {kmeans_c.inertia_}')

In [None]:
plt.figure(figsize=(10,5))
plt.plot(range(1,11), wcss)
plt.title('The Elbow Curve')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()

* Number of Clusters: 4

In [None]:
kmeans = KMeans(n_clusters=4, init='k-means++', random_state=0)
y_means = kmeans.fit_predict(X)
y_means

In [None]:
kmeans.cluster_centers_

# Visualization

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
ax[0].set_title('Unclustered Data', fontsize=15)
ax[0].set_xlabel('Hacim', fontsize=12)
ax[0].set_ylabel('Mass', fontsize=12)
sns.scatterplot(data.iloc[:,2], data.iloc[:,-1], ax=ax[0], s=150, color='brown', alpha=0.7)
ax[1].set_title('Clustered Data', fontsize=15)
ax[1].set_xlabel('Hacim', fontsize=12)
for i in range(0,4):
    sns.scatterplot(X[y_means == i, 0], X[y_means == i, 1], s=150, label='Cluster'+str(i+1), ax=ax[1])
sns.scatterplot(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s=800, marker='*', 
                color='purple', label='Centroids', ax=ax[1])
fig.show()

## Thank You!