# Clustering

## K Menas Algo

# Importing the data

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os

cwd = os.getcwd()

In [None]:
# Importing the dataset
data = pd.read_csv(os.path.join(cwd, 'xclara.csv'))
print(data.shape)
data.head()

In [None]:
# Getting the values and plotting it
x = data['V1'].values
y = data['V2'].values
plt.scatter(x, y, c='black', s=7)

# K-Means using Scikit Learn

In [None]:
from sklearn.cluster import KMeans

k = 3
X = np.array(list(zip(x, y)))
# Number of clusters
kmeans = KMeans(n_clusters=k)
# Fitting the input data
kmeans.fit(X)
# Getting the cluster labels
labels = kmeans.labels_
# Centroid values
centroids = kmeans.cluster_centers_
# Comparing with scikit-learn centroids
print(centroids) # From sci-kit learn
colors = ['r', 'g', 'b', 'y', 'c', 'm', 'r', 'g', 'b', 'y', 'c', 'm', 'r', 'g', 'b', 'y', 'c', 'm']
fig, ax = plt.subplots()
for i in range(k):
        points = np.array([X[j] for j in range(len(X)) if labels[j] == i])
        ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i])
ax.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=200, c='#050505')

In [None]:
dct_center={0:"red",1:"green",2:"blue"}

In [None]:
p =kmeans.predict([[100,-20],[20,80],[0,0], [50, 20], [-20, -40]])

In [None]:
for v in p:
    print(dct_center[v])

In [None]:
kmeans.score(X)

In [None]:
kmeans.score?

## Optimal Clustering

### Elbow Method

In [None]:
Nc = range(1, 20)
kmeans_lst = [KMeans(n_clusters=i) for i in Nc]
score = [kmeans_lst[i].fit(X).score(X) for i in range(len(kmeans_lst))]

plt.figure(figsize=(15,15))
plt.plot(Nc, score)
plt.xlabel('Number of Clusters')
plt.ylabel('Score')
plt.yticks(Nc)
plt.title('Elbow Curve')
plt.show()

### Silhoutte Method

In [None]:
from sklearn.metrics import silhouette_score

In [None]:
sil = []
for k in range(2, 25):
    kmeans = KMeans(n_clusters = k).fit(X)
    labels = kmeans.predict(X)
    sil.append(silhouette_score(X, labels))

In [None]:
from matplotlib.ticker import AutoMinorLocator

ax = plt.subplot()

ax.xaxis.set_minor_locator(AutoMinorLocator())
ax.tick_params(which='minor', length = 4, color='red')
ax.tick_params(which='major', length = 8, color='blue')

ax.yaxis.set_minor_locator(AutoMinorLocator())
ax.tick_params(which='minor', length = 4, color='red')
ax.tick_params(which='major', length = 8, color='blue')

ax.plot([x for x in range(2, 25)], sil)