In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings  
warnings.filterwarnings("ignore")   # ignore warnings


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

**In this kernel, 2 different clustering algorithms have been explained for machine learning.**

**CLUSTERING ALGORITHMS**

* Clustering is the technique of dividing the data points into a number of groups such that data points in the same groups are more similar than the others.
* Clustering is an unsupervised learning method. 

**1. K - Means**

* K-means clustering is a type of unsupervised learning, which is used when you have unlabeled data (i.e., data without defined categories or groups). 
* The goal of this algorithm is to find groups in the data, with the number of groups represented by the variable K. 
* The algorithm works iteratively to assign each data point to one of K groups based on the features that are provided. 
* Data points are clustered based on feature similarity.

In [None]:
data = pd.read_csv('../input/Mall_Customers.csv')

In [None]:
data.info()

In [None]:
data.head()

In [None]:
X = data.iloc[:, 3:].values
X

In [None]:
# Loading Library
from sklearn.cluster import KMeans

**WCSS (Within-Cluster Sums of Squares)**
* Let’s take there are 3 clusters. That means, we have 3 center points (C1, C2, C3). Each data point falls into the zone of either C1 or C2 or C3. 
* First we calculate the sum of squares of the distance of each data point in cluster 1 from their center point C1. 
* This is cluster 1 sum of squares.
[dist(C1, c1p1) ]² + [dist(C1, c1p2)]² + [dist(C1, c1p3)]². 
* Similarly we do the same for C2 & C3. 
* We add the sum of all 3 clusters sum of squares to get **WCSS**.
* WCSS always decreases with the increase in the number of clusters.

In [None]:
# To decide variable K, we use WCSS.
result = []
for i in range(1,11):
    kmeans = KMeans(n_clusters = i, init='k-means++', random_state = 123)
    kmeans.fit(X)
    result.append(kmeans.inertia_)

In [None]:
result

In [None]:
plt.plot(range(1,11), result)
plt.xlabel('the number of clusters')
plt.ylabel('result')
plt.title('The Elbow Method')
plt.show()
# From below figure, we can choose K as 3 because there is a decreasing of acceleration at that point. 

In [None]:
# Applying K-Means Algorithm
# Creation of model
kmeans = KMeans(n_clusters = 3, init='k-means++')
kmeans.fit(X)

In [None]:
# We learn center point of each cluster with below function.
# For example, first column is Annual Income (k$) whose center point is 44.15447154 for first cluster.
kmeans.cluster_centers_

In [None]:
# Prediction
y_kmeans = kmeans.fit_predict(X)
y_kmeans

In [None]:
# Visualising the clusters
plt.scatter(X[y_kmeans == 0,0], X[y_kmeans == 0,1], s=100, c='red')
plt.scatter(X[y_kmeans == 1,0], X[y_kmeans == 1,1], s=100, c='blue')
plt.scatter(X[y_kmeans == 2,0], X[y_kmeans == 2,1], s=100, c='green')
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s=300, c='yellow')
plt.title('K-Means Clustering')
plt.show()

**2. Hierarchical Clustering**

* Hierarchical clustering is a general family of clustering algorithms that build nested clusters by merging or splitting them successively. 
* This hierarchy of clusters is represented as a dendrogram. 
* Hierarchical clustering can be performed with distance matrix.

**Agglomerative :** bottom up approach, each observation starts in its own cluster, and clusters are successively merged together.

** Divisive :** top down approach.

In [None]:
# Creation of model & prediction
from sklearn.cluster import AgglomerativeClustering
ac = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='ward')
Y_predict = ac.fit_predict(X)
Y_predict

In [None]:
# Visualising the clusters
plt.scatter(X[Y_predict==0,0], X[Y_predict==0,1], s=100, c='red')
plt.scatter(X[Y_predict==1,0], X[Y_predict==1,1], s=100, c='blue')
plt.scatter(X[Y_predict==2,0], X[Y_predict==2,1], s=100, c='green')
plt.title('Hierarchical Clustering')
plt.show()

**Dendrogram**

In [None]:
import scipy.cluster.hierarchy as sch

In [None]:
dendrogram = sch.dendrogram(sch.linkage(X, method='ward'))
plt.show()

**CONCLUSION**

My other kernels are here:

https://www.kaggle.com/armagansarikey/machine-learning-1-data-preprocessing

https://www.kaggle.com/armagansarikey/machine-learning-2-prediction-algorithms

https://www.kaggle.com/armagansarikey/machine-learning-3-classification-algorithms

If you have any question or suggest, I will be happy to hear it.