# Hierarchical Clustering Example

from [original post](https://stackabuse.com/hierarchical-clustering-with-python-and-scikit-learn/)

# First example using Scipy and Scikit-learn

### Prepare Data

In [None]:
import numpy as np

X = np.array([[5,3],
    [10,15],
    [15,12],
    [24,10],
    [30,30],
    [85,70],
    [71,80],
    [60,78],
    [70,55],
    [80,91],])

In [None]:
import matplotlib.pyplot as plt

labels = range(1, 11)
plt.figure(figsize=(10, 7))
plt.subplots_adjust(bottom=0.1)
plt.scatter(X[:,0],X[:,1], label='True Position')

for label, x, y in zip(labels, X[:, 0], X[:, 1]):
    plt.annotate(
        label,
        xy=(x, y), xytext=(-3, 3),
        textcoords='offset points', ha='right', va='bottom')
plt.show()

### Clustering agglomerative algorithm with nearest point distance using Scipy

1.   Elemento de la lista
2.   Elemento de la lista


SciPy API:

*  [linkage](https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html)

*  [dendogram](https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.dendrogram.html)

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
from matplotlib import pyplot as plt

linked = linkage(X, 'single') #Performs Agglomerative Clustering using Single linkage metric for distance

labelList = range(1, 11) #Create labels for plot as points do not have labels

plt.figure(figsize=(10, 7))
#dendrogram creates plot
dendrogram(linked,
            orientation='top',
            labels=labelList,
            distance_sort='descending',
            show_leaf_counts=True)
plt.show()

### Clustering agglomerative algorithm with farthest point distance using Scikit-learn

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
import numpy as np

In [None]:
X = np.array([[5,3],
    [10,15],
    [15,12],
    [24,10],
    [30,30],
    [85,70],
    [71,80],
    [60,78],
    [70,55],
    [80,91],])

In [None]:
from sklearn.cluster import AgglomerativeClustering

cluster = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='complete')
cluster.fit_predict(X) #Predict data

In [None]:
print(cluster.labels_) #Check if prediction was done correctly

In [None]:
#Show predictions in graph
plt.scatter(X[:,0],X[:,1], c=cluster.labels_, cmap='rainbow')

# Second Example using Scikit-learn and Iris

Function to plot dendogram. Creates the linkage matrix before calling scipy's dendrogram function

In [None]:
def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram

    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack([model.children_, model.distances_,
                                      counts]).astype(float)

    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, **kwargs) #function in scipy imported earlier in notebook



## Load iris data and fit Agglomerative Clustering

In [None]:
from sklearn.datasets import load_iris

In [None]:
iris = load_iris()
X = iris.data

# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)

plt.title('Hierarchical Clustering Dendrogram')
# plot the top three levels of the dendrogram use p = 3
plot_dendrogram(model, truncate_mode='level')
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()

# First Scikit example with dendrogram

In [None]:
X = np.array([[5,3],
    [10,15],
    [15,12],
    [24,10],
    [30,30],
    [85,70],
    [71,80],
    [60,78],
    [70,55],
    [80,91],])

# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0,n_clusters=None, affinity='euclidean', linkage='complete')
model = model.fit(X)

plt.title('Hierarchical Clustering Dendrogram')
# plot the top three levels of the dendrogram use p = 3
plot_dendrogram(model, truncate_mode='level')
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()