# Hierarchical clustering and image segmentation

In [1]:
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from kneed import KneeLocator

## Hierarchical clustering

Remember that you can find an example of dendogram [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html)

### Loading the dataset 'cities_coordinates.txt'

In [3]:
def load_cities(filename="cities_coordinates.txt"):
    """Return city coordinates (X) and their names (y)"""
    X = []
    y = []
    with open(filename, 'r') as cities:
        for line in cities.readlines():
            data = line.split(",")
            X.append(data[0:-1])
            y.append(data[-1].split("\n")[0])
    X = np.array(X).astype(float)
    y = np.array(y).astype(str)
    return X, y

## Image segmentation 

### Loading the image 'scenery_184_233.txt'

In [None]:
def load_image(filename="scenery_184_233.txt"):
    "Return the RGB image as a numpy array X of shape (H*W, 3)"
    X = []
    with open(filename, 'r') as scenery:
        for line in scenery.readlines():
            data = line.split()
            data = data[0].split(',')
            X.append(data)
    # Make sure we have the right format:
    X = np.array(X).astype(float)
    return X

def plot_image(X):
    """
    Plot the image represented by X of shape (H*W, 3)
    """
    # Copy of X for visualization purposes (of shape (H, W, 3))
    X_vis = np.expand_dims(X, axis=1).reshape(184, 233, 3)
    X_vis = np.uint8(X_vis)
    
    fig, ax = plt.subplots()
    ax.axis('off')
    ax.imshow(X_vis)
    return fig, ax

def get_masks(model, X):
    """
    returns masks and segmentation of the image X using a trained model
    """
    clusters = model.predict(X)
    mask_representatives = model.cluster_centers_
    n_masks = len(mask_representatives)
    masks = [np.ones_like(X)*mask_representatives[i] for i in range(n_masks)]
    segmentation = np.zeros_like(X)
    for i in range(n_masks):
        masks[i][clusters != i] = 0
        segmentation += masks[i]
    return masks, segmentation