In [1]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial import distance

In [8]:
class GreedyAgglomerative:
    def __init__(self):
        # attributes
        self._merges = []

        # internally used variables
        self._X = None
        self._clusters = None
        self._n = None
        self.__distance_matrix = None

    # def __normalize(self, x):
    #     """
    #     Normalizes the data feature wise
    #     :param x: numpy.array of the input data
    #     :return: Normalized data
    #     """
    #     scaler = MinMaxScaler()
    #     return scaler.fit_transform(x)

    def __greedy(self):
        """
        Greedy Algorithm to find the Hierarchical Clustering
        :return:
        """
        while len(self._clusters) > 1:

            # Find minimum from the distance matrix (argmin flattens the matrix to 1D, therefore needs for transformation)
            min_ind = [int(np.argmin(self.__distance_matrix)/len(self.__distance_matrix)),np.argmin(self.__distance_matrix)%len(self.__distance_matrix)]
            min_dist = self.__distance_matrix.min()

            # Merge two clusters using single/complete linkage, update the table with the new distances and remove the extra row/col
            for k in range(len(self.__distance_matrix)):
                if k not in min_ind:
                    self.__distance_matrix[min_ind[0]][k] = max(self.__distance_matrix[min_ind[0]][k], self.__distance_matrix[min_ind[1]][k]) # change max to min, vice versa
                    self.__distance_matrix[k][min_ind[0]] = self.__distance_matrix[min_ind[0]][k]

            self.__distance_matrix = np.delete(self.__distance_matrix, min_ind[1], 0)
            self.__distance_matrix = np.delete(self.__distance_matrix, min_ind[1], 1)

            # Update clusters
            self._clusters[min_ind[0]] = self._clusters[min_ind[0]] + self._clusters[min_ind[1]]

            # Record merge (the last component of merge is a placeholder 0 for it to work with dendrogram plotting)
            self._merges.append([self._clusters[min_ind[0]][0],self._clusters[min_ind[0]][1], min_dist, 0])

            # Rename new cluster to the next available number
            self._clusters[min_ind[0]] = [self._n]

            # Increase cluster number
            self._n += 1

            # Remove old cluster member
            self._clusters.pop(min_ind[1])


    def __compute_distance_matrix(self):
        self.__distance_matrix = np.zeros((self._n,self._n))
        self.__distance_matrix[:,:] = 'inf'
        for i in range(self._n-1):
            for j in range(i+1,self._n):
                self.__distance_matrix[i][j] = distance.cityblock(self._X[i], self._X[j])
                self.__distance_matrix[j][i] = self.__distance_matrix[i][j]

    def fit(self, x):
        """
        Fits the model according to x
        :param x: dataset
        """
        self._X = x
        self._n = len(self._X)
        self._clusters = [[i] for i in range(self._n)]
        self.__compute_distance_matrix()

    def predict(self):
            """
            Predicts labels of X according to the model and writes them to labels_, where they can be accessed
            """
            self.__greedy()
            return self._merges

    def fit_predict(self, x):
        self.fit(x)
        self.predict()
        return self._merges

In [9]:
# for testing
X = np.array([[0,0], [10,10], [21,21], [33,33], [5, 27], [28,6]])

clustering = GreedyAgglomerative()
print(clustering.fit_predict(X))

[[0, 1, 0.6060606060606061, 0], [2, 4, 0.6666666666666667, 0], [3, 5, 0.9696969696969696, 0], [6, 7, 1.2727272727272727, 0], [9, 8, 2.0, 0]]


In [203]:
# merge lists example
A = [[1, 1], [1, 2], [1, 3], [1, 4]]
counter = 0

while len(A) > 1:
    elem = A.pop(0)
    print(elem)
    counter += 1
    A = [elem]

[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]
[1, 1]

KeyboardInterrupt: 

In [16]:
A = [[1, 1], [1, 2], [1, 3], [1, 4]]
while len(A) > 1:
    print('AAAA')
    for i in A:
        print(i)
    a = A.pop()
    b = A.pop()
    A = [[a,b]] + A

AAAA
[1, 1]
[1, 2]
[1, 3]
[1, 4]
AAAA
[[1, 4], [1, 3]]
[1, 1]
[1, 2]
AAAA
[[1, 2], [1, 1]]
[[1, 4], [1, 3]]
