In [2]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [138]:
class GreedyAgglomerative:
    def __init__(self):
        # attributes
        self.labels_ = []

        # internally used variables
        self._X = None
        self._clusters = []

    def __normalize(self, x):
        """
        Normalizes the data feature wise
        :param x: numpy.array of the input data
        :return: Normalized data
        """
        scaler = MinMaxScaler()
        return scaler.fit_transform(x)

    def __greedy(self):
        """
        Greedy Algorithm to find the Hierarchical Clustering
        :return:
        """
        distance_matrix = np.zeros((self._clusters.shape[0], self._clusters.shape[0]))

        while self._clusters.shape[0] > 1:
            break

        for i,A in enumerate(self._clusters):
            for j,B in enumerate(self._clusters):
                distance_matrix[i][j] = self.__distance_euclidian(A, B)

        print('distance matrix : ', distance_matrix)

    def __distance_euclidian(self, a, b):
        return np.linalg.norm(a - b)

    def fit(self, x):
        """
        Fits the model according to x
        :param x: dataset
        """

        self._X = self.__normalize(x)
        self._clusters = self._X
        print(self._clusters)

    def predict(self, x):
        """
        Predicts labels of X according to the model and writes them to labels_, where they can be accessed
        :param x: Dataset
        """
        self.__greedy()

In [139]:
# for testing
X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])

clustering = GreedyAgglomerative()
clustering.fit(X)
clustering.predict(X)

[[0.  0.5]
 [0.  1. ]
 [0.  0. ]
 [1.  0.5]
 [1.  1. ]
 [1.  0. ]]
distance matrix :  [[0.         0.5        0.5        1.         1.11803399 1.11803399]
 [0.5        0.         1.         1.11803399 1.         1.41421356]
 [0.5        1.         0.         1.11803399 1.41421356 1.        ]
 [1.         1.11803399 1.11803399 0.         0.5        0.5       ]
 [1.11803399 1.         1.41421356 0.5        0.         1.        ]
 [1.11803399 1.41421356 1.         0.5        1.         0.        ]]
