In [1]:
import numpy as np

In [2]:
# Read labels to an array
with open("labels.txt", "r") as fp:
    labels = np.array([elem for idx, elem in enumerate(fp.read().split('"')) if idx % 2 == 1])
    
# Read distance matrix to two dimentional array
with open("distance_matrix.txt", "r") as fp:
    distance_matrix = np.zeros((labels.shape[0], labels.shape[0]))
    for idx, line in enumerate(fp.read().splitlines()):
        line_data = np.array(line.replace(' ', '')[1:-1].split(','), dtype=float)
        distance_matrix[idx] = np.pad(line_data, (0, labels.shape[0]- line_data.shape[0]), 'constant')
        
print(labels)
print(distance_matrix)

['Man' 'Monkey' 'Dog' 'Horse' 'Donkey' 'Pig' 'Rabbit']
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.]
 [13. 12.  0.  0.  0.  0.  0.]
 [17. 16. 10.  0.  0.  0.  0.]
 [16. 15.  8.  1.  0.  0.  0.]
 [13. 12.  4.  5.  4.  0.  0.]
 [12. 11.  6. 11. 10.  6.  0.]]


In [3]:
# run the algorithm until the distance matrix has only one element
while(distance_matrix.shape[0] > 1):
    # get the copy of the distance matrix to protect original matrix from following two operations
    temp_distance_matrix = distance_matrix.copy()
    # upper triangle of the distance matrix is filled with zero. So if we run a argmin function we wont
    # be able to find min element. So, running argmin, I replace all 0's with infinity
    temp_distance_matrix[temp_distance_matrix == 0] = np.inf
    # find positions of minimum element
    min_position = np.unravel_index(np.argmin(temp_distance_matrix, axis=None), temp_distance_matrix.shape)
    
    # join
    # create new matrix which has the size equal to the one minus original matrix size
    new_distance_matrix = np.zeros((distance_matrix.shape[0]-1, distance_matrix.shape[0]-1), dtype=float)
    # get sum of all distances to the first element. min_position[1] has the fist element
    sum_of_first = np.sum([distance_matrix[min_position[1],:], distance_matrix[:,min_position[1]]], axis=0)
    # get sum of all distances to the second element. min_position[0] has the second element
    sum_of_second = np.sum([distance_matrix[:,min_position[0]], distance_matrix[min_position[0],:]], axis=0)
    # get the mean of two distance arrays
    mean_of_union = np.mean([sum_of_first, sum_of_second], axis=0)
    # turn the distance of two selected species to 0
    mean_of_union[min_position[1]] = 0.0
    # delete the distance information related with second element
    mean_of_union = np.delete(mean_of_union, min_position[0], 0)
    
    # log the elements that will be joined
    print('(' + labels[min_position[1]] + ', ' + labels[min_position[0]] + ')')
    
    # update the labels, delete one of the labels completely and update the other one so that 
    # new labels array contains the joined class
    labels = np.concatenate((labels[0:min_position[1]], ['(' + labels[min_position[1]] + ', ' + labels[min_position[0]] + ')'], 
                             labels[min_position[1]+1:min_position[0]], labels[min_position[0]+1:]), axis=0)
    
    # delete the related column and row from the distance matrix
    distance_matrix = np.delete(distance_matrix, min_position[0], 0)
    distance_matrix = np.delete(distance_matrix, min_position[0], 1)
    
    # assign the row and column of the new label with the calculated mean array
    distance_matrix[:,min_position[1]] = mean_of_union
    distance_matrix[min_position[1],:] = mean_of_union
    # fill upper side of the matrix with 0's
    distance_matrix[np.triu_indices(distance_matrix.shape[1])] = 0

(Man, Monkey)
(Horse, Donkey)
(Dog, Pig)
((Dog, Pig), Rabbit)
(((Dog, Pig), Rabbit), (Horse, Donkey))
((Man, Monkey), (((Dog, Pig), Rabbit), (Horse, Donkey)))
