In [9]:
import itertools
import numpy as np

K = 4

In [10]:
"""
MakeSet(x) initializes disjoint set for object x
Find(x) returns representative object of the set containing x
Union(x,y) makes two sets containing x and y respectively into one set

Some Applications:
- Kruskal's algorithm for finding minimal spanning trees
- Finding connected components in graphs
- Finding connected components in images (binary)

Source: https://code.activestate.com/recipes/577225-union-find/
"""


class Node:
    def __init__(self, label):
        self.label = label

    def __str__(self):
        return self.label


def MakeSet(x):
    x.parent = x
    x.rank = 0


def Union(x, y):
    xRoot = Find(x)
    yRoot = Find(y)
    if xRoot.rank > yRoot.rank:
        yRoot.parent = xRoot
    elif xRoot.rank < yRoot.rank:
        xRoot.parent = yRoot
    elif xRoot != yRoot:  # Unless x and y are already in same set, merge them
        yRoot.parent = xRoot
        xRoot.rank = xRoot.rank + 1


def Find(x):
    if x.parent == x:
        return x
    else:
        x.parent = Find(x.parent)
        return x.parent

## Main

In [11]:
path = "./data/clustering1.txt"
# path = "./data/test1.txt"
edge_list = []
num_of_node = None
with open(path, "r") as fh:
    for row in fh:
        row = row.strip()
        row = row.split()
        if len(row) == 1:
            num_of_node = int(row[0])
            continue

        edge_list.append(tuple([int(x) for x in row]))

node_list = [str(x) for x in range(1, num_of_node + 1)]
l = [Node(num) for num in node_list]  # list of distinct nodes

# starting with every object in its own set
for i in l:
    MakeSet(i)
Num_of_set = len(np.unique([str(Find(x)) for x in l]))
print("Num_of_set:", Num_of_set)

sorted_edge_list = sorted(edge_list, key=lambda x: x[2], reverse=True)

Num_of_set: 500


In [12]:
while Num_of_set > K:
    node1, node2, distance = sorted_edge_list.pop()
    if Find(l[node1 - 1]) != Find(l[node2 - 1]):
        Union(l[node1 - 1], l[node2 - 1])
        maximum_spacing = distance
        Num_of_set -= 1
    assert Num_of_set == len(np.unique([str(Find(x)) for x in l]))

In [13]:
maximum_spacing = np.inf
for node1, node2, distance in edge_list:
    if Find(l[node1 - 1]) != Find(l[node2 - 1]):
        if maximum_spacing > distance:
            maximum_spacing = distance
print("maximum_spacing:", maximum_spacing)

maximum_spacing: 106
