In [1]:
from queue import PriorityQueue

In [2]:
# Define graph edge
class Edge:
    def __init__(self, node1, node2, weight=1.0):
        self.node1 = node1
        self.node2 = node2
        self.weight = weight
        
    def __lt__(self, other):
        selfPriority = self.weight
        otherPriority = other.weight
        return selfPriority < otherPriority


## Union Find data structure example

The key part is an array position[]

Example: A graph of size 5. We add edge (3, 0), (2, 3), (0, 2), (1, 4) and (0, 4)

- Initially [0, 1, 2, 3, 4]
    - node 0 belongs to cluster 0, node 1 belongs to cluster 1 ...
- Add (3, 0): [0, 1, 2, 0, 4]
    - node 3 now belongs to cluster 0
- Add (2, 3): [0, 1, 0, 0, 4]
    - original node 2 -> 2, node 3 -> 0
    - now node2 -> 0, node3 -> 0
- Add (0, 2)
    - node 0 and node 2 are already in the same cluster
- Add (1, 4): [0, 1, 0, 0, 1]
- Add (0, 4): [0, 0, 0, 0, 0]
    - KEY PART: node 1 also changes
    - HOW TO INPLEMENT?

In [3]:
class UnionFind:
    def __init__(self, num_nodes):
        # Initially position[i] = i
        self.position = [i for i in range(num_nodes)]
        
    # Return the cluster index
    def find(self, node):
        if self.position[node] == node:
            return node
        else:
            self.position[node] = self.find(self.position[node])
            return self.position[node]
    
    def union(self, node1, node2):
        a = self.find(node1)
        b = self.find(node2)
        # no need to union
        if a == b:
            return
        # union is needed
        else:
            if a < b:
                self.position[b] = a
            else:
                self.position[a] = b

In [17]:
def kruskal(edge_list, num_nodes):
    MST = []   # a list of Edge to return
    uf = UnionFind(num_nodes)
    
    '''
    fill in the code here
    
    1. Put edges in edge_list into PriorityQueue
    2. Extract each edge from PriorityQueue
        1) check if the two nodes belong to same cluster
        2) if no, do something
    '''
    pq = PriorityQueue()
    for e in edge_list:
        pq.put(e)
    
    while len(MST)!= num_nodes - 1 and not pq.empty():
        temp = pq.get()
        node1, node2, weight = temp.node1, temp.node2, temp.weight
        node1_parent = uf.find(node1)
        node2_parent = uf.find(node2)
        if node1_parent!=node2_parent:
            MST.append(Edge(node1,node2, weight))
            uf.union(node1_parent, node2_parent)
    
    return MST

## Example of how class PriorityQueue works
- serve as a min heap
- can work with our class Edge

In [18]:
# example of how PriorityQueue works
edges = [Edge(0,1, 0.1), Edge(0,2, 0.3), Edge(1,2, 0.2)]

pq = PriorityQueue()
for e in edges:
    pq.put(e)
    
while not pq.empty():
    print(pq.qsize())
    e = pq.get()
    print(e.node1, e.node2, e.weight)

3
0 1 0.1
2
1 2 0.2
1
0 2 0.3


## Generate a random graph 
- only generate a edge list
- each node has 5 connections
- each edge has random weight

In [19]:
import random
# Randomly generate a graph of size 20
random.seed(100)

num_nodes = 20
edge_list = []

# For each node, add five random edges (may contain parallel edges)
node_list = []
for node1 in range(num_nodes):
    count = 0
    while count < 5:
        node2 = random.randint(0, num_nodes-1)
        if node2 not in node_list and node2 != node1:
            count += 1
            weight = random.random()   # return [0.0, 1.0]
            edge_list.append(Edge(node1, node2, weight))

for i in range(15):   # only show first 15 edges
    e = edge_list[i]
    print("{:2d}, {:2d}, {:.5f}".format(e.node1, e.node2, e.weight))

 0,  4, 0.45953
 0,  5, 0.70551
 0, 11, 0.43351
 0,  3, 0.53290
 0,  2, 0.73671
 1,  8, 0.04789
 1,  6, 0.33535
 1,  9, 0.83890
 1,  6, 0.96086
 1,  4, 0.18859
 2, 11, 0.37006
 2, 13, 0.96332
 2,  6, 0.40307
 2, 14, 0.55540
 2, 12, 0.16042


## Test code

Run the following codes to test

In [20]:
MST = kruskal(edge_list, num_nodes)

# Print size of the MST which should be num_nodes-1=19
print("The returned MST has {} edges".format(len(MST)))

total_weight = 0.0
for e in MST:
    total_weight += e.weight
    print("{:2d}, {:2d}, {:.5f}".format(e.node1, e.node2, e.weight))

print("The returned MST has total weight: {:.3f}".format(total_weight))

The returned MST has 19 edges
14,  8, 0.00284
 3,  5, 0.00565
14, 12, 0.00974
16,  2, 0.02284
15, 14, 0.03370
11, 12, 0.03686
14,  9, 0.03775
19,  7, 0.04714
 1,  8, 0.04789
10,  5, 0.04938
19,  2, 0.06946
18,  8, 0.07317
18, 16, 0.11105
 4, 12, 0.12662
16, 17, 0.14365
12, 13, 0.14533
 3, 12, 0.14760
 6, 13, 0.25253
 0, 11, 0.43351
The returned MST has total weight: 1.797
