# Kruskal alg

In [20]:
def find(parent, i):
    if parent[i] == i:
        return i
    return find(parent, parent[i])


def union(parent, rank, x, y):
    xroot = find(parent, x)
    yroot = find(parent, y)
    if rank[xroot] < rank[yroot]:
        parent[xroot] = yroot
    elif rank[xroot] > rank[yroot]:
        parent[yroot] = xroot
    else:
        parent[yroot] = xroot
        rank[xroot] += 1


def kruskal(nodes, edges, cols):

    result = []  # This will store the resultant MST
    i = 0  # An index variable, used for sorted edges
    e = 0  # An index variable, used for result[]

    # Sorting if all edges
    edges = sorted(edges, key=lambda x: x[2])

    parent = list(range(nodes))
    rank = [0] * nodes

    # Number of edges to be taken is equal to V-1
    while e < nodes - 1:

        # Construction of STree
        u, v, w = edges[i]
        i = i + 1

        if (w == -1):
            continue

        x = find(parent, u)
        y = find(parent, v)

        # If including this edge does't cause cycle,
        # include it in result and increment the index
        # of result for next edge
        if x != y:
            e = e + 1
            result.append([cols[u], cols[v], w])
            print("{} -- {} == {}".format(cols[u], cols[v], w))
            union(parent, rank, x, y)

    return result


In [21]:
import pandas as pd

In [22]:
def read_mat(mat):
    # reads the graph
    # and returns the list of edges
    edges = list()
    for (i, (index, row)) in enumerate(mat.iterrows()):
        for (j, vert) in enumerate(mat.columns[i+1:]):
            if (row[vert] == -1):
                continue
            edges.append((i, j + i + 1, row[vert]))
    return edges

In [23]:
mat = pd.read_csv('../data/ru_dist_mat_v1.csv')

In [24]:
# mat = pd.read_csv("../data/ru_dist_mat_v1.csv")
mat = pd.read_csv("../data/ru_dist_mat_v1.csv", index_col="name")

nodes = len(mat.columns)
edges = read_mat(mat)
cols = mat.columns

In [25]:
s_edges = kruskal(nodes, edges, cols)

Akademgorodok -- Arsen’yev == 0.0
Akademgorodok -- Bugul’ma == 0.0
Akademgorodok -- Dal'negorsk == 0.0
Akademgorodok -- Dal’nerechensk == 0.0
Akademgorodok -- Dolgoprudnyy == 0.0
Akademgorodok -- Gul’kevichi == 0.0
Akademgorodok -- Izobil’nyy == 0.0
Akademgorodok -- Kotel’nich == 0.0
Akademgorodok -- Medvezh’yegorsk == 0.0
Akademgorodok -- Novoul’yanovsk == 0.0
Akademgorodok -- Orekhovo-Borisovo == 0.0
Akademgorodok -- Petrovsk-Zabaykal’skiy == 0.0
Akademgorodok -- Severobaykal’sk == 0.0
Akademgorodok -- Severoural’sk == 0.0
Akademgorodok -- Spassk-Dal’niy == 0.0
Akademgorodok -- Tal’menka == 0.0
Akademgorodok -- Tsotsin-Yurt == 0.0
Akademgorodok -- Vasil’yevo == 0.0
Akademgorodok -- Vyaz’ma == 0.0
Akademgorodok -- Yuzhnoural’sk == 0.0
Aleksandrovskoye -- Zelenokumsk == 0.0
Bogorodsk -- Noginsk == 0.0
Chapayevsk -- Nagornyy == 0.0
Kuybyshev -- Samara == 0.0
Malgobek -- Staryy Malgobek == 0.0
Peterhof -- Petrodvorets == 0.0
Rostov -- Rostov-na-Donu == 0.0
Ryazanskiy -- Ryazan’ == 0.0
Ry

Aykhal -- Mirny == 0.05096844705267882
Aldan -- Yakutsk == 0.05955237095019561
Chita -- Ulan-Ude == 0.06361564901067976
Bodaybo -- Chita == 0.06610304280914624
Gusev -- Nevel’ == 0.0789008949729713
Dudinka -- Novyy Urengoy == 0.07906736444621668
Akademgorodok -- Tynda == 0.08132910337607759
Magadan -- Okha == 0.10177042104216223
Magadan -- Yelizovo == 0.1030950454628792


In [26]:
import csv
def edges_to_csv(file_name, edges):
    with open('../data/'+file_name+'.csv', 'w') as csvfile:
        w = csv.writer(csvfile, delimiter=',')
        w.writerow(['node1', 'node2', 'dist'])
        for i in range(len(edges)):
            w.writerow(edges[i])

In [27]:
edges_to_csv('mst_kruskal', s_edges)