1. Compute Core distances

2. Compute MRG

3. Build MST

4. Dendodgram

5. GLOSH

6. Cluster Stability


In [None]:
import csv
import math

In [3]:
def get_data(wiht_label: bool = False) -> list[list]:
    label = []
    points = []

    file_path = "data/min_example.csv"

    with open(file_path, "r") as file:
        reader = csv.DictReader(file)
        for row in reader:
            label.append(row["name"])
            points.append([int(row["x"]), int(row["y"])])

    result = points if wiht_label == False else [label, points]
    return result

def get_data_with_label() -> dict:
    result = {} # {label: [x, y]}

    file_path = "data/min_example.csv"

    with open(file_path, "r") as file:
        reader = csv.DictReader(file)
        for row in reader:
            result[row["name"]] = [int(row["x"]), int(row["y"])]

    return result



1. Calculating core distances

In [None]:
def calc_distance(x1: list, x2: list) -> float:
    """Calculates euclidean distance between two points"""
    if len(x1) != len(x2):
        raise ValueError("x1 and x2 must have equal sizes")

    sq_sum = 0
    for i in range(len(x1)):
        sq_sum += (x1[i] - x2[i]) ** 2
    return math.sqrt(sq_sum)

def calc_core_distances(data: list[list], label: list, minPts: int) -> dict:
    result = {}
    point_distances = []

    for i, x1 in enumerate(data):
        for x2 in data:
            point_distances.append(calc_distance(x1, x2))
        point_distances.sort()
        core_distance = point_distances[minPts - 1]
        result[label[i]] = core_distance
        point_distances = []

    return result

{'a': 1.4142135623730951,
 'b': 1.0,
 'c': 1.4142135623730951,
 'd': 1.4142135623730951,
 'e': 1.0,
 'f': 1.4142135623730951,
 'g': 1.4142135623730951,
 'h': 1.4142135623730951,
 'i': 1.4142135623730951,
 'j': 1.4142135623730951,
 'k': 1.4142135623730951,
 'l': 1.4142135623730951,
 'm': 1.4142135623730951,
 'n': 1.4142135623730951,
 'o': 1.4142135623730951,
 'p': 1.4142135623730951,
 'q': 1.4142135623730951,
 'r': 1.4142135623730951,
 's': 3.0,
 't': 4.0}

2. Computing MRT


In [None]:
def calc_mrg(data: list, labels: list, minPts: int) -> dict:
    result = {} # {label: [[mutual reachability distances, label]]}
    core_distances = calc_core_distances(data, labels, minPts)

    for i, x1 in enumerate(data):
        x1_label = labels[i] 
        x1_cd = core_distances[x1_label] 
        for j, x2 in enumerate(data):
            x2_label = labels[j]
            x2_cd = core_distances[x2_label]
            actual_distance = calc_distance(x1, x2)

            mutual_reac_dist = max(x1_cd, x2_cd, actual_distance) # Calculating mreach
            result[x1_label] = result.get(x1_label, [])
            result[x1_label].append([mutual_reac_dist, x2_label])

    return result


def save_mrg(mrg: dict):
    file_path = "data/mrg.csv"
    headers =  sorted(mrg.keys())
    rows = [[header, *[score[0] for score in mrg[header]]] for header in headers]

    with open(file_path, "w") as file:
        writer = csv.writer(file)
        headers.insert(0, "0")
        writer.writerow(headers)
        writer.writerows(rows)
    return file_path


'data/mrg.csv'

3. Building MST

In [None]:
def is_connected(node1, node2, connected_nodes):
    queue = [node1]
    traversed = set()

    while queue:
        neighbor = queue.pop()
        traversed.add(neighbor)

        if neighbor == node2:
            return True
        for child_neighbor in connected_nodes.get(neighbor, []):
            if not child_neighbor in traversed:
                queue.append(child_neighbor)

    return False

def build_mst(mrg: dict):
    edges = [] # [[node1, node2, weight], ...]
    mst = [] # [[node1, node2, weight], ...]
    connected_nodes = {}

    for node1 in mrg.keys():
        for (weight, node2) in mrg[node1]:
            edges.append([node1, node2, weight])

    edges = sorted(edges, key = lambda item: item[-1])

    for edge in edges:
        node1, node2, weight = edge 
        if node1 == node2:
            continue

        if not is_connected(node1, node2, connected_nodes):
            mst.append(edge)
            connected_nodes[node1] = connected_nodes.get(node1, []) + [node2]
            connected_nodes[node2] = connected_nodes.get(node2, []) + [node1]

    return mst

19
[['b', 'e', 1.0], ['a', 'b', 1.4142135623730951], ['a', 'd', 1.4142135623730951], ['b', 'c', 1.4142135623730951], ['b', 'f', 1.4142135623730951], ['g', 'h', 1.4142135623730951], ['g', 'i', 1.4142135623730951], ['g', 'j', 1.4142135623730951], ['k', 'l', 1.4142135623730951], ['k', 'm', 1.4142135623730951], ['k', 'n', 1.4142135623730951], ['o', 'p', 1.4142135623730951], ['o', 'q', 1.4142135623730951], ['o', 'r', 1.4142135623730951], ['d', 'h', 2.0], ['l', 'q', 2.23606797749979], ['l', 's', 3.0], ['f', 's', 3.605551275463989], ['f', 't', 4.0]]


4. Dendogram