# Hierarchical clustering program using different distance measures

Use pm4py for importing different event logs found in logs folder

In [None]:
# Reloads the local files if they change
%load_ext autoreload
%autoreload 2

# import local files
import Jaccard
import Simrank
import Log_processing
import Clustering

import matplotlib.pyplot as plt

In [None]:
# TESTING
def weighted_simrank(G, node1, node2, C=0.8, max_iterations=100, tolerance=1e-6):
    if node1 == node2:
        return 1.0

    prev_sim = 0
    sim = 1
    iterations = 0

    while abs(sim - prev_sim) > tolerance and iterations < max_iterations:
        prev_sim = sim
        neighbors1 = set(G.predecessors(node1))
        neighbors2 = set(G.predecessors(node2))
        sim = C / (len(neighbors1) * len(neighbors2)) * sum(G[node1][x]['weight'] * G[node2][y]['weight'] * weighted_simrank(G, x, y, C, max_iterations, tolerance) for x in neighbors1 for y in neighbors2)
        iterations += 1

    return sim

def full_weighted_simrank(G):
    similarities = {}
    for node1 in G.nodes():
        node_similarity = {}
        for node2 in G.nodes():
            node_similarity[node2] = weighted_simrank(G, node1, node2)
        similarities[node1] = {}

Analyze the Sepsis event log

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(20, 10))
fig.suptitle("Sepsis clustering")

ax[0, 0].set_title("Simple Jaccard")
ax[0, 1].set_title("Weighted Jaccard")
ax[1, 0].set_title("Simple Simrank")
ax[1, 1].set_title("Weighted Simrank")

sepsis = Log_processing.get_log("../logs/sepsis_event_log.xes")

simple_jaccard_sepsis = Jaccard.Simple_Jaccard(sepsis)
weighted_jaccard_sepsis = Jaccard.Weighted_Jaccard(sepsis)
simple_simrank_sepsis = Simrank.Simple_Simrank(sepsis)
weighted_simrank_sepsis = Simrank.Weighted_Simrank(sepsis)

simple_jaccard_sepsis.perform_clustering(ax=ax[0, 0])
weighted_jaccard_sepsis.perform_clustering(ax=ax[0, 1])
simple_simrank_sepsis.perform_clustering(ax=ax[1, 0])
weighted_simrank_sepsis.perform_clustering(ax=ax[1, 1])

fig.tight_layout()
plt.show()
fig.savefig("../out/sepsis.jpg")

Analyze the Coselog event log

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(20, 10))
fig.suptitle("Coselog clustering")

ax[0, 0].set_title("Simple Jaccard")
ax[0, 1].set_title("Weighted Jaccard")
ax[1, 0].set_title("Simple Simrank")
ax[1, 1].set_title("Weighted Simrank")

coselog = Log_processing.get_log("../logs/coselog.xes")

simple_jaccard_coselog = Jaccard.Simple_Jaccard(coselog)
weighted_jaccard_coselog = Jaccard.Weighted_Jaccard(coselog)
simple_simrank_coselog = Simrank.Simple_Simrank(coselog)
weighted_simrank_coselog = Simrank.Weighted_Simrank(coselog)

simple_jaccard_coselog.perform_clustering(ax=ax[0, 0])
weighted_jaccard_coselog.perform_clustering(ax=ax[0, 1])
simple_simrank_coselog.perform_clustering(ax=ax[1, 0])
weighted_simrank_coselog.perform_clustering(ax=ax[1, 1])

fig.tight_layout()
plt.show()
fig.savefig("../out/coselog.jpg")

Analyze the BPI Challenge event log

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(20, 10))
fig.suptitle("BPIC clustering")

ax[0, 0].set_title("Simple Jaccard")
ax[0, 1].set_title("Weighted Jaccard")
ax[1, 0].set_title("Simple Simrank")
ax[1, 1].set_title("Weighted Simrank")

bpic = Log_processing.get_log("../logs/BPI_Challenge_2013_incidents.xes")

simple_jaccard_bpic = Jaccard.Simple_Jaccard(bpic)
weighted_jaccard_bpic = Jaccard.Weighted_Jaccard(bpic)
simple_simrank_bpic = Simrank.Simple_Simrank(bpic)
weighted_simrank_bpic = Simrank.Weighted_Simrank(bpic)

simple_jaccard_bpic.perform_clustering(ax=ax[0, 0])
weighted_jaccard_bpic.perform_clustering(ax=ax[0, 1])
simple_simrank_bpic.perform_clustering(ax=ax[1, 0])
weighted_simrank_bpic.perform_clustering(ax=ax[1, 1])

fig.tight_layout()
plt.show()
fig.savefig("../out/bpic.jpg")

Analyze the Road traffic fine event log

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(20, 10))
fig.suptitle("Road traffic fine clustering")

ax[0, 0].set_title("Simple Jaccard")
ax[0, 1].set_title("Weighted Jaccard")
ax[1, 0].set_title("Simple Simrank")
ax[1, 1].set_title("Weighted Simrank")

road_traffic = Log_processing.get_log("../logs/Road_Traffic_Fine_Management_Process.xes")

simple_jaccard_road_traffic = Jaccard.Simple_Jaccard(road_traffic)
weighted_jaccard_road_traffic = Jaccard.Weighted_Jaccard(road_traffic)
simple_simrank_road_traffic = Simrank.Simple_Simrank(road_traffic)
weighted_simrank_road_traffic = Simrank.Weighted_Simrank(road_traffic)

simple_jaccard_road_traffic.perform_clustering(ax=ax[0, 0])
weighted_jaccard_road_traffic.perform_clustering(ax=ax[0, 1])
simple_simrank_road_traffic.perform_clustering(ax=ax[1, 0])
weighted_simrank_road_traffic.perform_clustering(ax=ax[1, 1])

fig.tight_layout()
plt.show()
fig.savefig("../out/road_traffic_fines.jpg")