## Hierarchical clustering program using different distance measures

#### Import all neccessary files and libraries

In [None]:
# Reloads the local files if they change
%load_ext autoreload
%autoreload 2

# import local files
import Jaccard
import Simrank
import Log_processing
import Clustering
import Comparison
import Label_Similarity
import Role_Comparison

import matplotlib.pyplot as plt

#### Load the event logs

In [None]:
sepsis = Log_processing.get_log("../logs/sepsis_event_log.xes")
coselog = Log_processing.get_log("../logs/coselog.xes")

In [None]:
bpic = Log_processing.get_log("../logs/BPI_Challenge_2013_incidents.xes")
road_traffic = Log_processing.get_log("../logs/Road_Traffic_Fine_Management_Process.xes")

#### Analyze the Sepsis event log

In [None]:
Comparison.show_jaccard_dendrograms_for_event_log(sepsis, "Sepsis Clustering using Jaccard", "sepsis_jaccard.jpg")

In [None]:
Comparison.show_simrank_dendrograms_for_event_log(sepsis, "Sepsis Clustering using Simrank", "sepsis_simrank.jpg")

In [None]:
Comparison.show_n_gram_dendrograms_for_event_log(sepsis, "Sepsis Clustering using N_grams", "sepsis_n_gram.jpg")

In [None]:
#Comparison.show_role_comparison_dendrograms_for_event_log(sepsis, "case:concept:name" "#InsertRoleHere", "Sepsis Clustering using role comparison", "sepsis_role_comp.jpg")

In [None]:
Comparison.show_label_similarity_dendrograms_for_event_log(sepsis, "Sepsis Clustering using label similarity", "sepsis_label_sim.jpg")

#### Analyze the Coselog event log

In [None]:
Comparison.show_jaccard_dendrograms_for_event_log(coselog, "Coselog Clustering using Jaccard", "coselog_jaccard.jpg")

In [None]:
Comparison.show_simrank_dendrograms_for_event_log(coselog, "Coselog Clustering using Simrank", "coselog_simrank.jpg")

In [None]:
Comparison.show_n_gram_dendrograms_for_event_log(coselog, "Coselog Clustering using N_grams", "coselog_n_gram.jpg")

In [None]:
Comparison.show_role_comparison_dendrograms_for_event_log(coselog, "concept:name", "case:responsible", "Coselog Clustering using role comparison", "coselog_role_comp.jpg")

In [None]:
Comparison.show_label_similarity_dendrograms_for_event_log(coselog, "Coselog Clustering using label similarity", "coselog_label_sim.jpg")

#### Analyze the BPI Challenge event log

In [None]:
Comparison.show_jaccard_dendrograms_for_event_log(bpic, "BPIC Clustering", "bpic.jpg")

In [None]:
Comparison.show_simrank_dendrograms_for_event_log(bpic, "BPIC Clustering using Simrank", "bpic_simrank.jpg")

In [None]:
Comparison.show_n_gram_dendrograms_for_event_log(bpic, "BPIC Clustering using N_grams", "bpic_n_gram.jpg")

In [None]:
#Comparison.show_role_comparison_dendrograms_for_event_log(bpic, "concept:name", "#InsertRole", "BPIC Clustering using role comparison", "bpic_role_comp.jpg")

In [None]:
Comparison.show_label_similarity_dendrograms_for_event_log(bpic, "BPIC Clustering using label similarity", "bpic_label_sim.jpg")

#### Analyze the Road traffic fine event log

In [None]:
Comparison.show_jaccard_dendrograms_for_event_log(road_traffic, "Road traffic fines Clustering", "Road_traffic_fines.jpg")

In [None]:
Comparison.show_simrank_dendrograms_for_event_log(road_traffic, "Road traffic fines Clustering using Simrank", "road_traffic_simrank.jpg")

In [None]:
Comparison.show_n_gram_dendrograms_for_event_log(road_traffic, "Road traffic fines Clustering using N_grams", "road_traffic_n_gram.jpg")

In [None]:
#Comparison.show_role_comparison_dendrograms_for_event_log(road_traffic, "concept:name", "#InsertRole", "Road traffic fines Clustering using role comparison", "road_traffic_role_comp.jpg")

In [None]:
Comparison.show_label_similarity_dendrograms_for_event_log(road_traffic, "Road traffic fines Clustering using label similarity", "road_traffic_label_sim.jpg")

#### Testing area

In [None]:
# Testing
'''
log = Log_processing.get_log("../logs/sepsis_event_log.xes")
num_levels = 5

simple_jaccard = Jaccard.Simple_Jaccard(log)
weighted_jaccard = Jaccard.Weighted_Jaccard(log)
simple_simrank = Simrank.Simple_Simrank(log)
weighted_simrank = Simrank.Weighted_Simrank(log)

simple_jaccard.perform_clustering(no_plot=True)
weighted_jaccard.perform_clustering(no_plot=True)
simple_simrank.perform_clustering(no_plot=True)
weighted_simrank.perform_clustering(no_plot=True)

Clustering.generate_hierarchy_file(simple_jaccard.get_linkage(), simple_jaccard.get_activities(), num_levels, "output_sepsis_simple_jaccard.csv")
Clustering.generate_hierarchy_file(weighted_jaccard.get_linkage(), weighted_jaccard.get_activities(), num_levels, "output_sepsis_weighted_jaccard.csv")
Clustering.generate_hierarchy_file(simple_simrank.get_linkage(), simple_simrank.get_activities(), num_levels, "output_sepsis_simple_simrank.csv")
Clustering.generate_hierarchy_file(weighted_simrank.get_linkage(), weighted_simrank.get_activities(), num_levels, "output_sepsis_weighted_simrank.csv")


log = Log_processing.get_log("../logs/BPI_Challenge_2013_incidents.xes")
num_levels = 5

simple_jaccard = Jaccard.Simple_Jaccard(log)
weighted_jaccard = Jaccard.Weighted_Jaccard(log)
simple_simrank = Simrank.Simple_Simrank(log)
weighted_simrank = Simrank.Weighted_Simrank(log)

simple_jaccard.perform_clustering(no_plot=True)
weighted_jaccard.perform_clustering(no_plot=True)
simple_simrank.perform_clustering(no_plot=True)
weighted_simrank.perform_clustering(no_plot=True)

Clustering.generate_hierarchy_file(simple_jaccard.get_linkage(), simple_jaccard.get_activities(), num_levels, "output_bpi_simple_jaccard.csv")
Clustering.generate_hierarchy_file(weighted_jaccard.get_linkage(), weighted_jaccard.get_activities(), num_levels, "output_bpi_weighted_jaccard.csv")
Clustering.generate_hierarchy_file(simple_simrank.get_linkage(), simple_simrank.get_activities(), num_levels, "output_bpi_simple_simrank.csv")
Clustering.generate_hierarchy_file(weighted_simrank.get_linkage(), weighted_simrank.get_activities(), num_levels, "output_bpi_weighted_simrank.csv")

simple_jaccard = Jaccard.Simple_Jaccard(coselog)
simple_jaccard.perform_clustering(no_plot=True)
Clustering.generate_hierarchy_file_with_dummies(simple_jaccard.get_activities(), simple_jaccard.get_distance_matrix(), simple_jaccard.get_linkage(), "out_sepsis_simple_jaccard_dummy.csv")

coselog = Log_processing.get_log("../logs/coselog.xes")
compare_jacc = Jaccard.Simple_Jaccard(coselog)
compare_simrank = Simrank.Simple_Simrank(coselog)

rand_scores = Comparison.compare_dendrogram_using_rand_score(compare_jacc, compare_simrank)
mutual_info_scores = Comparison.compare_dendrogram_using_mutual_info_score(compare_jacc, compare_simrank)
'''