In [13]:
import pandas as pd
import pm4py
import networkx as nx

def get_log(path: str):
    return pm4py.read_xes(path)

log = get_log("../logs/sepsis_event_log.xes")

parsing log, completed traces ::   0%|          | 0/1050 [00:00<?, ?it/s]

  df[col] = pd.to_datetime(df[col], utc=True)
  df[col] = pd.to_datetime(df[col], utc=True)
  df[col] = pd.to_datetime(df[col], utc=True)
  df[col] = pd.to_datetime(df[col], utc=True)


In [56]:
def print_log_info(log):
    start_activities = pm4py.stats.get_start_activities(log)
    end_activities = pm4py.stats.get_end_activities(log)
    event_attributes = pm4py.stats.get_event_attributes(log)
    trace_attributes = pm4py.stats.get_trace_attributes(log)
#    event_attribute_values = [
#        {attribute: pm4py.stats.get_event_attribute_values(log, attribute)}
#        for attribute in event_attributes]
#    trace_attribute_values = [
#        {attribute: pm4py.stats.get_trace_attribute_values(log, attribute)}
#        for attribute in trace_attributes
#    ]
#    variants = pm4py.stats.get_variants(log)

    info = f"Event log information:\n"
    info += f"Start activities: {start_activities}\n"
    info += f"End activities: {end_activities}\n"
    info += f"Event attributes: {event_attributes}\n"
    info += f"Trace attributes: {trace_attributes}\n"
#    info += f"Event attribute values: {event_attribute_values}\n"
#    info += f"Trace attribute values: {trace_attribute_values}\n"
#    info += f"Variants: {variants}"
    print(info)

def save_dfg_of_log(log):
    dfg, start_activities, end_activities = pm4py.discovery.discover_dfg(log, 
                                                                         case_id_key= "case:concept:name",
                                                                         activity_key= "concept:name")
    pm4py.vis.save_vis_dfg(dfg, start_activities, end_activities, file_path="../out/dfg/sepsis.svg")

def get_jaccard_distance_of_dfg(log):
    # convert event log to an undirected NetworkX graph
    dfg, start, end = pm4py.discover_dfg(log)
    edge_list = [(edge[0], edge[1], weight) for edge, weight in dfg.items()]
    dg = nx.DiGraph()
    dg.add_weighted_edges_from(edge_list)

    #viz = nx.nx_agraph.to_agraph(dg)
    #with open("../out/dfg/sepsis_dfg.png", "w"):
    #    viz.draw("../out/dfg/sepsis_dfg.png", prog="dot")

    # compute the jaccard distances of the nodes
    distances = nx.jaccard_coefficient(dg.to_undirected())

    return distances

def convert_pairwise_distances_to_distance_matrix(log, pairwise_distances):
    activities = list(pm4py.get_event_attribute_values(log, "concept:name").keys())
    activities.sort()
    print(activities)
    
    distance_matrix = pd.DataFrame(columns=activities, index=activities)
    
    for u, v, distance in pairwise_distances:
        distance_matrix[u][v] = 1 - round(distance, 3)
        distance_matrix[v][u] = 1 - round(distance, 3)
    distance_matrix = distance_matrix.fillna(1)
    display(distance_matrix)

    
    return distance_matrix.to_numpy()

In [57]:
pairwise_distances = get_jaccard_distance_of_dfg(log)
table = convert_pairwise_distances_to_distance_matrix(log, pairwise_distances)
display(table)

['Admission IC', 'Admission NC', 'CRP', 'ER Registration', 'ER Sepsis Triage', 'ER Triage', 'IV Antibiotics', 'IV Liquid', 'LacticAcid', 'Leucocytes', 'Release A', 'Release B', 'Release C', 'Release D', 'Release E', 'Return ER']


Unnamed: 0,Admission IC,Admission NC,CRP,ER Registration,ER Sepsis Triage,ER Triage,IV Antibiotics,IV Liquid,LacticAcid,Leucocytes,Release A,Release B,Release C,Release D,Release E,Return ER
Admission IC,1.0,1.0,1,1.0,1.0,0.111,1.0,1.0,1.0,1,0.4,0.333,0.7,0.7,0.7,0.846
Admission NC,1.0,1.0,1,0.385,1.0,1.0,1.0,1.0,1.0,1,1.0,1.0,1.0,1.0,0.786,0.643
CRP,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0
ER Registration,1.0,0.385,1,1.0,1.0,1.0,1.0,1.0,1.0,1,0.5,0.444,0.8,0.8,0.667,0.833
ER Sepsis Triage,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1,0.4,0.333,0.7,0.7,0.7,0.846
ER Triage,0.111,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1,0.333,0.25,0.667,0.667,0.667,0.833
IV Antibiotics,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1,1.0,1.0,0.75,0.75,0.75,0.786
IV Liquid,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1,1.0,1.0,0.75,0.75,0.75,0.786
LacticAcid,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1,1.0,1.0,0.786,0.786,1.0,0.733
Leucocytes,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0


array([[1.   , 1.   , 1.   , 1.   , 1.   , 0.111, 1.   , 1.   , 1.   ,
        1.   , 0.4  , 0.333, 0.7  , 0.7  , 0.7  , 0.846],
       [1.   , 1.   , 1.   , 0.385, 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   , 1.   , 1.   , 1.   , 1.   , 0.786, 0.643],
       [1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ],
       [1.   , 0.385, 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   , 0.5  , 0.444, 0.8  , 0.8  , 0.667, 0.833],
       [1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   , 0.4  , 0.333, 0.7  , 0.7  , 0.7  , 0.846],
       [0.111, 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   , 0.333, 0.25 , 0.667, 0.667, 0.667, 0.833],
       [1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   , 1.   , 1.   , 0.75 , 0.75 , 0.75 , 0.786],
       [1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   , 1.   , 1.  