# Inductive Miner Models
This notebook contains the computational pipeline for the Inductive Miner parts of the evaluation.
It discovers SBWF-nets with the Inductive Miner from pm4py and computes all optimal skip alignments in normal form as well as all optimal alignments (in case these are not infinitely many).

## Instructions
1. Install all required libraries and download the event logs. Provide their system paths.
2. Select the log you want to align (`inspected_log`) and hit 'Run All'.

## Output
The output is written to disk and available in the following variables:
- `skip_dict`: variant string $\rightarrow$ list of optimal skip alignment states
- `skip_times`: variant string $\rightarrow$ computation time for all optimal skip alignments in normal form in _ns_ or -1 for a timeout

- `pm4py_dict`: variant string $\rightarrow$ list of optimal alignments or empty list if there are infinitely many optimal alignments
- `pm4py_times`: variant string $\rightarrow$ computation time for all optimal alignments in _ns_ or -1 for a timeout resp. infinitely many optimal alignments
- `pm4py_times_first`: variant string $\rightarrow$ computation time for the first optimal alignment in _ns_ or -1 for a timeout

In [15]:
from logs import Logs
############### ENTER THE LOG PATHS HERE ###############
path_to_road_fines_log = 'xxxxx/xx/xxxx.xes'
path_to_request_for_payment_log = 'xxxxx/xx/xxxx.xes'
path_to_sepsis_log = 'xxxxx/xx/xxxx.xes'

inspected_log = Logs.ROAD_FINES

### Content

In [None]:
%load_ext autoreload
%autoreload 2
import pm4py
from processtree import *
from alignment import *
from alignall import *
import statistics
import random
from tqdm import tqdm
from pathlib import Path
import pickle

In [13]:
def get_variant_dict(log):
    variants = dict()
    for k,v in pm4py.statistics.variants.log.get.get_variants_from_log_trace_idx(log).items():
        variants[k] = len(v)
    variants = dict(sorted(variants.items(), key=lambda x: -x[1]))
    return variants

In [14]:
def update_pair_taus(tree:ProcessTree):
    if isinstance(tree, Tau):
        if tree.parent is not None and len(tree.parent.children) == 2:
            other = tree.parent.children[0]
            if other == tree:
                other = tree.parent.children[1]
            if isinstance(other, Activity):
                # set tau
                tree.name = "TAU_" + other.name
            else:
                tree.name = "TAU_" + other.id
        else:
            tree.name = "TAU_" + str(tree.get_distance_to_root()) + str(random.random())
        return
    elif not isinstance(tree, Activity):
        for c in tree.children:
            update_pair_taus(c)
        return
    else:
        return

In [None]:
path = None
if inspected_log == Logs.ROAD_FINES:
    path = path_to_road_fines_log
elif inspected_log == Logs.REQUEST_FOR_PAYMENT:
    path = path_to_request_for_payment_log
elif inspected_log == Logs.SEPSIS:
    path = path_to_sepsis_log
log_rf = pm4py.read_xes(path)

In [None]:
threshold = None
if inspected_log == Logs.ROAD_FINES:
    threshold = 0.5
elif inspected_log == Logs.REQUEST_FOR_PAYMENT:
    threshold = 0.5
elif inspected_log == Logs.SEPSIS:
    threshold = 0.2
process_tree_rf = pm4py.discover_process_tree_inductive(log_rf, activity_key='concept:name', case_id_key='case:concept:name', timestamp_key='time:timestamp', noise_threshold=threshold)
pm4py.view_process_tree(process_tree_rf, format='png')

In [None]:
tree_rf = ProcessTree.from_pm4py(process_tree_rf, 100000, 0, 0)
update_pair_taus(tree_rf)
process_tree_rf = tree_rf.to_pm4py()
tau_loops_rf, process_tree_rf = insert_cycle_checks(process_tree_rf)
pm4py.view_process_tree(process_tree_rf, format='png')
sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final = pm4py.convert.convert_to_petri_net(process_tree_rf)

In [None]:
skip_dict = {}
skip_times = {}
Aligner.set_level_incentive(0)
variant_strings = list(get_variant_dict(log_rf).keys())
futures = align_sk_all(variant_strings, tree_rf, timeout=3600)
for index, variant in enumerate(futures):
    agns, t = futures[index].result()
    skip_dict[", ".join(variant_strings[index])] = agns
    skip_times[", ".join(variant_strings[index])] = t

In [None]:
# recompute the runtime for ALL optimal skip agns of a variant
for k, v in tqdm(skip_times.items(), total=len(skip_times)):
    if v == 0:
        # it was not a timeout but too fast result
        skip_times[k] = align_sk_all_for_one(tree_rf, k.split(", "), 3600)

In [None]:
for k, v in tqdm(skip_times.items(), total=len(skip_times)):
    if v > 3600000000000:
        skip_times[k] = -1

In [None]:
print("Max num agns:", max(len(v) for k,v in skip_dict.items()))
print("Avg num agns:", statistics.mean(len(v) for k,v in skip_dict.items()))
print("Num timeouts:", sum(v == -1 for k,v in skip_times.items()))
print("Max time:", max(v for k,v in skip_times.items()))
print("Med time:", statistics.median(v for k,v in skip_times.items() if v != -1))

In [None]:
pm4py_dict = {}
pm4py_times = {}
pm4py_times_first = {}

pm4py_res = align_pn_all_multi(log_rf, sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, tau_loops_rf, tree_rf, timeout=3600)

for k,(process_time, (agns, has_timed_out, time_first_agn)) in pm4py_res.items():
    pm4py_dict[k] = list(set([tuple([t for t in agns[i]['alignment'] if t.label[1] is not None and not t.label[1].startswith("TAU_entry") and not t.label[1].startswith("TAU_exit")]) for i in range(len(agns))]))
    pm4py_times[k] = -1 if has_timed_out == -1 or process_time > 36*10**11 else process_time
    pm4py_times_first[k] = time_first_agn

In [None]:
# recompute the runtime for ALL optimal agns of a variant
for k, v in tqdm(pm4py_times.items(), total=len(pm4py_times)):
    if v == 0:
        # it was not a timeout but too fast result
        pm4py_times[k] = align_pn_all_for_one(k.split(", "), sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, tau_loops_rf, tree_rf.get_cheapest_execution(0)[0]+len(k.split(", "))*100000+0.1, timeout=3600)

In [None]:
# recompute the runtime for ONE optimal agn of a variant
for k, v in tqdm(pm4py_times_first.items(), total=len(pm4py_times_first)):
    if v == 0:
        # it was not a timeout but too fast result
        pm4py_times_first[k] = align_pn_one_for_one(k.split(", "), sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, tau_loops_rf, tree_rf.get_cheapest_execution(0)[0]+len(k.split(", "))*100000+0.1, timeout=3600, cnt=200)
for k, v in tqdm(pm4py_times_first.items(), total=len(pm4py_times_first)):
    if v == 0:
        # it was not a timeout but too fast result
        pm4py_times_first[k] = align_pn_one_for_one(k.split(", "), sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, tau_loops_rf, tree_rf.get_cheapest_execution(0)[0]+len(k.split(", "))*100000+0.1, timeout=3600, cnt=2000)

In [None]:
for k, v in tqdm(pm4py_times_first.items(), total=len(pm4py_times_first)):
    if v > 3600000000000:
        pm4py_times_first[k] = -1

In [None]:
print("Max num agns:", max(len(v) for k,v in pm4py_dict.items()))
print("Avg num agns:", statistics.mean(len(v) for k,v in pm4py_dict.items()))
print("Num timeouts:", sum(v == -1 for k,v in pm4py_times.items()))
print("Max time:", max(v for k,v in pm4py_times.items()))
print("Med. first agn time:", statistics.median(v for k,v in pm4py_times_first.items() if v != -1))
print("Min first time:", min(v for k,v in pm4py_times_first.items() if v != -1))

In [87]:
output_folder = None
if inspected_log == Logs.ROAD_FINES:
    output_folder = Path("im_results/rf").mkdir(parents=True, exist_ok=True)
elif inspected_log == Logs.REQUEST_FOR_PAYMENT:
    output_folder = Path("im_results/payreq").mkdir(parents=True, exist_ok=True)
elif inspected_log == Logs.SEPSIS:
    output_folder = Path("im_results/sepsis").mkdir(parents=True, exist_ok=True)

file_pm4py_dict = open("im_results/payreq/pm4py_dict","wb")
pickle.dump(pm4py_dict, file_pm4py_dict)
file_pm4py_dict.close()
file_skip_dict = open("im_results/payreq/skip_dict","wb")
pickle.dump(skip_dict, file_skip_dict)
file_skip_dict.close()
file_pm4py_times = open("im_results/payreq/pm4py_times","wb")
pickle.dump(pm4py_times, file_pm4py_times)
file_pm4py_times.close()
file_pm4py_times_first = open("im_results/payreq/pm4py_times_first","wb")
pickle.dump(pm4py_times_first, file_pm4py_times_first)
file_pm4py_times_first.close()
file_skip_times = open("im_results/payreq/skip_times","wb")
pickle.dump(skip_times, file_skip_times)
file_skip_times.close()