# Indulpet Miner Models
This notebook contains the computational pipeline for the Indulpet Miner parts of the evaluation.
It discovers SBWF-nets with the Indulpet Miner implemented in ProM and computes all optimal skip alignments in normal form as well as all optimal alignments (in case these are not infinitely many).

## Instructions
1. Install all required libraries and download the event logs. Provide their system paths.
2. Select the log you want to align (`inspected_log`) and hit 'Run All'.

## Output
The output is written to disk and available in the following variables:
- `skip_dict`: variant string $\rightarrow$ list of optimal skip alignment states
- `skip_times`: variant string $\rightarrow$ computation time for all optimal skip alignments in normal form in _ns_ or -1 for a timeout

- `pm4py_dict`: variant string $\rightarrow$ list of optimal alignments or empty list if there are infinitely many optimal alignments
- `pm4py_times`: variant string $\rightarrow$ computation time for all optimal alignments in _ns_ or -1 for a timeout resp. infinitely many optimal alignments
- `pm4py_times_first`: variant string $\rightarrow$ computation time for the first optimal alignment in _ns_ or -1 for a timeout

In [1]:
from logs import Logs
############### ENTER THE LOG PATHS HERE ###############
path_to_road_fines_log = 'xxxxx/xx/xxxx.xes'
path_to_request_for_payment_log = 'xxxxx/xx/xxxx.xes'
path_to_sepsis_log = 'xxxxx/xx/xxxx.xes'

inspected_log = Logs.ROAD_FINES

In [3]:
%load_ext autoreload
%autoreload 2
import pm4py
from processtree import *
from alignment import *
from alignall import *
import statistics
import random
from tqdm import tqdm
from pathlib import Path
import pickle

### Content

In [4]:
def get_variant_dict(log):
    variants = dict()
    for k,v in pm4py.statistics.variants.log.get.get_variants_from_log_trace_idx(log).items():
        variants[k] = len(v)
    variants = dict(sorted(variants.items(), key=lambda x: -x[1]))
    return variants

In [5]:
def update_pair_taus(tree:ProcessTree):
    if isinstance(tree, Tau):
        if tree.parent is not None and len(tree.parent.children) == 2:
            other = tree.parent.children[0]
            if other == tree:
                other = tree.parent.children[1]
            if isinstance(other, Activity):
                # set tau
                tree.name = "TAU_" + other.name
            else:
                tree.name = "TAU_" + other.id
        else:
            tree.name = "TAU_" + str(tree.get_distance_to_root()) + str(random.random())
        return
    elif not isinstance(tree, Activity):
        for c in tree.children:
            update_pair_taus(c)
        return
    else:
        return

In [6]:
def check_names(tree:ProcessTree, names:List[str]):
    if isinstance(tree, Activity):
        assert tree.name in names
    elif isinstance(tree, Tau):
        pass
    else:
        for c in tree.children:
            check_names(c, names)

In [None]:
path = None
if inspected_log == Logs.ROAD_FINES:
    path = path_to_road_fines_log
elif inspected_log == Logs.REQUEST_FOR_PAYMENT:
    path = path_to_request_for_payment_log
elif inspected_log == Logs.SEPSIS:
    path = path_to_sepsis_log
log_rf = pm4py.read_xes(path)

In [8]:
if inspected_log == Logs.ROAD_FINES:
    ## rf
    createfine = Activity(None, 'Create Fine', 100000)
    appealtojudge = Activity(None, 'Appeal to Judge', 100000)
    insertdateappealtoprefecture = Activity(None, 'Insert Date Appeal to Prefecture', 100000)
    receiveresultappealfromprefecture = Activity(None, 'Receive Result Appeal from Prefecture', 100000)
    notifyresultappealtooffender = Activity(None, 'Notify Result Appeal to Offender', 100000)
    sendappealtoprefecture = Activity(None, 'Send Appeal to Prefecture', 100000)
    payment = Activity(None, 'Payment', 100000)
    sendfine = Activity(None, 'Send Fine', 100000)
    insertfinenotification = Activity(None, 'Insert Fine Notification', 100000)
    addpenality = Activity(None, 'Add penalty', 100000)
    sendforcreditcollection = Activity(None, 'Send for Credit Collection', 100000)

    atjtau = Tau(None, 'TAU_Appeal to Judge', 0)
    nratotau = Tau(None, 'TAU_Notify Result Appeal to Offender', 0)
    ptau = Tau(None, 'TAU_Payment', 0)
    sfcctau = Tau(None, 'Send for Credit Collection', 0)

    atjchoice = Xor(None, [atjtau, appealtojudge])
    atjtau.set_parent(atjchoice)
    appealtojudge.set_parent(atjchoice)
    nratochoice = Xor(None, [nratotau, notifyresultappealtooffender])
    nratotau.set_parent(nratochoice)
    notifyresultappealtooffender.set_parent(nratochoice)
    pchoice = Xor(None, [ptau, payment])
    ptau.set_parent(pchoice)
    payment.set_parent(pchoice)
    sfccchoice = Xor(None, [sfcctau, sendforcreditcollection])
    sfcctau.set_parent(sfccchoice)
    sendforcreditcollection.set_parent(sfccchoice)

    idatpnratoseq = Sequence(None, [insertdateappealtoprefecture, nratochoice])
    insertdateappealtoprefecture.set_parent(idatpnratoseq)
    nratochoice.set_parent(idatpnratoseq)
    ifnapseq = Sequence(None, [insertfinenotification, addpenality])
    insertfinenotification.set_parent(ifnapseq)
    addpenality.set_parent(ifnapseq)

    tauand1 = Tau(None, 'TAU_AND1', 0)
    tauseq1 = Tau(None, 'TAU_SEQ1', 0)
    tauseq2 = Tau(None, 'TAU_SEQ2', 0)

    ifnapchoice = Xor(None, [tauseq1, ifnapseq])
    tauseq1.set_parent(ifnapchoice)
    ifnapseq.set_parent(ifnapchoice)
    sfifnapseq = Sequence(None, [sendfine, ifnapchoice])
    sendfine.set_parent(sfifnapseq)
    ifnapchoice.set_parent(sfifnapseq)
    sfifnapchoice = Xor(None, [tauseq2, sfifnapseq])
    tauseq2.set_parent(sfifnapchoice)
    sfifnapseq.set_parent(sfifnapchoice)

    and1 = And(None, [atjchoice, receiveresultappealfromprefecture, idatpnratoseq, sendappealtoprefecture])
    atjchoice.set_parent(and1)
    receiveresultappealfromprefecture.set_parent(and1)
    idatpnratoseq.set_parent(and1)
    sendappealtoprefecture.set_parent(and1)
    and2 = And(None, [pchoice, sfifnapchoice])
    pchoice.set_parent(and2)
    sfifnapchoice.set_parent(and2)

    choice = Xor(None, [tauand1, and1])
    tauand1.set_parent(choice)
    and1.set_parent(choice)

    tree_rf = Sequence(None, [createfine, choice, and2, sfccchoice])
    createfine.set_parent(tree_rf)
    choice.set_parent(tree_rf)
    and2.set_parent(tree_rf)
    sfccchoice.set_parent(tree_rf)

    check_names(tree_rf, list(log_rf['concept:name'].unique()))
    tree_rf
elif inspected_log == Logs.REQUEST_FOR_PAYMENT:
    ## payreq
    submittedbyemployee = Activity(None, 'Request For Payment SUBMITTED by EMPLOYEE', 100000)
    submittedbyemployee2 = Activity(None, 'Request For Payment SUBMITTED by EMPLOYEE', 100000)
    rejectedbyadministration = Activity(None, 'Request For Payment REJECTED by ADMINISTRATION', 100000)
    rejectedbyadministration2 = Activity(None, 'Request For Payment REJECTED by ADMINISTRATION', 100000)
    approvedbyadministration = Activity(None, 'Request For Payment APPROVED by ADMINISTRATION', 100000)
    approvedbyadministration2 = Activity(None, 'Request For Payment APPROVED by ADMINISTRATION', 100000)
    rejectedbysupervisor = Activity(None, 'Request For Payment REJECTED by SUPERVISOR', 100000)
    submittedbyemployee3 = Activity(None, 'Request For Payment SUBMITTED by EMPLOYEE', 100000)
    approvedbybudgetowner = Activity(None, 'Request For Payment APPROVED by BUDGET OWNER', 100000)
    rejectedbyemployee = Activity(None, 'Request For Payment REJECTED by EMPLOYEE', 100000)
    finalapprovedbysupervisor = Activity(None, 'Request For Payment FINAL_APPROVED by SUPERVISOR', 100000)
    finalapprovedbydirector = Activity(None, 'Request For Payment FINAL_APPROVED by DIRECTOR', 100000)
    approvedbysupervisor = Activity(None, 'Request For Payment APPROVED by SUPERVISOR', 100000)
    requestpayment = Activity(None, 'Request Payment', 100000)
    finalapprovedbybudgetowner = Activity(None, 'Request For Payment FINAL_APPROVED by BUDGET OWNER', 100000)
    paymenthandled = Activity(None, 'Payment Handled', 100000)
    savedbyemployee = Activity(None, 'Request For Payment SAVED by EMPLOYEE', 100000)
    forapprovalbysupervisor = Activity(None, 'Request For Payment FOR_APPROVAL by SUPERVISOR', 100000)

    tau1 = Tau(None, 'TAU_LOOPS', 0)
    tau2 = Tau(None, 'TAU_AND', 0)
    tau3 = Tau(None, 'TAU_FINAL_APPROVED by BUDGET OWNER', 0)
    tau4 = Tau(None, 'TAU_XOR', 0)

    aarachoice = Xor(None, [approvedbyadministration, rejectedbyadministration2])
    approvedbyadministration.set_parent(aarachoice)
    rejectedbyadministration2.set_parent(aarachoice)

    seaaraseq = Sequence(None, [submittedbyemployee, aarachoice])
    submittedbyemployee.set_parent(seaaraseq)
    aarachoice.set_parent(seaaraseq)

    seraloop = Loop(None, [submittedbyemployee2, rejectedbyadministration])
    submittedbyemployee2.set_parent(seraloop)
    rejectedbyadministration.set_parent(seraloop)

    seraaaseq = Sequence(None, [seraloop, approvedbyadministration2])
    seraloop.set_parent(seraaaseq)
    approvedbyadministration2.set_parent(seraaaseq)

    seraaarsloop = Loop(None, [seraaaseq, rejectedbysupervisor])
    seraaaseq.set_parent(seraaarsloop)
    rejectedbysupervisor.set_parent(seraaarsloop)

    seraaarsabseq = Sequence(None, [seraaarsloop, approvedbybudgetowner])
    seraaarsloop.set_parent(seraaarsabseq)
    approvedbybudgetowner.set_parent(seraaarsabseq)

    xor1 = Xor(None, [seaaraseq, seraaarsabseq, submittedbyemployee3])
    seaaraseq.set_parent(xor1)
    seraaarsabseq.set_parent(xor1)
    submittedbyemployee3.set_parent(xor1)

    loop = Loop(None, [xor1, rejectedbyemployee])
    xor1.set_parent(loop)
    rejectedbyemployee.set_parent(loop)

    xor = Xor(None, [tau1, loop])
    tau1.set_parent(xor)
    loop.set_parent(xor)


    and1 = And(None, [finalapprovedbydirector, approvedbysupervisor])
    finalapprovedbydirector.set_parent(and1)
    approvedbysupervisor.set_parent(and1)

    block3 = Xor(None, [tau2, and1])
    tau2.set_parent(block3)
    and1.set_parent(block3)

    block5 = Xor(None, [tau3, finalapprovedbybudgetowner])
    tau3.set_parent(block5)
    finalapprovedbybudgetowner.set_parent(block5)

    block7 = Xor(None, [tau4, savedbyemployee, forapprovalbysupervisor])
    tau4.set_parent(block7)
    savedbyemployee.set_parent(block7)
    forapprovalbysupervisor.set_parent(block7)

    tree_rf = Sequence(None, [xor, finalapprovedbysupervisor, block3, requestpayment, block5, paymenthandled, block7])
    xor.set_parent(tree_rf)
    finalapprovedbysupervisor.set_parent(tree_rf)
    block3.set_parent(tree_rf)
    requestpayment.set_parent(tree_rf)
    block5.set_parent(tree_rf)
    paymenthandled.set_parent(tree_rf)
    block7.set_parent(tree_rf)

    check_names(tree_rf, list(log_rf['concept:name'].unique()))
    tree_rf
elif inspected_log == Logs.SEPSIS:
    ## sepsis
    ersepsistriage = Activity(None, 'ER Sepsis Triage', 100000)
    ertriage = Activity(None, 'ER Triage', 100000)
    erregistration = Activity(None, 'ER Registration', 100000)
    admissionnc = Activity(None, 'Admission NC', 100000)
    ivantibiotics = Activity(None, 'IV Antibiotics', 100000)
    ivliquid = Activity(None, 'IV Liquid', 100000)
    lacticacid = Activity(None, 'LacticAcid', 100000)
    crp = Activity(None, 'CRP', 100000)
    leucocytes = Activity(None, 'Leucocytes', 100000)
    releasea = Activity(None, 'Release A', 100000)
    releasec = Activity(None, 'Release C', 100000)
    released = Activity(None, 'Release D', 100000)
    releasee = Activity(None, 'Release E', 100000)
    returner = Activity(None, 'Return ER', 100000)

    adnctau = Tau(None, 'TAU_Admission NC', 0)
    ivltau = Tau(None, 'TAU_IV Liquid', 0)
    crptau = Tau(None, 'TAU_CRP', 0)
    ltau = Tau(None, 'TAU_Leucocytes', 0)
    ratau = Tau(None, 'TAU_Release A', 0)
    rcdetau = Tau(None, 'TAU_Release CDE', 0)
    rertau = Tau(None, 'TAU_Return ER', 0)

    adncchoice = Xor(None, [adnctau, admissionnc])
    adnctau.set_parent(adncchoice)
    admissionnc.set_parent(adncchoice)
    ivlchoice = Xor(None, [ivltau, ivliquid])
    ivltau.set_parent(ivlchoice)
    ivliquid.set_parent(ivlchoice)
    crpchoice = Xor(None, [crptau, crp])
    crptau.set_parent(crpchoice)
    crp.set_parent(crpchoice)
    lchoice = Xor(None, [ltau, leucocytes])
    ltau.set_parent(lchoice)
    leucocytes.set_parent(lchoice)
    rachoice = Xor(None, [ratau, releasea])
    ratau.set_parent(rachoice)
    releasea.set_parent(rachoice)
    rcdechoice = Xor(None, [rcdetau, releasec, released, releasee])
    rcdetau.set_parent(rcdechoice)
    releasec.set_parent(rcdechoice)
    released.set_parent(rcdechoice)
    releasee.set_parent(rcdechoice)
    rerchoice = Xor(None, [rertau, returner])
    rertau.set_parent(rerchoice)
    returner.set_parent(rerchoice)

    and1 = And(None, [ersepsistriage, ertriage, erregistration])
    ersepsistriage.set_parent(and1)
    ertriage.set_parent(and1)
    erregistration.set_parent(and1)

    and2 = And(None, [adncchoice, ivantibiotics, ivlchoice, lacticacid, crpchoice, lchoice])
    adncchoice.set_parent(and2)
    ivantibiotics.set_parent(and2)
    ivlchoice.set_parent(and2)
    lacticacid.set_parent(and2)
    crpchoice.set_parent(and2)
    lchoice.set_parent(and2)

    tree_rf = Sequence(None, [and1, and2, rachoice, rcdechoice, rerchoice])
    and1.set_parent(tree_rf)
    and2.set_parent(tree_rf)
    rachoice.set_parent(tree_rf)
    rcdechoice.set_parent(tree_rf)
    rerchoice.set_parent(tree_rf)

    check_names(tree_rf, list(log_rf['concept:name'].unique()))
    tree_rf

In [None]:
process_tree_rf = tree_rf.to_pm4py()
tau_loops_rf, process_tree_rf = insert_cycle_checks(process_tree_rf)
pm4py.view_process_tree(process_tree_rf, format='png')
sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final = pm4py.convert.convert_to_petri_net(process_tree_rf)

In [None]:
skip_dict = {}
skip_times = {}
Aligner.set_level_incentive(0)
variant_strings = list(get_variant_dict(log_rf).keys())
futures = align_sk_all(variant_strings, tree_rf, timeout=3600)
for index, variant in enumerate(futures):
    agns, t = futures[index].result()
    skip_dict[", ".join(variant_strings[index])] = agns
    skip_times[", ".join(variant_strings[index])] = t

In [None]:
# recompute the runtime for ALL optimal skip agns of a variant
for k, v in tqdm(skip_times.items(), total=len(skip_times)):
    if v == 0:
        # it was not a timeout but too fast result
        skip_times[k] = align_sk_all_for_one(tree_rf, k.split(", "), 3600)

In [None]:
for k, v in tqdm(skip_times.items(), total=len(skip_times)):
    if v > 3600000000000:
        skip_times[k] = -1

In [None]:
print("Max num agns:", max(len(v) for k,v in skip_dict.items()))
print("Avg num agns:", statistics.mean(len(v) for k,v in skip_dict.items()))
print("Num timeouts:", sum(v == -1 for k,v in skip_times.items()))
print("Min skip time:", min(v for k,v in skip_times.items() if v != -1))
print("Max time:", max(v for k,v in skip_times.items()))
print("Min time:", min(v for k,v in skip_times.items()))
print("Med time:", statistics.median(v for k,v in skip_times.items() if v != -1))

In [None]:
pm4py_dict = {}
pm4py_times = {}
pm4py_times_first = {}

pm4py_res = align_pn_all_multi(log_rf, sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, [], tree_rf, timeout=3600)

for k,(process_time, (agns, has_timed_out, time_first_agn)) in pm4py_res.items():
    pm4py_dict[k] = list(set([tuple([t for t in agns[i]['alignment'] if t.label[1] is not None and not t.label[1].startswith("TAU_entry") and not t.label[1].startswith("TAU_exit")]) for i in range(len(agns))]))
    pm4py_times[k] = -1 if has_timed_out == -1 or process_time > 36*10**11 else process_time
    pm4py_times_first[k] = time_first_agn

In [None]:
# recompute the runtime for ALL optimal agns of a variant
for k, v in tqdm(pm4py_times.items(), total=len(pm4py_times)):
    if v == 0:
        # it was not a timeout but too fast result
        pm4py_times[k] = align_pn_all_for_one(k.split(", "), sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, tau_loops_rf, tree_rf.get_cheapest_execution(0)[0]+len(k.split(", "))*100000+0.1, timeout=3600)

In [None]:
# recompute the runtime for ONE optimal agn of a variant
for k, v in tqdm(pm4py_times_first.items(), total=len(pm4py_times_first)):
    if v == 0:
        # it was not a timeout but too fast result
        pm4py_times_first[k] = align_pn_one_for_one(k.split(", "), sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, tau_loops_rf, tree_rf.get_cheapest_execution(0)[0]+len(k.split(", "))*100000+0.1, timeout=3600, cnt=200)
for k, v in tqdm(pm4py_times_first.items(), total=len(pm4py_times_first)):
    if v == 0:
        # it was not a timeout but too fast result
        pm4py_times_first[k] = align_pn_one_for_one(k.split(", "), sublog_pt_rf_net, sublog_pt_rf_init, sublog_pt_rf_final, tau_loops_rf, tree_rf.get_cheapest_execution(0)[0]+len(k.split(", "))*100000+0.1, timeout=3600, cnt=2000)

In [None]:
for k, v in tqdm(pm4py_times_first.items(), total=len(pm4py_times_first)):
    if v > 3600000000000:
        pm4py_times_first[k] = -1

In [None]:
print("Max num agns:", max(len(v) for k,v in pm4py_dict.items()))
print("Avg num agns:", statistics.mean(len(v) for k,v in pm4py_dict.items()))
print("Num timeouts:", sum(v == -1 for k,v in pm4py_times.items()))
print("Max time:", max(v for k,v in pm4py_times.items()))
print("Med. first agn time:", statistics.median(v for k,v in pm4py_times_first.items() if v != -1))

In [26]:
output_folder = None
if inspected_log == Logs.ROAD_FINES:
    output_folder = Path("indulpet_results/rf").mkdir(parents=True, exist_ok=True)
elif inspected_log == Logs.REQUEST_FOR_PAYMENT:
    output_folder = Path("indulpet_results/payreq").mkdir(parents=True, exist_ok=True)
elif inspected_log == Logs.SEPSIS:
    output_folder = Path("indulpet_results/sepsis").mkdir(parents=True, exist_ok=True)

file_pm4py_dict = open("indulpet_results/sepsis/pm4py_dict","wb")
pickle.dump(pm4py_dict, file_pm4py_dict)
file_pm4py_dict.close()
file_skip_dict = open("indulpet_results/sepsis/skip_dict","wb")
pickle.dump(skip_dict, file_skip_dict)
file_skip_dict.close()
file_pm4py_times = open("indulpet_results/sepsis/pm4py_times","wb")
pickle.dump(pm4py_times, file_pm4py_times)
file_pm4py_times.close()
file_pm4py_times_first = open("indulpet_results/sepsis/pm4py_times_first","wb")
pickle.dump(pm4py_times_first, file_pm4py_times_first)
file_pm4py_times_first.close()
file_skip_times = open("indulpet_results/sepsis/skip_times","wb")
pickle.dump(skip_times, file_skip_times)
file_skip_times.close()