In [4]:
import pm4py
from pm4py.objects.conversion.bpmn import converter as bpmn_converter
import pandas as pd

from pm4py.objects.log.util import sorting
from pm4py.objects.conversion.log import converter as log_converter

try:
    # Newer API (pm4py >= 2.2)
    from pm4py.algo.evaluation import algorithm as eval_alg
except ImportError:
    # Older API
    from pm4py.evaluation import algorithm as eval_alg


In [5]:
LOG_PATH = "bpi-chall.xes"

elog = pm4py.read_xes(LOG_PATH)

if isinstance(elog, pd.DataFrame):
    elog = log_converter.apply(elog, variant=log_converter.Variants.TO_EVENT_LOG)

elog = sorting.sort_timestamp(elog, timestamp_key="time:timestamp")

print(type(elog))
print(f"Number of cases: {len(elog)}")

  from .autonotebook import tqdm as notebook_tqdm
parsing log, completed traces :: 100%|██████████| 31509/31509 [00:32<00:00, 960.31it/s] 


<class 'pm4py.objects.log.obj.EventLog'>
Number of cases: 31509


In [15]:
import numpy as np

def compute_fast_metrics(event_log, net, im, fm):
    fitness = np.nan
    try:
        fit_res = fitness_eval.apply(
            event_log, net, im, fm,
            variant=fitness_eval.Variants.TOKEN_BASED
        )
        # fit_res is typically a dict
        if isinstance(fit_res, dict) and "log_fitness" in fit_res:
            fitness = float(fit_res["log_fitness"])
        else:
            fitness = float(fit_res)
    except Exception as e:
        print("Token-based fitness error:", type(e), e)

    try:
        from pm4py.algo.evaluation.simplicity import algorithm as simp_alg
        simp_res = simp_alg.apply(net)
        if isinstance(simp_res, dict) and "simplicity" in simp_res:
            simplicity_builtin = float(simp_res["simplicity"])
        else:
            simplicity_builtin = float(simp_res)
    except Exception:
        simplicity_builtin = np.nan

    num_places = len(net.places)
    num_transitions = len(net.transitions)
    num_arcs = len(net.arcs)
    size = num_places + num_transitions + num_arcs

    # size-based simplicity
    simplicity_size = 1.0 / (1.0 + np.log1p(size)) if size > 0 else np.nan

    # connectivity-based simplicity
    nodes = num_places + num_transitions
    connectivity_simplicity = 1.0 / (1.0 + (2.0 * num_arcs / nodes)) if nodes > 0 else np.nan

    return {
        "fitness": fitness,
        "precision": np.nan,
        "generalization": np.nan,
        "simplicity_builtin": simplicity_builtin,
        "simplicity_size": float(simplicity_size),
        "simplicity_connectivity": float(connectivity_simplicity),
        "places": int(num_places),
        "transitions": int(num_transitions),
        "arcs": int(num_arcs),
        "size": int(size),
    }

In [14]:

bpmn_graph = pm4py.read_bpmn("import/diagram-ver-1.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [02:38<00:00, 100.37it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [01:35<00:00, 2760.74it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.5971871590803725, 'log_fitness': 0.5589650927186534, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.4258705254315367, 'generalization': 0.95788509561983, 'simplicity': 0.7391304347826086, 'metricsAverageWeight': 0.6704627871381572, 'fscore': 0.4834243468592294}


In [16]:
print(compute_fast_metrics(elog, net, im, fm))

Token-based fitness error: <class 'NameError'> name 'fitness_eval' is not defined
{'fitness': nan, 'precision': nan, 'generalization': nan, 'simplicity_builtin': 0.7391304347826086, 'simplicity_size': 0.16655711910843396, 'simplicity_connectivity': 0.2982456140350877, 'places': 34, 'transitions': 34, 'arcs': 80, 'size': 148}


In [None]:

bpmn_graph = pm4py.read_bpmn("import/diagram-ver-2.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [02:05<00:00, 126.79it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [01:36<00:00, 2741.04it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.6089833433252885, 'log_fitness': 0.5677412511560701, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.4642573341959193, 'generalization': 0.9577263384648805, 'simplicity': 0.7303370786516854, 'metricsAverageWeight': 0.6800155006171389, 'fscore': 0.5108108548130869}


In [8]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-3.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [02:22<00:00, 111.64it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [01:06<00:00, 3959.29it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.5897998211363679, 'log_fitness': 0.5589778570099243, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.6342961392247884, 'generalization': 0.9367445416203679, 'simplicity': 0.7227722772277226, 'metricsAverageWeight': 0.7131977037707008, 'fscore': 0.5942599901318903}


In [6]:
bpmn_high = pm4py.read_bpmn("models/candidate_HM_dep0_1.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_high)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [00:57<00:00, 276.68it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [02:49<00:00, 1552.72it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.9465848441559679, 'log_fitness': 0.9484026400584812, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.7831687346649673, 'generalization': 0.8948434250535694, 'simplicity': 0.5254237288135593, 'metricsAverageWeight': 0.7879596321476443, 'fscore': 0.8579020263442991}


In [9]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-4.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [02:21<00:00, 112.58it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [00:57<00:00, 4610.15it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.5884702904596485, 'log_fitness': 0.5572426592489195, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.48755901118183886, 'generalization': 0.914150784507872, 'simplicity': 0.7009345794392523, 'metricsAverageWeight': 0.6649717585944707, 'fscore': 0.5200770397308568}


In [10]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-5.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [02:21<00:00, 112.57it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [00:57<00:00, 4558.09it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.594000496063338, 'log_fitness': 0.5623686303968428, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.48755901118183886, 'generalization': 0.9152437987811822, 'simplicity': 0.6851851851851851, 'metricsAverageWeight': 0.6625891563862623, 'fscore': 0.5222986470642835}


In [11]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-6.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [06:12<00:00, 42.74it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [01:26<00:00, 3064.78it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.6540925083216095, 'log_fitness': 0.6258973376965458, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.568378095517758, 'generalization': 0.9440476542894795, 'simplicity': 0.690909090909091, 'metricsAverageWeight': 0.7073080446032185, 'fscore': 0.5957525825213248}


In [8]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-7.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [06:32<00:00, 40.60it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [02:25<00:00, 1810.16it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.7258351235706894, 'log_fitness': 0.7210303088149499, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.3225723796732639, 'generalization': 0.915040286722749, 'simplicity': 0.6879432624113474, 'metricsAverageWeight': 0.6616465594055776, 'fscore': 0.4457337358298948}


In [11]:
print(compute_fast_metrics(elog, net, im, fm))

Token-based fitness error: <class 'NameError'> name 'fitness_eval' is not defined
{'fitness': nan, 'precision': nan, 'generalization': nan, 'simplicity_builtin': 0.6879432624113474, 'simplicity_size': 0.15674233370620924, 'simplicity_connectivity': 0.28955223880597014, 'places': 40, 'transitions': 57, 'arcs': 119, 'size': 216}


In [13]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-8.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [06:26<00:00, 41.25it/s]
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [02:55<00:00, 1507.56it/s]


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.6989051589375617, 'log_fitness': 0.7018699681216896, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.28191802039692837, 'generalization': 0.8431827232469645, 'simplicity': 0.6729559748427673, 'metricsAverageWeight': 0.6249816716520874, 'fscore': 0.40226104465225904}


In [14]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-9.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [04:01<00:00, 66.05it/s] 
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [04:43<00:00, 929.38it/s] 


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.795144194260723, 'log_fitness': 0.8520945489268186, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.1605281585750855, 'generalization': 0.8334228312373719, 'simplicity': 0.668639053254438, 'metricsAverageWeight': 0.6286711479984285, 'fscore': 0.27016018475140324}


In [12]:
bpmn_graph = pm4py.read_bpmn("import/diagram-ver-9-final.bpmn")
net, im, fm = bpmn_converter.apply(bpmn_graph)

metrics = eval_alg.apply(elog, net, im, fm)
print(metrics)

replaying log with TBR, completed traces :: 100%|██████████| 15930/15930 [04:03<00:00, 65.35it/s] 
replaying log with TBR, completed traces :: 100%|██████████| 263907/263907 [04:34<00:00, 960.81it/s] 


{'fitness': {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.7951972367964067, 'log_fitness': 0.8520777865368468, 'percentage_of_fitting_traces': 0.0}, 'precision': 0.1605281585750855, 'generalization': 0.8333896716119287, 'simplicity': 0.668639053254438, 'metricsAverageWeight': 0.6286586674945748, 'fscore': 0.2701593422313455}


In [13]:
print(compute_fast_metrics(elog, net, im, fm))

Token-based fitness error: <class 'NameError'> name 'fitness_eval' is not defined
{'fitness': nan, 'precision': nan, 'generalization': nan, 'simplicity_builtin': 0.668639053254438, 'simplicity_size': 0.1528756627976194, 'simplicity_connectivity': 0.28607594936708863, 'places': 45, 'transitions': 68, 'arcs': 141, 'size': 254}
