In [None]:
import pm4py
import importer
from main_repair import MainRepair
from helpers import sollmodell_helpers

In [None]:
# path to xes file
file_path = ''

In [None]:
log = importer.read_from_xes(file_path)

In [None]:
# how many cases (traces)
len(log['case:concept:name'].unique())

In [None]:
# how many different events ("event types")
len(log['concept:name'].unique())

In [None]:
bpmn = pm4py.discover_bpmn_inductive(log)

In [None]:
pm4py.view_bpmn(bpmn)

In [None]:
durations = importer.read_durations(file_path=file_path)
durations_dict = dict(zip(durations['case:concept:name'], durations.case_durations))

In [None]:
durations.sort_values(by=['case_durations'])

In [None]:
# mean of all traces
mean_all_traces = durations.case_durations.mean()
print(f"mean of all traces in seconds: {mean_all_traces}")
print(f"mean in days: {mean_all_traces / 86400}")

# median
median_all_traces = durations.case_durations.median()
print(f"median of all traces in seconds: {median_all_traces}")
print(f"median in days: {median_all_traces / 86400}")

In [None]:
satisfactory_threshold = median_all_traces
durations[durations['case_durations']<=satisfactory_threshold].shape

### Create original model (reference model)

In [None]:
net_im_fm, filtered_log = sollmodell_helpers.create_soll_modell_by_variants(log=log, return_filtered_log=True)
net, im, fm = net_im_fm

In [None]:
pm4py.view_petri_net(net)

#### Logs that are currently not represented by the model
...and might be considered based on the KPI values

In [None]:
exclude_cases_from_original_net = False

if exclude_cases_from_original_net:
    log_to_use = log[~log['case:concept:name'].isin(filtered_log['case:concept:name'])]
    print(f"Number of cases when cases that are used for original net are excluded: {len(log_to_use['case:concept:name'].unique())}")
else:
    log_to_use = log

#### Train/test split
Default:
* 67 % Training
* 33 % Testing

`Training data` is used to identify log and model moves and repair the model based on the respective alignments. <br>
Traces in `test data` are used to split which fit with the original model and which fit with the repaired model. The KPI values of these two groups are compared to see whether the repaired model is advantageous for future logs.

In [None]:
repairer = MainRepair(log_to_use,
                      net,
                      im,
                      fm,
                      target_KPI_values_per_case = durations_dict,
                      satisfactory_values=[satisfactory_threshold],
                      lower_KPI_is_better=True)

#### Create alignments

In [None]:
repairer.main()

In [None]:
repairer.print_conformant_kpi_values()

Try to reduce invisble transitions with built-in functions (apparently not very effective)

In [None]:
pm4py.view_petri_net(pm4py.analysis.reduce_petri_net_invisibles(repairer.repaired_net_IM))

In [None]:
pm4py.view_petri_net(pm4py.reduce_petri_net_implicit_places(repairer.repaired_net_IM, repairer.repaired_im_IM, repairer.repaired_fm_IM)[0])