In [None]:
import networkx as nx
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import pm4py as pm4
from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
import pandas as pd
from statistics import mean

#### Perform process mining model comparison between generated Petri Net and original event logs

In [None]:
elog_data_first = pd.read_csv('data/export/cv19_event_log_wv1.csv')
elog_data_second = pd.read_csv('data/export/cv19_event_log_wv2.csv')
elog_data_first['start'] = pd.to_datetime(elog_data_first['start'])
elog_data_second['start'] = pd.to_datetime(elog_data_second['start'])

In [None]:
results_df = pd.DataFrame(columns=['model_name', 'wv1_lf', 'wv1_prec', 'wv1_gen', 'wv2_lf', 'wv2_prec', 'wv2_gen', 'm_lf',
                          'm_prec', 'm_gen'])

In [None]:
elog_data_first.provider_event.value_counts()

#### Provider-level

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

##### Alpha miner

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_alpha(el_f)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
print(fitness, prec, gen)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_alpha(el_s)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
print(fitness2, prec2, gen2)

In [None]:
res_l = ['Alpha_Miner_PL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
results_df

##### Inductive Miner

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_inductive(el_f, noise_threshold=0.3,multi_processing=True)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
print(fitness, gen, prec)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_inductive(el_s, noise_threshold=0.3, multi_processing=True)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Inductive_Miner_PL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l
#results_df.drop([1], inplace=True)

In [None]:
results_df

##### Heuristics Miner

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_heuristics(el_f, dependency_threshold=0.7)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_heuristics(el_s, dependency_threshold=0.7)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Heuristics_Miner_PL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l
#results_df.drop([2], inplace=True)

In [None]:
results_df

#### Activity-level

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "act_code": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "act_code": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_alpha(el_f)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_alpha(el_s)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Alpha_Miner_AL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
results_df

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_inductive(el_f, noise_threshold=0.3, multi_processing=True)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_inductive(el_s, noise_threshold=0.3, multi_processing=True)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Inductive_Miner_AL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l
#results_df.drop([1], inplace=True)

In [None]:
results_df

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_heuristics(el_f, dependency_threshold=0.7)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_heuristics(el_s, dependency_threshold=0.7)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Heuristics_Miner_AL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l
#results_df.drop([2], inplace=True)

In [None]:
results_df

#### Fuzzy Mined Petri Nets

In [None]:
pn_p_1, im_p_1, fm_p_1 = pm4.read_pnml('process_mining/wv1_p_pn.pnml')
pn_p_2, im_p_2, fm_p_2 = pm4.read_pnml('process_mining/wv2_p_pn.pnml')
pn_a_1, im_a_1, fm_a_1 = pm4.read_pnml('process_mining/wv1_a_pn.pnml')
pn_a_2, im_a_2, fm_a_2 = pm4.read_pnml('process_mining/wv2_a_pn.pnml')

In [None]:
pm4.view_petri_net(pn_p_1, im_p_1, fm_p_1)

In [None]:
pm4.view_petri_net(pn_p_2, im_p_2, fm_p_2)

In [None]:
pm4.view_petri_net(pn_a_1, im_a_1, fm_a_1)

In [None]:
pm4.view_petri_net(pn_a_2, im_a_2, fm_a_2)

In [None]:
fitness = pm4.fitness_token_based_replay(el_f, pn_a_1, im_a_1, fm_a_1)
prec = pm4.precision_token_based_replay(el_f, pn_a_1, im_a_1, fm_a_1)
gen = generalization_evaluator.apply(el_f, pn_a_1, im_a_1, fm_a_1)

In [None]:
fitness2 = pm4.fitness_token_based_replay(el_s, pn_a_2, im_a_2, fm_a_2)
prec2 = pm4.precision_token_based_replay(el_s, pn_a_2, im_a_2, fm_a_2)
gen2 = generalization_evaluator.apply(el_s, pn_a_2, im_a_2, fm_a_2)

In [None]:
res_l = ['Fuzzy_Miner_AL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
results_df

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

In [None]:
fitness = pm4.fitness_token_based_replay(el_f, pn_p_1, im_p_1, fm_p_1)
prec = pm4.precision_token_based_replay(el_f, pn_p_1, im_p_1, fm_p_1)
gen = generalization_evaluator.apply(el_f, pn_p_1, im_p_1, fm_p_1)

In [None]:
fitness2 = pm4.fitness_token_based_replay(el_s, pn_p_2, im_p_2, fm_p_2)
prec2 = pm4.precision_token_based_replay(el_s, pn_p_2, im_p_2, fm_p_2)
gen2 = generalization_evaluator.apply(el_s, pn_p_2, im_p_2, fm_p_2)

In [None]:
res_l = ['Fuzzy_Miner_PL', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
results_df

In [None]:
results_df.to_csv('process_mining/cc_results.csv', index=False)

#### Add some more thresholds

In [None]:
results_df = pd.read_csv('process_mining/cc_results.csv')

In [None]:
results_df.model_name = results_df.model_name.replace('Fuzzy_Miner_AL_30', 'IDFG_Miner_AL_30')
results_df.model_name = results_df.model_name.replace('Fuzzy_Miner_PL_30', 'IDFG_Miner_PL_30')
results_df.model_name = results_df.model_name.replace('Inductive_Miner_PL_70', 'Inductive_Miner_PL_30')
results_df.model_name = results_df.model_name.replace('Inductive_Miner_AL_70', 'Inductive_Miner_AL_30')
results_df.model_name = results_df.model_name.replace('Heuristics_Miner_PL_70', 'Heuristics_Miner_PL_70')
results_df.model_name = results_df.model_name.replace('Heuristics_Miner_AL_70', 'Heuristics_Miner_AL_70')

In [None]:
results_df

In [None]:
pn_p_1, im_p_1, fm_p_1 = pm4.read_pnml('process_mining/ProM/wv1_p_pn_70.pnml')
pn_p_2, im_p_2, fm_p_2 = pm4.read_pnml('process_mining/ProM/wv2_p_pn_70.pnml')
pn_a_1, im_a_1, fm_a_1 = pm4.read_pnml('process_mining/ProM/wv1_a_pn_70.pnml')
pn_a_2, im_a_2, fm_a_2 = pm4.read_pnml('process_mining/ProM/wv2_a_pn_70.pnml')

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

In [None]:
fitness = pm4.fitness_token_based_replay(el_f, pn_p_1, im_p_1, fm_p_1)
prec = pm4.precision_token_based_replay(el_f, pn_p_1, im_p_1, fm_p_1)
gen = generalization_evaluator.apply(el_f, pn_p_1, im_p_1, fm_p_1)

In [None]:
fitness2 = pm4.fitness_token_based_replay(el_s, pn_p_2, im_p_2, fm_p_2)
prec2 = pm4.precision_token_based_replay(el_s, pn_p_2, im_p_2, fm_p_2)
gen2 = generalization_evaluator.apply(el_s, pn_p_2, im_p_2, fm_p_2)

In [None]:
res_l = ['IDFG_Miner_PL_30', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[7] = res_l

In [None]:
results_df

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "act_code": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "act_code": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

In [None]:
fitness = pm4.fitness_token_based_replay(el_f, pn_a_1, im_a_1, fm_a_1)
prec = pm4.precision_token_based_replay(el_f, pn_a_1, im_a_1, fm_a_1)
gen = generalization_evaluator.apply(el_f, pn_a_1, im_a_1, fm_a_1)

In [None]:
fitness2 = pm4.fitness_token_based_replay(el_s, pn_a_2, im_a_2, fm_a_2)
prec2 = pm4.precision_token_based_replay(el_s, pn_a_2, im_a_2, fm_a_2)
gen2 = generalization_evaluator.apply(el_s, pn_a_2, im_a_2, fm_a_2)

In [None]:
res_l = ['IDFG_Miner_AL_30', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[6] = res_l

In [None]:
results_df

In [None]:
pn_p_1, im_p_1, fm_p_1 = pm4.read_pnml('process_mining/ProM/wv1_p_pn_99.pnml')
pn_p_2, im_p_2, fm_p_2 = pm4.read_pnml('process_mining/ProM/wv2_p_pn_99.pnml')
pn_a_1, im_a_1, fm_a_1 = pm4.read_pnml('process_mining/ProM/wv1_a_pn_99.pnml')
pn_a_2, im_a_2, fm_a_2 = pm4.read_pnml('process_mining/ProM/wv2_a_pn_99.pnml')

In [None]:
fitness = pm4.fitness_token_based_replay(el_f, pn_a_1, im_a_1, fm_a_1)
prec = pm4.precision_token_based_replay(el_f, pn_a_1, im_a_1, fm_a_1)
gen = generalization_evaluator.apply(el_f, pn_a_1, im_a_1, fm_a_1)

In [None]:
fitness2 = pm4.fitness_token_based_replay(el_s, pn_a_2, im_a_2, fm_a_2)
prec2 = pm4.precision_token_based_replay(el_s, pn_a_2, im_a_2, fm_a_2)
gen2 = generalization_evaluator.apply(el_s, pn_a_2, im_a_2, fm_a_2)

In [None]:
res_l = ['IDFG_Miner_AL_1', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
results_df

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

In [None]:
fitness = pm4.fitness_token_based_replay(el_f, pn_p_1, im_p_1, fm_p_1)
prec = pm4.precision_token_based_replay(el_f, pn_p_1, im_p_1, fm_p_1)
gen = generalization_evaluator.apply(el_f, pn_p_1, im_p_1, fm_p_1)

In [None]:
fitness2 = pm4.fitness_token_based_replay(el_s, pn_p_2, im_p_2, fm_p_2)
prec2 = pm4.precision_token_based_replay(el_s, pn_p_2, im_p_2, fm_p_2)
gen2 = generalization_evaluator.apply(el_s, pn_p_2, im_p_2, fm_p_2)

In [None]:
res_l = ['IDFG_Miner_PL_1', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
results_df

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_inductive(el_f, noise_threshold=0.01, multi_processing=True)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_inductive(el_s, noise_threshold=0.01, multi_processing=True)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Inductive_Miner_PL_1', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_heuristics(el_f, dependency_threshold=0.99)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_heuristics(el_s, dependency_threshold=0.99)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Heuristics_Miner_PL_1', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "provider_event": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_inductive(el_f, noise_threshold=0.01, multi_processing=True)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_inductive(el_s, noise_threshold=0.01, multi_processing=True)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Inductive_Miner_PL_1', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
elog_data_fp = elog_data_first.rename(columns={"pid": "case:concept:name", "act_code": "concept:name",
                                            "start": "time:timestamp"})
elog_data_fp = pm4.format_dataframe(elog_data_fp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_f = pm4.convert_to_event_log(elog_data_fp)

elog_data_sp = elog_data_second.rename(columns={"pid": "case:concept:name", "act_code": "concept:name",
                                            "start": "time:timestamp"})
elog_data_sp = pm4.format_dataframe(elog_data_sp, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp',
                                   timest_format='%Y-%m-%d %H:%M:%S')
el_s = pm4.convert_to_event_log(elog_data_sp)

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_inductive(el_f, noise_threshold=0.01, multi_processing=True)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_inductive(el_s, noise_threshold=0.01, multi_processing=True)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Inductive_Miner_AL_1', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
net, initial_marking, final_marking = pm4.discover_petri_net_heuristics(el_f, dependency_threshold=0.99)
pm4.view_petri_net(net, initial_marking, final_marking)
fitness = pm4.fitness_token_based_replay(el_f, net, initial_marking, final_marking)
prec = pm4.precision_token_based_replay(el_f, net, initial_marking, final_marking)
gen = generalization_evaluator.apply(el_f, net, initial_marking, final_marking)

In [None]:
net2, initial_marking2, final_marking2 = pm4.discover_petri_net_heuristics(el_s, dependency_threshold=0.99)
pm4.view_petri_net(net2, initial_marking2, final_marking2)
fitness2 = pm4.fitness_token_based_replay(el_s, net2, initial_marking2, final_marking2)
prec2 = pm4.precision_token_based_replay(el_s, net2, initial_marking2, final_marking2)
gen2 = generalization_evaluator.apply(el_s, net2, initial_marking2, final_marking2)

In [None]:
res_l = ['Heuristics_Miner_AL_1', round(fitness['log_fitness'], 3), round(prec, 3), round(gen, 3),
         round(fitness2['log_fitness'], 3), round(prec2, 3), round(gen2, 3),
         round(mean([fitness['log_fitness'], fitness2['log_fitness']]), 3),
         round(mean([prec, prec2]), 3),
         round(mean([gen, gen2]), 3)]

results_df.loc[len(results_df)] = res_l

In [None]:
results_df

In [None]:
results_df.to_csv('process_mining/cc_results_v2.csv', index=False)