In [1]:
from IPython.display import clear_output
%load_ext autoreload
%autoreload 2
import os
print(os.getcwd())
os.chdir('/home/vco/Projects/pm4py-dcr') # working directory should be pm4py-dcr (the one behind notebooks)
print(os.getcwd())

/home/vco/Projects/pm4py-dcr/notebooks
/home/vco/Projects/pm4py-dcr


In [2]:
import pm4py
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np

from copy import deepcopy
from scipy import stats
from pm4py.algo.discovery.dcr_discover import algorithm as alg
from pm4py.objects.dcr.exporter import exporter as dcr_exporter
from pm4py.util.benchmarking import benchmark_event_log

# Mine with SpT-DisCoveR

# Sepsis

In [None]:
event_log_file = '/home/vco/Datasets/Sepsis Cases - Event Log.xes'
dcr_title = 'Sepsis Cases'
result_file_prefix = 'sepsis'
config = {
    'findAdditionalConditions' : True,
    'inBetweenRels' : True,
    'timed' : False,
    'discardSelfInPredecessors': True,
    'usePredecessors': False
}
sepsis_reference_dcr, sepsis_event_log, sepsis_sp_dcr, sepsis_sp_log = \
    benchmark_event_log(event_log_file,result_file_prefix,dcr_title,config)

# BPIC 2017

In [None]:
event_log_file = '/home/vco/Datasets/BPI Challenge 2017 - Offer log.xes'
dcr_title = 'BPIC2017'
result_file_prefix = 'bpic2017'
config = {
    'findAdditionalConditions' : True,
    'inBetweenRels' : True,
    'timed' : False,
    'discardSelfInPredecessors': True,
    'usePredecessors': False
}
bpic17_reference_dcr, bpic17_event_log, bpic17_sp_dcr, bpic17_sp_log =\
    benchmark_event_log(event_log_file,result_file_prefix,dcr_title,config)

# Road Traffic

In [None]:
event_log_file = '/home/vco/Datasets/12683249/Road_Traffic_Fine_Management_Process.xes'
dcr_title = 'Traffic Fine'
result_file_prefix = 'traffic_fine'
config = {
    'findAdditionalConditions' : True,
    'inBetweenRels' : True,
    'timed' : False,
    'discardSelfInPredecessors': True,
    'usePredecessors': False
}
rtfmp_reference_dcr, rtfmp_event_log, rtfmp_sp_dcr, rtfmp_sp_log =\
benchmark_event_log(event_log_file,result_file_prefix,dcr_title,config)

# BPIC2017: Something Thomas asked about

In [None]:
df = pm4py.convert_to_dataframe(bpic17_event_log)
#TODO: check if Created exists but afterwards there is no Refused/Accepted/Cancelled (OR)
df[df['concept:name']=='0_Created']

# Visualize as histograms

In [None]:
from pm4py.algo.discovery.dcr_discover import time_mining
from pm4py.visualization.timings import visualizer as t_vis

timings = time_mining.get_timing_values(rtfmp_reference_dcr, rtfmp_event_log)
clear_output(wait=True)
t_vis.apply(timings, xmin=0, xmax=500)

# Playground stuff

In [None]:
#test trace to test timings
cids = [1,1,1,1,1,1,
        2,2,2,2,2,
        3,3,3,3,3,3,3,3,3,3,
        4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
events = ['B','B','C','D','E','F',
          'A', 'B', 'E', 'F' ,'C',
          'A', 'B', 'F', 'C', 'B', 'C', 'B', 'E', 'F', 'C',
          'B','B','B','Z','B','C','B','A','C','Z','C','C','B','B','C']
timestamps = [pd.to_datetime('2021-8-02'),pd.to_datetime('2021-10-02'),pd.to_datetime('2021-10-03'),
                      pd.to_datetime('2021-10-04'),pd.to_datetime('2021-10-05'),pd.to_datetime('2021-10-06'),
                     pd.to_datetime('2021-10-01'),pd.to_datetime('2021-10-02'),pd.to_datetime('2021-10-03'),
                      pd.to_datetime('2021-10-04'),pd.to_datetime('2021-10-05'),
                     pd.to_datetime('2021-10-01'),pd.to_datetime('2021-10-02'),pd.to_datetime('2021-10-03'),
                      pd.to_datetime('2021-10-04'),pd.to_datetime('2021-10-05'),pd.to_datetime('2021-10-06'),pd.to_datetime('2021-10-07'),
                      pd.to_datetime('2021-10-08'),pd.to_datetime('2021-10-09'),pd.to_datetime('2021-10-10'),
                     pd.to_datetime('2021-9-01'),pd.to_datetime('2021-10-01'),pd.to_datetime('2021-10-03'),
                      pd.to_datetime('2021-10-04'),pd.to_datetime('2021-10-05'),pd.to_datetime('2021-10-06'),
                      pd.to_datetime('2021-10-07'),pd.to_datetime('2021-10-08'),pd.to_datetime('2021-10-09'),
                      pd.to_datetime('2021-10-10'),pd.to_datetime('2021-10-11'),pd.to_datetime('2025-10-11'),
                      pd.to_datetime('2025-10-12'),pd.to_datetime('2025-10-13'),pd.to_datetime('2025-10-14')]
test_log_dict = {
    'case:concept:name':cids,
    'concept:name':events,
    'time:timestamp':timestamps
}

test_tid = {
    'CONDITION': [['B','C']],
    'RESPONSE': [['B','C']]
}

test_log_df = pd.DataFrame(test_log_dict)
test_event_log = deepcopy(test_log_df)

In [None]:
#test traces for subproceses
dict_keys = ['case:concept:name','concept:name','time:timestamp']

def create_test_traces(ordered_event_matrix):
    start_date = dt.date(2023,1,1)
    case_id = 1
    test_el = []
    for trace in ordered_event_matrix:
        date = start_date
        for event in trace:
            test_el.append({dict_keys[0]:str(case_id),dict_keys[1]:str(event),dict_keys[2]:date})
            date = date + dt.timedelta(days=1) # TODO: there could be a random sample from a distribution here
        case_id = case_id + 1
    df_test = pd.DataFrame(test_el,columns=['case:concept:name','concept:name','time:timestamp'],dtype=str)
    df_test['time:timestamp'] = pd.to_datetime(df_test['time:timestamp'])
    return pm4py.convert_to_event_log(df_test)
A,B,C,D,E,F = 'A','B','C','D','E','F'
event_log = create_test_traces([[A,C,C,B,D],[A,B,C,C,D],[A,E,F,D],[A,F,E,D],[A,B,C,D,A,E,F,D],[A,C,B,C,D,A,F,E,D]])

In [None]:
A,B,C,D = 'A','B','C','D'
tijs_trace = create_test_traces([[A,C],[A,D],[B,C],[B,D]])
tt = pm4py.convert_to_event_log(tijs_trace)
pm4py.write_xes(tt,file_path='/home/vco/Downloads/trace_for_tijs.xes')