In [1]:
import numpy as np
import pandas as pd

from ordinor.io import read_disco_csv

In [2]:
el = read_disco_csv('data/processed/wabo.csv')
# el = read_disco_csv('data/processed/bpic15.csv')
# el = read_disco_csv('data/processed/bpic17.csv')

print(len(pd.unique(el['org:resource'])))
print(len(pd.unique(el['concept:name'])))

Importing from CSV file data/processed/wabo.csv
Scanned 6641 events from "data/processed/wabo.csv".
--------------------------------------------------------------------------------
Number of events:		6641
Number of cases:		1348
--------------------------------------------------------------------------------
44
27


In [3]:
# specification

all_cand_attrs = [
    # WABO
    {'attr': 'ct:channel', 'attr_type': 'categorical', 'attr_dim': 'CT'},
    {'attr': 'tt:weekday', 'attr_type': 'categorical', 'attr_dim': 'TT'}, 
    {'attr': 'tt:ampm', 'attr_type': 'categorical', 'attr_dim': 'TT'},
    
    # BPIC15
#     {'attr': 'ct:permit_type', 'attr_type': 'categorical', 'attr_dim': 'CT'},
#     {'attr': 'tt:weekday', 'attr_type': 'categorical', 'attr_dim': 'TT'}, 
#     {'attr': 'tt:ampm', 'attr_type': 'categorical', 'attr_dim': 'TT'},
    
    # BPIC17
#     {'attr': 'ct:loan_goal', 'attr_type': 'categorical', 'attr_dim': 'CT'},
#     {'attr': 'ct:application_type', 'attr_type': 'categorical', 'attr_dim': 'CT'},
#     {'attr': 'ct:requested_amount', 'attr_type': 'numeric', 'attr_dim': 'CT'},
#     {'attr': 'at:event_origin', 'attr_type': 'categorical', 'attr_dim': 'AT'},
#     {'attr': 'tt:weekday', 'attr_type': 'categorical', 'attr_dim': 'TT'}, 
#     {'attr': 'tt:ampm', 'attr_type': 'categorical', 'attr_dim': 'TT'},
]

spec = dict()
spec['cand_attrs'] = all_cand_attrs

In [4]:
from ordinor.execution_context import ODTMiner, ODTSAMiner
from sklearn.model_selection import KFold
from copy import deepcopy
kf = KFold(n_splits=5, shuffle=True)

In [5]:
RUN_CV = False

if RUN_CV:
    i = 0
    for train_index, test_index in kf.split(el):
        i += 1
        el_train = el.loc[train_index]
        el_test = el.loc[test_index]

        miner = ODTMiner(el_train, spec, eps=1e-2)

        el_test_agg = []
        for node_label, node in miner._leaves.items():
            el_test_par = deepcopy(node.composite_rule.apply(el_test))
            el_test_par['case_type'] = node.ct_label
            el_test_par['activity_type'] = node.at_label
            el_test_par['time_type'] = node.tt_label

            el_test_agg.append(el_test_par[['org:resource', 'case_type', 'activity_type', 'time_type']])
        pd.concat(el_test_agg).to_csv(f"5fold_{i}.csv")
else:
    miner = ODTMiner(el, spec, eps=1e-2)

Decision tree initialized with an empty root node
	Dis. = 0.000000, Imp. = 1.000000, Target = 1.000000
Start to fit decision tree with epsilon = 0.01
Tree grows by splitting all current leaf nodes on `tt:weekday`
	Dis. = 0.257169, Imp. = 0.979428, Target = 1.020572
Tree grows by splitting all current leaf nodes on `tt:ampm`
	Dis. = 0.403828, Imp. = 0.965586, Target = 0.584413
Tree grows by splitting all current leaf nodes on `ct:channel`
	Dis. = 0.292868, Imp. = 0.951931, Target = 0.288912
Tree grows by splitting all current leaf nodes on `ct:channel`
	Dis. = 0.305735, Imp. = 0.945681, Target = 0.050500
Tree grows by splitting all current leaf nodes on `tt:weekday`
	Dis. = 0.305797, Imp. = 0.945004, Target = 0.000917
Tree grows by splitting all current leaf nodes on `tt:weekday`
	Dis. = 0.309197, Imp. = 0.938643, Target = 0.017850
Tree grows by splitting all current leaf nodes on `tt:weekday`
	Dis. = 0.406302, Imp. = 0.920794, Target = 0.333071
Tree grows by splitting all current leaf 

In [None]:
if not RUN_CV:
    rl = miner.derive_resource_log(el)

    print('CT types: {}'.format(len(pd.unique(rl['case_type']))))
    print('AT types: {}'.format(len(pd.unique(rl['activity_type']))))
    print('TT types: {}'.format(len(pd.unique(rl['time_type']))))

    print(rl)

    for co, events in rl.groupby(['case_type', 'activity_type', 'time_type']):
        print(co)
        print('\t', end='')
        print(f"{len(events)} events")

In [None]:
if not RUN_CV:
    rl.to_csv('test_rl.csv')