### Make top-level imports

* PM4Py
* OrdinoR (*note: from local repo*)

In [10]:
from os.path import join as path_join

import pandas as pd
import pm4py

import ordinor.constants as const
from ordinor.io import read_csv
from ordinor.utils.log_preprocessing import append_case_duration

### Import original event log data file

In [None]:
DIRPATH = './data/DATA_csv'
LOGNAME = 'wabo'

fn = path_join(DIRPATH, f'{LOGNAME}.csv')
print(f'Import source event log {fn}')

try:
    log = read_csv(fn)
except Exception as e:
    print(e)

print(sorted(log.columns))

Import source event log ./data/DATA_csv/wabo.csv
Importing from CSV file ./data/DATA_csv/wabo.csv
Scanned 8577 events from "./data/DATA_csv/wabo.csv".
--------------------------------------------------------------------------------
Number of events:		8577
Number of cases:		1434
--------------------------------------------------------------------------------
['@@case_index', '@@index', 'Unnamed: 0', 'case:channel', 'case:concept:name', 'case:deadline', 'case:department', 'case:enddate', 'case:enddate_planned', 'case:group', 'case:responsible', 'case:startdate', 'concept:instance', 'concept:name', 'lifecycle:transition', 'org:group', 'org:resource', 'time:timestamp']


Unnamed: 0.1,Unnamed: 0,org:group,concept:instance,org:resource,concept:name,time:timestamp,lifecycle:transition,case:startdate,case:responsible,case:enddate_planned,case:department,case:group,case:concept:name,case:deadline,case:channel,case:enddate,@@index,@@case_index
0,0,Group 1,task-42933,Resource21,Confirmation of receipt,2011-10-11 13:45:40.276000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,0,0
1,1,Group 4,task-42935,Resource10,T02 Check confirmation of receipt,2011-10-12 08:26:25.398000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,1,0
2,2,Group 1,task-42957,Resource21,T03 Adjust confirmation of receipt,2011-11-24 15:36:51.302000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,2,0
3,3,Group 4,task-47958,Resource21,T02 Check confirmation of receipt,2011-11-24 15:37:16.553000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,3,0
4,4,EMPTY,task-43021,Resource30,Confirmation of receipt,2011-10-18 13:46:39.679000+00:00,complete,2011-10-11 01:06:40.020000+00:00,Resource04,2011-12-06 01:06:40.010000+00:00,General,Group 5,case-10017,2011-12-06 01:06:40+00:00,Internet,2011-10-18 13:56:55.943000+00:00,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8572,8572,Group 4,task-43560,Resource06,T02 Check confirmation of receipt,2011-10-18 09:04:48.732000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8572,1433
8573,8573,Group 3,task-43562,Resource06,T04 Determine confirmation of receipt,2011-10-18 09:05:12.359000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8573,1433
8574,8574,Group 2,task-43563,Resource06,T05 Print and send confirmation of receipt,2011-10-18 09:05:30.196000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8574,1433
8575,8575,Group 1,task-43561,Resource06,T06 Determine necessity of stop advice,2011-10-18 09:06:01.468000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8575,1433


### Data preprocessing (generic)

##### Preprocessing
- Derive a case-level attribute `case:days_remained` (duration in days,
  calculated by `case:deadline - case:startdate`)
- Filter out meaningless resource labels: `test`, `TEST`

In [12]:
# Derive case-level attribute `case:days_remained`
log['case:deadline'] = pd.to_datetime(log['case:deadline'])
log['case:startdate'] = pd.to_datetime(log['case:startdate'])
log['case:days_remained'] = (log['case:deadline'] - log['case:startdate']).dt.days # round to days

# Derive additional event attribute `task_group`
def extract_task_group(act):
    if act.startswith('T'):
        task_group = act[:3]
    else:
        # special case: "Confirmation of receipt"
        task_group = act
    return task_group
log['task_group'] = log[const.ACTIVITY].apply(extract_task_group)

# print(log['task_group'].unique())
# print(log['task_group'].nunique())
# print(log[[const.ACTIVITY, 'task_group']].drop_duplicates())

log = log[~log[const.RESOURCE].isin({'test', 'TEST'})]

log.to_csv(f'data/{LOGNAME}.preprocessed.csv')
log

Unnamed: 0.1,Unnamed: 0,org:group,concept:instance,org:resource,concept:name,time:timestamp,lifecycle:transition,case:startdate,case:responsible,case:enddate_planned,case:department,case:group,case:concept:name,case:deadline,case:channel,case:enddate,@@index,@@case_index,case:days_remained,task_group
0,0,Group 1,task-42933,Resource21,Confirmation of receipt,2011-10-11 13:45:40.276000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,0,0,55,Confirmation of receipt
1,1,Group 4,task-42935,Resource10,T02 Check confirmation of receipt,2011-10-12 08:26:25.398000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,1,0,55,T02
2,2,Group 1,task-42957,Resource21,T03 Adjust confirmation of receipt,2011-11-24 15:36:51.302000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,2,0,55,T03
3,3,Group 4,task-47958,Resource21,T02 Check confirmation of receipt,2011-11-24 15:37:16.553000+00:00,complete,2011-10-11 13:42:22.688000+00:00,Resource21,2011-12-06 13:41:31.788000+00:00,General,Group 2,case-10011,2011-12-06 13:41:31.788000+00:00,Internet,NaT,3,0,55,T02
4,4,EMPTY,task-43021,Resource30,Confirmation of receipt,2011-10-18 13:46:39.679000+00:00,complete,2011-10-11 01:06:40.020000+00:00,Resource04,2011-12-06 01:06:40.010000+00:00,General,Group 5,case-10017,2011-12-06 01:06:40+00:00,Internet,2011-10-18 13:56:55.943000+00:00,4,1,55,Confirmation of receipt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8572,8572,Group 4,task-43560,Resource06,T02 Check confirmation of receipt,2011-10-18 09:04:48.732000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8572,1433,55,T02
8573,8573,Group 3,task-43562,Resource06,T04 Determine confirmation of receipt,2011-10-18 09:05:12.359000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8573,1433,55,T04
8574,8574,Group 2,task-43563,Resource06,T05 Print and send confirmation of receipt,2011-10-18 09:05:30.196000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8574,1433,55,T05
8575,8575,Group 1,task-43561,Resource06,T06 Determine necessity of stop advice,2011-10-18 09:06:01.468000+00:00,complete,2011-10-06 01:06:40.020000+00:00,Resource06,2011-12-01 01:06:40.010000+00:00,General,Group 5,case-9997,2011-12-01 01:06:40+00:00,Internet,2011-10-20 14:19:44.448000+00:00,8575,1433,55,T06


### Experiments

##### Preprocessing
- Filter out meaningless resource labels: `test`, `TEST`

##### Config
- Determine CT by
    
    case:channel (previously used), case:department, case:responsible, case:days_remained

- Determine AT by
    
    concept:name, prefix of concept:name (see file:///C:\Users\n10399429\OneDrive%20-%20Queensland%20University%20of%20Technology\Data\Data%20for%20thesis\Receipt_WABO\Description.docx)
    
- Determine TT by 
    
    year, month, weekday (previously used)

- Discover resource groups using AHC, number of groups automatically determined (via CV)

- Apply OverallScore, parameters automatically determined (via Grid Search)
