In [1]:
%load_ext autoreload
import datetime
# import ete3
import itertools
import json
import logging
import math
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import typing
# import re
import xml.etree.ElementTree as ET
# import xml.dom.minidom
# import xmlschema

In [2]:
%autoreload
from maxes.xes_loader2 import XesLoader, XesLog
# from maxes.xes_file import XesFile
# from maxes.analyze_xes import AnalyzeXes
# import maxes.analyze_sequence
# import maxes.serialization.serialize
# import maxes.graphs
import maxes.notebooks.utils
import maxes.data.load_files
import maxes.utils
from maxes.generators.xes_generator.xes_generator3 import XesGenerator3 as XesGenerator

In [None]:
maxes.notebooks.utils.init_notebook()  # RUN ONLY ONCE

In [4]:
import logging

formatter = logging.Formatter()

consoleHandler = logging.StreamHandler()
consoleHandler.setLevel(logging.DEBUG)
consoleHandler.setFormatter(formatter)

logger = logging.getLogger()
logger.addHandler(consoleHandler)

In [5]:
def prepare(file_path):
    loader = XesLoader()
    log = loader.load(file_path)

    rng = np.random.default_rng(1)

    generator = XesGenerator(rng=rng)
    generator.fit(log)

    return loader, log, generator

In [14]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold


def evaluate_accuracy_score(
    log: XesLog,
    attribute_generator,
    random_state: int,
    kfold_splits: int = 5,
):

    kfold = KFold(n_splits=kfold_splits, shuffle=True, random_state=random_state)
    df = log.df
    X, y = attribute_generator.split_xy(df)

    # attribute_generator.model.fit_transformers(X, y)

    scores = []
    for train_index, val_index in kfold.split(X):
        X_train = X.iloc[train_index].reset_index(drop=True)
        y_train = y.iloc[train_index].reset_index(drop=True)
        X_test = X.iloc[val_index].reset_index(drop=True)
        y_test = y.iloc[val_index].reset_index(drop=True)

        attribute_generator.model.fit(X_train, y_train)
        y_pred = attribute_generator.model.predict(X_test)
        score = accuracy_score(y_test, y_pred)

        scores.append(score)

    return np.mean(scores)


def evaluate_attribute_accuracy_score(
    attribute: str,
    log: XesLog,
    generator: XesGenerator,
):
    attribute_generator = generator.event_attributes_info[attribute].predictor
    min_categories = log.df[attribute].nunique()
    attribute_generator.model.inner_model_kwargs = {"min_categories": min_categories}
    score = evaluate_accuracy_score(
        log=log,
        attribute_generator=attribute_generator,
        random_state=1,
        kfold_splits=5,
    )

    return score

# bpic2020

In [23]:
file_path = maxes.data.load_files.get_path__bpic2020__request_for_payment()

loader, log, generator = prepare(file_path)
log.df

  start;    Loading XML
  complete;    Loading XML
  start;    Removing namespaces from XML file
  complete;    Removing namespaces from XML file
  start;    Collecting XML traces
  complete;    Collecting XML traces
  start;    Collecting XML events
  complete;    Collecting XML events
  start;    Validating
  complete;    Validating
  start;    Gathering meta data
  complete;    Gathering meta data
  start;    Fitting models for traces beginning timestamps
  complete;    Fitting models for traces beginning timestamps
  start;    Analysing sequence graph
  complete;    Analysing sequence graph
  start;    Fitting graph traverser
  complete;    Fitting graph traverser
  start;    Fittig models for time:timestamp attribute
  complete;    Fittig models for time:timestamp attribute
  start;    Collecting stats
  complete;    Collecting stats
  start;    Collecting attributes info
  complete;    Collecting attributes info
  start;    Fitting presence models
  complete;    Fitting presence 

Unnamed: 0,case:concept:name,concept:name,time:timestamp,id,org:resource,org:role,case:Rfp_id,case:Project,case:Task,case:OrganizationalEntity,case:Cost Type,case:RequestedAmount,case:Activity,case:RfpNumber
0,1,Request For Payment SUBMITTED by EMPLOYEE,2017-01-09 08:17:18+00:00,st_step 148220_0,STAFF MEMBER,EMPLOYEE,request for payment 148214,project 148216,UNKNOWN,organizational unit 65463,0,34.336343,UNKNOWN,request for payment number 148215
1,1,Request For Payment FINAL_APPROVED by SUPERVISOR,2017-01-09 08:18:00+00:00,st_step 148221_0,STAFF MEMBER,SUPERVISOR,request for payment 148214,project 148216,UNKNOWN,organizational unit 65463,0,34.336343,UNKNOWN,request for payment number 148215
2,1,Request For Payment REJECTED by MISSING,2017-01-10 11:42:32+00:00,st_step 148222_0,STAFF MEMBER,MISSING,request for payment 148214,project 148216,UNKNOWN,organizational unit 65463,0,34.336343,UNKNOWN,request for payment number 148215
3,1,Request For Payment SUBMITTED by EMPLOYEE,2017-03-03 08:51:13+00:00,st_step 148219_0,STAFF MEMBER,EMPLOYEE,request for payment 148214,project 148216,UNKNOWN,organizational unit 65463,0,34.336343,UNKNOWN,request for payment number 148215
4,1,Request For Payment APPROVED by PRE_APPROVER,2017-03-03 08:51:42+00:00,st_step 148218_0,STAFF MEMBER,PRE_APPROVER,request for payment 148214,project 148216,UNKNOWN,organizational unit 65463,0,34.336343,UNKNOWN,request for payment number 148215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,6886,Request For Payment APPROVED by ADMINISTRATION,2018-12-29 11:35:02+00:00,st_step 185004_0,STAFF MEMBER,ADMINISTRATION,request for payment 185000,project 147860,task 152704,organizational unit 65468,0,15.409660,activity 505,request for payment number 185001
2,6886,Request For Payment APPROVED by BUDGET OWNER,2019-01-03 08:27:20+00:00,st_step 185003_0,STAFF MEMBER,BUDGET OWNER,request for payment 185000,project 147860,task 152704,organizational unit 65468,0,15.409660,activity 505,request for payment number 185001
3,6886,Request For Payment FINAL_APPROVED by SUPERVISOR,2019-01-08 08:00:39+00:00,st_step 185005_0,STAFF MEMBER,SUPERVISOR,request for payment 185000,project 147860,task 152704,organizational unit 65468,0,15.409660,activity 505,request for payment number 185001
4,6886,Request Payment,2019-01-08 08:29:14+00:00,rp_request for payment 185000_15,SYSTEM,UNDEFINED,request for payment 185000,project 147860,task 152704,organizational unit 65468,0,15.409660,activity 505,request for payment number 185001


In [24]:
generator.event_attributes_info_df()

Unnamed: 0,name,level,xes_type,presence,numeralicity,is_trace_identifier
0,id,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
1,org:resource,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
2,concept:name,,XesTypeEnum.STRING,,,
3,time:timestamp,,XesTypeEnum.DATE,,,
4,org:role,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False


In [25]:
score__bpic2020 = {
    "org:resource": evaluate_attribute_accuracy_score("org:resource", log, generator),
    "org:role": evaluate_attribute_accuracy_score("org:role", log, generator),
}

score__bpic2020

{'org:resource': np.float64(1.0), 'org:role': np.float64(0.9999184708756506)}

# env_permit_application_process

In [36]:
file_path = maxes.data.load_files.get_path__env_permit_application_process__data()

loader, log, generator = prepare(file_path)
log.df

  start;    Loading XML
  complete;    Loading XML
  start;    Removing namespaces from XML file
  complete;    Removing namespaces from XML file
  start;    Collecting XML traces
  complete;    Collecting XML traces
  start;    Collecting XML events
  complete;    Collecting XML events
  start;    Validating
  complete;    Validating
  start;    Gathering meta data
  complete;    Gathering meta data
  start;    Fitting models for traces beginning timestamps
  complete;    Fitting models for traces beginning timestamps
  start;    Analysing sequence graph
  complete;    Analysing sequence graph
  start;    Fitting graph traverser
  complete;    Fitting graph traverser
  start;    Fittig models for time:timestamp attribute
  complete;    Fittig models for time:timestamp attribute
  start;    Collecting stats
  complete;    Collecting stats
  start;    Collecting attributes info
  complete;    Collecting attributes info
  start;    Fitting presence models
  complete;    Fitting presence 

Unnamed: 0,case:concept:name,concept:name,lifecycle:transition,time:timestamp,org:group,concept:instance,org:resource,case:startdate,case:responsible,case:enddate_planned,case:department,case:group,case:deadline,case:channel,case:enddate
0,1,Confirmation of receipt,complete,2011-10-11 11:45:40.276000+00:00,Group 1,task-42933,Resource21,2011-10-11 11:42:22.688000+00:00,Resource21,2011-12-06 12:41:31.788000+00:00,General,Group 2,2011-12-06 12:41:31.788000+00:00,Internet,NaT
1,1,T02 Check confirmation of receipt,complete,2011-10-12 06:26:25.398000+00:00,Group 4,task-42935,Resource10,2011-10-11 11:42:22.688000+00:00,Resource21,2011-12-06 12:41:31.788000+00:00,General,Group 2,2011-12-06 12:41:31.788000+00:00,Internet,NaT
2,1,T03 Adjust confirmation of receipt,complete,2011-11-24 14:36:51.302000+00:00,Group 1,task-42957,Resource21,2011-10-11 11:42:22.688000+00:00,Resource21,2011-12-06 12:41:31.788000+00:00,General,Group 2,2011-12-06 12:41:31.788000+00:00,Internet,NaT
3,1,T02 Check confirmation of receipt,complete,2011-11-24 14:37:16.553000+00:00,Group 4,task-47958,Resource21,2011-10-11 11:42:22.688000+00:00,Resource21,2011-12-06 12:41:31.788000+00:00,General,Group 2,2011-12-06 12:41:31.788000+00:00,Internet,NaT
0,2,Confirmation of receipt,complete,2011-10-18 11:46:39.679000+00:00,EMPTY,task-43021,Resource30,2011-10-10 23:06:40.020000+00:00,Resource04,2011-12-06 00:06:40.010000+00:00,General,Group 5,2011-12-06 00:06:40+00:00,Internet,2011-10-18 11:56:55.943000+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,1434,T02 Check confirmation of receipt,complete,2011-10-18 07:04:48.732000+00:00,Group 4,task-43560,Resource06,2011-10-05 23:06:40.020000+00:00,Resource06,2011-12-01 00:06:40.010000+00:00,General,Group 5,2011-12-01 00:06:40+00:00,Internet,2011-10-20 12:19:44.448000+00:00
2,1434,T04 Determine confirmation of receipt,complete,2011-10-18 07:05:12.359000+00:00,Group 3,task-43562,Resource06,2011-10-05 23:06:40.020000+00:00,Resource06,2011-12-01 00:06:40.010000+00:00,General,Group 5,2011-12-01 00:06:40+00:00,Internet,2011-10-20 12:19:44.448000+00:00
3,1434,T05 Print and send confirmation of receipt,complete,2011-10-18 07:05:30.196000+00:00,Group 2,task-43563,Resource06,2011-10-05 23:06:40.020000+00:00,Resource06,2011-12-01 00:06:40.010000+00:00,General,Group 5,2011-12-01 00:06:40+00:00,Internet,2011-10-20 12:19:44.448000+00:00
4,1434,T06 Determine necessity of stop advice,complete,2011-10-18 07:06:01.468000+00:00,Group 1,task-43561,Resource06,2011-10-05 23:06:40.020000+00:00,Resource06,2011-12-01 00:06:40.010000+00:00,General,Group 5,2011-12-01 00:06:40+00:00,Internet,2011-10-20 12:19:44.448000+00:00


In [37]:
generator.event_attributes_info_df()

Unnamed: 0,name,level,xes_type,presence,numeralicity,is_trace_identifier
0,org:group,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
1,concept:instance,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
2,org:resource,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
3,concept:name,,XesTypeEnum.STRING,,,
4,time:timestamp,,XesTypeEnum.DATE,,,
5,lifecycle:transition,,XesTypeEnum.STRING,,,


In [16]:
attribute = "org:resource"
attribute_generator = generator.event_attributes_info[attribute].predictor
min_categories = log.df[attribute].nunique()
attribute_generator.model.inner_model_kwargs = {"min_categories": min_categories}
score__org_resource = evaluate_accuracy_score(
    attribute_generator=attribute_generator,
    random_state=1,
    kfold_splits=5,
)

print(score__org_resource)

attribute = "org:group"
attribute_generator = generator.event_attributes_info[attribute].predictor
min_categories = log.df[attribute].nunique()
attribute_generator.model.inner_model_kwargs = {"min_categories": min_categories}
score__org_group = evaluate_accuracy_score(
    attribute_generator=attribute_generator,
    random_state=1,
    kfold_splits=5,
)

print(score__org_group)

attribute = "concept:instance"
attribute_generator = generator.event_attributes_info[attribute].predictor
min_categories = log.df[attribute].nunique()
attribute_generator.model.inner_model_kwargs = {"min_categories": min_categories}
score__concept_instance = evaluate_accuracy_score(
    attribute_generator=attribute_generator,
    random_state=1,
    kfold_splits=5,
)

print(score__concept_instance)

0.1449233759437841
0.857410276798032
0.0


In [None]:
env_permit_application_process_accuracy = {
    "org:resource": score__org_resource,
    "org:group": score__org_group,
    "concept:instance": score__concept_instance,
}
env_permit_application_process_accuracy

{'org:resource': np.float64(0.1449233759437841),
 'org:group': np.float64(0.857410276798032),
 'concept:instance': np.float64(0.0)}

In [39]:
score__env_permit_application_process = {
    "org:resource": evaluate_attribute_accuracy_score("org:resource", log, generator),
    "org:group": evaluate_attribute_accuracy_score("org:group", log, generator),
    # "concept:instance": evaluate_attribute_accuracy_score(
    #     "concept:instance", log, generator
    # ),
}

score__env_permit_application_process

{'org:resource': np.float64(0.1449233759437841),
 'org:group': np.float64(0.857410276798032)}

# simple

In [7]:
file_path = maxes.notebooks.utils.get_data_path("other/simple.xes")

loader, log, generator = prepare(file_path)
log.df

  start;    Loading XML
  complete;    Loading XML
  start;    Removing namespaces from XML file
  complete;    Removing namespaces from XML file
  start;    Collecting XML traces
  complete;    Collecting XML traces
  start;    Collecting XML events
  complete;    Collecting XML events
  start;    Validating
  complete;    Validating
  start;    Gathering meta data
  complete;    Gathering meta data
  start;    Fitting models for traces beginning timestamps
  complete;    Fitting models for traces beginning timestamps
  start;    Analysing sequence graph
  complete;    Analysing sequence graph
  start;    Fitting graph traverser
  complete;    Fitting graph traverser
  start;    Fittig models for time:timestamp attribute
  complete;    Fittig models for time:timestamp attribute
  start;    Collecting stats
  complete;    Collecting stats
  start;    Collecting attributes info
  complete;    Collecting attributes info
  start;    Fitting presence models
  complete;    Fitting presence 

Unnamed: 0,case:concept:name,concept:name,lifecycle:transition,time:timestamp,call centre,org:resource,location,duration,outcome,case:description
0,1,incoming claim,complete,1970-01-01 00:00:00+00:00,Brisbane,customer,,,,Simulated process instance
1,1,B check if sufficient information is available,start,1970-01-01 00:00:00+00:00,,Call Centre Agent Brisbane,Brisbane,,,Simulated process instance
2,1,B check if sufficient information is available,complete,1970-01-01 00:00:41+00:00,,Call Centre Agent Brisbane,Brisbane,,,Simulated process instance
3,1,B register claim,start,1970-01-01 00:00:41+00:00,,Call Centre Agent Brisbane,,,,Simulated process instance
4,1,B register claim,complete,1970-01-01 00:12:59+00:00,,Call Centre Agent Brisbane,,,,Simulated process instance
...,...,...,...,...,...,...,...,...,...,...
4,3512,S register claim,complete,1970-01-01 02:39:31+00:00,,Call Centre Agent Sydney,,,,Simulated process instance
5,3512,determine likelihood of claim,start,1970-01-01 06:56:04+00:00,,Claims handler,,,,Simulated process instance
6,3512,determine likelihood of claim,complete,1970-01-01 06:56:35+00:00,,Claims handler,,,,Simulated process instance
7,3512,end,start,1970-01-01 06:56:35+00:00,,Claims handler,,17306,not liable,Simulated process instance


In [8]:
generator.event_attributes_info_df()

Unnamed: 0,name,level,xes_type,presence,numeralicity,is_trace_identifier
0,call centre,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,0.076119,,False
1,org:resource,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
2,time:timestamp,,XesTypeEnum.DATE,,,
3,lifecycle:transition,,XesTypeEnum.STRING,,,
4,concept:name,,XesTypeEnum.STRING,,,
5,location,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,0.152239,,False
6,duration,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,0.152239,,False
7,outcome,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,0.152239,,False


In [15]:
score__simple = {
    "org:resource": evaluate_attribute_accuracy_score("org:resource", log, generator),
}

score__simple

{'org:resource': np.float64(0.9756383155689455)}

# photo_copier

In [17]:
file_path = maxes.data.load_files.get_path__photo_copier()

loader, log, generator = prepare(file_path)
log.df

  start;    Loading XML
  complete;    Loading XML
  start;    Removing namespaces from XML file
  complete;    Removing namespaces from XML file
  start;    Collecting XML traces
  complete;    Collecting XML traces
  start;    Collecting XML events
  complete;    Collecting XML events
  start;    Validating
  complete;    Validating
  start;    Gathering meta data
  complete;    Gathering meta data
  start;    Fitting models for traces beginning timestamps
  complete;    Fitting models for traces beginning timestamps
  start;    Analysing sequence graph
  complete;    Analysing sequence graph
  start;    Fitting graph traverser
  complete;    Fitting graph traverser
  start;    Fittig models for time:timestamp attribute
  complete;    Fittig models for time:timestamp attribute
  start;    Collecting stats
  complete;    Collecting stats
  start;    Collecting attributes info
  complete;    Collecting attributes info
  start;    Fitting presence models
  complete;    Fitting presence 

Unnamed: 0,case:concept:name,concept:name,lifecycle:transition,time:timestamp,org:resource,case:description,case:Class
0,1,Job,start,1970-01-01 00:00:00+00:00,PRN1,Simulated process instance,Print
1,1,Remote Print,complete,1970-01-01 00:15:00+00:00,PRN1,Simulated process instance,Print
2,1,Read Print Options,complete,1970-01-01 00:26:00+00:00,PRN1,Simulated process instance,Print
3,1,Rasterization,start,1970-01-01 00:38:00+00:00,PRN1,Simulated process instance,Print
4,1,Interpretation,start,1970-01-01 00:51:00+00:00,PRN1,Simulated process instance,Print
...,...,...,...,...,...,...,...
120,100,Compression,complete,1970-12-18 18:46:00+00:00,PRN1,Simulated process instance,Copy/Scan
121,100,Store Image,complete,1970-12-18 18:53:00+00:00,PRN1,Simulated process instance,Copy/Scan
122,100,Transfer Image,complete,1970-12-18 19:11:00+00:00,PRN1,Simulated process instance,Copy/Scan
123,100,Send SMTP,complete,1970-12-18 19:18:00+00:00,PRN1,Simulated process instance,Copy/Scan


In [18]:
generator.event_attributes_info_df()

Unnamed: 0,name,level,xes_type,presence,numeralicity,is_trace_identifier
0,org:resource,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,1.0,,False
1,time:timestamp,,XesTypeEnum.DATE,,,
2,concept:name,,XesTypeEnum.STRING,,,
3,lifecycle:transition,,XesTypeEnum.STRING,,,


~ no attributes ~

# activitylog_uci_detailed_labour

In [27]:
file_path = (
    maxes.data.load_files.get_path__daily_living_activities__activitylog_uci_detailed_labour()
)

loader, log, generator = prepare(file_path)
log.df

  start;    Loading XML
  complete;    Loading XML
  start;    Removing namespaces from XML file
  complete;    Removing namespaces from XML file
  start;    Collecting XML traces
  complete;    Collecting XML traces
  start;    Collecting XML events
  complete;    Collecting XML events
  start;    Validating
  complete;    Validating
  start;    Gathering meta data
  complete;    Gathering meta data
  start;    Fitting models for traces beginning timestamps
  complete;    Fitting models for traces beginning timestamps
  start;    Analysing sequence graph
  complete;    Analysing sequence graph
  start;    Fitting graph traverser
  complete;    Fitting graph traverser
  start;    Fittig models for time:timestamp attribute
  complete;    Fittig models for time:timestamp attribute
  start;    Collecting stats
  complete;    Collecting stats
  start;    Collecting attributes info
  complete;    Collecting attributes info
  start;    Fitting presence models
  complete;    Fitting presence 

Unnamed: 0,case:concept:name,concept:name,lifecycle:transition,time:timestamp,Column_4,case:creator
0,1,Start,start,2012-11-11 23:48:38+00:00,Start,Fluxicon Disco
1,1,Start,complete,2012-11-11 23:48:38+00:00,Start,Fluxicon Disco
2,1,washing,start,2012-11-11 23:48:38+00:00,washing,Fluxicon Disco
3,1,washing,complete,2012-11-11 23:50:12+00:00,washing,Fluxicon Disco
4,1,watchingtv,start,2012-11-11 23:50:29+00:00,watchingtv,Fluxicon Disco
...,...,...,...,...,...,...
37,25,washing,complete,2011-12-09 20:04:50+00:00,washing,Fluxicon Disco
38,25,watchingtv,start,2011-12-09 20:04:59+00:00,watchingtv,Fluxicon Disco
39,25,watchingtv,complete,2011-12-09 23:16:21+00:00,watchingtv,Fluxicon Disco
40,25,End,start,2011-12-09 23:16:21+00:00,End,Fluxicon Disco


In [28]:
generator.event_attributes_info_df()

Unnamed: 0,name,level,xes_type,presence,numeralicity,is_trace_identifier
0,concept:name,,XesTypeEnum.STRING,,,
1,lifecycle:transition,,XesTypeEnum.STRING,,,
2,time:timestamp,,XesTypeEnum.DATE,,,
3,Column_4,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False


In [29]:
score__activitylog_uci_detailed_labour = {
    "Column_4": evaluate_attribute_accuracy_score("Column_4", log, generator),
}

score__activitylog_uci_detailed_labour

{'Column_4': np.float64(0.9784482091745959)}

# ccc19

In [31]:
file_path = maxes.data.load_files.get_path__ccc19__data()

loader, log, generator = prepare(file_path)
log.df

  start;    Loading XML
  complete;    Loading XML
  start;    Removing namespaces from XML file
  complete;    Removing namespaces from XML file
  start;    Collecting XML traces
  complete;    Collecting XML traces
  start;    Collecting XML events
  complete;    Collecting XML events
  start;    Validating
  complete;    Validating
  start;    Gathering meta data
  complete;    Gathering meta data
  start;    Fitting models for traces beginning timestamps
  complete;    Fitting models for traces beginning timestamps
  start;    Analysing sequence graph
  complete;    Analysing sequence graph
  start;    Fitting graph traverser
  complete;    Fitting graph traverser
  start;    Fittig models for time:timestamp attribute
  complete;    Fittig models for time:timestamp attribute
  start;    Collecting stats
  complete;    Collecting stats
  start;    Collecting attributes info
  complete;    Collecting attributes info
  start;    Fitting presence models
  complete;    Fitting presence 

Unnamed: 0,case:concept:name,concept:name,lifecycle:transition,time:timestamp,org:resource,RESOURCE,ROUND,EVENTID,ACTIVITY,STAGE,VIDEOSTART,VIDEOEND,case:variant,case:variant-index,case:creator
0,1,Hand washing,start,2018-10-11 12:00:00+00:00,R_13_1C,R_13_1C,Pre,1539301115461,Hand washing,Operator and Patient Preparation,55,67,Variant 1,1,Fluxicon Disco
1,1,Hand washing,complete,2018-10-11 12:01:00+00:00,R_13_1C,R_13_1C,Pre,1539301115461,Hand washing,Operator and Patient Preparation,55,67,Variant 1,1,Fluxicon Disco
2,1,Ultrasound configuration,start,2018-10-11 12:01:00+00:00,R_13_1C,R_13_1C,Pre,1539301145312,Ultrasound configuration,Ultrasound Preparation,84,137,Variant 1,1,Fluxicon Disco
3,1,Ultrasound configuration,complete,2018-10-11 12:02:00+00:00,R_13_1C,R_13_1C,Pre,1539301145312,Ultrasound configuration,Ultrasound Preparation,84,137,Variant 1,1,Fluxicon Disco
4,1,Anatomic identification,start,2018-10-11 12:02:00+00:00,R_13_1C,R_13_1C,Pre,1539301204591,Anatomic identification,Locate Structures,143,143,Variant 1,1,Fluxicon Disco
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,20,Remove guidewire,complete,2018-10-17 16:16:00+00:00,R_48_2D,R_48_2D,Pre,1539832239808,Remove guidewire,Install Catheter,963,966,Variant 20,20,Fluxicon Disco
68,20,Check flow and reflow,start,2018-10-17 16:16:00+00:00,R_48_2D,R_48_2D,Pre,1539832243182,Check flow and reflow,Install Catheter,968,994,Variant 20,20,Fluxicon Disco
69,20,Check flow and reflow,complete,2018-10-17 16:16:00+00:00,R_48_2D,R_48_2D,Pre,1539832243182,Check flow and reflow,Install Catheter,968,994,Variant 20,20,Fluxicon Disco
70,20,Check catheter position,start,2018-10-17 16:16:00+00:00,R_48_2D,R_48_2D,Pre,1539832264783,Check catheter position,Install Catheter,1001,1001,Variant 20,20,Fluxicon Disco


In [32]:
generator.event_attributes_info_df()

Unnamed: 0,name,level,xes_type,presence,numeralicity,is_trace_identifier
0,concept:name,,XesTypeEnum.STRING,,,
1,lifecycle:transition,,XesTypeEnum.STRING,,,
2,org:resource,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,1.0,,False
3,time:timestamp,,XesTypeEnum.DATE,,,
4,RESOURCE,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,1.0,,False
5,ROUND,AttributeLevelEnum.TRACE,XesTypeEnum.STRING,1.0,,False
6,EVENTID,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
7,ACTIVITY,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
8,STAGE,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False
9,VIDEOSTART,AttributeLevelEnum.EVENT,XesTypeEnum.STRING,1.0,,False


In [33]:
score__ccc19 = {
    "EVENTID": evaluate_attribute_accuracy_score("EVENTID", log, generator),
    "ACTIVITY": evaluate_attribute_accuracy_score("ACTIVITY", log, generator),
    "STAGE": evaluate_attribute_accuracy_score("STAGE", log, generator),
    "VIDEOSTART": evaluate_attribute_accuracy_score("VIDEOSTART", log, generator),
    "VIDEOEND": evaluate_attribute_accuracy_score("VIDEOEND", log, generator),
}

score__ccc19

{'EVENTID': np.float64(0.0),
 'ACTIVITY': np.float64(0.5495397230602614),
 'STAGE': np.float64(1.0),
 'VIDEOSTART': np.float64(0.0),
 'VIDEOEND': np.float64(0.008607307702225318)}

# all

In [40]:
print()
print("simple")
print(score__simple)

print()
print("activitylog_uci_detailed_labour")
print(score__activitylog_uci_detailed_labour)

print()
print("bpic2020")
print(score__bpic2020)

print()
print("ccc19")
print(score__ccc19)

print()
print("env_permit_application_process")
print(score__env_permit_application_process)


simple
{'org:resource': np.float64(0.9756383155689455)}

activitylog_uci_detailed_labour
{'Column_4': np.float64(0.9784482091745959)}

bpic2020
{'org:resource': np.float64(1.0), 'org:role': np.float64(0.9999184708756506)}

ccc19
{'EVENTID': np.float64(0.0), 'ACTIVITY': np.float64(0.5495397230602614), 'STAGE': np.float64(1.0), 'VIDEOSTART': np.float64(0.0), 'VIDEOEND': np.float64(0.008607307702225318)}

env_permit_application_process
{'org:resource': np.float64(0.1449233759437841), 'org:group': np.float64(0.857410276798032)}
