In [28]:
#! /usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
from copy import deepcopy

import numpy as np
import pandas as pd
from ordinor.utils.validation import check_convert_input_log

from ordinor.io import read_disco_csv
from ordinor.execution_context.rule_based import ODTMiner


In [29]:
log = 'bpic15'
fn = f'data/processed/{log}.csv'
df = pd.read_csv(fn)
#el = pd.read_csv("./data/processed/bpic15.csv")
#el.head()

In [30]:
#test = check_convert_input_log(el) --> use to debug when column naming is not satisfied in the way its needed

In [33]:
# when using bpic15_amended_typed
if(log == "bpic15_amended_typed"):
    df = df.rename(columns={
                # Resource-related
                "activityNameEN": "activity label",
                # CT-related
                'case:last_phase': 'ct:last_phase', 
                # AT-related
                "action_code": "concept:name",
                # Additional
                'case:parts': 'case_parts',
    })

In [32]:
# specification
spec = {
    'type_def_attrs': {
        # BPIC15
        'ct:permit_type': {'attr_type': 'categorical', 'attr_dim': 'CT'},
        'at:phase': {'attr_type': 'categorical', 'attr_dim': 'AT'},
        'tt:weekday': {'attr_type': 'categorical', 'attr_dim': 'TT'}, 
        'tt:ampm': {'attr_type': 'categorical', 'attr_dim': 'TT'},
    }
}

miner = ODTMiner(df, spec, max_height=12, trace_history=True)

DataMissingError: 
            One or more of the required event attributes (case id,
            activity label, timestamp, resource) are not found:
            expected dataframe columns ["case:concept:name",
            "concept:name", "time:timestamp", "org:resource"].

            Are there missing or duplicate data attributes in the
            original data?
            

In [None]:
def addDimensions_1(df,miner):
    def insert_only_new_atomicRule_to_list(list, atomicRule):
        for a_rule in list:
            if a_rule==atomicRule:## __eq__ method in AtomicRule class seems reasonable
                return
        else:
            list.append(atomicRule)
    def check_attr_already_exists_in_list(list,exRule):
        for i, _ in enumerate(list):
            if exRule.is_same_attr(list[i][0]): # take index 0(first rule) because all rules in that index have the same attr
                return i
        return -1
    def get_corresponding_set_as_string(value,list_of_AtomicRules):
        for atomicRule in list_of_AtomicRules:
            if value in atomicRule.attr_vals:
                
                return "-".join(atomicRule.attr_vals)
        return None
    # this method does not differentiate between Dimensions; each column with a rule will be added
    # extract all rules from leafes into one list
    all = []
    for key, node in miner._leaves.items(): 
        for i, _ in enumerate(node.at_rule.ars):
            insert_only_new_atomicRule_to_list(all,node.at_rule.ars[i])

        for i, _ in enumerate(node.ct_rule.ars):
            insert_only_new_atomicRule_to_list(all,node.ct_rule.ars[i])

        for i, _ in enumerate(node.tt_rule.ars):
            insert_only_new_atomicRule_to_list(all,node.tt_rule.ars[i])

    # create list of atomicrules indexed by their attr
    # der umweg über die list wird gegangen, da im dict nur noch der name des attr steht und daher die is_same_attr(self, other) der AtomicRule Klasse nicht mehr genutzt werden könnte --> im unwahrscheinlichen fall dass 2 spalten gleich heißen würden hier Probleme reduziert werden
    all_disaggregated = []
    for rule in all:
        # check if list has an index with rules that are the same as the current rule
        pos = check_attr_already_exists_in_list(all_disaggregated,rule)
        if pos == -1:
            l = []
            l.append(rule)
            all_disaggregated.append(l)
        else:
            all_disaggregated[pos].append(rule)

    # create dictionary out of list; key is attr, value is list of attr_vals(which is a frozen set)
    all_disaggregated_dict = {}

    for i, _ in enumerate(all_disaggregated):
        all_disaggregated_dict[all_disaggregated[i][0].attr] = all_disaggregated[i]#any item in all_disaggregated[i] works; [0] is always available

    df = pd.read_csv("./data/processed/bpic15.csv")
    prefix="RES_"

    # initialize all result columns with None --> coulumns were no rules were learned on will still be None
    for attr_spec,_ in spec["type_def_attrs"].items():
        df[prefix+attr_spec] = None
        

    # add new columns to df that represent the learned rules
    for key, list_of_sets in all_disaggregated_dict.items():
        if key: #check if key is None
            if not key in df.columns: 
                raise Exception(f"{key=} not found in dataframe")
            df[prefix+str(key)] = df[key].apply(lambda x: get_corresponding_set_as_string(x,list_of_sets))

    # combine columns to CO columnn
    for col in df.columns:
        if col.startswith(prefix):
            if prefix+"CO" not in df.columns:
                df[prefix+"CO"] = df[col].astype(str)
            else:   
                df[prefix+"CO"] = df[prefix+"CO"] + " | " + df[col].astype(str)

    return df

In [None]:
def addDimensions_2(df,miner):  
    # this method is recommended over 1 and 3
    
    # get different rules
    l_rules_ct, l_rules_at, l_rules_tt = miner._parse_rules_from_leaves(miner._leaves)

    # label ATs
    dfs = [r.apply(df, index_only=False).assign(AT_rule=r) for r in l_rules_at]
    df = pd.concat(dfs)

    # label CTs
    dfs = [r.apply(df, index_only=False).assign(CT_rule=r) for r in l_rules_ct]
    df = pd.concat(dfs)

    # label TTs
    dfs = [r.apply(df, index_only=False).assign(TT_rule=r) for r in l_rules_tt]
    df = pd.concat(dfs)

    # add CO_rule as combination of AT CT and TT
    from ordinor.execution_context.rule_based.Rule import Rule
    df['CO_rule'] = df.apply(lambda x: Rule(x['AT_rule'].ars+x['CT_rule'].ars+x['TT_rule'].ars), axis=1)
    
    return df

In [None]:
def addDimensions_3(df,miner):
    # this method only works on the df where the miner was trained on (works by joining on index)
    df = pd.read_csv("./data/processed/bpic15.csv")
    dfs_temp = []

    for _, n in miner._leaves.items():
        df_temp_temp = pd.DataFrame({"ind": n.event_ids, "Node": [n]*len(n.event_ids)})
        dfs_temp.append(df_temp_temp)

    df_temp = pd.concat(dfs_temp)
    df = df.join(df_temp.set_index("ind"))
    df["CO_rule"] = df.apply(lambda x: x["Node"].composite_rule if not isinstance(x["Node"],float) else None,axis=1)
    df[["CT_rule", "AT_rule", "TT_rule"]] = df.apply(lambda x: pd.Series(x["Node"].composite_rule.to_types()) if not isinstance(x["Node"],float) else (None,None,None), axis=1)

    return df

In [None]:
df = pd.read_csv("./data/processed/bpic15.csv")
df = el.rename(columns={
            # Resource-related
            'Case ID' : 'case:concept:name',
            'Complete Timestamp': 'time:timestamp',
            "Resource": "org:resource",
            "action_code": "concept:name",
        })
#df = df[df['case:concept:name'] != 4020737]
df = addDimensions_2(df,miner)
df.to_csv('output.csv', index=False)
df.head()



Unnamed: 0.1,Unnamed: 0,case:concept:name,activityNameEN,org:resource,time:timestamp,ct:last_phase,case_parts,concept:name,r:municipality,ct:permit_type,at:phase,tt:month,tt:day,tt:weekday,tt:ampm,AT_rule,CT_rule,TT_rule,CO_rule
13044,16631,3462821,enter senddate continuation,560589,2011-06-21 22:00:00+00:00,Procedure afgebroken,Bouw,01_HOOFD_600_2,muni-1,Bouw,01_HOOFD_6,Jun,Day_21,Tue,PM,(`at:phase` ELEMENT OF {'01_HOOFD_6'}),T(NULL),"(`tt:weekday` ELEMENT OF {'Mon', 'Sun', 'Thu',...",(T(NULL)) AND (`at:phase` ELEMENT OF {'01_HOOF...
13055,16648,3462821,send continuation letter,560589,2011-06-22 09:52:04+00:00,Procedure afgebroken,Bouw,01_HOOFD_600_1,muni-1,Bouw,01_HOOFD_6,Jun,Day_22,Wed,AM,(`at:phase` ELEMENT OF {'01_HOOFD_6'}),T(NULL),"(`tt:weekday` ELEMENT OF {'Mon', 'Sun', 'Thu',...",(T(NULL)) AND (`at:phase` ELEMENT OF {'01_HOOF...
60981,81947,3657921,enter senddate continuation,560458,2011-05-02 22:00:00+00:00,Zaak afgehandeld,Bouw,01_HOOFD_600_2,muni-2,Bouw,01_HOOFD_6,May,Day_2,Mon,PM,(`at:phase` ELEMENT OF {'01_HOOFD_6'}),T(NULL),"(`tt:weekday` ELEMENT OF {'Mon', 'Sun', 'Thu',...",(T(NULL)) AND (`at:phase` ELEMENT OF {'01_HOOF...
60984,81950,3657921,send continuation letter,560458,2011-05-03 15:34:09+00:00,Zaak afgehandeld,Bouw,01_HOOFD_600_1,muni-2,Bouw,01_HOOFD_6,May,Day_3,Tue,PM,(`at:phase` ELEMENT OF {'01_HOOFD_6'}),T(NULL),"(`tt:weekday` ELEMENT OF {'Mon', 'Sun', 'Thu',...",(T(NULL)) AND (`at:phase` ELEMENT OF {'01_HOOF...
73727,99138,3197901,enter senddate continuation,2013365,2011-03-15 23:00:00+00:00,Advies bekend,Bouw,01_HOOFD_600_2,muni-3,Bouw,01_HOOFD_6,Mar,Day_15,Tue,PM,(`at:phase` ELEMENT OF {'01_HOOFD_6'}),T(NULL),"(`tt:weekday` ELEMENT OF {'Mon', 'Sun', 'Thu',...",(T(NULL)) AND (`at:phase` ELEMENT OF {'01_HOOF...
