## Impact of Declarative Constraint Violations on the Outcome

### Importing a log (P2P Process BPIC'19)

In [1]:
import time
from tqdm import tqdm
import pm4py

import matplotlib.pyplot as plt
from collections import Counter
import warnings
warnings.filterwarnings("ignore")
from process_atoms.mine.declare.enums.mp_constants import Template
from process_atoms.processatoms import ProcessAtoms
from process_atoms.mine.declare.regexchecker import RegexChecker
import pandas as pd
import seaborn as sns

from process_atoms.models.event_log import EventLog, EventLogSchemaTypes
from process_atoms.models.column_types import (
    CaseID,
    Categorical,
    EventType,
    EventTime,
    Continuous,
)

def penalty(duration, slack, amount, percentage):
    penalty = (duration / slack) * percentage * amount
    return penalty

schema = EventLogSchemaTypes(
    # schema for case-level attributes
    cases={
        "Case ID": CaseID,
        "(case) Company": Categorical,
        "(case) Document Type": Categorical,
        "(case) GR-Based Inv. Verif.": Categorical,
        "(case) Goods Receipt": Categorical,
        "(case) Item": Categorical,
        "(case) Item Category": Categorical,
        "(case) Item Type": Categorical,
        "(case) Name": Categorical,
        "(case) Purch. Doc. Category name": Categorical,
        "(case) Purchasing Document": Categorical,
        "(case) Source": Categorical,
        "(case) Spend area text": Categorical,
        "(case) Spend classification text": Categorical,
        "(case) Sub spend area text": Categorical,
        "(case) Vendor": Categorical,
        "Cumulative net worth (EUR)": Continuous,
    },
    events={
        "Case ID": CaseID,
        "Activity": EventType,
        "Complete Timestamp": EventTime,
        "Resource": Categorical,
    },
)

LOG_PATH = "data/BPI_Challenge_2019.csv"
PROCESS = "BPIC_19"
# read the full log
log = pd.read_csv(LOG_PATH, parse_dates=["Complete Timestamp"])
# split into case and event attributes
df_cases = log[list(schema.cases.keys())].drop_duplicates(subset="Case ID")
df_events = log[list(schema.events.keys())]

# create event log object
event_log = EventLog(df_cases, df_events, schema)

### Create the process model by mining declarative constraints from the log 

In [2]:
considered_templates = [Template.RESPONSE.templ_str, Template.PRECEDENCE.templ_str]
api = ProcessAtoms()

start_time = time.time()
atoms = api.mine_atoms_from_log(
    PROCESS,
    event_log,
    considered_templates,
    min_support=(0.1 * len(event_log)) / len(event_log),
    local=True,
    consider_vacuity=False,
)
end_time = time.time()

delta = end_time - start_time

print(f"Atom mining took {delta} seconds.")
def atoms_to_df(atoms):
    records = [
        {
            "type": atom.atom_type,
            "op_0": atom.operands[0],
            "op_1": atom.operands[1] if len(atom.operands) > 0 else "",
            "support": atom.support,
            "confidence": atom.attributes["confidence"],
        }
        for atom in atoms
    ]
    return pd.DataFrame.from_records(records).sort_values(
        by="confidence", ascending=False
    )
atoms_df = atoms_to_df(atoms)


100%|██████████| 27/27 [00:01<00:00, 17.29it/s]

Atom mining took 1.843095064163208 seconds.





In [4]:
atoms_df

Unnamed: 0,type,op_0,op_1,support,confidence
0,Precedence,Vendor creates invoice,Remove Payment Block,0.146884,1.0
1,Precedence,Record Goods Receipt,Remove Payment Block,0.146884,1.0
2,Precedence,Record Goods Receipt,Record Invoice Receipt,0.732973,1.0
3,Precedence,Create Purchase Order Item,Clear Invoice,0.637334,1.0
4,Precedence,Create Purchase Order Item,Remove Payment Block,0.146884,1.0
5,Precedence,Create Purchase Order Item,Record Invoice Receipt,0.732973,1.0
6,Precedence,Record Goods Receipt,Clear Invoice,0.637268,0.999897
7,Precedence,Vendor creates invoice,Clear Invoice,0.637202,0.999793
8,Precedence,Record Invoice Receipt,Clear Invoice,0.636741,0.99907
9,Precedence,Record Invoice Receipt,Remove Payment Block,0.146687,0.998655


In [5]:
dev=[]
for i in atoms_df.index:
    dev.append(atoms_df["type"][i]+'_'+atoms_df["op_0"][i]+'_'+atoms_df["op_1"][i])
dev

['Precedence_Vendor creates invoice_Remove Payment Block',
 'Precedence_Record Goods Receipt_Remove Payment Block',
 'Precedence_Record Goods Receipt_Record Invoice Receipt',
 'Precedence_Create Purchase Order Item_Clear Invoice',
 'Precedence_Create Purchase Order Item_Remove Payment Block',
 'Precedence_Create Purchase Order Item_Record Invoice Receipt',
 'Precedence_Record Goods Receipt_Clear Invoice',
 'Precedence_Vendor creates invoice_Clear Invoice',
 'Precedence_Record Invoice Receipt_Clear Invoice',
 'Precedence_Record Invoice Receipt_Remove Payment Block',
 'Response_Vendor creates invoice_Record Invoice Receipt',
 'Precedence_Create Purchase Order Item_Vendor creates invoice',
 'Precedence_Vendor creates invoice_Record Invoice Receipt',
 'Response_Create Purchase Order Item_Record Goods Receipt',
 'Response_Vendor creates invoice_Clear Invoice',
 'Response_Remove Payment Block_Clear Invoice',
 'Response_Record Invoice Receipt_Clear Invoice',
 'Precedence_Create Purchase Order

In [6]:
len(dev)

46

In [10]:
con = 1
the_atom = None
for atom in atoms:
    if atom.atom_type == atoms_df['type'][con] and atom.operands == [
        atoms_df['op_0'][con],
        atoms_df['op_1'][con],
    ]:
        the_atom = atom
checker = RegexChecker(PROCESS, event_log)
activities = checker.log.unique_activities()
activity_map = checker._map_activities_to_letters(activities)
variant_frame = checker.create_variant_frame_from_log(activity_map)
variant_frame["sat"] = checker.compute_satisfaction(
    the_atom, variant_frame, activity_map, consider_vacuity=False
)
case_ids = set(val for cases in variant_frame["case_ids"].values for val in cases)

In [11]:

collect_data=pd.DataFrame(data=0, index=range(len(event_log)), columns=dev)
collect_data['case_id']=None
collect_data['outcome']=None
for i, d in enumerate(dev):
    the_atom = None
    for atom in atoms:
        if atom.atom_type == atoms_df['type'][i] and atom.operands == [
            atoms_df['op_0'][i],
            atoms_df['op_1'][i],
        ]:
            the_atom = atom
    checker = RegexChecker(PROCESS, event_log)
    activities = checker.log.unique_activities()
    activity_map = checker._map_activities_to_letters(activities)
    variant_frame = checker.create_variant_frame_from_log(activity_map)
    variant_frame["sat"] = checker.compute_satisfaction(
        the_atom, variant_frame, activity_map, consider_vacuity=False
    )
    if i==0:
        collect_data['case_id'] = list(val for cases in variant_frame["case_ids"].values for val in cases)
    for j in range(len(variant_frame)):
        for case_id in variant_frame["case_ids"][j]:
            ids = collect_data.index[collect_data['case_id'] == case_id]
            if variant_frame["sat"][j] == 1:
                collect_data[d][ids] = 0
            else:
                collect_data[d][ids] = 1

In [12]:
collect_data['outcome']=None
for case in range(len(event_log)):
    ids = collect_data.index[collect_data['case_id'] == event_log[case].attributes["Case ID"]]
    checker_1=False
    checker_2=False
    events=event_log[case].get_activity_sequence()
    for event in events:
        if event=='Record Goods Receipt':
            checker_1=True
        if event == 'Clear Invoice' and checker_1:
            checker_2=True
    collect_data['outcome'][case] = 1 if checker_2 else 0
collect_data['outcome']=collect_data['outcome'].astype(float)

In [11]:
dev

['Precedence_Vendor creates invoice_Remove Payment Block',
 'Precedence_Record Goods Receipt_Remove Payment Block',
 'Precedence_Record Goods Receipt_Record Invoice Receipt',
 'Precedence_Create Purchase Order Item_Clear Invoice',
 'Precedence_Create Purchase Order Item_Remove Payment Block',
 'Precedence_Create Purchase Order Item_Record Invoice Receipt',
 'Precedence_Record Goods Receipt_Clear Invoice',
 'Precedence_Vendor creates invoice_Clear Invoice',
 'Precedence_Record Invoice Receipt_Clear Invoice',
 'Precedence_Record Invoice Receipt_Remove Payment Block',
 'Response_Vendor creates invoice_Record Invoice Receipt',
 'Precedence_Create Purchase Order Item_Vendor creates invoice',
 'Precedence_Vendor creates invoice_Record Invoice Receipt',
 'Response_Create Purchase Order Item_Record Goods Receipt',
 'Response_Vendor creates invoice_Clear Invoice',
 'Response_Remove Payment Block_Clear Invoice',
 'Response_Record Invoice Receipt_Clear Invoice',
 'Precedence_Create Purchase Order

### Quantify the impact of each deviation based on cost function 

In [12]:
from dowhy import CausalModel as dowhymodel
from dowhy import gcm
import networkx as nx

cates={}
for d in dev:
    model=dowhymodel(
            data = collect_data,
            treatment=d,
            outcome='outcome',
            graph=gcm.StructuralCausalModel(nx.DiGraph([(d, 'outcome')])),
            instruments=list(collect_data.drop(columns=d).columns)
            )
    identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
    cates[d]=model.estimate_effect(identified_estimand,
                            method_name="backdoor.linear_regression",
                            test_significance=True).value

In [13]:
cates

{'Precedence_Vendor creates invoice_Remove Payment Block': -0.020443541797643405,
 'Precedence_Record Goods Receipt_Remove Payment Block': -0.020443541797643405,
 'Precedence_Record Goods Receipt_Record Invoice Receipt': -0.19501560883501162,
 'Precedence_Create Purchase Order Item_Clear Invoice': -0.22962668257573965,
 'Precedence_Create Purchase Order Item_Remove Payment Block': -0.020443541797643405,
 'Precedence_Create Purchase Order Item_Record Invoice Receipt': -0.19501560883501162,
 'Precedence_Record Goods Receipt_Clear Invoice': -0.2297903021242802,
 'Precedence_Vendor creates invoice_Clear Invoice': -0.2296689807401675,
 'Precedence_Record Invoice Receipt_Clear Invoice': -0.2293900391654939,
 'Precedence_Record Invoice Receipt_Remove Payment Block': -0.020419929648364255,
 'Response_Vendor creates invoice_Record Invoice Receipt': -0.16053151522974607,
 'Precedence_Create Purchase Order Item_Vendor creates invoice': -0.1628659025797241,
 'Precedence_Vendor creates invoice_Reco

### Defining the severity function



In [19]:
def severity(value):
    if value == 0:
        return "neutral"
    elif -0.05 <= value < 0:
        return "low negative"
    elif -0.25 <= value < -0.05:
        return "medium negative"
    elif -0.5 <= value < -0.25:
        return "high negative"
    elif value < -0.5:
        return "critical negative"
    elif 0 < value <= 0.05:
        return "low positive"
    elif 0.05 < value <= 0.25:
        return "medium positive"
    elif 0.25 < value <= 0.5:
        return "high positive"
    elif value > 0.5:
        return "critical positive"
    else:
        return "undefined"


severities = {}
for deviation, penalty in cates.items():
    severities[deviation] = severity(penalty)

In [20]:
counter = Counter(severities.values())

print(counter)

Counter({'medium negative': 21, 'low negative': 10, 'medium positive': 8, 'low positive': 6, 'high positive': 1})


In [15]:
severities

{'Precedence_Vendor creates invoice_Remove Payment Block': 'low positive',
 'Precedence_Record Goods Receipt_Remove Payment Block': 'low positive',
 'Precedence_Record Goods Receipt_Record Invoice Receipt': 'medium positive',
 'Precedence_Create Purchase Order Item_Clear Invoice': 'medium positive',
 'Precedence_Create Purchase Order Item_Remove Payment Block': 'low positive',
 'Precedence_Create Purchase Order Item_Record Invoice Receipt': 'medium positive',
 'Precedence_Record Goods Receipt_Clear Invoice': 'medium positive',
 'Precedence_Vendor creates invoice_Clear Invoice': 'medium positive',
 'Precedence_Record Invoice Receipt_Clear Invoice': 'medium positive',
 'Precedence_Record Invoice Receipt_Remove Payment Block': 'low positive',
 'Response_Vendor creates invoice_Record Invoice Receipt': 'medium positive',
 'Precedence_Create Purchase Order Item_Vendor creates invoice': 'medium positive',
 'Precedence_Vendor creates invoice_Record Invoice Receipt': 'medium positive',
 'Respon