In [1]:
import numpy as np
import pandas as pd

from deepalign import Dataset
from deepalign import fs
from deepalign.alignments import ALIGNERS

from IPython.display import display
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 999)

## Setup

Let us first define a `get_model` helper method that retrieves an `Aligner` model from disk and the corresponding `Dataset` instance.

In [2]:
def get_model(aligner, dataset_name, case_attributes=False, event_attributes=False):
    ea = ca = 0
    
    if aligner == 'confnet':
        ea = int(event_attributes)
        ca = int(case_attributes)
        model_name = f'{dataset_name}_{aligner}{ea}{ca}'
    else:
        model_name = f'{dataset_name}_{aligner}'

    dataset = Dataset(dataset_name, use_case_attributes=ca, use_event_attributes=ea)
    models = list(set([f.name.replace('_forward', '').replace('_backward', '') 
                      for f in fs.get_model_files()
                      if model_name in f.name]))
    
    if aligner == 'confnet':
        aligner = ALIGNERS[aligner](dataset, use_case_attributes=ca, use_event_attributes=ea)
    else:
        aligner = ALIGNERS[aligner]()
    aligner.load(str(fs.MODEL_DIR / models[0]))
                
    return aligner, dataset

## Computing alignments with DeepAlign

We will use the `paper-0.3-3` event log as our main example here.

In [3]:
dataset_name = 'paper-0.3-3'

The possible aligner key strings are: `confnet` (DeepAlign), `optimal` (Reference Model), `hm` (Heuristics Miner), and `im` (Inductive Miner).

In [4]:
ALIGNERS.keys()

dict_keys(['alpha', 'alphaplus', 'confnet', 'dfg', 'hm', 'im', 'optimal', 'sm'])

Let us load a ConfNet first.

In [5]:
confnet, dataset = get_model('confnet', dataset_name, True, True)

Now, we can run the DeepAlign algorithm with `confnet.align`. We can control the number of beams with the parameter `k`, the maximum number of steps with `steps`, the maximum deletion size for one step with `delete_max`, and a hot start mode with `hot_start`. The hot start will use the BINet anomaly detection method to automatically finish all beams where no anomaly was found.

In [6]:
alignments, corrected_cases, costs = confnet.align(dataset, k=5, steps=10, delete_max=3, hot_start=True)

Step 1 → 5.525381803512573s (25000, 28) finished=3768
Step 2 ← 2.2669196128845215s (25000, 28) finished=3768
Step 3 → 2.2092976570129395s (25000, 28) finished=3888
Step 4 ← 1.9833369255065918s (25000, 28) finished=4336
Step 5 → 1.2932274341583252s (25000, 28) finished=4830
Step 6 ← 0.3943462371826172s (25000, 28) finished=4864
Step 7 → 0.33541369438171387s (25000, 28) finished=4871
Step 8 ← 0.31231045722961426s (25000, 28) finished=4897
Step 9 → 0.26900696754455566s (25000, 28) finished=4911
Step 10 ← 0.24869203567504883s (25000, 28) finished=4912


Okay, done. Now let us take a look at the found alignments. Fist, we define a display method for alignments.

In [7]:
def display_alignment(alignment, decode=None):
    a = alignment[alignment != -1]
    a = a.reshape(2, a.shape[0] // 2)
    if decode is not None:
        a = decode(a)
    df = pd.DataFrame(a, index=['Log', 'Model'])
    df = df.style.hide_index()
    return display(df)

We will need a decode method for the integer encoded event sequences of the `Dataset`. We can create a `decode` method from the `sklearn.preprocessing.LabelEncoder` instances saved in `dataset.encoders`.

In [8]:
decode = dict((k, v) for k, v in enumerate(dataset.encoders['name'].classes_))
decode[-1] = decode[0]  # Padding
decode[0] = '»'
decode = np.vectorize(decode.get)

Now, we can display alignments from the `alignments` object. `alignments` is a NumPy array with shape `(num_cases, k, 2, max_sequence_len)`.

In [9]:
alignments.shape

(5000, 5, 2, 28)

To display the top-1 alignment for the first case we can use the following.

In [10]:
display_alignment(alignments[0][0], decode=decode)

0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Seems like this was not an anomalous case. Let us check.

In [11]:
dataset.text_labels[0]

'Normal'

Remember, the different anomaly types are as follows.

In [12]:
set(dataset.text_labels)

{'Attribute', 'Early', 'Insert', 'Late', 'Normal', 'Rework', 'SkipSequence'}

Let us take a look at a *Skip* anomaly. This is also the example given in the Evaluation section of the paper.

In [13]:
skip_anomalies = np.where(dataset.text_labels == 'SkipSequence')[0]
display_alignment(alignments[skip_anomalies[0]][0], decode=decode)

0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,»,»,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


We can do the same for all anomaly types.

In [14]:
for anomaly_type in set(dataset.text_labels):
    index = np.where(dataset.text_labels == anomaly_type)[0][0] # Select the first one
    print(anomaly_type)
    display_alignment(alignments[index][0], decode=decode)

Early


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Submit,Evaluate,Conclude,»,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,»,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Normal


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Late


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
▶,Identify Problem,»,»,Experiment,Research Related Work,Develop Method,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,»,»,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Attribute


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Insert


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
▶,Identify Problem,Research Related Work,Random activity 10,Develop Method,Experiment,Evaluate,Conclude,Random activity 12,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,»,Develop Method,Experiment,Evaluate,Conclude,»,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Rework


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,»,»,■


SkipSequence


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,»,»,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


## Computing Alignments with the Heuristics Miner

Now, let us take a look at the Heuristics Miner using the shorthand `'hm'`.

In [15]:
heuristics_miner, dataset = get_model('hm', dataset_name)

In [16]:
alignments, x, costs = heuristics_miner.align(dataset)

100%|██████████| 651/651 [00:02<00:00, 257.73it/s]


Let us look at the same examples as for the DeepAlign algorithm. We can see that the Heuristics Miner does not always produce the correct alignments.

In [17]:
for anomaly_type in set(dataset.text_labels):
    index = np.where(dataset.text_labels == anomaly_type)[0][0] # Select the first one
    print(anomaly_type)
    display_alignment(alignments[index][0], decode=decode)

Early


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Submit,Evaluate,Conclude,»,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,»,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,»,■


Normal


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,»,■


Late


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
▶,Identify Problem,Experiment,Research Related Work,Develop Method,»,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,»,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,»,■


Attribute


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,»,■


Insert


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
▶,Identify Problem,Research Related Work,Random activity 10,Develop Method,Experiment,Evaluate,Conclude,Random activity 12,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,»,Develop Method,Experiment,Evaluate,Conclude,»,Submit,Review,Minor Revision,Revise,Submit,»,■


Rework


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Minor Revision,Revise,»,»,Submit,»,■


SkipSequence


0,1,2,3,4,5,6,7,8,9,10,11
▶,Identify Problem,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,»,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,»,■


## Ground Truth Alignments

We can obtain the ground truth alignments from the `dataset` with the following method.

In [18]:
ground_truth_alignments, ground_truth_costs = dataset.alignments

Let us check the ground truth for the example cases from above.

In [19]:
for anomaly_type in set(dataset.text_labels):
    index = np.where(dataset.text_labels == anomaly_type)[0][0] # Select the first one
    print(anomaly_type)
    display_alignment(ground_truth_alignments[index], decode=decode)

Early


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Submit,Evaluate,Conclude,»,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,»,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Normal


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Late


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
▶,Identify Problem,Experiment,Research Related Work,Develop Method,»,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,»,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Attribute


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Insert


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
▶,Identify Problem,Research Related Work,Random activity 10,Develop Method,Experiment,Evaluate,Conclude,Random activity 12,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,»,Develop Method,Experiment,Evaluate,Conclude,»,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Rework


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Minor Revision,Revise,»,»,Submit,Final Decision,■


SkipSequence


0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,Identify Problem,»,»,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


By comparing the ground truth to the alignments from the algorithms, we can evaluate the accuracy. This will be shown in the next notebook.

## Alignments from Nothing with ConfNet (Examples from Sec. 5)

As described in the paper, we can also compute alignments from an empty case using the DeepAlign algorithm. To demonstrate this let us load a ConfNet that uses only the case attributes.

In [20]:
confnet, dataset = get_model('confnet', dataset_name, True, False)

In the dataset, we have two case attributes, `decision` and `topic`, based on which the resulting sequence will differ.

In [21]:
dataset.attribute_keys

array(['name', '[Case]_decision', '[Case]_topic'], dtype=object)

In [22]:
print(dataset.encoders['[Case]_decision'].classes_.tolist())
print(dataset.encoders['[Case]_topic'].classes_.tolist())

['Accept', 'Borderline', 'Reject', 'Weak accept', 'Weak reject']
['Engineering', 'Theory']


Let us create an emtpy case sequence using the start and end symbols and the encode functionality `transform` of the `dataset.encoders`. 

In [23]:
def get_empty_case(decision, topic):
    return [
        dataset.encoders['name'].transform(['▶', '■'])[None, :],  # We have to add one dimension here
        dataset.encoders['[Case]_decision'].transform([decision]),
        dataset.encoders['[Case]_topic'].transform([topic]),
    ]

Now, we can use this method to create empty sequences, while setting the case attributes as we wish.

In [24]:
empty_case = get_empty_case('Accept', 'Engineering')
empty_case

[array([[27, 26]]), array([0]), array([0])]

Let us see what the DeepAlign algorithm does. We have to run it for some more steps to create enough events for a complete sequence.

In [25]:
alignments, corrected_cases, costs = confnet.align(empty_case, k=5, steps=50)

Step 1 → 3.2658004760742188s (5, 53) finished=0
Step 2 ← 0.06090092658996582s (5, 53) finished=0
Step 3 → 0.061464548110961914s (5, 53) finished=0
Step 4 ← 0.060236215591430664s (5, 53) finished=0
Step 5 → 0.05981302261352539s (5, 53) finished=0
Step 6 ← 0.06018400192260742s (5, 53) finished=0
Step 7 → 0.06013178825378418s (5, 53) finished=0
Step 8 ← 0.060292959213256836s (5, 53) finished=0
Step 9 → 0.0600590705871582s (5, 53) finished=0
Step 10 ← 0.060143470764160156s (5, 53) finished=0
Step 11 → 0.05980658531188965s (5, 53) finished=0
Step 12 ← 0.05988907814025879s (5, 53) finished=0
Step 13 → 0.06007075309753418s (5, 53) finished=0
Step 14 ← 0.059142112731933594s (5, 53) finished=1
Converged


In [68]:
display_alignment(alignments[0][0], decode=decode)

0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,»,»,»,»,»,»,»,»,»,»,»,»,■
▶,Identify Problem,Research Related Work,Develop Method,Experiment,Evaluate,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Interestingly, DeepAlign creates the correct case given the two case attributes. *Minor Revision* only occurs for `decision == 'Accept'` and `decision == 'Weak accept'`. Similarly, *Develop Method* is related to `topic == 'Engineering'`.

Let us see what happens if we change this around.

In [27]:
empty_case = get_empty_case('Accept', 'Theory')

In [28]:
alignments, corrected_cases, costs = confnet.align(empty_case, k=5, steps=50)

Step 1 → 0.06078147888183594s (5, 53) finished=0
Step 2 ← 0.06042075157165527s (5, 53) finished=0
Step 3 → 0.2031574249267578s (5, 53) finished=0
Step 4 ← 0.06001996994018555s (5, 53) finished=0
Step 5 → 0.05976581573486328s (5, 53) finished=0
Step 6 ← 0.05966496467590332s (5, 53) finished=0
Step 7 → 0.06041312217712402s (5, 53) finished=0
Step 8 ← 0.060509443283081055s (5, 53) finished=0
Step 9 → 0.05970597267150879s (5, 53) finished=0
Step 10 ← 0.06000518798828125s (5, 53) finished=0
Step 11 → 0.06087923049926758s (5, 53) finished=0
Step 12 ← 0.05998420715332031s (5, 53) finished=0
Step 13 → 0.058971405029296875s (5, 53) finished=1
Converged


In [71]:
display_alignment(alignments[0][0], decode=decode)

0,1,2,3,4,5,6,7,8,9,10,11,12,13
▶,»,»,»,»,»,»,»,»,»,»,»,»,■
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Minor Revision,Revise,Submit,Final Decision,■


Now, it correctly generates the *Develop Hypothesis* activity.

What happens, if we change the `decision` to something else? Let us find out.

In [30]:
empty_case = get_empty_case('Weak reject', 'Theory')

In [31]:
alignments, corrected_cases, costs = confnet.align(empty_case, k=5, steps=50)

Step 1 → 0.06032443046569824s (5, 53) finished=0
Step 2 ← 0.060350656509399414s (5, 53) finished=0
Step 3 → 0.05985522270202637s (5, 53) finished=0
Step 4 ← 0.05911874771118164s (5, 53) finished=0
Step 5 → 0.06026053428649902s (5, 53) finished=0
Step 6 ← 0.06024932861328125s (5, 53) finished=0
Step 7 → 0.059778451919555664s (5, 53) finished=0
Step 8 ← 0.059566497802734375s (5, 53) finished=0
Step 9 → 0.06102705001831055s (5, 53) finished=0
Step 10 ← 0.06002545356750488s (5, 53) finished=1
Converged


In [32]:
display_alignment(alignments[0][0], decode=decode)

0,1,2,3,4,5,6,7,8,9,10
▶,»,»,»,»,»,»,»,»,»,■
▶,Identify Problem,Research Related Work,Develop Hypothesis,Experiment,Conduct Study,Conclude,Submit,Review,Final Decision,■


The *Minor Revision* is gone.