In [None]:
import sys
sys.path.append('../src')
from utils import EventStream
from cache_policy_buffers import CachePolicy
from priority_policy_buffers import PrioPolicyOrder, PPBObjectsPerObjectType
from model_buffers import TotemBuffer
from model_builder_totem import TotemModel, get_totem_accuracy, visualize_totem_overlap, get_totem_avg_scores

import matplotlib as mpl
import matplotlib.cm as cm
import numpy as np
from pathlib import Path

### Loading `SmallExample.json` stream and preparing visualization

In [9]:
# Load event stream
event_stream = EventStream('../data/SmallExample.json', o2o_has_time=True)

# Define colors for object types
ots = sorted(event_stream.object_types)
ot_rgb_colors = cm.jet(np.linspace(0, 1, len(ots)))
ot_to_rgb_color = dict(zip(ots, ot_rgb_colors))
ot_to_hex_color = {ot: mpl.colors.rgb2hex(ot_rgb) for ot, ot_rgb in ot_to_rgb_color.items()}

Parsing ../data/SmallExample.json...
# of (removed) E2O target objects w/o type: 0
# of (removed) O2O target objects w/o type: 0
# events:			5
# object updates:		0
# O2O relations:		1
# E2O-derived O2O relations:	11
Enriching enabled: False
Finished parsing ../data/SmallExample.json in 0.00 min.


### Discovery, visualization and evaluation of TOTeM

In [None]:
# Discover online TOTeM model that fits example log fully
totem_buf = TotemBuffer(
    50,
    50,
    50,
    CachePolicy.FIFO,
    pp_buf=PPBObjectsPerObjectType(PrioPolicyOrder.MAX)
)
totem_buf.process_stream(event_stream.stream)

# Output TOTeM buffers at end of stream
# Note that while priority policy is maintained as extra buffer, it is not needed as model buffers are never full
print(totem_buf)

# Mine and draw corresponding offline TOTeM model
totem_model = TotemModel(totem_buf)
totem_model_offl = TotemModel('../data/SmallExample.xml')
totem_model_offl.visualize(Path('../figures/SmallExample'), 'totem_offl.pdf', ot_to_hex_color)

../data/SmallExample.xml
Coupled removal for buffered TOTeM model: False
FIFO TOTeM TR buffer characteristics:
 - buffer size: 50
 - max counter: 10000
 - object type: --
+----+-------+----------------+---------------------------+---------------------------+
|    | oid   | object type    | first seen                | last seen                 |
|----+-------+----------------+---------------------------+---------------------------|
|  0 | c_1   | Customer       | 2025-01-01 00:00:00+00:00 | 2025-01-01 00:00:00+00:00 |
|  1 | o_1   | Order          | 2025-01-01 00:00:00+00:00 | 2025-01-04 00:00:00+00:00 |
|  2 | i_1   | Item           | 2025-01-01 00:00:00+00:00 | 2025-01-03 00:00:00+00:00 |
|  3 | i_2   | Item           | 2025-01-01 00:00:00+00:00 | 2025-01-02 00:00:00+00:00 |
|  4 | p_1   | Parcel         | 2025-01-04 00:00:00+00:00 | 2025-01-04 00:00:00+00:00 |
|  5 | l_1   | Shipping label | 2025-01-05 00:00:00+00:00 | 2025-01-05 00:00:00+00:00 |
+----+-------+----------------+------

In [11]:
# Print evaluation metrics for discovered OC-DFG
def print_totem_scores(onl_model : TotemModel, offl_model : TotemModel) -> None:
    score_dict = get_totem_accuracy(offl_model, onl_model)
    print('Total evaluation scores\n-----------------------')
    for key, val in score_dict.items():
        print(f'{key}:\t{val}')

    avg_score_dict = get_totem_avg_scores(offl_model, onl_model)
    print(f'\nAverage evaluation scores\n-------------------------')
    for key, val in avg_score_dict.items():
        print(f'{key}:\t{val}')

In [12]:
# Evaluate online model with sufficiently large buffers to fit entire SmallExample log
print_totem_scores(totem_model, totem_model_offl)

Total evaluation scores
-----------------------
node recall:	1.0
node accuracy:	1.0
node precision:	1.0
arc recall:	1.0
arc accuracy:	1.0
arc precision:	1.0
TR accuracy:	1.0
EC accuracy:	1.0
LC accuracy:	1.0

Average evaluation scores
-------------------------
recall:	1.0
precision:	1.0
accuracy:	1.0


In [16]:
# Visualize effect of using insufficiently small model buffers
totem_buf_too_small = TotemBuffer(
    3,
    3,
    3,
    CachePolicy.FIFO
)
totem_buf_too_small.process_stream(event_stream.stream)
totem_model_too_small = TotemModel(totem_buf_too_small)
visualize_totem_overlap(totem_model_offl, totem_model_too_small, Path('../figures/SmallExample'), 'totem_too_small_overlap.pdf', ot_to_hex_color)

# Evaluate online model with insufficiently small model buffers to fully capture SmallExample
print_totem_scores(totem_model_too_small, totem_model_offl)


Total evaluation scores
-----------------------
node recall:	0.6
node accuracy:	0.6
node precision:	1.0
arc recall:	0.4
arc accuracy:	0.76
arc precision:	1.0
TR accuracy:	0.65
EC accuracy:	0.95
LC accuracy:	1.0

Average evaluation scores
-------------------------
recall:	0.5
precision:	1.0
accuracy:	0.6799999999999999
