In [None]:
import pandas as pd
from pathlib import Path
from pprint import pprint
from ocpa.algo.util.process_executions.factory import CONN_COMP, LEAD_TYPE
from ocpa.algo.util.variants.factory import ONE_PHASE, TWO_PHASE

In [None]:
# from https://ocel-standard.org
DATASET_GITHUB = {"dataset": "github_pm4py.jsonocel", "leading_type": "case:concept:name"}
DATASET_O2C = {"dataset": "o2c.jsonocel", "leading_type": "BELNR"} # SAP
DATASET_P2P = {"dataset": "p2p.jsonocel", "leading_type": "BELNR"} # SAP
DATASET_TRANSFER = {"dataset": "transfer_order.jsonocel", "leading_type": "MATNR"} # SAP
DATASET_RECRUITING = {"dataset": "recruiting.jsonocel", "leading_type": "applications"}
DATASET_ORDER = {"dataset": "running-example.jsonocel", "leading_type": "xxx"}
DATASET_WINDOWS = {"dataset": "windows_events.jsonocel", "leading_type": "eventIdentifier"}

DATASET_OCPA_P2P = {"dataset": "p2p-normal.jsonocel", "execution_extraction": CONN_COMP}

# example dataset from celonis
DATASET_CELONIS = {"dataset": "celonis.jsonocel", "leading_type": "xxx"}

In [None]:
import ocpa
from importlib import reload
# import ocpa.objects.log.importer.ocel.factory as ocel_import_factory

In [None]:
reload(ocpa)
ocel_import_factory = ocpa.objects.log.importer.ocel.factory

In [None]:
dataset = DATASET_OCPA_P2P

filename = Path("../data/datasets") / dataset["dataset"]
# https://ocpa.readthedocs.io/en/latest/eventlogmanagement.html
ocel = ocel_import_factory.apply(filename, parameters={"execution_extraction": dataset.get("execution_extraction", LEAD_TYPE),
                                                       "leading_type": dataset.get("leading_type", None),
                                                       "variant_calculation": dataset.get("variant_calculation", TWO_PHASE),
                                                       "exact_variant_calculation": dataset.get("exact_variant_calculation", False)})

In [None]:
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout, to_agraph

In [None]:
# A.nodes()[0].attr

In [None]:
G, objects = ocel.variant_graphs[ocel.variants[0]]
eids = list(G.nodes.keys())
log = ocel.log.log
variant_log = log[log.event_id.isin(eids)]

event_objects = {eid: {(ot, oid) for ot, oid in objects if oid in variant_log.loc[eid, ot]} for eid in eids}
edge_objects = {
    (i, j): event_objects[i] & event_objects[j] for i, j in G.edges
}
for i, node in G.nodes.items():
    node["label"] = log.loc[i, "event_activity"]
for (i, j), edge in G.edges.items():
    edge_ots = {ot1 for ot1,_ in edge_objects[(i, j)]}
    obj_counts = {ot: len([obj for ot1, obj in edge_objects[(i, j)] if ot1 == ot]) for ot in edge_ots}
    edge["label"] = ", ".join([f"{count}x {ot}" for ot, count in obj_counts.items()])


# import pygraphviz as pgv

# nx.draw_networkx(G, with_labels=True, labels=labels)
A = to_agraph(G)
A.graph_attr["rankdir"] = "TB"
A.node_attr["shape"] = "box"
# A.rankdir="LR"
# print(A)
A.layout('dot')
# A.draw("G.png")
A

In [None]:
import pm4py

dataset = DATASET_RECRUITING
filename = str(Path("../data/datasets") / dataset["dataset"])
ocel = pm4py.read_ocel(filename)

In [None]:
pm4py.ocel.ocel_object_type_activities(ocel)

In [None]:
ocel.log.log

In [None]:
ocel.object_types

## Object types per activity

In [None]:
act_ot_counts = {act: {f"num_{ot}": sum([len([oid for oid in ocel.obj.eve_objects(eid) if ocel.obj.raw.objects[oid].type == ot]) for eid in ocel.obj.act_events(act)]) for ot in ocel.object_types} for act in ocel.obj.activities}
act_stats = pd.DataFrame([{"activity": act,
                           "frequency": len(ocel.obj.act_events(act)),
                           **act_ot_counts[act]} for act in ocel.obj.activities])
act_stats

## Object type statistics (per event)

In [None]:
numobjs_ev = ocel.log.log[ocel.object_types].applymap(len)
stats = numobjs_ev.describe().transpose().drop(columns=["count"])
stats["count"] = [len(ocel.obj.ot_objects(ot)) for ot in stats.index]
stats["caseID"] = (stats["min"] == 1) & (stats["max"] == 1)
stats

In [None]:
print(f"{len(ocel.process_executions)} Process executions")

In [None]:
pex_event_counts = pd.Series([len(ex) for ex in ocel.process_executions])
pex_event_counts.describe()

In [None]:
import matplotlib.pyplot as plt

plt.hist(pex_event_counts, bins=100)
plt.title("Number of events per process execution")
plt.show()

# Process Execution Graphs

In [None]:
G = ocel.get_process_execution_graph(0)

In [None]:
import networkx as nx

In [None]:
graphs = [ocel.get_process_execution_graph(i) for i in range(len(ocel.process_executions))]

In [None]:
gstats = pd.DataFrame([{"i": i, "nodes": len(G.nodes), "edges": len(G.edges)} for i, G in enumerate(graphs)])

In [None]:
gstats.describe()

In [None]:
gstats.sort_values(by="nodes")

In [None]:
G.nodes

In [None]:
nx.find_cycle(G)

In [None]:
ocel.log.log.loc[0, "event_activity"]

In [None]:
for event_id, node in G.nodes.items():
    node["label"] = ocel.log.log.loc[event_id, "event_activity"]

In [None]:
G.nodes[1]

In [None]:
nx.draw_networkx(G.subgraph([1, 16, 21, 30]))

## Object type statistics (per process execution)

In [None]:
numobjs_ex = pd.DataFrame([{t: len([o for ot, o in objs if ot == t]) for t in ocel.object_types} for objs in ocel.process_execution_objects])
numobjs_ex.describe().transpose()

In [None]:
ocel.process_execution_objects[200]

In [None]:
ocel.process_executions[1]

In [None]:
# https://ocpa.readthedocs.io/en/latest/eventlogmanagement.html
num_exec = len(ocel.process_executions)

print(f"Object types: {ocel.object_types}")
print(f"Number of process executions: {num_exec}")
print(f"Events of the first process execution: {ocel.process_executions[0]}")
print(f"Objects of the first process execution: {ocel.process_execution_objects[0]}")
print(f"Process execution graph of the first execution: {ocel.get_process_execution_graph(0)}")
print(f"Process execution of the first event with event id 0: {ocel.process_execution_mappings[0]}")

exec_info_data = []
for i, (events, objects) in enumerate(zip(ocel.process_executions, ocel.process_execution_objects)):
    exec_info_data.append({
        "index": i,
        "num_events": len(events),
        **{f"num_{ot}": len([obj for t, obj in objects if t == ot]) for ot in ocel.object_types}
    })

exec_info = pd.DataFrame(exec_info_data)
display(exec_info)
display(exec_info.describe())

In [None]:
import networkx as nx

nx.draw(ocel.get_process_execution_graph(0))