In [1]:
import pm4py
import pandas as pd
import networkx as nx
from itertools import combinations

ocel = pm4py.read_ocel2_sqlite("/home/grkmr/data/ocels/ContainerLogistics.sqlite")

def compute_event_object_graph(ocel: pm4py.OCEL) -> nx.DiGraph:
    """
    Builds a directed event-object graph (EOG) from an OCEL using directly-follows per object.

    Each event becomes a node, and edges are added between events that refer to the same object
    and occur consecutively in time.

    :param ocel: PM4Py OCEL object
    :return: networkx.DiGraph representing the EOG
    """
    event_object_graph = nx.DiGraph()

    # Step 1: Add each event as a node in the graph
    event_ids = ocel.events[ocel.event_id_column].to_list()
    event_object_graph.add_nodes_from(event_ids)

    # Step 2: Sort and group by object, then generate adjacent event pairs
    object_with_sorted_events = (
        ocel.relations
        .sort_values(ocel.event_timestamp)
        .groupby(ocel.object_id_column)["ocel:eid"]
        .apply(lambda lst: list(zip(lst, lst[1:])))  # adjacent pairs only
        .explode()
        .dropna()
        .to_list()
    )

    # Step 3: Add those event-event edges to the graph
    event_object_graph.add_edges_from(object_with_sorted_events)

    return event_object_graph

def compute_process_executions_connected_components(ocel:pm4py.OCEL):
    return sorted(
            nx.weakly_connected_components(compute_event_object_graph(ocel)), key=len, reverse=True
        )


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pandas_utils.dataframe_column_string_to_datetime(df[col], format=timest_format, utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = strpfromiso.fix_dataframe_column(df[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  object_changes[internal_index] = object_changes.index

In [2]:
def leading_type_process_executions(ocel:pm4py.OCEL, leading_type :str):
    objects = ocel.relations.groupby(ocel.object_id_column).agg({ocel.event_id_column:list,ocel.object_type_column :'first'}).to_dict("index").items()
    edges = ocel.relations.groupby(ocel.event_id_column)[ocel.object_id_column].apply(lambda x: list(combinations(x, 2))).explode().dropna().to_list()

    object_graph = nx.Graph()
    object_graph.add_nodes_from(objects)
    object_graph.add_edges_from(edges)

    
    cases = []
    search_depth = len(pm4py.ocel_get_object_types(ocel))

    for object_id in ocel.objects[ocel.objects[ocel.object_type_column] == leading_type][ocel.object_id_column]:
        relevant_objects = {object_id}
        events = set(object_graph.nodes[object_id][ocel.event_id_column])
        next_level_objects = list(object_graph.neighbors(object_id))  
        visited_object_types = {leading_type: 0}
    
        for level in range(1, search_depth):
            to_be_next_level_objects = []
    
            for current_object_id in next_level_objects:
                current_object_type = object_graph.nodes[current_object_id][ocel.object_type_column] 
    
                if current_object_type not in visited_object_types:
                    visited_object_types[current_object_type] = level
                elif visited_object_types[current_object_type] != level:
                    continue
    
                to_be_next_level_objects.extend(object_graph.neighbors(current_object_id))
                relevant_objects.add(current_object_id)
                events.update(object_graph.nodes[current_object_id][ocel.event_id_column])
                    
            next_level_objects = list(set(to_be_next_level_objects)) 
    
        cases.append((events, relevant_objects))
    return cases

In [3]:
from collections import deque

def leading_type_process_executions(ocel: pm4py.OCEL, leading_type: str):
    object_id_col = ocel.object_id_column
    event_id_col = ocel.event_id_column
    object_type_col = ocel.object_type_column

    # Build object graph
    relations = ocel.relations
    object_info_df = relations.groupby(object_id_col).agg({event_id_col: list, object_type_col: 'first'})
    object_graph = nx.Graph()
    object_graph.add_nodes_from([
        (idx, {event_id_col: row[event_id_col], object_type_col: row[object_type_col]})
        for idx, row in object_info_df.iterrows()
    ])

    edges = []
    for _, group in relations.groupby(event_id_col):
        objs = group[object_id_col].values
        if len(objs) >= 2:
            edges.extend(combinations(objs, 2))
    object_graph.add_edges_from(edges)

    # Prepare for traversal
    cases = []
    all_object_types = pm4py.ocel_get_object_types(ocel)
    max_level = len(all_object_types)

    leading_objects = ocel.objects[ocel.objects[object_type_col] == leading_type][object_id_col]

    for object_id in leading_objects:
        events = set(object_graph.nodes[object_id][event_id_col])
        visited_objects = {object_id}
        visited_object_types = {leading_type: 0}
        queue = deque([(object_id, 0)])

        while queue:
            current_object_id, level = queue.popleft()
            if level >= max_level:
                continue

            for neighbor in object_graph.neighbors(current_object_id):
                if neighbor in visited_objects:
                    continue
                neighbor_type = object_graph.nodes[neighbor][object_type_col]
                if neighbor_type in visited_object_types and visited_object_types[neighbor_type] != level + 1:
                    continue

                visited_objects.add(neighbor)
                visited_object_types.setdefault(neighbor_type, level + 1)
                queue.append((neighbor, level + 1))
                events.update(object_graph.nodes[neighbor][event_id_col])

        cases.append((events, visited_objects))

    return cases


In [4]:
object_id_col = ocel.object_id_column
event_id_col = ocel.event_id_column
object_type_col = ocel.object_type_column

test = ocel.relations.groupby(ocel.event_id_column)[[ocel.object_type_column, ocel.object_id_column]].agg({object_type_col:set, object_id_col:list}).reset_index()

In [5]:
event_object_map = dict(zip(test[event_id_col],zip(test[object_id_col],test[object_type_col])))

In [8]:
pm4py.ocel_get_object_types(ocel)

['Container', 'Customer Order', 'Transport Document', 'Vehicle']

In [9]:
process_executions = leading_type_process_executions(ocel,'Customer Order')

In [12]:
process_executions[0]

({'book_vehs_td1',
  'book_vehs_td6',
  'book_vehs_td9',
  'create_td6',
  'depart_vh2',
  'depart_vh4',
  'drive_term_cr1',
  'drive_term_cr21',
  'drive_term_cr22',
  'drive_term_cr23',
  'drive_term_cr24',
  'drive_term_cr33',
  'drive_term_cr4',
  'drive_term_cr5',
  'load_truck_hu128',
  'load_truck_hu129',
  'load_truck_hu13',
  'load_truck_hu130',
  'load_truck_hu131',
  'load_truck_hu132',
  'load_truck_hu133',
  'load_truck_hu14',
  'load_truck_hu140',
  'load_truck_hu141',
  'load_truck_hu142',
  'load_truck_hu143',
  'load_truck_hu144',
  'load_truck_hu145',
  'load_truck_hu15',
  'load_truck_hu16',
  'load_truck_hu17',
  'load_truck_hu18',
  'load_truck_hu56',
  'load_truck_hu57',
  'load_truck_hu58',
  'load_truck_hu59',
  'load_truck_hu60',
  'load_truck_hu61',
  'load_truck_hu62',
  'load_truck_hu63',
  'load_truck_hu64',
  'load_truck_hu65',
  'load_truck_hu66',
  'load_truck_hu67',
  'load_truck_hu68',
  'load_truck_hu69',
  'load_truck_hu70',
  'load_truck_hu71',
  'l