In [29]:
ocel = pm4py.read_ocel2_json("/home/grkmr/Downloads/age_of_empires_ocel2.json")



In [30]:
from math import ceil
from typing import Literal, Optional, Tuple

import pandas as pd
from pandas.core.frame import DataFrame

from datetime import datetime
from typing import Any, Dict, List, Optional

from pydantic import BaseModel


class OcelEntity(BaseModel):
    id: str
    timestamp: Optional[datetime] = None
    attributes: Dict[str, Any]
    relations: Dict[str, List[str]]


class PaginatedResponse(BaseModel):
    page: int
    page_size: int
    total_pages: int
    total_items: int
    items: List[OcelEntity]


def get_sorted_table(
    dataframe: DataFrame,
    type_field: str,
    type_value: str,
    sort_by: Optional[Tuple[str, Literal["asc", "desc"]]] = None,
):
    table = dataframe[dataframe[type_field] == type_value].copy()

    if sort_by:
        table = table.sort_values(
            by=sort_by[0], ascending=True if sort_by[1] == "asc" else False
        )  # type: ignore

    return table


def get_paginated_dataframe(
    df: DataFrame,
    non_attribute_fields: list[str],
    page: int,
    page_size: int,
    relation_table: DataFrame,
    from_field: str,
    to_field: str,
) -> PaginatedResponse:
    start = (page - 1) * page_size
    end = start + page_size
    paginated_df = df.iloc[start:end].copy()
    total_items = len(df)
    total_pages = ceil(total_items / page_size)

    # Only consider relations for this page
    related = relation_table[relation_table[from_field].isin(paginated_df[from_field])]

    # Pivot relation data
    relations = related.pivot_table(
        index=from_field,
        columns=["ocel:qualifier", "ocel:type"],
        values=to_field,
        aggfunc=lambda x: list(x),
    ).reset_index()
    
    # Bundle relation columns into one 'relations' dict
    relations["relations"] = relations.drop(columns=[from_field]).to_dict(
        orient="records"
    )

    relations = relations[[from_field, "relations"]]

    # Drop non-informative columns
    paginated_df = paginated_df.dropna(axis=1, how="all")

    # Build attribute dict excluding non-attribute fields
    columns_to_drop = [
        col for col in non_attribute_fields if col in paginated_df.columns
    ]
    attribute_data = paginated_df.drop(columns=columns_to_drop)

    if attribute_data.shape[1] == 0:
        paginated_df["attributes"] = [{} for _ in range(len(paginated_df))]
    else:
        paginated_df["attributes"] = attribute_data.to_dict(orient="records")

    # Merge with relation info
    merged = pd.merge(paginated_df, relations, on=from_field, how="left")

    merged["relations"] = merged["relations"].apply(
        lambda r: {
            k: v if isinstance(v, list) else []
            for k, v in (r if isinstance(r, dict) else {}).items()
        }
    )
    # Convert rows to OcelEntity objects
    items = [
        OcelEntity(
            id=row[from_field],  # type:ignore
            timestamp=row.get("ocel:timestamp"),
            attributes=row["attributes"],  # type:ignore
            relations=row["relations"],  # type:ignore
        )
        for _, row in merged.iterrows()
    ]

    return PaginatedResponse(
        page=page,
        page_size=page_size,
        total_pages=total_pages,
        total_items=total_items,
        items=items,
    )


In [40]:
import numpy as np
relation = ocel.relations.copy() 

In [41]:
relation[ocel.qualifier] = np.where(relation[ocel.qualifier].isnull() | (relation[ocel.qualifier] == ""), relation[ocel.object_type_column], relation[ocel.qualifier])

In [42]:
relation

Unnamed: 0,ocel:eid,ocel:activity,ocel:timestamp,ocel:oid,ocel:type,ocel:qualifier
0,e_M227028371_4,Create Initial Scout Cavalry,2023-04-17 00:32:49+00:00,P767395,Player,Player
1,e_M227028371_4,Create Initial Scout Cavalry,2023-04-17 00:32:49+00:00,S227028371_767395,Session,Session
2,e_M227028371_4,Create Initial Scout Cavalry,2023-04-17 00:32:49+00:00,M227028371,Match,Match
3,e_M227028371_9,Create Initial Scout Cavalry,2023-04-17 00:32:49+00:00,S227028371_1472826,Session,Session
4,e_M227028371_9,Create Initial Scout Cavalry,2023-04-17 00:32:49+00:00,M227028371,Match,Match
...,...,...,...,...,...,...
10387379,e_M278253992_1642,Set Gather Point None,2023-12-14 06:42:42+00:00,Stable_M278253992_2,Stable,Stable
10387380,e_M278253992_1642,Set Gather Point None,2023-12-14 06:42:42+00:00,M278253992,Match,Match
10387381,e_M278253992_1642,Set Gather Point None,2023-12-14 06:42:42+00:00,Stable_M278253992_7,Stable,Stable
10387382,e_M278253992_1642,Set Gather Point None,2023-12-14 06:42:42+00:00,P10917498,Player,Player


In [43]:
ocel.o2o

Unnamed: 0,ocel:oid,ocel:oid_2,ocel:qualifier
0,Archery_Range_M271214755_1,M271214755,
1,Archery_Range_M271214755_1,S271214755_1872133,
2,Archery_Range_M271214755_2,M271214755,
3,Archery_Range_M271214755_2,S271214755_10531210,
4,Archery_Range_M276726136_1,M276726136,
...,...,...,...
2541710,Watch_Tower_M276357728_1,S276357728_2145654,
2541711,Watch_Tower_M272745911_1,M272745911,
2541712,Watch_Tower_M272745911_1,S272745911_2867633,
2541713,Watch_Tower_M277676020_1,M277676020,


In [44]:
pd.merge(ocel.o2o, ocel.objects, left_on="ocel:oid_2" , right_on=ocel.object_id_column, how="left")

Unnamed: 0,ocel:oid_x,ocel:oid_2,ocel:qualifier,ocel:oid_y,ocel:type
0,Archery_Range_M271214755_1,M271214755,,M271214755,Match
1,Archery_Range_M271214755_1,S271214755_1872133,,S271214755_1872133,Session
2,Archery_Range_M271214755_2,M271214755,,M271214755,Match
3,Archery_Range_M271214755_2,S271214755_10531210,,S271214755_10531210,Session
4,Archery_Range_M276726136_1,M276726136,,M276726136,Match
...,...,...,...,...,...
2541710,Watch_Tower_M276357728_1,S276357728_2145654,,S276357728_2145654,Session
2541711,Watch_Tower_M272745911_1,M272745911,,M272745911,Match
2541712,Watch_Tower_M272745911_1,S272745911_2867633,,S272745911_2867633,Session
2541713,Watch_Tower_M277676020_1,M277676020,,M277676020,Match
