In [None]:
"""Provide Funktion for extraction of an jsonocel data from SAP tables dataframes."""
import pandas as pd

VBTYP_DESCRIPTIONS = {
    "A": "Inquiry",
    "B": "Quotation", 
    "C": "Order", 
    "Q": "WMS transfer order", 
    "R": "Goods movement", 
    "J": "Delivery",
    "M": "Invoice"
    }    

def add_event_timestamp_column(df, date_column="ERDAT", time_column="ERZET", replace_columns=True) -> pd.DataFrame:
        """Function to add a "event_timestamp" column from the SAP date and time fields."""    
        _df = df.copy()
        # todo: check if the date and time fields are in the dataframe
        _df["event_timestamp"] = _df[date_column].astype(str) + _df[time_column].astype(str)
        _df["event_timestamp"] = pd.to_datetime(_df["event_timestamp"], format="%Y%m%d%H%M%S")
        if replace_columns:
            _df.drop(["ERDAT", "ERZET"], axis=1, inplace=True)
        return _df
    
def add_event_activity_column(df, activity_column="VBTYP_N", activity_value_prefix="Create ", replace_columns=True) -> pd.DataFrame:
    """Function to add a "event_activity" column."""
    _df = df.copy()
    _df[activity_column] = _df[activity_column].str.replace('[^a-zA-Z0-9]', '')
    _df["event_activity"] = activity_value_prefix + _df[activity_column].map(VBTYP_DESCRIPTIONS).astype(str)
    if replace_columns:
        _df.drop([activity_column], axis=1, inplace=True)
    return _df

def add_event_id_column(df, event_id_column="event_id") -> pd.DataFrame:
    """Function to add a "event_id" column."""
    _df = df.copy()
    _df[event_id_column] = _df.index.astype(str)
    return _df


def get_tables():
    """Return dataframes of SAP tables."""
    import os

    # load all dataframes stored in the current directory as pickle files
    files = [f for f in os.listdir('.') if f.endswith('.pkl')]
    tables = []
    dfs = {}
    for file  in files:
        table = file.split(".")[0]
        tables.append(table)
        dfs[table] = pd.read_pickle(file)
        
    return dfs


def extract_jsonocel_data():
    """Return jsonocel data from SAP tables dataframes."""
    import pandas as pd
    tables = get_tables()
    
    # 1. Get all events creating a new document in VBFA
    
    vbfa = tables["VBFA"]
    vbfa = vbfa[['ERDAT', 'ERZET', 'VBELN', 'VBELV', 'VBTYP_N', 'VBTYP_V']] # todo: remove/move to constants
    
    vbfa = add_event_timestamp_column(vbfa)
    vbfa = add_event_activity_column(vbfa, activity_column="VBTYP_N", activity_value_prefix="Create ", replace_columns=False)
    log_vbfa = vbfa
    
    # 2. Get all edge cases events with additional information from VBAK
    
    vbak = tables["VBAK"]
    vbak = vbak[['VBELN', 'ERDAT', 'ERZET']] # todo: remove/move to constants
    vbak = add_event_timestamp_column(vbak)
        # Stuff I dont understand but seems to work lol
    vbfa_vbak = pd.merge(vbfa, vbak, left_on="VBELV", right_on="VBELN", how="inner", suffixes=("_VBFA", "_VBAK"))
    vbfa_vbak.rename(columns={"event_timestamp_VBAK": "event_timestamp"}, inplace=True)
    vbfa_vbak = add_event_activity_column(vbfa_vbak, activity_column="VBTYP_V", activity_value_prefix="Create ", replace_columns=False)
    log_vbak = vbfa_vbak
    
    # 3. Add relations between events from 2. (tbd)
    
    pass

    # 4. Otional: Process changes made on documents, recorded in CDHDR and CDPOS
    
    pass 

    # 5. Generatre jsonocel
    
    log = log_vbfa #pd.concat([log_vbfa, log_vbak])
    log = log.sort_values("event_timestamp")
    log = add_event_id_column(log)
    
    from pm4pymdl.objects.ocel.exporter import exporter as ocel_exporter
    ocel_exporter.apply(log, "log.jsonocel")
    

In [None]:
extract_jsonocel_data()

In [None]:
import pm4py


path = "p2p-normal.jsonocel"
ocel = pm4py.read_ocel(path)
ocdfg = pm4py.discover_ocdfg(ocel)
# views the model with the performance annotation
pm4py.view_ocdfg(ocdfg, format="png", annotation="performance", performance_aggregation="median")


In [None]:
model = pm4py.discover_oc_petri_net(ocel)
pm4py.view_ocpn(model, format="png")

In [None]:
import pm4py


path = "log.jsonocel"
ocel = pm4py.read_ocel(path)
ocdfg = pm4py.discover_ocdfg(ocel)
# views the model with the performance annotation
pm4py.view_ocdfg(ocdfg, format="png", annotation="performance", performance_aggregation="median")


In [None]:
model = pm4py.discover_oc_petri_net(ocel)
pm4py.view_ocpn(model, format="png")