# Model Comparison

This file contains all comparisons on the model outputs and was developed by Marc C. Hennig (mhennig@hm.edu).

# Environment

## Dependency installation

### A. PIP Dependencies

In [None]:
!pip install ipdb
!pip freeze > requirements.txt

## Dependency Imports

In [None]:
# Python dependencies
import os
import sys
import re
import math
import datetime
import random
import json
import time
import shutil
import warnings
import functools
from pathlib import Path
from typing import List, Tuple, Union, Optional, Literal, Callable, Dict
from collections import namedtuple
from enum import Enum

# Debugging
import ipdb

# Colab dependencies
from google.colab import files, drive, output

# Basic dependencies
import pandas as pd
import numpy as np
import scipy as sp
import statsmodels as sm
import statsmodels.api
import statsmodels.stats
import statsmodels.stats.contingency_tables

# Plotting dependencies
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

# Machine learning depenencies
import sklearn as sl
import sklearn.metrics

## Variables & Global Settings

In [None]:
# Assign a random seed for reproduceability
RANDOM_STATE = 1337

os.environ["PYTHONHASHSEED"] = str(RANDOM_STATE)
random.seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

# Show all Pandas columns
pd.set_option("display.max_columns", None)

# Set Matplotlib and Seaborn color scheme
plt.rcParams["image.cmap"] = "Blues"
sns.set_palette("Blues")

# Colab settings
output.enable_custom_widget_manager()

In [None]:
# Google Drive folders
GDRIVE_INPUT_DIR = "/content/drive/My Drive/Colab Notebooks/TGN-AST/Eventlogs"
GDRIVE_OUTPUT_DIR = "/content/drive/My Drive/Colab Notebooks/TGN-AST/Results"

# Local Colab folders
UTIL_DIR = os.path.join(".", "Util")
DATA_DIR = os.path.join(".", "Data")
INPUT_DATA_DIR = os.path.join(DATA_DIR, "Input")
INPUT_DATA_BPIC2013_DIR = os.path.join(INPUT_DATA_DIR, "BPIC 2013")
INPUT_DATA_BPIC2014_DIR = os.path.join(INPUT_DATA_DIR, "BPIC 2014")
INPUT_DATA_BPIC2015_DIR = os.path.join(INPUT_DATA_DIR, "BPIC 2015")
INPUT_DATA_RESULT_DIR = os.path.join(INPUT_DATA_DIR, "Results")
INTERIM_DATA_DIR = os.path.join(DATA_DIR, "Interim")
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, "Output")

GRAPHIC_DIR = os.path.join(".", "Graphics")
MODEL_DIR = os.path.join(".", "Models")

Path(DATA_DIR).mkdir(exist_ok=True)
Path(INTERIM_DATA_DIR).mkdir(exist_ok=True)
Path(OUTPUT_DATA_DIR).mkdir(exist_ok=True)
Path(GRAPHIC_DIR).mkdir(exist_ok=True)
Path(MODEL_DIR).mkdir(exist_ok=True)

## Common Functions

### Cleaning & Formatting Functions

In [None]:
EVENTLOG_CASE = "case:concept:name"
EVENTLOG_ACTIVITY = "concept:name"
EVENTLOG_TIMESTAMP = "time:timestamp"
EVENTLOG_GROUP = "org:group"
EVENTLOG_RESOURCE = "org:resource"
EVENTLOG_ROLE = "org:role"
EVENTLOG_CASE_PREFIX = "case:"
EVENTLOG_LABEL_PREFIX = "label:"

EVENTLOG_LABEL_REM_TIME = f"{EVENTLOG_LABEL_PREFIX}time:timestamp:last"
EVENTLOG_LABEL_NEXT_ACT = f"{EVENTLOG_LABEL_PREFIX}concept:name:next"
EVENTLOG_LABEL_NEXT_TIME = f"{EVENTLOG_LABEL_PREFIX}time:timestamp:next"

EVENTLOG_FEAT_TIME_OF_YEAR_SUFFIX = ":timeofyear"
EVENTLOG_FEAT_TIME_OF_MONTH_SUFFIX = ":timeofmonth"
EVENTLOG_FEAT_TIME_OF_WEEK_SUFFIX = ":timeofweek"
EVENTLOG_FEAT_TIME_OF_DAY_SUFFIX = ":timeofday"
EVENTLOG_FEAT_TIME_ELAPSED_CYCLE_SUFFIX = ":elapsedcycle"
EVENTLOG_FEAT_TIME_ELAPSED_PREV_SUFFIX = ":elapsedprev"

TOKEN_PADDING = "[PAD]"
TOKEN_PADDING_NUM = 0
TOKEN_NA = "[NA]"
TOKEN_EOC = "[EOC]"

def df_find_case_attributes(df: pd.DataFrame, case_col: str = EVENTLOG_CASE, label_prefix: str = EVENTLOG_LABEL_PREFIX, exclude_labels: bool = False) -> List[str]:
  """
  Identifies and returns a list of attributes from the DataFrame that have a constant value within each case. Attributes are considered 'case attributes' if they have the same single value for all rows associated with a given case.

  Parameters:
    df (pd.DataFrame): A pandas DataFrame containing the event log data.
    case_col (str, optional): The name of the column in df that represents the case identifier. Defaults to `EVENTLOG_CASE`.
    label_prefix (str, optional): The prefix used to identify label columns within df. Defaults to `EVENTLOG_LABEL_PREFIX`.
    exclude_labels (bool, optional): If True, attributes that are considered labels (i.e., start with label_prefix) will be excluded from the result.

  Returns:
    List[str]: A list of case attributes that are constant within each case. If `exclude_labels` is set to True, attributes considered as labels will not be included in the list.
  """
  attrs = df.groupby(case_col).agg('nunique', dropna=False).agg('max', axis='rows')
  attrs = attrs.where(attrs == 1).dropna().index.to_list()
  if exclude_labels:
    attrs = [attr for attr in attrs if attr not in df_find_labels(df, label_prefix)]
  return attrs

def df_find_event_attributes(df: pd.DataFrame, case_col: str = EVENTLOG_CASE, label_prefix: str = EVENTLOG_LABEL_PREFIX, exclude_labels: bool = False) -> List[str]:
  """
  Identifies and returns a list of attributes from the DataFrame that have varying values across events within the same case. Attributes are considered 'event attributes' if they do not have the same single value for all rows associated with a given case  (i.e., their value varies across events within the same case).

  Parameters:
    df (pd.DataFrame): A pandas DataFrame containing the event log data.
    case_col (str, optional): The name of the column in df that represents the case identifier. Defaults to `EVENTLOG_CASE`.
    label_prefix (str, optional): The prefix used to identify label columns within df. Defaults to `EVENTLOG_LABEL_PREFIX`.
    exclude_labels (bool, optional): If True, attributes that are considered labels (i.e., start with label_prefix) will be excluded from the result.

  Returns:
    List[str]: A list of event attributes that have more than one unique value within each case. If `exclude_labels` is set to True, attributes considered as labels will not be included in the list.
  """
  attrs = df.groupby(case_col).agg('nunique', dropna=False).agg('max', axis='rows')
  attrs = attrs.where(attrs > 1).dropna().index.to_list()
  if exclude_labels:
    attrs = [attr for attr in attrs if attr not in df_find_labels(df, label_prefix)]
  return attrs



def df_find_labels(df: pd.DataFrame, label_prefix: str = EVENTLOG_LABEL_PREFIX) -> List[str]:
  """
  Identifies and returns a list of column names that are considered labels in the DataFrame based on a given prefix. Label columns are those that start with the specified `label_prefix`.

  Parameters:
    df (pd.DataFrame): A pandas DataFrame containing the event log data or similar structured data.
    label_prefix (str, optional): The prefix used to identify label columns within df. Defaults to `EVENTLOG_LABEL_PREFIX`.

  Returns:
    List[str]: A list of column names that are identified as labels based on the prefix.
  """
  return [col for col in df.columns if col.startswith(label_prefix)]

def df_separate_categoricals(df: pd.DataFrame) -> Tuple[List[str], List[str]]:
  ordered_cols = []
  unordered_cols = []
  for col in df.select_dtypes(include='category').columns:
    if df[col].cat.ordered:
      ordered_cols.append(col)
    else:
      unordered_cols.append(col)
  return ordered_cols, unordered_cols

def df_convert_datetimes(df: pd.DataFrame, cols: List[str] = [], dayfirst: bool = False, yearfirst: bool = False, tz: Optional[Union[str, datetime.tzinfo]] = None) -> pd.DataFrame:
  """
  Converts specified columns of a DataFrame to datetime format and localizes the datetime objects to the specified timezone if provided. Attempts to parse the columns as date times, optionally interpreting the day first or year first. If the initial parsing fails, it retries with the assumption that the datetime is in UTC. After conversion, the datetime objects may be localized to a specific timezone if `tz` is not None.

  Parameters:
    df (pd.DataFrame): The DataFrame containing columns to be converted to datetime.
    cols (List[str], optional): The list of column names to convert to datetime. Defaults to an empty list.
    dayfirst (bool, optional): Boolean indicating if the day is the first number in the date string. Defaults to False.
    yearfirst (bool, optional): Boolean indicating if the year is the first number in the date string. Defaults to False.
    tz (Optional[Union[str, datetime.tzinfo]], optional): Optional timezone information to which the datetimes will be localized. Defaults to None.

  Returns:
    pd.DataFrame: The DataFrame with the specified columns converted to datetime format.

  Raises:
    ValueError: If parsing the dates fails even after assuming UTC.
  """
  for col in cols:
    try:
      df[col] = pd.to_datetime(df[col], dayfirst=dayfirst, yearfirst=yearfirst)
    except ValueError:
      df[col] = pd.to_datetime(df[col], dayfirst=dayfirst, yearfirst=yearfirst, utc=True)

    df[col] = df[col].dt.tz_localize(tz=tz)
  return df

def df_convert_timedeltas(df: pd.DataFrame, cols: List[str] = [], unit: str = 'nanoseconds') -> pd.DataFrame:
  """
  Converts specified columns of a DataFrame to timedelta format using the given time unit. Each value in the specified columns will be converted into a timedelta object, interpreting the value according to the specified unit.

  Parameters:
    df (pd.DataFrame): The DataFrame containing columns to be converted to timedelta.
    cols (List[str], optional): The list of column names to convert to timedelta. Defaults to an empty list.
    unit (str, optional): The time unit to interpret the values in `cols` when converting. Defaults to 'nanoseconds'.

  Returns:
    pd.DataFrame: The DataFrame with the specified columns converted to timedelta format.
  """
  for col in cols:
    df[col] = pd.to_timedelta(df[col], unit=unit)
  return df

def df_convert_bools(df: pd.DataFrame, cols: list[str] = [], true_vals: Union[str, List[str]] = [], false_vals: Union[str, List[str]] = []) -> pd.DataFrame:
  """
  Converts specified columns of a DataFrame to boolean format based on provided true and false values. String values from `true_vals` are mapped to `True`, while values from `false_vals` are mapped to `False`. All other values not included in `true_vals` or `false_vals` will be converted based on the presence of either list; if only `true_vals` is provided, all other values are considered `False`, and vice-versa.

  Parameters:
    df (pd.DataFrame): The DataFrame containing columns to be converted to boolean.
    cols (List[str], optional): The list of column names to convert to boolean. Defaults to an empty list.
    true_vals (Union[str, List[str]], optional): Values to be mapped to `True`. Can be a single string or a list of strings. Defaults to an empty list.
    false_vals (Union[str, List[str]], optional): Values to be mapped to `False`. Can be a single string or a list of strings. Defaults to an empty list.

  Returns:
    pd.DataFrame: The DataFrame with the specified columns converted to boolean format.

  """
  if isinstance(true_vals, str):
    true_vals = [true_vals]
  if isinstance(false_vals, str):
    false_vals = [false_vals]

  map = {true_val: True for true_val in true_vals} | {false_val: False for false_val in false_vals}
  if len(true_vals) == 0 and len(false_vals) > 0:
    # Replace unknown values with True
    map['__missing__'] = True
  elif len(false_vals) == 0 and len(true_vals) > 0:
    # Replace unknown values with False
    map['__missing__'] = False
  else:
    map["__missing__"] = pd.NA

  for col in cols:
    df[col] = df[col].map(map).astype('boolean')

  return df

def df_convert_bool_to_int(df: pd.DataFrame, cols: Optional[Union[str, List[str]]] = None) -> pd.DataFrame:
  df = df.copy()
  if cols is None:
    cols = df.select_dtypes('boolean').columns
  elif isinstance(cols, str):
    cols = [cols]

  for col in cols:
    df[col] = df[col].astype('Int8')

  return df

def df_convert_ordered_cat_to_int(df: pd.DataFrame, cols: Optional[Union[str, List[str]]] = None, relative: bool = False) -> pd.DataFrame:
  df = df.copy()
  if cols is None:
    cols, _ = df_separate_categoricals(df)
  elif isinstance(cols, str):
    cols = [cols]

  for col in cols:
    max_code = df[col].cat.codes.astype('Int16').max()
    na_rows = df[df[col].isna()].index

    df[col] = df[col].cat.codes.astype('Int16')
    df.loc[na_rows, col] = pd.NA
    if relative:
      df[col] = df[col] / max_code



  return df

def df_fillna_str(df: pd.DataFrame, val: str, cols: Optional[Union[str, List[str]]] = None) -> pd.DataFrame:
  df = df.copy()
  if cols is None:
    cols = df.select_dtypes('object').columns
  elif isinstance(cols, str):
    cols = [cols]

  for col in cols:
    df[col] = df[col].astype('string').fillna(val)

  return df

def df_fillna_num(df: pd.DataFrame, val: Union[int, float], cols: Optional[Union[str, List[str]]] = None) -> pd.DataFrame:
  df = df.copy()
  if cols is None:
    cols = df.select_dtypes('number').columns
  elif isinstance(cols, str):
    cols = [cols]

  for col in cols:
    df[col] = df[col].fillna(val)

  return df

def df_fillna_cat(df: pd.DataFrame, val: str, cols: Optional[Union[str, List[str]]] = None) -> pd.DataFrame:
  df = df.copy()
  if cols is None:
    cols = df.select_dtypes('category').columns
  elif isinstance(cols, str):
    cols = [cols]

  for col in cols:
    if val not in df[col].cat.categories.array:
      df[col] = df[col].cat.add_categories(val)
    df[col] = df[col].fillna(val)

  return df

def df_rename_cat_values(df: pd.DataFrame, cols: Union[str, List[str]], from_cats: Union[str, List[str]], to_cat: str) -> pd.DataFrame:
  """
  Changes one or more categorical values to another specified value within the provided columns, and removes any categories that are no longer used.

  Parameters:
    df (pd.DataFrame): The DataFrame containing categorical columns where values will be renamed.
    cols (Union[str, List[str]]): A column name or list of column names to be modified.
    from_cats (Union[str, List[str]]): The category or list of categories to be changed.
    to_cat (str): The new category value that replaces the `from_cats`.

  Returns:
    pd.DataFrame: The DataFrame with renamed categorical values and cleaned categories.

  Raises:
    TypeError: If the columns specified are not categorical dtype.
  """
  if isinstance(cols, str):
    cols = [cols]
  for col in cols:
    df[col] = df[col].cat.remove_categories(from_cats).fillna(to_cat)
    df[col] = df[col].cat.remove_unused_categories()

  return df

def df_drop_duplicate_rows(df: pd.DataFrame, inplace: bool = True, ignore_index: bool = True, keep: str = 'first') -> Union[pd.DataFrame, None]:
  """
  Removes duplicate rows from the DataFrame, optionally updating the DataFrame in place and resetting the index.

  Parameters:
    df (pd.DataFrame): The DataFrame from which duplicate rows will be removed.
    inplace (bool, optional): If True, the DataFrame will be updated in place and None will be returned. Otherwise, a new DataFrame is returned. Defaults to True.
    ignore_index (bool, optional): If True, the index will be reset to the default integer index after dropping duplicates. Otherwise, the original index will be preserved. This parameter is ignored when inplace is True. Defaults to True.
    keep (str, optional): Determines which duplicates (if any) to keep.
        - 'first': Drop duplicates except for the first occurrence.
        - 'last': Drop duplicates except for the last occurrence.
        - False: Drop all duplicates.
        Defaults to 'first'.

  Returns:
    Union[pd.DataFrame, None]: The DataFrame with duplicate rows removed if inplace is set to False, otherwise None.
  """
  df = df.drop_duplicates(keep=keep, ignore_index=ignore_index, inplace=inplace)
  return df

def df_drop_duplicate_cols(df: pd.DataFrame, keep: str = 'first') -> pd.DataFrame:
  """
  Removes duplicate columns from the DataFrame while keeping the first occurrence by default. It also ensures that the data types of the remaining columns are preserved.

  Parameters:
    df (pd.DataFrame): The DataFrame from which duplicate columns will be removed.
    keep (str, optional): Determines which duplicates (if any) to keep.
        - 'first': Drop duplicates except for the first occurrence.
        - 'last': Drop duplicates except for the last occurrence.
        - False: Drop all duplicates.
        Defaults to 'first'.

  Returns:
    pd.DataFrame: A new DataFrame with duplicate columns removed and original data types intact.
  """
  dtypes = df.dtypes
  df = df.T.drop_duplicates(keep=keep).T
  dtypes.drop(dtypes.index[~dtypes.index.isin(df.columns)], inplace=True)
  return df.astype(dtypes)

# Remove rows and columns that are completely empty
def df_drop_na_rows_and_cols(df: pd.DataFrame, inplace: bool = True) -> Union[pd.DataFrame, None]:
  """
  Removes rows and columns from the DataFrame that are completely empty (all values are NaN).

  Parameters:
    df (pd.DataFrame): The DataFrame from which completely empty rows and columns will be removed.
    inplace (bool, optional): If True, the DataFrame will be updated in place, which modifies the original DataFrame and returns None.
                              If False, a new DataFrame is returned with the empty rows and columns removed.
                              Defaults to True.

  Returns:
    Union[pd.DataFrame, None]: None if inplace is True; otherwise, a new DataFrame with empty rows and columns removed.
  """
  if inplace:
    df.dropna(how="all", axis='index', inplace=inplace)
    df = df.dropna(how="all", axis='columns', inplace=inplace)
  else:
    df = df.dropna(how="all", axis='index', inplace=inplace).dropna(how="all", axis='columns', inplace=inplace)
  return df

def df_drop_single_val_cols(df: pd.DataFrame, inplace: bool = True) -> Union[pd.DataFrame, None]:
  """
  Removes columns from the DataFrame that only contain a single unique value.

  Parameters:
    df (pd.DataFrame): The DataFrame from which columns with a single unique value will be removed.
    inplace (bool, optional): If True, the operation will be performed inplace and the function will return None.
                              If False, a new DataFrame with the specified columns removed will be returned.
                              Defaults to True.

  Returns:
    Union[pd.DataFrame, None]: None if inplace is True; otherwise, a new DataFrame with columns that have a single unique value removed.
  """
  df = df.drop(columns=df.columns[df.nunique(dropna=True) == 1], inplace=inplace)
  return df

def df_drop_threshold_cols(df: pd.DataFrame, gte: float = sys.float_info.min, lt: float = sys.float_info.max, cols: List[str] = [], absolute: bool = False) -> Union[pd.DataFrame, None]:
  """
  Removes columns from the DataFrame where all values meet a threshold condition. Greater than or equal to `gte` and less than `lt` thresholds can be set, and optionally, absolute value conditions can be considered.

  Parameters:
    df (pd.DataFrame): The DataFrame from which columns will be dropped based on threshold conditions.
    gte (float, optional): The 'greater than or equal to' threshold condition. Defaults to the smallest representable float.
    lt (float, optional): The 'less than' threshold condition. Defaults to the largest representable float.
    cols (List[str], optional): The list of columns to check for the threshold conditions. If empty, all numeric columns will be checked. Defaults to an empty list.
    absolute (bool, optional): If True, the absolute values of the column data will be considered for the thresholds. Defaults to False.

  Returns:
    Union[pd.DataFrame, None]: The modified DataFrame with thresholded columns removed. As per the modification of the function, this will always return a new DataFrame and never None.
  """
  df = df.copy()
  if cols is None or len(cols) == 0:
    cols = df.select_dtypes('number').columns.to_list()

  if absolute:
    if gte != sys.float_info.min:
      drop_cols = df[cols].mask(df[cols].abs() >= abs(gte)).dropna(axis='columns', how='all').columns.to_list()
    elif lt != sys.float_info.max:
      drop_cols = df[cols].mask(df[cols].abs() < abs(lt)).dropna(axis='columns', how='all').columns.to_list()
  else:
    drop_cols = df[cols].mask((df[cols] >= gte) & (df[cols] < lt)).dropna(axis='columns', how='all').columns.to_list()

  return df.drop(columns=drop_cols)

def df_drop_threshold_na_cols(df: pd.DataFrame, threshold: Union[float, int], inplace: bool = True) -> pd.DataFrame:
  """
  Removes columns from the DataFrame that have NaN values equal to or exceeding the specified threshold.

  Parameters:
    df (pd.DataFrame): The DataFrame from which columns with excessive NaN values will be removed.
    threshold (Union[float, int]): The threshold for NaN values (absolute number or percentage). If provided as a float,
                                   it is interpreted as a percentage of the total number of rows.
    inplace (bool, optional): If True, the DataFrame will be updated in place, and None will be returned.
                              If False, a new DataFrame with the specified columns removed will be returned.
                              Defaults to True.

  Returns:
    pd.DataFrame: The DataFrame with columns removed if `inplace` is False. If `inplace` is True, the original DataFrame is modified and the function will return None.

  Raises:
    ValueError: If the threshold is greater than the size of the DataFrame.
  """
  if isinstance(threshold, float):
    threshold = threshold * len(df)

  if threshold > len(df):
    raise ValueError(f"Threshold {threshold} must be lower than or equal to the number of rows in the DataFrame {len(df)}")

  df_na = df.isna().sum()
  df_na = df_na[df_na >= threshold]

  return df.drop(columns=df_na.index.array, inplace=inplace)

def df_drop_threshold_rows(df: pd.DataFrame, gte: float = sys.float_info.min, lt: float = sys.float_info.max, cols: List[str] = [], absolute: bool = False) -> Union[pd.DataFrame, None]:
  """
  Drops rows from the DataFrame where all numeric values in specified columns meet threshold conditions of either 'greater than or equal to' (`gte`) or 'less than' (`lt`). Optionally, absolute values can be considered for the thresholds.

  Parameters:
    df (pd.DataFrame): The DataFrame from which rows will be dropped.
    gte (float, optional): The 'greater than or equal to' threshold condition. Defaults to the smallest representable float.
    lt (float, optional): The 'less than' threshold condition. Defaults to the largest representable float.
    cols (List[str], optional): The list of column names to check against the threshold conditions. If empty, all numeric columns will be checked. Defaults to an empty list.
    absolute (bool, optional): If True, the absolute values of the data in the columns will be considered when comparing against the thresholds. Defaults to False.

  Returns:
    pd.DataFrame: A DataFrame with the specified rows dropped.
  """
  df = df.copy()
  if cols is None or len(cols) == 0:
    cols = df.select_dtypes('number').columns.to_list()

  if absolute:
    if gte != sys.float_info.min:
      drop_rows = df[cols].mask(df[cols].abs() >= abs(gte)).dropna(axis='index', how='all').index.to_list()
    elif lt != sys.float_info.max:
      drop_rows = df[cols].mask(df[cols].abs() < abs(lt)).dropna(axis='index', how='all').index.to_list()
  else:
    drop_rows = df[cols].mask((df[cols] >= gte) & (df[cols] < lt)).dropna(axis='index', how='all').index.to_list()

  return df.drop(index=drop_rows)

def df_format_as_eventlog(df: pd.DataFrame, case_col: str = EVENTLOG_CASE, activity_col: str = EVENTLOG_ACTIVITY, time_col: str = EVENTLOG_TIMESTAMP, resource_col: Optional[str] = None, group_col: Optional[str] = None, role_col: Optional[str] = None, inplace: Optional[bool] = True, sort: Union[bool, str] = True):
  col_map = {
    case_col: EVENTLOG_CASE,
    activity_col: EVENTLOG_ACTIVITY,
    time_col: EVENTLOG_TIMESTAMP
  }
  if resource_col is not None:
    col_map[resource_col] = EVENTLOG_RESOURCE
  if group_col is not None:
    col_map[group_col] = EVENTLOG_GROUP
  if role_col is not None:
    col_map[role_col] = EVENTLOG_ROLE

  if sort and isinstance(sort, str):
    sort_cols = [case_col, sort, time_col, activity_col]
  else:
    sort_cols = [case_col, time_col, activity_col]

  case_attrs = df_find_case_attributes(df, case_col)
  col_map = col_map | {attr: f"{EVENTLOG_CASE_PREFIX}{attr}" for attr in case_attrs if not attr.startswith(EVENTLOG_CASE_PREFIX)}

  event_attrs = df_find_event_attributes(df, case_col)
  col_map = col_map | {attr: f"{attr.replace(EVENTLOG_CASE_PREFIX, '', 1)}" for attr in event_attrs if attr.startswith(EVENTLOG_CASE_PREFIX)}

  if sort and inplace:
    df.sort_values(by=sort_cols, inplace=inplace, ignore_index=True)
  elif sort and not inplace:
    df = df.sort_values(by=sort_cols, inplace=inplace, ignore_index=True)

  return df.rename(columns=col_map, inplace=inplace)

def df_write_files(df: pd.DataFrame, filename: str, index: bool = False, skip_xes: bool = True) -> None:
  df.to_csv(f"{filename}.csv", index=index)
  df.to_pickle(f"{filename}.pkl.gz")
  try:
    if isinstance(df.columns, pd.MultiIndex):
      df = df.copy()
      df.columns = df.columns.to_flat_index()
    df.reset_index().to_feather(f"{filename}.feather")
  except Exception as e:
    print(f"Skipping feather: {e}")
  if not skip_xes:
    pm4py.write_xes(df, f"{filename}.xes")

def df_datetime_to_numeric(df: pd.DataFrame, cols: Optional[Union[List[str], str]] = None, convert_datetime: Optional[bool] = True, convert_timedelta: Optional[bool] = True) -> pd.DataFrame:
  """
  Converts datetime and timedelta columns in a DataFrame to a numeric representation. Datetime columns are converted to UNIX timestamps, and timedelta columns are converted to total seconds.

  Parameters:
    df (pd.DataFrame): The DataFrame with columns to be converted.
    cols (Optional[Union[List[str], str]], optional): Columns to be converted. If None, all columns are considered. It can be a single column name or a list of column names. Defaults to None.
    convert_datetime (Optional[bool], optional): Flag indicating whether to convert datetime columns. Defaults to True.
    convert_timedelta (Optional[bool], optional): Flag indicating whether to convert timedelta columns. Defaults to True.

  Returns:
    pd.DataFrame: A DataFrame with the specified datetime and timedelta columns converted to numeric values.
  """
  df = df.copy()
  if cols is None:
    cols = df.columns.to_list()
  elif isinstance(cols, str):
    cols = [cols]

  if convert_datetime:
    for col in df.select_dtypes(include='datetime').columns.to_list():
      if col in cols:
        df[col] = df[col].map(pd.Timestamp.timestamp, na_action='ignore')

  if convert_timedelta:
    for col in df.select_dtypes(include='timedelta').columns.to_list():
      if col in cols:
        df[col] = df[col].dt.total_seconds()

  return df

def df_timedelta_to_unit(df: pd.DataFrame, timedelta_col: str, unit: Optional[Literal['days', "day", "d", "hours", "hour", "hr", "h", "m", "minute", "min", "minutes", "t", "s", "seconds", "sec", "second"]], floor: bool = False, na_token: Optional[pd.Timedelta] = pd.NA) -> pd.DataFrame:
  if not pd.isna(na_token):
    na_token = pd.Timedelta(na_token)
    df[timedelta_col].fillna(na_token, inplace=True)

  if unit in ["days", "day", "d"]:
    df[timedelta_col] = df[timedelta_col].dt.total_seconds() / 60 / 60 / 24
  elif unit in ["hours", "hour", "hr", "h"]:
    df[timedelta_col] = df[timedelta_col].dt.total_seconds() / 60 / 60
  elif unit in ["m", "minute", "min", "minutes", "t"]:
    df[timedelta_col] = df[timedelta_col].dt.total_seconds() / 60
  elif unit in ["s", "seconds", "sec", "second"]:
    df[timedelta_col] = df[timedelta_col].dt.total_seconds()
  else:
    raise ValueError(f"Invalid timedelta unit {unit}")

  if floor:
    df[timedelta_col] = df[timedelta_col].astype('Int64')
  return df

def df_to_multiindex(df: pd.DataFrame, case_col: str = EVENTLOG_CASE) -> pd.DataFrame:
  df = df.copy()
  df = df.groupby(case_col).apply(lambda x: x.reset_index(drop=True))
  df.columns = pd.MultiIndex.from_arrays([df.columns, np.zeros(len(df.columns), dtype=int)])
  return df

def df_to_flatindex(df: pd.DataFrame):
  df = df.copy()
  if df.columns.nlevels > 1:
    df.columns = ["_".join(map(str, col)) for col in df.columns.to_flat_index()]
  return df.reset_index()

def np_filter_na(arr: np.ndarray, unsqueeze: bool = False) -> np.ndarray:
  if unsqueeze:
    return np.array([np.expand_dims(el[~pd.isna(el)], -1) for el in arr], dtype='object')
  else:
    return np.array([el[~pd.isna(el)] for el in arr], dtype='object')


### Statistic & Visualization Functions

In [None]:
def df_naive_regression_metrics(df_train: pd.DataFrame, df_test: pd.DataFrame, label_col: str, method: Literal['median', 'mean', 'mode'] = 'median', return_df: bool = False) -> Union[Dict[str, float], pd.DataFrame]:
  if 'median' == method:
    y_pred = df_train[label_col].median()
  elif 'mean' == method:
    y_pred = df_train[label_col].mean()
  elif 'mode' == method:
    y_pred = df_train[label_col].mode()
  else:
    raise ValueError(f"Unknown method {method}")

  y_pred = np.full(df_test[label_col].size, y_pred)
  y_true = df_test[label_col].to_numpy()

  return evaluate_regression(y_true, y_pred, return_df=return_df)

def df_naive_classification_metrics(df_train: pd.DataFrame, df_test: pd.DataFrame, label_col: str, return_df: bool = False) -> Union[Dict[str, float], pd.DataFrame]:
  y_pred = df_train[EVENTLOG_LABEL_NEXT_ACT].mode().iloc[0]

  label_enc = sl.preprocessing.LabelEncoder()
  label_enc.fit(np.concatenate((df_train[label_col], df_test[label_col]), axis=None))

  y_pred = label_enc.transform(np.full(df_test[label_col].size, y_pred))
  y_true = label_enc.transform(df_test[label_col])

  return evaluate_classification(y_true, y_pred, return_df=return_df)

def df_predictive_power_scores(df: pd.DataFrame, label_col: str, variable_cols: Optional[Union[str, List[str]]] = None, datetime_is_numeric: Optional [bool] = False, exclude_labels: Optional[bool] = True, threshold: float = 0.0) -> Tuple[pd.DataFrame, plt.Axes]:
  df = df.copy()
  if variable_cols is None:
    if exclude_labels:
      variable_cols = df.columns[~df.columns.str.startswith(EVENTLOG_LABEL_PREFIX) | (df.columns == label_col)].to_list()
    else:
      variable_cols = df.columns.to_list()
  elif isinstance(variable_cols, str):
    variable_cols = [variable_cols, label_col]

  df = df[variable_cols]

  if threshold is None or threshold < 0 or threshold > 1:
    raise ValueError(f"threshold must be in the interval [0, 1] but was {threshold}")
  if datetime_is_numeric:
    df = df_datetime_to_numeric(df)

  df_predictors = pps.predictors(df, label_col, output="df", sorted=True, catch_errors=False, random_seed=RANDOM_STATE, invalid_score=np.NaN, cross_validation=2)
  fig, ax = plt.subplots(figsize=(10,10))
  barplot = sns.barplot(data=df_predictors[df_predictors["is_valid_score"] & df_predictors["ppscore"] >= threshold], x="ppscore", y="x", orient="h", ax=ax).set_title(f"Predictive power score {label_col}")
  return df_predictors, barplot

def df_predictive_power_matrix(df: pd.DataFrame, datetime_is_numeric: Optional[bool] = False, threshold: float = 0.0) -> Tuple[pd.DataFrame, plt.Axes]:
  df = df.copy()
  if threshold is None or threshold < 0 or threshold > 1:
    raise ValueError(f"threshold must be in the interval [0, 1] but was {threshold}")
  if datetime_is_numeric:
    df = df_datetime_to_numeric(df)

  df_matrix = pps.matrix(df, output="df", sorted=True, catch_errors=False, random_seed=RANDOM_STATE, invalid_score=np.NaN, cross_validation=2)
  df_heatmap = df_matrix[df_matrix["is_valid_score"] & df_matrix["ppscore"] >= threshold][['x', 'y', 'ppscore']].pivot(columns='y', index='x', values='ppscore')
  fig, ax = plt.subplots(figsize=(30,30))
  heatmap = sns.heatmap(df_heatmap, vmin=0, vmax=1, linewidths=0.5, annot=True, ax=ax).set_title("Predictive power matrix")
  return df_matrix, heatmap

def df_correlation_matrix(df: pd.DataFrame, datetime_is_numeric: Optional[bool] = False, method: str = "pearson", threshold: float = 0.0) -> Tuple[pd.DataFrame, plt.Axes]:
  df = df.copy()
  if threshold is None or threshold < 0 or threshold > 1:
    raise ValueError(f"threshold must be in the interval [0, 1] but was {threshold}")
  if datetime_is_numeric:
    df = df_datetime_to_numeric(df)

  ordered_cols, unordered_cols = df_separate_categoricals(df)

  for ordinal_col in ordered_cols:
    df[ordinal_col] = df[ordinal_col].cat.codes

  if len(unordered_cols) > 0:
    df = df.join(pd.get_dummies(df[unordered_cols], sparse=True)).drop(columns=unordered_cols)

  df_corr = df.corr(method=method, numeric_only=True)
  fig, ax = plt.subplots(figsize=(50,50))
  heatmap = sns.heatmap(df_corr, vmin=-1, vmax=1, annot=True, ax=ax).set_title(f"{method} correlation")

  return df_corr, heatmap

def df_case_length_stats(df: pd.DataFrame, case_col: str = EVENTLOG_CASE, result_col: str = "Case Length", percentiles: List[float] = np.arange(.05, 1, .05)) -> pd.DataFrame:
  df = df.groupby(case_col).size()
  return pd.DataFrame(data={result_col: df.describe(percentiles=percentiles)})

def df_case_duration_stats(df: pd.DataFrame, case_col: str = EVENTLOG_CASE, time_col: str = EVENTLOG_TIMESTAMP, result_col: str = "Case Duration", percentiles: List[float] = np.arange(.05, 1, .05)) -> pd.DataFrame:
  df = df.groupby(case_col)[time_col].max() - df.groupby(case_col)[time_col].min()
  return pd.DataFrame(data={result_col: df.describe(percentiles=percentiles)})

def df_correlation_scores(df: pd.DataFrame, label_col: str, variable_cols: Optional[Union[str, List[str]]] = None, datetime_is_numeric: Optional [bool] = False, exclude_labels: Optional[bool] = True, method: str = "pearson", threshold: float = 0.0) -> Tuple[pd.DataFrame, plt.Axes]:
  df = df.copy()
  if threshold is None or threshold < 0 or threshold > 1:
    raise ValueError(f"threshold must be in the interval [0, 1] but was {threshold}")
  if datetime_is_numeric:
    df = df_datetime_to_numeric(df)
  if variable_cols is None or len(variable_cols) == 0:
    variable_cols = df.select_dtypes(include=["number", "category"]).columns.to_list()

  df = df[variable_cols]
  ordered_cols, unordered_cols = df_separate_categoricals(df)

  for ordinal_col in ordered_cols:
    df[ordinal_col] = df[ordinal_col].cat.codes

  if len(unordered_cols) > 0:
    df = df.join(pd.get_dummies(df[unordered_cols], sparse=True)).drop(columns=unordered_cols)
  variable_cols = df.columns.to_list()

  df_corr = pd.DataFrame(index=variable_cols, columns=[f"{method} correlation"], dtype="float")

  for col in variable_cols:
    if col == label_col:
      continue
    df_corr.loc[col][f"{method} correlation"] = df[col].dropna().astype("float").corr(df[label_col])

  df_corr.sort_values(by=[f"{method} correlation"], inplace=True)
  fig, ax = plt.subplots(figsize=(100,100))
  barplot = sns.barplot(data=df_corr, x=f"{method} correlation", y=df_corr.index, orient="h", ax=ax).set_title(label_col)

  return df_corr, barplot

def df_visualize_strict_temporal_splitting(df_train: pd.DataFrame, df_test: pd.DataFrame, time_col: str = EVENTLOG_TIMESTAMP) -> plt.Axes:
  s_months_train = df_train[time_col].dt.to_period('M').value_counts()
  s_months_test_before_sep = df_test[df_test[df_find_labels(df_test)].isna().any(axis=1)][time_col].dt.to_period('M').value_counts()
  s_months_test_after_sep = df_test[~df_test[df_find_labels(df_test)].isna().any(axis=1)][time_col].dt.to_period('M').value_counts()

  df = pd.concat([
    s_months_train.rename("Training Set Correct"),
    s_months_test_before_sep.rename("Training Set Wrong"),
    s_months_test_after_sep.rename("Test Set")
  ], axis=1).sort_index().fillna(0)
  return df.plot(kind='bar', stacked=True, color=['green', 'red', 'grey'])

### Model Comparison

In [None]:
def evaluate_regression(y_true: np.ndarray, y_pred: np.ndarray, return_df: bool = False) -> Union[Dict[str, float], pd.DataFrame]:
  if y_true.ndim > 1:
      y_true = np.argmax(y_true, axis=1)
  if y_pred.ndim > 1:
      y_pred = np.argmax(y_pred, axis=1)

  def logcosh_error(y_true, y_pred):
    error = np.subtract(y_pred, y_true)
    return np.mean(np.log(np.cosh(error)), axis=-1)

  eval = {
    'mae': sl.metrics.mean_absolute_error(y_true, y_pred),
    'mse': sl.metrics.mean_squared_error(y_true, y_pred),
    'rmse': sl.metrics.root_mean_squared_error(y_true, y_pred),
    'mape': sl.metrics.mean_absolute_percentage_error(y_true, y_pred),
    'medae': sl.metrics.median_absolute_error(y_true, y_pred),
    'logcosh': logcosh_error(y_true, y_pred),
    'max_error': sl.metrics.max_error(y_true, y_pred),
  }

  if y_true.min() >= 0 and y_pred.min() >= 0:
    eval.update({
      'msle': float(sl.metrics.mean_squared_log_error(y_true, y_pred)),
      'rmsle': float(sl.metrics.root_mean_squared_log_error(y_true, y_pred)),
    })

  if return_df:
    return pd.DataFrame.from_dict(eval, orient='index', columns=["Value"])
  else:
    return eval

def evaluate_classification(y_true: np.ndarray, y_pred: np.ndarray, return_df: bool = False) -> Union[Dict[str, float], pd.DataFrame]:
  if y_true.ndim > 1:
      y_true = np.argmax(y_true, axis=1)
  if y_pred.ndim > 1:
      y_pred = np.argmax(y_pred, axis=1)

  eval = {
    'accuracy': sl.metrics.accuracy_score(y_true, y_pred),
    'accuracy_balanced': sl.metrics.balanced_accuracy_score(y_true, y_pred),
    'accuracy_balanced_adjusted': sl.metrics.balanced_accuracy_score(y_true, y_pred, adjusted=True),
    'f1_micro': sl.metrics.f1_score(y_true, y_pred, average='micro', zero_division='warn'),
    'f1_macro': sl.metrics.f1_score(y_true, y_pred, average='macro', zero_division='warn'),
    'f1_weighted': sl.metrics.f1_score(y_true, y_pred, average='weighted', zero_division='warn'),
    'precision_micro': sl.metrics.precision_score(y_true, y_pred, average='micro', zero_division='warn'),
    'precision_macro': sl.metrics.precision_score(y_true, y_pred, average='macro', zero_division='warn'),
    'precision_weighted': sl.metrics.precision_score(y_true, y_pred, average='weighted', zero_division='warn'),
    'recall_micro': sl.metrics.recall_score(y_true, y_pred, average='micro'),
    'recall_macro': sl.metrics.recall_score(y_true, y_pred, average='macro'),
    'recall_weighted': sl.metrics.recall_score(y_true, y_pred, average='weighted'),
  }
  if return_df:
    return pd.DataFrame.from_dict(eval, orient='index', columns=["Value"])
  else:
    return eval

In [None]:
def regression_paired_differences(y_true: np.ndarray, y_pred_model_1: np.ndarray, y_pred_model_2: np.ndarray, error_type: Literal['absolute', 'squared'] = 'absolute') -> np.ndarray:
  if 'absolute' == error_type:
    model_1_errors = np.abs(y_true - y_pred_model_1)
    model_2_errors = np.abs(y_true - y_pred_model_2)
  elif 'squared' == error_type:
    model_1_errors = np.square(y_true - y_pred_model_1)
    model_2_errors = np.square(y_true - y_pred_model_2)

  paired_differences = model_1_errors - model_2_errors # Positive value means model 2 is better
  return paired_differences

def classification_paired_differences(y_true: np.ndarray, y_pred_model_1: np.ndarray, y_pred_model_2: np.ndarray) -> np.ndarray:
  if y_true.ndim > 1:
      y_true = np.argmax(y_true, axis=1)

  y_true = y_true.astype(int)

  model_1_correct = y_pred_model_1[np.arange(len(y_true)), y_true]
  model_2_correct = y_pred_model_2[np.arange(len(y_true)), y_true]

  # Calculate paired differences in confidence for the true class
  paired_differences = model_2_correct - model_1_correct # Positive value means model 2 is better

  return paired_differences

def regression_bootstrap_twosided_confidence_intervals(y_true: np.ndarray, y_pred_model_1: np.ndarray, y_pred_model_2: np.ndarray, metric: Callable[[np.ndarray, np.ndarray], np.ndarray] = sl.metrics.mean_absolute_error, n_iterations: int = 10000, alpha: float = 0.05) -> Tuple[float, float]:
  return bootstrap_twosided_confidence_intervals(y_true, y_pred_model_1, y_pred_model_2, metric, n_iterations, alpha)

def classification_bootstrap_twosided_confidence_intervals(y_true: np.ndarray, y_pred_model_1: np.ndarray, y_pred_model_2: np.ndarray, metric: Callable[[np.ndarray, np.ndarray], np.ndarray] = sl.metrics.accuracy_score, n_iterations: int = 10000, alpha: float = 0.05) -> Tuple[float, float]:
  return bootstrap_twosided_confidence_intervals(y_true, y_pred_model_1, y_pred_model_2, metric, n_iterations, alpha)

def bootstrap_twosided_confidence_intervals(y_true: np.ndarray, y_pred_model_1: np.ndarray, y_pred_model_2: np.ndarray, metric: Callable[[np.ndarray, np.ndarray], np.ndarray], n_iterations: int = 10000, alpha: float = 0.05) -> Tuple[float, float]:
  sample_size = len(y_true)

  boot_diffs = []
  for _ in range(n_iterations):
    # Resample indices with replacement
    indices = np.random.choice(range(sample_size), size=sample_size, replace=True)

    y_true_sample = y_true[indices]
    y_pred_1_sample = y_pred_model_1[indices]
    y_pred_2_sample = y_pred_model_2[indices]

    metric_1 = metric(y_true_sample, y_pred_1_sample)
    metric_2 = metric(y_true_sample, y_pred_2_sample)

    boot_diffs.append(metric_1 - metric_2)

  lower = np.percentile(boot_diffs, 100 * alpha / 2)
  upper = np.percentile(boot_diffs, 100 * (1 - alpha / 2))

  return lower, upper

def bootstrap_paired_differences_twosided_confidence_interval(paired_differences: np.ndarray, n_iterations: int = 10000, alpha: float = 0.05) -> Tuple[float, float]:
  sample_size = len(paired_differences)

  boot_means = []
  for _ in range(n_iterations):
    # Resample indices with replacement
    indices = np.random.choice(paired_differences, size=sample_size, replace=True)

    paired_diff_sample = np.random.choice(paired_differences, size=len(paired_differences), replace=True)
    boot_means.append(np.mean(paired_diff_sample))

  lower = np.percentile(boot_means, 100 * alpha / 2)
  upper = np.percentile(boot_means, 100 * (1 - alpha / 2))

  return lower, upper

def test_mcnemar(y_true: np.ndarray, y_pred_model_1: np.ndarray, y_pred_model_2: np.ndarray, exact: bool = True, return_df: bool = False) -> Tuple[float, float]:
  if y_true.ndim > 1:
      y_true = np.argmax(y_true, axis=1)
  if y_pred_model_1.ndim > 1:
      y_pred_model_1 = np.argmax(y_pred_model_1, axis=1)
  if y_pred_model_2.ndim > 1:
      y_pred_model_2 = np.argmax(y_pred_model_2, axis=1)

  # Build the contingency table
  a = np.sum((y_true == y_pred_model_1) & (y_true == y_pred_model_2)) # both correct
  b = np.sum((y_true == y_pred_model_1) & (y_true != y_pred_model_2)) # m1 correct, m2 incorrect
  c = np.sum((y_true != y_pred_model_1) & (y_true == y_pred_model_2)) # m1 incorrect, m2 correct
  d = np.sum((y_true != y_pred_model_1) & (y_true != y_pred_model_2)) # both incorrect

  contingency_table = np.array([[a, b],
                                [c, d]], dtype=int)

  test_result = sm.stats.contingency_tables.mcnemar(contingency_table, exact=exact)

  # Effect sizes
  n = a + b + c + d
  odds_ratio = b / c if c != 0 else np.nan
  prop_diff = (b - c) / n

  eval = {
    "test_statistic": test_result.statistic,
    "p_value": test_result.pvalue,
    "contingency_table": contingency_table.tolist(),
    "proportion_difference": prop_diff,
    "odds_ratio": odds_ratio,
  }

  if return_df:
    return pd.DataFrame.from_dict(eval, orient='index', columns=["Value"])
  else:
    return eval

def test_wilcoxon_signed_rank(paired_differences: np.ndarray, return_df: bool = False) -> Tuple[float, float]:
  # Large sample size (n > 50) so use asymptotic approximation
  test_result = sp.stats.wilcoxon(paired_differences, alternative='two-sided', method='approx', correction=True)

  # Calculate Rosenthal's r effect size
  n = len(paired_differences)
  r = abs(test_result.zstatistic) / np.sqrt(n)

  eval = {
    "test_statistic": test_result.statistic,
    "p_value": test_result.pvalue,
    "z_score": test_result.zstatistic,
    "rosenthals_r": r,
  }

  if return_df:
    return pd.DataFrame.from_dict(eval, orient='index', columns=["Value"])
  else:
    return eval

def interpret_effect_size(effect_size: float) -> Literal['negligible', 'small', 'medium', 'large']:
  if effect_size < 0.1:
    effect_interpretation = "negligible"
  elif effect_size < 0.3:
    effect_interpretation = "small"
  elif effect_size < 0.5:
    effect_interpretation = "medium"
  else:
    effect_interpretation = "large"

  return effect_interpretation

def visualize_paired_differences(
    paired_differences: np.ndarray,
    confidence_level: float = 0.95,
    ci_lower: Optional[float] = None,
    ci_upper: Optional[float] = None,
    name_model_1: str = "Model 1",
    name_model_2: str = "Model 2"
  ):
  mean_diff = np.mean(paired_differences)
  median_diff = np.median(paired_differences)

  # Create visualization
  plt.figure(figsize=(12, 6))

  # Main histogram
  plt.hist(paired_differences, bins='auto', density=True, alpha=0.7, color='skyblue', edgecolor='black')

  # Add kernel density estimate
  kde = sp.stats.gaussian_kde(paired_differences)
  x_range = np.linspace(min(paired_differences), max(paired_differences), 200)
  plt.plot(x_range, kde(x_range), 'r-', lw=2, label='KDE')

  # Reference lines
  plt.axvline(x=0, color='gray', linestyle='--', alpha=0.5, label="No difference")
  plt.axvline(x=mean_diff, color='green', linestyle='-', label="Mean")
  plt.axvline(x=median_diff, color='blue', linestyle='-', label="Median")

  # Confidence interval
  if ci_lower is not None and ci_upper is not None:
    plt.axvspan(ci_lower, ci_upper, alpha=0.2, color='green', label=f'{confidence_level*100:.0f}% CI')

  plt.xlabel(f"Performance Difference ({name_model_1} - {name_model_2})")
  plt.ylabel("Density")
  plt.title("Distribution of Paired Differences")
  plt.legend()
  plt.grid(True, alpha=0.3)


## Data Import

### A: Import from Google Drive

In [None]:
drive.mount("/content/drive")

!cp -r "$GDRIVE_INPUT_DIR" "$INPUT_DATA_DIR"

drive.flush_and_unmount()

### B: Upload from Local Machine

In [None]:
#uploaded = files.upload()

#for filename in uploaded.keys():
#  target = os.path.join(INPUT_DATA_DIR, filename)
#  !mv "$filename" "$target"

# del uploaded

# Dataset: Incident management process enriched event log

This event log is of an incident management process extracted from an instance of the ServiceNow platform used by an IT company. See also https://doi.org/10.24432/C57S4H and http://processmining.each.webhostusp.sti.usp.br/index.php/event-logs/.

- **Control Attributes**:
    - *number*: incident identifier with the same number as total cases;
    
    - *incident state*: attribute with eight levels controlling incident management process transitions from opening until closing the case;
    
    - *active*: boolean attribute indicating if record is active or closed/canceled;
    
    - *reassignment_count*: number of times incident has changed group or support analysts;
    
    - *reopen_count*: number of times incident resolution was rejected by caller;
    
    - *sys_mod_count*: number of incident updates until that moment;
    
    - *made_sla*: boolean attribute to incident exceeded target SLA or not;

- **Identification and Classification Attributes**:
    - *caller_id*: user identifier affected;
    
    - *opened_by*: user identifier that reported the incident;
    
    - *opened_at*: incident opening date and time;
    
    - *sys_created_by*: user identifier that registered the incident;
    
    - *sys_created_at*: incident creation date and time;
    
    - *sys_updated_by*: user identifier that made update and generated current log record;
    
    - *sys_updated_at*: log update date and time;
    
    - *contact_type*: categorical field with values indicating how incident was reported;
    
    - *location*: location identifier of place being affected;
    
    - *category*: description of the first level of service being affected;
    
    - *subcategory*: description of the second level of service being affected related to first level;
    
    - *u_symptom*: description about user perception of service availability;
    
    - *cmdb_ci*: (confirmation item) identifier (not mandatory) referencing homonyms relation and used to report item being affected;
    
    - *impact*: description of the impact caused by incident. Values are: 1-High; 2-Medium; 3-Low;
    
    - *urgency*: description to the urgency asked by user for incident resolution. Values are same as impact;
    
    - *priority*: priority calculated by system based on Impact and urgency;

- **Support, Diagnosis and Other Attributes**:
    - *assignment_group*: identifier referencing the relation Group (database relational model in ServiceNowTM) describing support group in charge of incident;
    
    - *assigned_to*: user identifier in charge of incident;
    
    - *knowledge*: boolean attribute indicating whether a knowledge base document was used to resolve incident;
    
    - *u_priority_confirmation*: boolean attribute indicating whether priority field was double checked;
    
    - *notify*: categorical attribute indicating whether notifications was generated for this incident;
    
    - *problem_id*: identifier referencing homonyms relation describing problem identifier associated with this incident;
    
    - *rfc*: (change request) identifier referencing homonyms relation describing change request identifier associated with incident;
    
    - *vendor*: identifier referencing homonyms relation describing vendor in charge of incident;
    
    - *caused_by*: relation with RFC code responsible by the incident;
    
    - *close_code*: resolution code of the incident;
    
    - *resolved_by*: user identifier who resolved the incident;
    
    - *resolved_at*: incident resolution date and time;
    
    - *closed_at*: incident close date and time;

## Read Incident management process enriched event log V1

In [None]:
df_servicenow = pd.read_feather(os.path.join(INPUT_DATA_DIR, "incident_event_log_cleaned.feather"))
df_servicenow

In [None]:
df_servicenow_train = pd.read_feather(os.path.join(INPUT_DATA_DIR, "incident_event_log_train.feather"))
df_servicenow_train

In [None]:
df_servicenow_test = pd.read_feather(os.path.join(INPUT_DATA_DIR, "incident_event_log_test.feather"))
df_servicenow_test

## Result Evaluation for Incident management process enriched event log V1

### Next Activity

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Activity", "Servicenow")

y_true_servicenow_na = np.load(os.path.join(in_dir, "servicenow_next_activity_groundtruth.npy"))

#### Naive

In [None]:
df_naive_classification_metrics(df_servicenow_train, df_servicenow_test, EVENTLOG_LABEL_NEXT_ACT, return_df=True)

#### LSTM

In [None]:
y_pred_servicenow_lstm_na = np.load(os.path.join(in_dir, "lstm_servicenow_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_servicenow_na, y_pred_servicenow_lstm_na, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_servicenow_trans_na = np.load(os.path.join(in_dir, "processtransformer_servicenow_next_activity_predictions.npy"))

evaluate_classification(y_true_servicenow_na, y_pred_servicenow_trans_na, return_df=True)

#### PROPHET

In [None]:
y_pred_servicenow_prophet_na = np.load(os.path.join(in_dir, "prophet_servicenow_next_activity_predictions.npy"))

evaluate_classification(y_true_servicenow_na, y_pred_servicenow_prophet_na, return_df=True)

#### AST

In [None]:
y_pred_servicenow_ast_na = np.load(os.path.join(in_dir, "ast_servicenow_next_activity_predictions.npy"))

evaluate_classification(y_true_servicenow_na, y_pred_servicenow_ast_na, return_df=True)

#### TGN-AST

In [None]:
y_pred_servicenow_tgnast_na = np.load(os.path.join(in_dir, "tgnast_servicenow_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_servicenow_na, y_pred_servicenow_tgnast_na, return_df=True)

#### Comparison: AST vs. TGN-AST

In [None]:
paired_diff = classification_paired_differences(y_true_servicenow_na, y_pred_servicenow_ast_na, y_pred_servicenow_tgnast_na)
test_wilcoxon_signed_rank(paired_diff, return_df=True)

### Next Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Time", "Servicenow")

y_true_servicenow_nt = np.load(os.path.join(in_dir, "servicenow_next_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_servicenow_train, df_servicenow_test, EVENTLOG_LABEL_NEXT_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_servicenow_lstm_nt = np.load(os.path.join(in_dir, "lstm_servicenow_multi_next_time_predictions.npy"))

evaluate_regression(y_true_servicenow_nt, y_pred_servicenow_lstm_nt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_servicenow_trans_nt = np.load(os.path.join(in_dir, "processtransformer_servicenow_next_time_predictions.npy"))

evaluate_regression(y_true_servicenow_nt, y_pred_servicenow_trans_nt, return_df=True)

#### PROPHET

In [None]:
y_pred_servicenow_prophet_nt = np.load(os.path.join(in_dir, "prophet_servicenow_next_time_predictions.npy"))

evaluate_regression(y_true_servicenow_nt, y_pred_servicenow_prophet_nt, return_df=True)

#### AST

In [None]:
y_pred_servicenow_ast_nt = np.load(os.path.join(in_dir, "ast_servicenow_next_time_predictions.npy"))

evaluate_regression(y_true_servicenow_nt, y_pred_servicenow_ast_nt, return_df=True)

#### TGN-AST

In [None]:
y_pred_servicenow_tgnast_nt = np.load(os.path.join(in_dir, "tgnast_servicenow_multi_next_time_predictions.npy"))

evaluate_regression(y_true_servicenow_nt, y_pred_servicenow_tgnast_nt, return_df=True)

### Remaining Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Remaining Time", "Servicenow")

y_true_servicenow_rt = np.load(os.path.join(in_dir, "servicenow_remaining_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_servicenow_train, df_servicenow_test, EVENTLOG_LABEL_REM_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_servicenow_lstm_rt = np.load(os.path.join(in_dir, "lstm_servicenow_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_servicenow_rt, y_pred_servicenow_lstm_rt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_servicenow_trans_rt = np.load(os.path.join(in_dir, "processtransformer_servicenow_remaining_time_predictions.npy"))

evaluate_regression(y_true_servicenow_rt, y_pred_servicenow_trans_rt, return_df=True)

#### PROPHET

In [None]:
y_pred_servicenow_prophet_rt = np.load(os.path.join(in_dir, "prophet_servicenow_remaining_time_predictions.npy"))

evaluate_regression(y_true_servicenow_rt, y_pred_servicenow_prophet_rt, return_df=True)

#### AST

In [None]:
y_pred_servicenow_ast_rt = np.load(os.path.join(in_dir, "ast_servicenow_remaining_time_predictions.npy"))

evaluate_regression(y_true_servicenow_rt, y_pred_servicenow_ast_rt, return_df=True)

#### TGN-AST

In [None]:
y_pred_servicenow_tgnast_rt = np.load(os.path.join(in_dir, "tgnast_servicenow_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_servicenow_rt, y_pred_servicenow_tgnast_rt, return_df=True)

#### Comparison: PROPHET vs TGN-AST

In [None]:
paired_diff = regression_paired_differences(y_true_servicenow_rt, y_pred_servicenow_prophet_rt, y_pred_servicenow_tgnast_rt)
test_wilcoxon_signed_rank(paired_diff, return_df=True)

# Dataset: Dataset belonging to the help desk log of an Italian Company

The event log concerns the ticketing management process of the Help desk of an Italian software company. See also https://doi.org/10.4121/uuid:0c60edf1-6f83-4e75-9367-4c63b3e9d5bb.

- *Case ID*: the case identifier

- *Activity*: the activity name

- *Resource*: the resource who performed the action

- Complete Timestamp: the timestamp of the event. Format: YYYY/MM/DD hh:mm:ss.

- *Variant*: case variant

- Variant index: case variant in integer format

- *seriousness*: a seriousness level for the ticket

- *customer*: name of the customer

- *product*: name of the product

- *responsible_section*: name of the responsible section

- *seriousness_2*: a sub-seriousness level

- *service_level*: level of the service

- *service_type*: type of the service

- *support_section*: name of the support section

- *workgroup*: name of the workgroup

## Read Dataset belonging to the help desk log of an Italian Company

In [None]:
df_italy = pd.read_feather(os.path.join(INPUT_DATA_DIR, "finale_cleaned.feather"))
df_italy

In [None]:
df_italy_train = pd.read_feather(os.path.join(INPUT_DATA_DIR, "finale_train.feather"))
df_italy_train

In [None]:
df_italy_test = pd.read_feather(os.path.join(INPUT_DATA_DIR, "finale_test.feather"))
df_italy_test

## Result Evaluation for Dataset belonging to the help desk log of an Italian Company

### Next Activity

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Activity", "Italy")

y_true_italy_na = np.load(os.path.join(in_dir, "italy_next_activity_groundtruth.npy"))

#### Naive

In [None]:
df_naive_classification_metrics(df_italy_train, df_italy_test, EVENTLOG_LABEL_NEXT_ACT, return_df=True)

#### LSTM

In [None]:
y_pred_italy_lstm_na = np.load(os.path.join(in_dir, "lstm_italy_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_italy_na, y_pred_italy_lstm_na, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_italy_trans_na = np.load(os.path.join(in_dir, "processtransformer_italy_next_activity_predictions.npy"))

evaluate_classification(y_true_italy_na, y_pred_italy_trans_na, return_df=True)

#### PROPHET

In [None]:
y_pred_italy_prophet_na = np.load(os.path.join(in_dir, "prophet_italy_next_activity_predictions.npy"))

evaluate_classification(y_true_italy_na, y_pred_italy_prophet_na, return_df=True)

#### AST

In [None]:
y_pred_italy_ast_na = np.load(os.path.join(in_dir, "ast_italy_next_activity_predictions.npy"))

evaluate_classification(y_true_italy_na, y_pred_italy_ast_na, return_df=True)

#### TGN-AST

In [None]:
y_pred_italy_tgnast_na = np.load(os.path.join(in_dir, "tgnast_italy_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_italy_na, y_pred_italy_tgnast_na, return_df=True)

### Next Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Time", "Italy")

y_true_italy_nt = np.load(os.path.join(in_dir, "italy_next_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_italy_train, df_italy_test, EVENTLOG_LABEL_NEXT_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_italy_lstm_nt = np.load(os.path.join(in_dir, "lstm_italy_multi_next_time_predictions.npy"))

evaluate_regression(y_true_italy_nt, y_pred_italy_lstm_nt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_italy_trans_nt = np.load(os.path.join(in_dir, "processtransformer_italy_next_time_predictions.npy"))

evaluate_regression(y_true_italy_nt, y_pred_italy_trans_nt, return_df=True)

#### PROPHET

In [None]:
y_pred_italy_prophet_nt = np.load(os.path.join(in_dir, "prophet_italy_next_time_predictions.npy"))

evaluate_regression(y_true_italy_nt, y_pred_italy_prophet_nt, return_df=True)

#### AST

In [None]:
y_pred_italy_ast_nt = np.load(os.path.join(in_dir, "ast_italy_next_time_predictions.npy"))

evaluate_regression(y_true_italy_nt, y_pred_italy_ast_nt, return_df=True)

#### TGN-AST

In [None]:
y_pred_italy_tgnast_nt = np.load(os.path.join(in_dir, "tgnast_italy_multi_next_time_predictions.npy"))

evaluate_regression(y_true_italy_nt, y_pred_italy_tgnast_nt, return_df=True)

### Remaining Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Remaining Time", "Italy")

y_true_italy_rt = np.load(os.path.join(in_dir, "italy_remaining_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_italy_train, df_italy_test, EVENTLOG_LABEL_REM_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_italy_lstm_rt = np.load(os.path.join(in_dir, "lstm_italy_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_italy_rt, y_pred_italy_lstm_rt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_italy_trans_rt = np.load(os.path.join(in_dir, "processtransformer_italy_remaining_time_predictions.npy"))

evaluate_regression(y_true_italy_rt, y_pred_italy_trans_rt, return_df=True)

#### PROPHET

In [None]:
y_pred_italy_prophet_rt = np.load(os.path.join(in_dir, "prophet_italy_remaining_time_predictions.npy"))

evaluate_regression(y_true_italy_rt, y_pred_italy_prophet_rt, return_df=True)

#### AST

In [None]:
y_pred_italy_ast_rt = np.load(os.path.join(in_dir, "ast_italy_remaining_time_predictions.npy"))

evaluate_regression(y_true_italy_rt, y_pred_italy_ast_rt, return_df=True)

#### TGN-AST

In [None]:
y_pred_italy_tgnast_rt = np.load(os.path.join(in_dir, "tgnast_italy_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_italy_rt, y_pred_italy_tgnast_rt, return_df=True)

# Dataset: BPIC 2014

This datset is provided by the Rabobank ICT and contains information about the employed ITIL processes. See https://www.win.tue.nl/bpi/2014/challenge.html and https://data.4tu.nl/collections/dff0e630-9c91-4b8e-806d-ec9a3a0f2206

## Read BPIC 2014

In [None]:
df_bpic14 = pd.read_feather(os.path.join(INPUT_DATA_DIR, "Detail_Incident_Activity_cleaned.feather"))
df_bpic14

In [None]:
df_bpic14_train = pd.read_feather(os.path.join(INPUT_DATA_DIR, "Detail_Incident_Activity_train.feather"))
df_bpic14_train

In [None]:
df_bpic14_test = pd.read_feather(os.path.join(INPUT_DATA_DIR, "Detail_Incident_Activity_test.feather"))
df_bpic14_test

## Result Evaluation for BPIC 2014

### Next Activity

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Activity", "BPIC 2014")

y_true_bpic14_na = np.load(os.path.join(in_dir, "bpic14_next_activity_groundtruth.npy"))

#### Naive

In [None]:
df_naive_classification_metrics(df_bpic14_train, df_bpic14_test, EVENTLOG_LABEL_NEXT_ACT, return_df=True)

#### LSTM

In [None]:
y_pred_bpic14_lstm_na = np.load(os.path.join(in_dir, "lstm_bpic14_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic14_na, y_pred_bpic14_lstm_na, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_bpic14_trans_na = np.load(os.path.join(in_dir, "processtransformer_bpic14_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic14_na, y_pred_bpic14_trans_na, return_df=True)

#### PROPHET

In [None]:
y_pred_bpic14_prophet_na = np.load(os.path.join(in_dir, "prophet_bpic14_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic14_na, y_pred_bpic14_prophet_na, return_df=True)

#### AST

In [None]:
y_pred_bpic14_ast_na = np.load(os.path.join(in_dir, "ast_bpic14_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic14_na, y_pred_bpic14_ast_na, return_df=True)

#### TGN-AST

In [None]:
y_pred_bpic14_tgnast_na = np.load(os.path.join(in_dir, "tgnast_bpic14_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic14_na, y_pred_bpic14_tgnast_na, return_df=True)

#### Comparison: AST vs TGN-AST

In [None]:
paired_diff = classification_paired_differences(y_true_bpic14_na, y_pred_bpic14_ast_na, y_pred_bpic14_tgnast_na)
test_wilcoxon_signed_rank(paired_diff, return_df=True)

### Next Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Time", "BPIC 2014")

y_true_bpic14_nt = np.load(os.path.join(in_dir, "bpic14_next_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_bpic14_train, df_bpic14_test, EVENTLOG_LABEL_NEXT_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_bpic14_lstm_nt = np.load(os.path.join(in_dir, "lstm_bpic14_multi_next_time_predictions.npy"))

evaluate_regression(y_true_bpic14_nt, y_pred_bpic14_lstm_nt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_bpic14_trans_nt = np.load(os.path.join(in_dir, "processtransformer_bpic14_next_time_predictions.npy"))

evaluate_regression(y_true_bpic14_nt, y_pred_bpic14_trans_nt, return_df=True)

#### PROPHET

In [None]:
y_pred_bpic14_prophet_nt = np.load(os.path.join(in_dir, "prophet_bpic14_next_time_predictions.npy"))

evaluate_regression(y_true_bpic14_nt, y_pred_bpic14_prophet_nt, return_df=True)

#### AST

In [None]:
y_pred_bpic14_ast_nt = np.load(os.path.join(in_dir, "ast_bpic14_next_time_predictions.npy"))

evaluate_regression(y_true_bpic14_nt, y_pred_bpic14_ast_nt, return_df=True)

#### TGN-AST

In [None]:
y_pred_bpic14_tgnast_nt = np.load(os.path.join(in_dir, "tgnast_bpic14_multi_next_time_predictions.npy"))

evaluate_regression(y_true_bpic14_nt, y_pred_bpic14_tgnast_nt, return_df=True)

#### Comparison: AST vs. TGN-AST

In [None]:
paired_diff = regression_paired_differences(y_true_bpic14_nt, y_pred_bpic14_ast_nt, y_pred_bpic14_tgnast_nt)
test_wilcoxon_signed_rank(paired_diff, return_df=True)

### Remaining Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Remaining Time", "BPIC 2014")

y_true_bpic14_rt = np.load(os.path.join(in_dir, "bpic14_remaining_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_bpic14_train, df_bpic14_test, EVENTLOG_LABEL_REM_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_bpic14_lstm_rt = np.load(os.path.join(in_dir, "lstm_bpic14_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic14_rt, y_pred_bpic14_lstm_rt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_bpic14_trans_rt = np.load(os.path.join(in_dir, "processtransformer_bpic14_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic14_rt, y_pred_bpic14_trans_rt, return_df=True)

#### PROPHET

In [None]:
y_pred_bpic14_prophet_rt = np.load(os.path.join(in_dir, "prophet_bpic14_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic14_rt, y_pred_bpic14_prophet_rt, return_df=True)

#### AST

In [None]:
y_pred_bpic14_ast_rt = np.load(os.path.join(in_dir, "ast_bpic14_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic14_rt, y_pred_bpic14_ast_rt, return_df=True)

#### TGN-AST

In [None]:
y_pred_bpic14_tgnast_rt = np.load(os.path.join(in_dir, "tgnast_bpic14_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic14_rt, y_pred_bpic14_tgnast_rt, return_df=True)

# Dataset: Helpdesk

## Read Helpdesk

In [None]:
df_helpdesk = pd.read_feather(os.path.join(INPUT_DATA_DIR, "helpdesk_cleaned.feather"))
df_helpdesk

In [None]:
df_helpdesk_train = pd.read_feather(os.path.join(INPUT_DATA_DIR, "helpdesk_train.feather"))
df_helpdesk_train

In [None]:
df_helpdesk_test = pd.read_feather(os.path.join(INPUT_DATA_DIR, "helpdesk_test.feather"))
df_helpdesk_test

## Result Evaluation for Helpdesk

### Next Activity

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Activity", "Helpdesk")

y_true_helpdesk_na = np.load(os.path.join(in_dir, "helpdesk_next_activity_groundtruth.npy"))

#### Naive

In [None]:
df_naive_classification_metrics(df_helpdesk_train, df_helpdesk_test, EVENTLOG_LABEL_NEXT_ACT, return_df=True)

#### LSTM

In [None]:
y_pred_helpdesk_lstm_na = np.load(os.path.join(in_dir, "lstm_helpdesk_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_helpdesk_na, y_pred_helpdesk_lstm_na, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_helpdesk_trans_na = np.load(os.path.join(in_dir, "processtransformer_helpdesk_next_activity_predictions.npy"))

evaluate_classification(y_true_helpdesk_na, y_pred_helpdesk_trans_na, return_df=True)

#### PROPHET

In [None]:
y_pred_helpdesk_prophet_na = np.load(os.path.join(in_dir, "prophet_helpdesk_next_activity_predictions.npy"))

evaluate_classification(y_true_helpdesk_na, y_pred_helpdesk_prophet_na, return_df=True)

#### AST

In [None]:
y_pred_helpdesk_ast_na = np.load(os.path.join(in_dir, "ast_helpdesk_next_activity_predictions.npy"))

evaluate_classification(y_true_helpdesk_na, y_pred_helpdesk_ast_na, return_df=True)

#### TGN-AST

In [None]:
y_pred_helpdesk_tgnast_na = np.load(os.path.join(in_dir, "tgnast_helpdesk_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_helpdesk_na, y_pred_helpdesk_tgnast_na, return_df=True)

### Next Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Time", "Helpdesk")

y_true_helpdesk_nt = np.load(os.path.join(in_dir, "helpdesk_next_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_helpdesk_train, df_helpdesk_test, EVENTLOG_LABEL_NEXT_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_helpdesk_lstm_nt = np.load(os.path.join(in_dir, "lstm_helpdesk_multi_next_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_nt, y_pred_helpdesk_lstm_nt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_helpdesk_trans_nt = np.load(os.path.join(in_dir, "processtransformer_helpdesk_next_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_nt, y_pred_helpdesk_trans_nt, return_df=True)

#### PROPHET

In [None]:
y_pred_helpdesk_prophet_nt = np.load(os.path.join(in_dir, "prophet_helpdesk_next_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_nt, y_pred_helpdesk_prophet_nt, return_df=True)

#### AST

In [None]:
y_pred_helpdesk_ast_nt = np.load(os.path.join(in_dir, "ast_helpdesk_next_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_nt, y_pred_helpdesk_ast_nt, return_df=True)

#### TGN-AST

In [None]:
y_pred_helpdesk_tgnast_nt = np.load(os.path.join(in_dir, "tgnast_helpdesk_multi_next_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_nt, y_pred_helpdesk_tgnast_nt, return_df=True)

#### Comparison: AST vs TGN-AST

In [None]:
paired_diff = regression_paired_differences(y_true_helpdesk_nt, y_pred_helpdesk_ast_nt, y_pred_helpdesk_tgnast_nt)
test_wilcoxon_signed_rank(paired_diff, return_df=True)

### Remaining Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Remaining Time", "Helpdesk")

y_true_helpdesk_rt = np.load(os.path.join(in_dir, "helpdesk_remaining_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_helpdesk_train, df_helpdesk_test, EVENTLOG_LABEL_REM_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_helpdesk_lstm_rt = np.load(os.path.join(in_dir, "lstm_helpdesk_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_rt, y_pred_helpdesk_lstm_rt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_helpdesk_trans_rt = np.load(os.path.join(in_dir, "processtransformer_helpdesk_remaining_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_rt, y_pred_helpdesk_trans_rt, return_df=True)

#### PROPHET

In [None]:
y_pred_helpdesk_prophet_rt = np.load(os.path.join(in_dir, "prophet_helpdesk_remaining_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_rt, y_pred_helpdesk_prophet_rt, return_df=True)

#### AST

In [None]:
y_pred_helpdesk_ast_rt = np.load(os.path.join(in_dir, "ast_helpdesk_remaining_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_rt, y_pred_helpdesk_ast_rt, return_df=True)

#### TGN-AST

In [None]:
y_pred_helpdesk_tgnast_rt = np.load(os.path.join(in_dir, "tgnast_helpdesk_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_helpdesk_rt, y_pred_helpdesk_tgnast_rt, return_df=True)

# Dataset: BPIC 2013

## Read BPIC 2013

In [None]:
df_bpic13 = pd.read_feather(os.path.join(INPUT_DATA_DIR, "BPI_Challenge_2013_incidents_cleaned.feather"))
df_bpic13

In [None]:
df_bpic13_train = pd.read_feather(os.path.join(INPUT_DATA_DIR, "BPI_Challenge_2013_incidents_train.feather"))
df_bpic13_train

In [None]:
df_bpic13_test = pd.read_feather(os.path.join(INPUT_DATA_DIR, "BPI_Challenge_2013_incidents_test.feather"))
df_bpic13_test

## Result Evaluation for BPIC 2013

### Next Activity

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Activity", "BPIC 2013")

y_true_bpic13_na = np.load(os.path.join(in_dir, "bpic13_next_activity_groundtruth.npy"))

#### Naive

In [None]:
df_naive_classification_metrics(df_bpic13_train, df_bpic13_test, EVENTLOG_LABEL_NEXT_ACT, return_df=True)

#### LSTM

In [None]:
y_pred_bpic13_lstm_na = np.load(os.path.join(in_dir, "lstm_bpic13_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic13_na, y_pred_bpic13_lstm_na, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_bpic13_trans_na = np.load(os.path.join(in_dir, "processtransformer_bpic13_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic13_na, y_pred_bpic13_trans_na, return_df=True)

#### PROPHET

In [None]:
y_pred_bpic13_prophet_na = np.load(os.path.join(in_dir, "prophet_bpic13_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic13_na, y_pred_bpic13_prophet_na, return_df=True)

#### AST

In [None]:
y_pred_bpic13_ast_na = np.load(os.path.join(in_dir, "ast_bpic13_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic13_na, y_pred_bpic13_ast_na, return_df=True)

#### TGN-AST

In [None]:
y_pred_bpic13_tgnast_na = np.load(os.path.join(in_dir, "tgnast_bpic13_multi_next_activity_predictions.npy"))

evaluate_classification(y_true_bpic13_na, y_pred_bpic13_tgnast_na, return_df=True)

#### Comparison: PROPHET vs. TGN-AST

In [None]:
paired_diff = classification_paired_differences(y_true_bpic13_na, y_pred_bpic13_prophet_na, y_pred_bpic13_tgnast_na)
test_wilcoxon_signed_rank(paired_diff, return_df=True)

### Next Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Next Time", "BPIC 2013")

y_true_bpic13_nt = np.load(os.path.join(in_dir, "bpic13_next_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_bpic13_train, df_bpic13_test, EVENTLOG_LABEL_NEXT_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_bpic13_lstm_nt = np.load(os.path.join(in_dir, "lstm_bpic13_multi_next_time_predictions.npy"))

evaluate_regression(y_true_bpic13_nt, y_pred_bpic13_lstm_nt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_bpic13_trans_nt = np.load(os.path.join(in_dir, "processtransformer_bpic13_next_time_predictions.npy"))

evaluate_regression(y_true_bpic13_nt, y_pred_bpic13_trans_nt, return_df=True)

#### PROPHET

In [None]:
y_pred_bpic13_prophet_nt = np.load(os.path.join(in_dir, "prophet_bpic13_next_time_predictions.npy"))

evaluate_regression(y_true_bpic13_nt, y_pred_bpic13_prophet_nt, return_df=True)

#### AST

In [None]:
y_pred_bpic13_ast_nt = np.load(os.path.join(in_dir, "ast_bpic13_next_time_predictions.npy"))

evaluate_regression(y_true_bpic13_nt, y_pred_bpic13_ast_nt, return_df=True)

#### TGN-AST

In [None]:
y_pred_bpic13_tgnast_nt = np.load(os.path.join(in_dir, "tgnast_bpic13_multi_next_time_predictions.npy"))

evaluate_regression(y_true_bpic13_nt, y_pred_bpic13_tgnast_nt, return_df=True)

### Remaining Time

In [None]:
in_dir = os.path.join(INPUT_DATA_RESULT_DIR, "Remaining Time", "BPIC 2013")

y_true_bpic13_rt = np.load(os.path.join(in_dir, "bpic13_remaining_time_groundtruth.npy"))

#### Naive

In [None]:
df_naive_regression_metrics(df_bpic13_train, df_bpic13_test, EVENTLOG_LABEL_REM_TIME, return_df=True)

#### LSTM

In [None]:
y_pred_bpic13_lstm_rt = np.load(os.path.join(in_dir, "lstm_bpic13_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic13_rt, y_pred_bpic13_lstm_rt, return_df=True)

#### ProcessTransformer

In [None]:
y_pred_bpic13_trans_rt = np.load(os.path.join(in_dir, "processtransformer_bpic13_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic13_rt, y_pred_bpic13_trans_rt, return_df=True)

#### PROPHET

In [None]:
y_pred_bpic13_prophet_rt = np.load(os.path.join(in_dir, "prophet_bpic13_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic13_rt, y_pred_bpic13_prophet_rt, return_df=True)

#### AST

In [None]:
y_pred_bpic13_ast_rt = np.load(os.path.join(in_dir, "ast_bpic13_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic13_rt, y_pred_bpic13_ast_rt, return_df=True)

#### TGN-AST

In [None]:
y_pred_bpic13_tgnast_rt = np.load(os.path.join(in_dir, "tgnast_bpic13_multi_remaining_time_predictions.npy"))

evaluate_regression(y_true_bpic13_rt, y_pred_bpic13_tgnast_rt, return_df=True)

# Data Export

In [None]:
output_file = f"results_{datetime.datetime.now().strftime('%Y-%m-%d_%H.%M.%S%z')}.zip"

!zip -r "$output_file" "$DATA_DIR" "$GRAPHIC_DIR" "$MODEL_DIR"

## A: Export to Google Drive

In [None]:
drive.mount("/content/drive")

Path(GDRIVE_OUTPUT_DIR).mkdir(exist_ok=True)

!cp "$output_file" "$GDRIVE_OUTPUT_DIR"

drive.flush_and_unmount()

## B: Download to Local Machine

In [None]:
files.download(output_file)