In [None]:
import pandas as pd
import numpy as np
import pm4py
from pm4py.objects.log.util import sorting
from scipy.stats import wasserstein_distance
from sklearn import preprocessing

from log_distance_measures.config import EventLogIDs, AbsoluteTimestampType, discretize_to_hour
from log_distance_measures.control_flow_log_distance import control_flow_log_distance
from log_distance_measures.n_gram_distribution import n_gram_distribution_distance
from log_distance_measures.absolute_event_distribution import absolute_event_distribution_distance
from log_distance_measures.case_arrival_distribution import case_arrival_distribution_distance
from log_distance_measures.circadian_event_distribution import circadian_event_distribution_distance
from log_distance_measures.relative_event_distribution import relative_event_distribution_distance
from log_distance_measures.work_in_progress import work_in_progress_distance
from log_distance_measures.cycle_time_distribution import cycle_time_distribution_distance

import warnings
warnings.filterwarnings("ignore")

ModuleNotFoundError: No module named 'log_distance_measures'

In [None]:
import re
def extract_first_float(cell):
    if isinstance(cell, str):
        # Use regular expression to extract the first float and the value in brackets
        match = re.match(r'(\d+\.\d+)(?: \((\d+\.\d+)\))?', cell)
        if match:
            return float(match.group(1)), (match.group(2)) if match.group(2) else ''
        else:
            return float('inf'), ''
    else:
        return cell, ''

In [3]:
def align_column_names(df):
    if 'case:concept:name' in df.columns:
        df = df.rename(columns={'case:concept:name': 'case_id'})
    elif 'caseid' in df.columns:
        df = df.rename(columns={'caseid': 'case_id'})
    if 'Activity' in df.columns:
        df = df.rename(columns={'Activity': 'activity'})
    elif 'activity_name' in df.columns:
        df = df.rename(columns={'activity_name': 'activity'})
    elif 'task' in df.columns:
        df = df.rename(columns={'task': 'activity'})
    elif 'concept:name' in df.columns:
        df = df.rename(columns={'concept:name': 'activity'})
    if 'Resource' in df.columns:
        df = df.rename(columns={'Resource': 'resource'})
    elif 'user' in df.columns:
        df = df.rename(columns={'user': 'resource'})
    elif 'agent' in df.columns:
        if 'resource' in df.columns:
            df = df.drop(['resource'], axis=1)
        df = df.rename(columns={'agent': 'resource'})
    elif 'org:resource' in df.columns:
        df = df.rename(columns={'org:resource': 'resource'})
    if 'start_timestamp' in df.columns:
        df = df.rename(columns={'start_timestamp': 'start_time'})
    if 'end_timestamp' in df.columns:
        df = df.rename(columns={'end_timestamp': 'end_time'})
    # for SIMOD simulated logs
    if 'start_time' in df.columns:
        df = df.rename(columns={'start_time': 'start_time'})
    if 'end_time' in df.columns:
        df = df.rename(columns={'end_time': 'end_time'})
    if 'start:timestamp' in df.columns:
        df = df.rename(columns={'start:timestamp': 'start_time'})
    if 'time:timestamp' in df.columns:
        df = df.rename(columns={'time:timestamp': 'end_time'})
    return df

In [4]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_rel

# --- Your other imports and definitions (such as align_column_names, EventLogIDs, etc.) --- #

def get_rankings(metric_results, alpha=0.05):
    """
    Given a dictionary mapping experiment names to a list of metric values
    (e.g. 10 evaluations), compute a ranking (smaller is better) using paired t-tests.
    
    The procedure is as follows:
      1. Sort experiments by their mean (ascending).
      2. For each experiment (starting from the second best), compare its values 
         with each experiment that already received a rank (i.e. with a lower mean).
      3. If the paired t-test between the current experiment and any lower ranked 
         experiment returns a p-value >= alpha (i.e. the difference is not significant),
         then assign the same rank as that experiment.
      4. If the current experiment is significantly different (p < alpha) from all lower-ranked
         experiments, assign a new rank equal to (max(current ranks) + 1).
    """
    # Compute means for sorting
    means = {exp: np.mean(values) for exp, values in metric_results.items()}
    sorted_exps = sorted(means, key=lambda exp: means[exp])
    ranks = {}
    
    for i, exp in enumerate(sorted_exps):
        if i == 0:
            ranks[exp] = 1
        else:
            # Look for at least one experiment with a lower mean that is not significantly different.
            assigned_rank = None
            for lower_exp in sorted_exps[:i]:
                # Perform paired t-test between current experiment and a lower–ranked experiment.
                stat, p_val = ttest_rel(metric_results[exp], metric_results[lower_exp])
                if p_val >= alpha:
                    # Not significantly different: candidate for same rank.
                    candidate_rank = ranks[lower_exp]
                    # In case more than one lower experiment qualifies, choose the smallest rank.
                    if assigned_rank is None or candidate_rank < assigned_rank:
                        assigned_rank = candidate_rank
            if assigned_rank is None:
                # If current experiment is significantly different from every lower experiment,
                # assign a new rank.
                new_rank = max(ranks.values()) + 1
                ranks[exp] = new_rank
            else:
                ranks[exp] = assigned_rank
    return ranks


def main_(log_paths, name_experiments):
    def perform_evauluation(all_metrics, PATH_SIMULATED_LOG, test_log):
        for i in range(10):
            # Evaluate simulation i
            path_simulated_file = PATH_SIMULATED_LOG + '/simulated_log_' + str(i) + '.csv'
            simulated_log = pd.read_csv(path_simulated_file)
            simulated_log = align_column_names(simulated_log)
            simulated_log[event_log_ids.start_time] = pd.to_datetime(simulated_log[event_log_ids.start_time], utc=True, format='mixed')
            simulated_log[event_log_ids.end_time] = pd.to_datetime(simulated_log[event_log_ids.end_time], utc=True, format='mixed')
    
            # Compute metrics on the simulated log vs. the test log
            ngd = n_gram_distribution_distance(test_log, event_log_ids, simulated_log, event_log_ids, n=3)
            all_metrics['NGD'].append(ngd)
    
            aedd = absolute_event_distribution_distance(
                test_log, event_log_ids,
                simulated_log, event_log_ids,
                discretize_type=AbsoluteTimestampType.BOTH,
                discretize_event=discretize_to_hour
            )
            all_metrics['AEDD'].append(aedd)
    
            cedd = circadian_event_distribution_distance(
                test_log, event_log_ids,
                simulated_log, event_log_ids,
                discretize_type=AbsoluteTimestampType.BOTH
            )
            all_metrics['CEDD'].append(cedd)
    
            redd = relative_event_distribution_distance(
                test_log, event_log_ids,
                simulated_log, event_log_ids,
                discretize_type=AbsoluteTimestampType.BOTH,
                discretize_event=discretize_to_hour
            )
            all_metrics['REDD'].append(redd)
    
            ctdd = cycle_time_distribution_distance(
                test_log, event_log_ids,
                simulated_log, event_log_ids,
                bin_size=pd.Timedelta(hours=1)
            )
            all_metrics['CTDD'].append(ctdd)
    
        return all_metrics

    number_evaluations = len(log_paths)

    # Set event log column ID mapping
    event_log_ids = EventLogIDs(
        case="case_id",
        activity="activity",
        start_time="start_time",
        end_time="end_time",
        resource='resource'
    )

    index_names = name_experiments
    results_df = pd.DataFrame(index=index_names)
    mean_results = pd.DataFrame(index=index_names)

    # --- Prepare a dictionary to save the raw metric values (lists of 10 values) per experiment ---
    per_experiment_metrics = {
        'NGD': {},
        'AEDD': {},
        'CEDD': {},
        'REDD': {},
        'CTDD': {}
    }
    
    for experiment in range(number_evaluations):
        # Read test log and convert time attributes
        test_log = pd.read_csv(log_paths[experiment][0])
        test_log = align_column_names(test_log)
        test_log[event_log_ids.start_time] = pd.to_datetime(test_log[event_log_ids.start_time], utc=True, format='mixed')
        test_log[event_log_ids.end_time] = pd.to_datetime(test_log[event_log_ids.end_time], utc=True, format='mixed')
    
        PATH_SIMULATED_LOG = log_paths[experiment][1]
    
        all_metrics = {
            'NGD': [],
            'AEDD': [],
            'CEDD': [],
            'REDD': [],
            'CTDD': [],
        }
    
        all_metrics = perform_evauluation(all_metrics, PATH_SIMULATED_LOG, test_log)
    
        # Save mean and std values in your results DataFrames
        mean_results.loc[index_names[experiment], 'N-Gram Distribution Distance'] = round(np.mean(all_metrics['NGD']), 3)
        mean_results.loc[index_names[experiment], 'Absolute Event Distribution Distance'] = round(np.mean(all_metrics['AEDD']), 3)
        mean_results.loc[index_names[experiment], 'Circadian Event Distribution Distance'] = round(np.mean(all_metrics['CEDD']), 3)
        mean_results.loc[index_names[experiment], 'Relative Event Distribution Distance'] = round(np.mean(all_metrics['REDD']), 3)
        mean_results.loc[index_names[experiment], 'Cycle Time Distribution Distance'] = round(np.mean(all_metrics['CTDD']), 3)
    
        results_df.loc[index_names[experiment], 'N-Gram Distribution Distance'] = f"{round(np.mean(all_metrics['NGD']), 3)} ({round(np.std(all_metrics['NGD']), 3)})"
        results_df.loc[index_names[experiment], 'Absolute Event Distribution Distance'] = f"{round(np.mean(all_metrics['AEDD']), 3)} ({round(np.std(all_metrics['AEDD']), 3)})"
        results_df.loc[index_names[experiment], 'Circadian Event Distribution Distance'] = f"{round(np.mean(all_metrics['CEDD']), 3)} ({round(np.std(all_metrics['CEDD']), 3)})"
        results_df.loc[index_names[experiment], 'Relative Event Distribution Distance'] = f"{round(np.mean(all_metrics['REDD']), 3)} ({round(np.std(all_metrics['REDD']), 3)})"
        results_df.loc[index_names[experiment], 'Cycle Time Distribution Distance'] = f"{round(np.mean(all_metrics['CTDD']), 3)} ({round(np.std(all_metrics['CTDD']), 3)})"
    
        # --- Save the raw lists for the paired t-tests ---
        per_experiment_metrics['NGD'][index_names[experiment]] = all_metrics['NGD']
        per_experiment_metrics['AEDD'][index_names[experiment]] = all_metrics['AEDD']
        per_experiment_metrics['CEDD'][index_names[experiment]] = all_metrics['CEDD']
        per_experiment_metrics['REDD'][index_names[experiment]] = all_metrics['REDD']
        per_experiment_metrics['CTDD'][index_names[experiment]] = all_metrics['CTDD']
    
    # --- Compute ranking for each metric using paired t-tests ---
    ranking_df = pd.DataFrame(index=index_names, columns=[
        'N-Gram Distribution Distance',
        'Absolute Event Distribution Distance',
        'Circadian Event Distribution Distance',
        'Relative Event Distribution Distance',
        'Cycle Time Distribution Distance'
    ])
    
    # For each metric, compute the ranking.
    for metric, col_name in zip(
            ['NGD', 'AEDD', 'CEDD', 'REDD', 'CTDD'],
            ['N-Gram Distribution Distance',
             'Absolute Event Distribution Distance',
             'Circadian Event Distribution Distance',
             'Relative Event Distribution Distance',
             'Cycle Time Distribution Distance']):
        rankings = get_rankings(per_experiment_metrics[metric], alpha=0.05)
        # Save the ranking for each experiment (method)
        for exp_name in index_names:
            ranking_df.loc[exp_name, col_name] = rankings[exp_name]
    
    # Optionally, sort ranking_df by one metric to see the ordering for that metric:
    # print(ranking_df.sort_values(by='N-Gram Distribution Distance'))
    
    # Return the mean and formatted results as before, plus the ranking information.
    return mean_results, results_df, ranking_df


## Loan Application

In [10]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Loan_Application_extraneous/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Loan_Application_extraneous/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Loan_Application/tst_LoanApp.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Loan_Application'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/LoanApp.csv/orchestrated/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/LoanApp.csv/orchestrated'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.15 (0.017),13.551 (1.838),0.402 (0.04),9.228 (1.667),20.425 (2.78)
DGEN,0.219 (0.014),212.278 (0.01),13.403 (0.001),5.262 (0.004),9.385 (0.005)
AgentSim,0.069 (0.013),2.971 (0.768),0.271 (0.042),1.656 (0.716),2.711 (0.993)


In [11]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,2,2,2,3,3
DGEN,3,3,3,2,2
AgentSim,1,1,1,1,1


## P2P

In [16]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/P2P_simod/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/P2P_simod/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/P2P/tst_PurchasingExample.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/P2P'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/P2P/tst_PurchasingExample.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/P2P'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/P2P/tst_PurchasingExample.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/P2P'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/P2P/autonomous/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/P2P/autonomous'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.415 (0.004),1044.251 (57.186),2.213 (0.181),840.194 (1.229),677.053 (1.782)
DGEN,0.2 (0.022),1481.46 (2.117),2.558 (0.083),828.093 (2.067),670.053 (4.139)
DSIM,0.222 (0.013),1310.037 (14.221),1.157 (0.103),722.334 (6.329),566.637 (8.125)
RIMS,0.22 (0.011),1266.295 (11.238),0.832 (0.123),727.771 (6.4),581.407 (13.848)
AgentSim,0.24 (0.022),1214.542 (12.513),1.216 (0.128),725.374 (11.773),558.359 (8.538)


In [17]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,3,1,3,3,4
DGEN,1,5,4,2,3
DSIM,2,4,2,1,1
RIMS,2,3,1,1,2
AgentSim,2,2,2,1,1


## CVS

In [20]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/cvs_pharmacy_simod/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/cvs_pharmacy_simod/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/CVS/tst_cvs_pharmacy.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/CVS'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/CVS/tst_cvs_pharmacy.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/CVS'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/cvs_pharmacy/tst_cvs_pharmacy.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/cvs_pharmacy'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/cvs_pharmacy/orchestrated/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/cvs_pharmacy/orchestrated'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.44 (0.002),52.947 (1.7),0.444 (0.061),39.432 (3.534),54.594 (4.119)
DGEN,0.219 (0.002),310.394 (0.012),11.699 (0.001),176.652 (0.012),294.214 (0.066)
DSIM,0.201 (0.003),36.237 (2.942),8.982 (0.051),19.743 (0.299),52.43 (0.744)
RIMS,0.443 (0.002),58.73 (1.719),8.879 (0.035),40.05 (0.747),28.262 (1.235)
AgentSim,0.101 (0.006),93.429 (1.436),7.627 (0.036),87.238 (1.388),107.488 (2.245)


In [21]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,4,2,1,2,2
DGEN,3,5,5,4,4
DSIM,2,1,4,1,2
RIMS,5,3,3,2,1
AgentSim,1,4,2,3,3


## Confidential 1000

In [22]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Confidential_1000_simod/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Confidential_1000_simod/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Confidential_1000/tst_confidential_1000.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Confidential_1000'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/confidential_1000/tst_confidential_1000.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/confidential_1000'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/Confidential_1000/tst_confidential_1000.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/Confidential_1000'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/Confidential_1000/autonomous/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/Confidential_1000/autonomous'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.247 (0.006),344.486 (21.051),3.01 (0.138),468.818 (33.931),804.074 (56.599)
DGEN,0.581 (0.004),462.847 (0.111),18.934 (0.046),8.114 (0.102),13.929 (0.109)
DSIM,0.203 (0.005),246.41 (8.501),2.28 (0.302),5.342 (0.327),7.297 (0.307)
RIMS,0.251 (0.01),242.8 (9.335),2.097 (0.3),5.06 (0.858),7.385 (0.953)
AgentSim,0.252 (0.011),106.507 (11.425),3.126 (0.95),16.701 (7.696),28.126 (13.076)


In [23]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,2,3,2,4,4
DGEN,3,4,3,2,2
DSIM,1,2,1,1,1
RIMS,2,2,1,1,1
AgentSim,2,1,2,3,3


## Confidential 2000

In [24]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Confidential_2000_simod/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Confidential_2000_simod/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Confidential_2000/tst_confidential_2000.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Confidential_2000'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/confidential_2000/tst_confidential_2000.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/confidential_2000'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/Confidential_2000/tst_confidential_2000.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/Confidential_2000'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/Confidential_2000/autonomous/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/Confidential_2000/autonomous'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.246 (0.004),820.454 (48.137),2.968 (0.056),952.378 (85.529),1614.918 (135.639)
DGEN,0.161 (0.006),857.683 (0.279),18.092 (0.484),4.588 (0.228),8.12 (0.252)
DSIM,0.186 (0.008),591.136 (5.504),2.845 (0.14),1.701 (0.31),2.262 (0.429)
RIMS,0.277 (0.008),620.611 (5.44),2.815 (0.144),2.294 (0.822),2.329 (0.677)
AgentSim,0.246 (0.009),221.482 (9.555),2.84 (0.921),31.744 (9.264),52.424 (13.099)


In [25]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,3,4,1,4,4
DGEN,1,5,2,2,2
DSIM,2,2,1,1,1
RIMS,4,3,1,1,1
AgentSim,3,1,1,3,3


## ACR

In [26]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/ConsultaDataMining_simod/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/ConsultaDataMining_simod/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/ConsultaDataMining/tst_ConsultaDataMining201618.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/ConsultaDataMining'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/ConsultaDataMining/tst_ConsultaDataMining201618.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/ConsultaDataMining'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/ConsultaDataMining/tst_ConsultaDataMining201618.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/ConsultaDataMining'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/ConsultaDataMining/autonomous/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/ConsultaDataMining/autonomous'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.239 (0.021),287.279 (29.414),2.602 (0.095),32.461 (0.821),93.512 (1.026)
DGEN,0.314 (0.029),559.675 (0.18),17.84 (0.756),30.875 (0.146),95.113 (0.235)
DSIM,0.26 (0.023),273.468 (8.714),4.644 (0.242),15.621 (2.787),48.246 (4.14)
RIMS,0.259 (0.022),241.947 (9.014),3.068 (0.075),18.549 (8.673),41.577 (10.298)
AgentSim,0.362 (0.014),328.714 (1.934),7.609 (0.218),22.911 (0.615),62.482 (1.995)


In [27]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,1,2,1,3,3
DGEN,2,4,5,2,4
DSIM,1,2,3,1,1
RIMS,1,1,2,1,1
AgentSim,3,3,4,1,2


## Production

In [29]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Production_simod/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/Production_simod/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Production/tst_Productions.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/Production'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/Production/tst_Production.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/Production'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/Production/tst_Productions.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/Production'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/Production/orchestrated/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/Production/orchestrated'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.928 (0.008),146.381 (86.724),2.82 (0.178),83.88 (2.531),89.15 (5.52)
DGEN,0.528 (0.026),224.455 (10.539),9.3 (3.417),70.113 (10.478),90.82 (4.219)
DSIM,0.868 (0.011),154.319 (9.471),2.669 (0.172),33.308 (8.035),43.267 (8.669)
RIMS,0.872 (0.013),132.817 (12.977),2.73 (0.231),18.694 (5.413),24.711 (7.938)
AgentSim,0.598 (0.033),61.132 (8.601),5.703 (0.321),31.724 (6.581),29.953 (7.956)


In [30]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,4,2,1,4,3
DGEN,1,3,3,3,3
DSIM,3,2,1,2,2
RIMS,3,2,1,1,1
AgentSim,2,1,2,2,1


## BPI12W

In [32]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/BPIC_2012W_extraneous/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/BPIC_2012W_extraneous/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/BPIC_2012W/tst_BPI_Challenge_2012_W_Two_TS.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/BPIC_2012W'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/BPI_Challenge_2012_W_Two_TS/tst_BPI_Challenge_2012_W_Two_TS.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/BPI_Challenge_2012_W_Two_TS'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/BPIC_2012_W/tst_BPI_Challenge_2012_W_Two_TS.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/BPIC_2012_W'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/BPIC_2012_W/orchestrated/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/BPIC_2012_W/orchestrated'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.72 (0.003),71.977 (9.781),1.716 (0.089),95.72 (1.692),155.467 (1.737)
DGEN,0.435 (0.008),306.28 (0.925),4.534 (0.223),116.188 (0.797),176.794 (0.732)
DSIM,0.655 (0.005),78.625 (6.76),2.887 (0.05),119.126 (1.019),173.491 (1.028)
RIMS,0.558 (0.004),73.821 (7.045),3.008 (0.143),99.124 (1.092),150.927 (1.67)
AgentSim,0.151 (0.008),115.445 (9.301),1.847 (0.039),54.853 (6.129),89.069 (3.72)


In [33]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,5,1,1,2,3
DGEN,2,3,4,4,5
DSIM,4,1,3,5,4
RIMS,3,1,3,3,2
AgentSim,1,2,2,1,1


## BPIC17W

In [34]:
# Simod
PATH_TEST_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/BPIC_2017_W_extraneous/best_result/evaluation/test_log.csv'
PATH_SIMULATED_LOG_SIMOD = '../simulated_data/process_science_data/Evaluation_1/simod/BPIC_2017_W_extraneous/best_result/evaluation'

# DGEN
PATH_TEST_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/BPIC_2017W/test_log.csv'
PATH_SIMULATED_LOG_DGEN = '../simulated_data/process_science_data/Evaluation_1/deep_generator/BPIC_2017W'

# DSIM
PATH_TEST_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/BPI_Challenge_2017_W_Two_TS/tst_BPI_Challenge_2017_W_Two_TS.csv'
PATH_SIMULATED_LOG_DSIM = '../simulated_data/process_science_data/Evaluation_1/deep_simulator/BPI_Challenge_2017_W_Two_TS'

# RIMS
PATH_TEST_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/BPIC_2017_W/tst_BPI_Challenge_2017_W_Two_TS.csv'
PATH_SIMULATED_LOG_RIMS = '../simulated_data/process_science_data/Evaluation_1/rims/BPIC_2017_W'

# AgentSim
PATH_TEST_LOG_MAS = '../simulated_data/BPIC_2017_W/orchestrated/test_preprocessed.csv'
PATH_SIMULATED_LOG_MAS = '../simulated_data/BPIC_2017_W/orchestrated'

log_paths = [
    [PATH_TEST_LOG_SIMOD, PATH_SIMULATED_LOG_SIMOD],
    [PATH_TEST_LOG_DGEN, PATH_SIMULATED_LOG_DGEN],
    [PATH_TEST_LOG_DSIM, PATH_SIMULATED_LOG_DSIM],
    [PATH_TEST_LOG_RIMS, PATH_SIMULATED_LOG_RIMS],
    [PATH_TEST_LOG_MAS, PATH_SIMULATED_LOG_MAS],
    
]

name_experiments = ['Simod', 'DGEN', 'DSIM', 'RIMS', 'AgentSim']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,0.591 (0.002),300.28 (9.387),3.342 (0.018),136.63 (0.892),148.403 (1.765)
DGEN,0.671 (0.0),4557.193 (0.0),3.396 (0.0),118.848 (0.0),172.94 (0.0)
DSIM,0.536 (0.006),54.613 (4.043),3.346 (0.078),33.106 (2.571),30.266 (2.436)
RIMS,0.688 (0.002),122.315 (11.531),3.726 (0.151),54.431 (1.54),108.655 (1.575)
AgentSim,0.195 (0.003),218.429 (2.578),2.39 (0.024),40.69 (1.273),41.06 (2.178)


In [35]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
Simod,3,4,2,5,4
DGEN,4,5,2,4,5
DSIM,2,1,2,1,1
RIMS,5,2,3,3,3
AgentSim,1,3,1,2,2


In [37]:
agent_sim = 5 + 2 + 1 + 1 + 2 + 1 + 2 + 3 + 2
print(agent_sim)
simod = 8
print(simod)
dgen = 3
dsim = 19
rims = 17

19
8


# Evaluation 2

## LoanApp

In [40]:
# Test log
PATH_TEST_LOG_LoanApp = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/LoanApp/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/LoanApp/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/LoanApp/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/LoanApp/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/LoanApp/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/LoanApp/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/LoanApp/PN_auto'

log_paths = [
    [PATH_TEST_LOG_LoanApp, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_LoanApp, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_LoanApp, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_LoanApp, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_LoanApp, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_LoanApp, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.069 (0.013),2.971 (0.768),0.271 (0.042),1.656 (0.716),2.711 (0.993)
FP Auto,0.067 (0.015),3.113 (0.994),0.224 (0.036),2.557 (0.588),3.752 (0.799)
LSTM Orch,0.08 (0.014),3.043 (0.876),0.243 (0.048),2.057 (0.651),2.894 (0.869)
LSTM Auto,0.139 (0.016),3.478 (0.999),0.234 (0.032),2.719 (0.614),3.455 (0.978)
PN Orch,0.057 (0.011),2.484 (0.351),0.233 (0.034),1.326 (0.316),1.795 (0.561)
PN Auto,0.649 (0.012),9.056 (2.3),0.239 (0.059),5.196 (1.492),2.856 (0.597)


In [41]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,1,1,1,1,2
FP Auto,1,1,1,1,2
LSTM Orch,1,1,1,1,2
LSTM Auto,2,1,1,1,2
PN Orch,1,1,1,1,1
PN Auto,3,2,1,2,2


## P2P

In [42]:
# Test log
PATH_TEST_LOG_P2P = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/P2P/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/P2P/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/P2P/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/P2P/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/P2P/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/P2P/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/P2P/PN_auto'

log_paths = [
    [PATH_TEST_LOG_P2P, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_P2P, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_P2P, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_P2P, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_P2P, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_P2P, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.246 (0.036),1227.25 (16.525),1.25 (0.086),733.358 (15.427),566.291 (11.556)
FP Auto,0.24 (0.022),1214.542 (12.513),1.216 (0.128),725.374 (11.773),558.359 (8.538)
LSTM Orch,0.306 (0.019),1222.752 (5.22),1.422 (0.181),730.29 (5.599),574.817 (6.279)
LSTM Auto,0.299 (0.013),1193.583 (6.319),1.134 (0.134),727.042 (6.706),559.332 (6.263)
PN Orch,0.537 (0.02),1118.416 (14.504),1.597 (0.163),661.06 (12.76),490.121 (14.374)
PN Auto,0.899 (0.013),1295.878 (12.109),1.796 (0.277),827.481 (9.202),664.956 (7.958)


In [43]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,1,3,1,2,2
FP Auto,1,3,1,2,2
LSTM Orch,2,3,1,2,2
LSTM Auto,2,2,1,2,2
PN Orch,3,1,1,1,1
PN Auto,4,4,1,3,3


## CVS

In [55]:
# Test log
PATH_TEST_LOG_CVS = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/cvs_pharmacy/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/CVS/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/CVS/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/CVS/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/CVS/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/CVS/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/CVS/PN_auto'

log_paths = [
    [PATH_TEST_LOG_CVS, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_CVS, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_CVS, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_CVS, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_CVS, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_CVS, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.101 (0.006),93.429 (1.436),7.627 (0.036),87.238 (1.388),107.488 (2.245)
FP Auto,0.097 (0.002),97.755 (1.236),7.58 (0.036),94.679 (1.294),120.111 (2.242)
LSTM Orch,0.101 (0.006),100.745 (1.361),7.588 (0.037),94.482 (1.5),119.236 (1.991)
LSTM Auto,0.113 (0.004),98.176 (0.972),7.564 (0.028),90.465 (0.981),113.823 (2.225)
PN Orch,0.28 (0.004),95.039 (1.061),7.543 (0.032),89.6 (1.001),110.82 (1.728)
PN Auto,0.852 (0.002),117.977 (1.384),7.571 (0.026),110.996 (1.241),196.818 (2.618)


In [None]:
ranking_df

## Confidential 1000

In [44]:
# Test log
PATH_TEST_LOG_CONFIDENTIAL_1000 = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/Confidential_1000/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/Confidential_1000/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_1000/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_1000/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_1000/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_1000/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_1000/PN_auto'

log_paths = [
    [PATH_TEST_LOG_CONFIDENTIAL_1000, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_CONFIDENTIAL_1000, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_CONFIDENTIAL_1000, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_CONFIDENTIAL_1000, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_CONFIDENTIAL_1000, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_CONFIDENTIAL_1000, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.242 (0.007),115.069 (13.918),2.963 (0.843),30.351 (14.941),46.371 (21.127)
FP Auto,0.252 (0.011),106.507 (11.425),3.126 (0.95),16.701 (7.696),28.126 (13.076)
LSTM Orch,0.241 (0.01),118.163 (9.407),2.509 (0.284),25.427 (8.917),41.335 (14.375)
LSTM Auto,0.295 (0.006),116.068 (14.531),3.018 (0.267),26.378 (11.589),42.484 (17.143)
PN Orch,0.565 (0.004),126.402 (5.279),1.925 (0.088),8.0 (2.089),12.343 (2.445)
PN Auto,0.804 (0.004),152.964 (8.996),6.032 (1.136),9.485 (0.517),7.563 (0.637)


In [45]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,1,1,2,3,4
FP Auto,2,1,2,2,3
LSTM Orch,1,1,2,3,4
LSTM Auto,3,1,2,3,4
PN Orch,4,1,1,1,2
PN Auto,5,2,3,1,1


## Confidential 2000

In [46]:
# Test log
PATH_TEST_LOG_CONFIDENTIAL_2000 = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/Confidential_2000/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/Confidential_2000/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_2000/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_2000/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_2000/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_2000/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/CONFIDENTIAL_2000/PN_auto'

log_paths = [
    [PATH_TEST_LOG_CONFIDENTIAL_2000, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_CONFIDENTIAL_2000, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_CONFIDENTIAL_2000, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_CONFIDENTIAL_2000, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_CONFIDENTIAL_2000, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_CONFIDENTIAL_2000, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.242 (0.007),182.629 (10.207),3.176 (1.093),32.014 (5.981),52.764 (8.492)
FP Auto,0.246 (0.009),221.482 (9.555),2.84 (0.921),31.744 (9.264),52.424 (13.099)
LSTM Orch,0.289 (0.011),195.661 (5.46),2.775 (1.006),14.751 (5.681),26.185 (8.915)
LSTM Auto,0.279 (0.007),202.001 (17.626),2.909 (1.046),46.142 (18.69),78.79 (26.707)
PN Orch,0.546 (0.005),236.134 (7.541),1.606 (0.067),6.404 (0.838),10.225 (1.163)
PN Auto,0.815 (0.005),176.742 (10.985),2.163 (0.124),11.252 (1.434),13.737 (0.884)


In [47]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,1,1,2,3,4
FP Auto,1,3,2,3,4
LSTM Orch,3,2,2,2,3
LSTM Auto,2,2,2,3,5
PN Orch,4,4,1,1,1
PN Auto,5,1,2,2,2


## ACR

In [38]:
# Test log
PATH_TEST_LOG_ACR = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/ACR/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/ACR/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/ACR/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/ACR/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/ACR/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/ACR/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/ACR/PN_auto'

log_paths = [
    [PATH_TEST_LOG_ACR, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_ACR, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_ACR, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_ACR, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_ACR, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_ACR, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.388 (0.015),320.554 (1.487),6.363 (0.174),22.878 (0.975),69.671 (1.0)
FP Auto,0.362 (0.014),328.714 (1.934),7.609 (0.218),22.911 (0.615),62.482 (1.995)
LSTM Orch,0.404 (0.015),278.781 (1.978),6.048 (0.28),24.514 (0.981),71.675 (2.101)
LSTM Auto,0.471 (0.025),315.989 (3.832),6.942 (0.29),23.014 (1.584),67.894 (2.221)
PN Orch,0.746 (0.01),346.588 (3.61),5.863 (0.338),26.501 (0.388),76.632 (0.503)
PN Auto,0.76 (0.014),328.956 (2.196),5.085 (0.219),25.444 (1.324),74.638 (1.198)


In [39]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,2,3,3,1,3
FP Auto,1,4,5,1,1
LSTM Orch,2,1,2,1,3
LSTM Auto,3,2,4,1,2
PN Orch,4,5,2,2,5
PN Auto,4,4,1,1,4


## Production

In [48]:
# Test log
PATH_TEST_LOG_Production = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/Production/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/Production/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/Production/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/Production/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/Production/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/Production/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/Production/PN_auto'

log_paths = [
    [PATH_TEST_LOG_Production, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_Production, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_Production, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_Production, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_Production, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_Production, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.598 (0.033),61.132 (8.601),5.703 (0.321),31.724 (6.581),29.953 (7.956)
FP Auto,0.677 (0.073),62.837 (13.754),5.616 (0.554),28.122 (17.605),38.192 (17.159)
LSTM Orch,0.57 (0.019),100.853 (11.051),5.483 (0.738),34.618 (9.813),24.8 (9.469)
LSTM Auto,0.826 (0.022),81.037 (11.971),5.961 (0.783),26.952 (5.655),20.032 (7.286)
PN Orch,0.866 (0.081),4607.41 (5986.036),6.078 (0.557),2469.697 (3516.625),2170.932 (3074.013)
PN Auto,0.733 (0.022),92.835 (7.505),5.989 (0.419),47.648 (5.541),64.486 (4.077)


In [49]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,1,1,1,1,1
FP Auto,2,1,1,1,1
LSTM Orch,1,2,1,1,1
LSTM Auto,4,2,1,1,1
PN Orch,4,2,1,1,1
PN Auto,3,2,1,2,2


## BPI12W

In [50]:
# Test log
PATH_TEST_LOG_BPIC_2012W = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/BPIC_2012W/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/BPIC_2012W/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/BPIC_2012W/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/BPIC_2012W/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/BPIC_2012W/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/BPIC_2012W/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/BPIC_2012W/PN_auto'

log_paths = [
    [PATH_TEST_LOG_BPIC_2012W, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_BPIC_2012W, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_BPIC_2012W, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_BPIC_2012W, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_BPIC_2012W, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_BPIC_2012W, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.151 (0.008),115.445 (9.301),1.847 (0.039),54.853 (6.129),89.069 (3.72)
FP Auto,0.157 (0.005),103.376 (13.237),1.872 (0.066),63.068 (9.648),96.791 (3.266)
LSTM Orch,0.166 (0.007),77.206 (8.512),1.831 (0.094),41.02 (6.838),72.984 (4.12)
LSTM Auto,0.277 (0.019),206.02 (34.72),1.798 (0.059),160.213 (35.587),114.262 (11.522)
PN Orch,0.256 (0.01),141.105 (15.43),2.09 (0.063),93.301 (12.73),92.72 (4.721)
PN Auto,0.302 (0.007),80.95 (7.652),1.966 (0.071),79.483 (4.223),115.238 (2.889)


In [51]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,1,3,1,2,2
FP Auto,1,2,1,2,3
LSTM Orch,2,1,1,1,1
LSTM Auto,4,5,1,5,4
PN Orch,3,4,3,4,2
PN Auto,5,1,2,3,4


## BPIC17W

In [52]:
# Test log
PATH_TEST_LOG_BPIC_2017W = '../simulated_data/process_science_data/Evaluation_1/agent_simulator/BPIC_2017W/test_preprocessed.csv'
# FP Orch
PATH_SIMULATED_LOG_FP_ORCH = '../simulated_data/process_science_data/Evaluation_2/BPIC_2017W/FP_orch'

# FP Auto
PATH_SIMULATED_LOG_FP_AUTO = '../simulated_data/process_science_data/Evaluation_2/BPIC_2017W/FP_auto'

# LSTM Orch
PATH_SIMULATED_LOG_LSTM_ORCH = '../simulated_data/process_science_data/Evaluation_2/BPIC_2017W/LSTM_orch'

# LSTM Auto
PATH_SIMULATED_LOG_LSTM_AUTO = '../simulated_data/process_science_data/Evaluation_2/BPIC_2017W/LSTM_auto'

# PN Orch
PATH_SIMULATED_LOG_PN_ORCH = '../simulated_data/process_science_data/Evaluation_2/BPIC_2017W/PN_orch'

# PN Auto
PATH_SIMULATED_LOG_PN_AUTO = '../simulated_data/process_science_data/Evaluation_2/BPIC_2017W/PN_auto'

log_paths = [
    [PATH_TEST_LOG_BPIC_2017W, PATH_SIMULATED_LOG_FP_ORCH],
    [PATH_TEST_LOG_BPIC_2017W, PATH_SIMULATED_LOG_FP_AUTO],
    [PATH_TEST_LOG_BPIC_2017W, PATH_SIMULATED_LOG_LSTM_ORCH],
    [PATH_TEST_LOG_BPIC_2017W, PATH_SIMULATED_LOG_LSTM_AUTO],
    [PATH_TEST_LOG_BPIC_2017W, PATH_SIMULATED_LOG_PN_ORCH],
    [PATH_TEST_LOG_BPIC_2017W, PATH_SIMULATED_LOG_PN_AUTO],
    
]

name_experiments = ['FP Orch', 'FP Auto', 'LSTM Orch', 'LSTM Auto', 'PN Orch', 'PN Auto']

mean_results, results_df, ranking_df = main_(log_paths, name_experiments)
results_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,0.195 (0.003),218.429 (2.578),2.39 (0.024),40.69 (1.273),41.06 (2.178)
FP Auto,0.221 (0.004),238.303 (4.619),2.415 (0.014),47.704 (1.757),44.648 (1.974)
LSTM Orch,0.188 (0.003),233.481 (3.087),2.387 (0.012),37.585 (1.245),34.832 (1.432)
LSTM Auto,0.185 (0.002),241.444 (1.94),2.392 (0.017),50.825 (2.009),48.335 (2.343)
PN Orch,0.353 (0.003),171.904 (3.449),2.4 (0.024),44.755 (2.568),56.756 (2.668)
PN Auto,0.233 (0.002),180.37 (2.78),2.374 (0.019),29.075 (2.602),28.768 (1.907)


In [53]:
ranking_df

Unnamed: 0,N-Gram Distribution Distance,Absolute Event Distribution Distance,Circadian Event Distribution Distance,Relative Event Distribution Distance,Cycle Time Distribution Distance
FP Orch,3,3,1,3,3
FP Auto,4,4,1,5,4
LSTM Orch,2,4,1,2,2
LSTM Auto,1,4,1,6,5
PN Orch,6,1,1,4,6
PN Auto,5,2,1,1,1
