In [1]:
import os
from aaai_experiments import *

In [2]:
PATH = 'C:\\Users\\User\\Jupyter Projects\\Research_2\\Datasets'
os.chdir(PATH)

## A*, A* incremental, and REACH evaluations

In [3]:
# Parameters for `load_and_preprocess_log`
df_name = 'bpi_2012.csv'
min_len = 80
max_len = None
n_traces = None
random_seed = 304

In [4]:
df, map_dict = load_and_preprocess_log(df_name, min_len=min_len, max_len=max_len, n_traces=n_traces, random_seed=random_seed)

In [5]:
# Parameters for `compare_search_algorithms`
n_train_traces = 10
n_test_traces = None
allow_intersection = False
read_model_from_file = False
model_path = 'prAm6.txt'
activity_mapping_dict = map_dict
only_return_model = False
algorithms=['astar_extended']
time_limit=10

In [7]:
compare_search_algorithms(
    df=df,
    df_name=df_name,
    n_train_traces=n_train_traces,
    n_test_traces=n_test_traces,
    allow_intersection=allow_intersection,
    read_model_from_file=read_model_from_file,
    model_path=model_path,
    activity_mapping_dict=activity_mapping_dict,
    only_return_model=only_return_model,
    algorithms=algorithms,
    time_limit=time_limit
)

## Running Horizon (+ heuristics) evaluations

In [3]:
df_name = 'bpi_2012.csv'
model_path = ''
n_train_traces = 10
n_test_traces = None
window_lengths_lst = [15, 30, 50]
n_final_markings_lst = [1]
min_len = 80
max_len = None
use_heuristics = True 
read_model_from_file = False
allow_intersection = True

In [5]:
res = compare_window_based_baselines(
    df_name=df_name,
    n_train_traces=n_train_traces,
    n_test_traces=n_test_traces,
    window_lengths_lst=window_lengths_lst,
    n_final_markings_lst=n_final_markings_lst,
    min_len=min_len,
    max_len=max_len,
    use_heuristics=use_heuristics,
    read_model_from_file=read_model_from_file,
    model_path=model_path,
    allow_intersection=allow_intersection
)

## Video Datasets evaluations

### Our approach

In [1]:
n_train_traces = 10
n_test_traces = None
window_lengths_lst = [20]
n_final_markings_lst = [1]
use_heuristics = True 
allow_intersection = False
random_seed=101

In [4]:
# Define the path and file names
df_path = r'C:\Users\User\Jupyter Projects\Research_2\Datasets\Long_Traces'
df_names = ['gtea_target.pkl', '50salads_target.pkl', 'breakfast_target.pkl']

# Parameters for different datasets
dataset_params = {
    'gtea_target.pkl': 10,
    '50salads_target.pkl': 10,
    'breakfast_target.pkl': 15    
}

# Loop over each dataset
for df_name in df_names:
    print(f"Processing dataset: {df_name}")
    
    # Process the pickle file
    df = process_pickle_file(os.path.join(df_path, df_name))
    
    # Adjust the number of training traces based on the dataset
    n_train_traces = dataset_params[df_name]
    
    # Call the compare function with the processed DataFrame
    res = compare_window_based_baselines(
        df_name=df,
        n_train_traces=n_train_traces,
        n_test_traces=n_test_traces,
        window_lengths_lst=window_lengths_lst,
        n_final_markings_lst=n_final_markings_lst,
        use_heuristics=use_heuristics,
        allow_intersection=allow_intersection,
        random_seed=random_seed
    )
    
    # Define the path to save the results
    result_save_path = os.path.join(df_path, f"{df_name.split('_')[0]}_results.csv")
    
    # Save the results after processing the dataset
    save_results(res, result_save_path)
    print(f"Results saved to {result_save_path}")

### Baselines

In [2]:
# Parameters for `compare_search_algorithms`
n_test_traces = None
allow_intersection = True
read_model_from_file = False
only_return_model = False
algorithms=['astar', 'astar_extended', 'reach']
return_results=True

In [1]:
# Define the path and file names
df_path = r'C:\Users\User\Jupyter Projects\Research_2\Datasets\Long_Traces'
df_names = ['gtea_target.pkl', '50salads_target.pkl', 'breakfast_target.pkl']
save_path = r'C:\path\to\save\directory'  # Update with your actual save path

# Parameters for different datasets
dataset_params = {
    'gtea_target.pkl': 10,
    '50salads_target.pkl': 10,
    'breakfast_target.pkl': 15    
}

# Loop over each dataset
for df_name in df_names:
    print(f"Processing dataset: {df_name}")
    
    time_limit = 120
    
    # Process the pickle file
    df = process_pickle_file(os.path.join(df_path, df_name))
    
    # Adjust the number of training traces based on the dataset
    n_train_traces = dataset_params[df_name]
    
    # Call the compare function with the processed DataFrame
    results = compare_search_algorithms(
        df=df,
        df_name=df_name,
        n_train_traces=n_train_traces,
        n_test_traces=n_test_traces,
        allow_intersection=allow_intersection,
        read_model_from_file=read_model_from_file,
        only_return_model=only_return_model,
        algorithms=algorithms,
        time_limit=time_limit,  # Use the time limit set above
        return_results=return_results
    )
    
    # Save each algorithm's results to a separate pickle file
    for alg, alg_results in results.items():
        result_file = os.path.join(save_path, f"{df_name.split('_')[0]}_{alg}_results.csv")
        
        # Convert results to DataFrame, drop rows with all None values, and save
        results_df = pd.DataFrame(alg_results).dropna(how='any')
        results_df.to_csv(result_file, index=False)
        
        print(f"Results for algorithm '{alg}' saved to {result_file}")