## Grid Objectives
Iterating between min and max for each column

### Glossary
- **task**: Refers to the set of values (row) and corresponding keys to be aimed at sequentially.
- **objective**: Refers to one key (column) and respective value to be aimed at simultaneously during a task.
- **experiment**: Refers to one file containing a multiple of objectives and tasks for a fixed number of each, respectively. 

In [1]:
import itertools
import json
import numpy as np
import os
import pandas as pd

In [2]:
#Features between 0 and 1: 
normalized_feature_names = ['ratio_variants_per_number_of_traces', 'trace_len_hist1', 'trace_len_hist2',
                            'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',
                            'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', 
                            'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', 
                            'ratio_top_20_variants', 'ratio_top_50_variants', 'ratio_top_75_variants', 
                            'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 
                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']

normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', 
                            'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 
                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']

def abbrev_obj_keys(obj_keys):
    abbreviated_keys = []
    for obj_key in obj_keys:
        key_slices = obj_key.split("_")
        chars = []
        for key_slice in key_slices:
            for idx, single_char in enumerate(key_slice):
                if idx == 0 or single_char.isdigit():
                    chars.append(single_char)
        abbreviated_key = ''.join(chars)
        abbreviated_keys.append(abbreviated_key)
    return '_'.join(abbreviated_keys) 

In [3]:
def write_generator_experiment(experiment_path, objectives=["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]):
    first_dir = os.path.split(experiment_path[3:])[-1].replace(".csv","")
    second_dir = first_dir.replace("grid_","").replace("objectives","")

    experiment = [
      {
        'pipeline_step': 'event_logs_generation',
        'output_path':'output/generated/grid_2obj',
        'generator_params': {
          "experiment": {"input_path": experiment_path[3:],
            "objectives": objectives},
          'config_space': {
            'mode': [5, 20],
            'sequence': [0.01, 1],
            'choice': [0.01, 1],
            'parallel': [0.01, 1],
            'loop': [0.01, 1],
            'silent': [0.01, 1],
            'lt_dependency': [0.01, 1],
            'num_traces': [10, 10001],
            'duplicate': [0],
            'or': [0]
          },
          'n_trials': 200
        }
      },
      {
        'pipeline_step': 'feature_extraction',
        'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),
        'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},
        'output_path': 'output/plots',
        'real_eventlog_path': 'data/BaselineED_feat.csv',
        'plot_type': 'boxplot'
  }
    ]

    #print("EXPERIMENT:", experiment[1]['input_path'])
    output_path = os.path.join('..', 'config_files','algorithm','grid_2obj')
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(".")[0]}.json') 
    with open(output_path, 'w') as f:
        json.dump(experiment, f, ensure_ascii=False)
    print(f"Saved experiment config in {output_path}")
    
    return experiment

def create_objectives_grid(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    if n_para_obj==1:
        experiments = [[exp] for exp in objectives]
    else:
        experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    print(len(experiments), experiments)
    
    parameters = "np.around(np.arange(0, 1.1,0.1),2), "
    tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
    tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]
    print(len(tasks))
    for exp in experiments:
        df = pd.DataFrame(data=tasks, columns=["task", *exp])
        experiment_path = os.path.join('..','data', 'grid_2obj')
        os.makedirs(experiment_path, exist_ok=True)
        experiment_path = os.path.join(experiment_path, f"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv") 
        df.to_csv(experiment_path, index=False)
        print(f"Saved experiment in {experiment_path}")
        write_generator_experiment(experiment_path, objectives=exp)
    #df.to_csv(f"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv" ,index=False)
        
exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2)        
print(exp_test)

21 [('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_variants_per_number_of_traces'), ('ratio_most_common_variant', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('ratio_top_10_variants', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_expon

### Helper prototypes

In [4]:
df = pd.DataFrame(columns=["log","ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"])    

In [5]:
k=0
for i in np.arange(0, 1.1,0.2):
    for j in np.arange(0,0.55,0.1):
        k+=1
        new_entry = pd.Series({'log':f"objective_{k}", "ratio_top_20_variants":round(i,1),
                   "epa_normalized_sequence_entropy_linear_forgetting":round(j,1)})
        df = pd.concat([
                df, 
                pd.DataFrame([new_entry], columns=new_entry.index)]
           ).reset_index(drop=True)
    

In [6]:
df.to_csv("../data/grid_objectives.csv" ,index=False)

In [7]:
df

Unnamed: 0,log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
0,objective_1,0.0,0.0
1,objective_2,0.0,0.1
2,objective_3,0.0,0.2
3,objective_4,0.0,0.3
4,objective_5,0.0,0.4
5,objective_6,0.0,0.5
6,objective_7,0.2,0.0
7,objective_8,0.2,0.1
8,objective_9,0.2,0.2
9,objective_10,0.2,0.3


## Objectives from real logs
(Feature selection)

In [8]:
bpic_features = pd.read_csv("../data/BaselineED_feat.csv", index_col=None)
#bpic_features = pd.read_csv("../gedi/output/features/real_event_logs.csv", index_col=None)

#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)
print(bpic_features.shape)
print(len(bpic_features), " Event-Logs: ", bpic_features.sort_values('log')['log'].unique())

#bpic_features.rename(columns={"variant_entropy":"epa_variant_entropy", "normalized_variant_entropy":"epa_normalized_variant_entropy", "sequence_entropy":"epa_sequence_entropy", "normalized_sequence_entropy":"epa_normalized_sequence_entropy", "sequence_entropy_linear_forgetting":"epa_sequence_entropy_linear_forgetting", "normalized_sequence_entropy_linear_forgetting":"epa_normalized_sequence_entropy_linear_forgetting", "sequence_entropy_exponential_forgetting":"epa_sequence_entropy_exponential_forgetting", "normalized_sequence_entropy_exponential_forgetting":"epa_normalized_sequence_entropy_exponential_forgetting"},
#          errors="raise", inplace=True)

bpic_features.head()
#bpic_features.to_csv("../data/BaselineED_feat.csv", index=False)

(34, 178)
34  Event-Logs:  ['BPI2016_Complaints' 'BPI2016_Questions' 'BPI2016_Werkmap_Messages'
 'BPIC15_1' 'BPIC15_2' 'BPIC15_3' 'BPIC15_4' 'BPIC15_5'
 'BPI_Challenge_2012' 'BPI_Challenge_2013_closed_problems'
 'BPI_Challenge_2013_incidents' 'BPI_Challenge_2013_open_problems'
 'BPI_Challenge_2017' 'BPI_Challenge_2017_Offer_log' 'BPI_Challenge_2018'
 'BPI_Challenge_2019' 'CoSeLoG_WABO_1' 'CoSeLoG_WABO_2' 'CoSeLoG_WABO_3'
 'CoSeLoG_WABO_4' 'CoSeLoG_WABO_5' 'Detail_Change'
 'Detail_Incident_Activity' 'Detail_Interaction' 'DomesticDeclarations'
 'Hospital_log' 'InternationalDeclarations' 'PermitLog'
 'PrepaidTravelCost' 'Receipt_WABO_CoSeLoG' 'RequestForPayment'
 'Road_Traffic_Fine_Management_Process' 'Sepsis_Cases_Event_Log' 'finale']


Unnamed: 0,log,n_traces,n_unique_traces,ratio_unique_traces_per_trace,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,...,within_day_time_iqr,within_day_time_geometric_mean,within_day_time_geometric_std,within_day_time_harmonic_mean,within_day_time_skewness,within_day_time_kurtosis,within_day_time_coefficient_variation,within_day_time_entropy,within_day_time_skewness_hist,within_day_time_kurtosis_hist
0,BPIC15_2,832,828,0.995192,1,132,53.310096,54.0,61,19.894977,...,,,,,,,,,,
1,BPI_Challenge_2018,43809,28457,0.64957,24,2973,57.391541,49.0,49,34.872131,...,,,,,,,,,,
2,Receipt_WABO_CoSeLoG,1434,116,0.080893,1,25,5.981172,6.0,6,2.166129,...,,,,,,,,,,
3,BPIC15_3,1409,1349,0.957417,3,124,42.356991,42.0,44,16.138406,...,,,,,,,,,,
4,BPI_Challenge_2019,251734,11973,0.047562,1,990,6.33972,5.0,5,13.057417,...,,,,,,,,,,


In [9]:
bpic_stats = bpic_features.describe().transpose()
normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() 
normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', 
                            'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 
                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']
print(normalized_feature_names)
bpic_features[['log']+normalized_feature_names]

['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']


KeyError: "['ratio_variants_per_number_of_traces'] not in index"

In [None]:
#Features between 0 and 1: 
def write_generator_bpic_experiment(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    for exp in experiments:
        experiment_path = os.path.join('..','data', 'BaselineED_feat')
        os.makedirs(experiment_path, exist_ok=True)
        experiment_path = os.path.join(experiment_path, f"34bpic_{len(exp)}objectives_{abbrev_obj_keys(exp)}.csv") 


        first_dir = os.path.split(experiment_path[3:])[-1].replace(".csv","")
        second_dir = first_dir.replace("grid_","").replace("objectives","")

        experiment = [
          {
            'pipeline_step': 'event_logs_generation',
            'output_path':'output/generated',
            'generator_params': {
              "experiment": {"input_path": "data/BaselineED_feat.csv",
                "objectives": exp},
              'config_space': {
                'mode': [5, 20],
                'sequence': [0.01, 1],
                'choice': [0.01, 1],
                'parallel': [0.01, 1],
                'loop': [0.01, 1],
                'silent': [0.01, 1],
                'lt_dependency': [0.01, 1],
                'num_traces': [10, 10001],
                'duplicate': [0],
                'or': [0]
              },
              'n_trials': 200
            }
          },
          {
            'pipeline_step': 'feature_extraction',
            'input_path': os.path.join('output', 'features', 'generated', 'BaselineED_feat', second_dir),
            'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},
            'output_path': 'output/plots',
            'real_eventlog_path': 'data/BaselineED_feat.csv',
            'plot_type': 'boxplot'
      }
        ]

        output_path = os.path.join('..', 'config_files','algorithm','BaselineED_feat')
        os.makedirs(output_path, exist_ok=True)
        output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(".")[0]}.json') 

        with open(output_path, 'w') as f:
            json.dump(experiment, f, ensure_ascii=False)
        print(f"Saved experiment config in {output_path}")
        return experiment


def create_objectives_grid(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    print(len(experiments))
    
    for exp in experiments:
        write_generator_bpic_experiment(objectives=exp)
        
exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2)        
print(exp_test)

## Single objective from real logs
(Feature selection)

In [None]:
def write_single_objective_experiment(experiment_path, objectives=["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]):
    first_dir = os.path.split(experiment_path[3:])[-1].replace(".csv","")
    second_dir = first_dir.replace("grid_","").replace("objectives","")

    experiment = [
      {
        'pipeline_step': 'event_logs_generation',
        'output_path':os.path.join('output','generated', 'grid_1obj'),
        'generator_params': {
          "experiment": {"input_path": experiment_path[3:],
            "objectives": objectives},
          'config_space': {
            'mode': [5, 20],
            'sequence': [0.01, 1],
            'choice': [0.01, 1],
            'parallel': [0.01, 1],
            'loop': [0.01, 1],
            'silent': [0.01, 1],
            'lt_dependency': [0.01, 1],
            'num_traces': [10, 10001],
            'duplicate': [0],
            'or': [0]
          },
          'n_trials': 200
        }
      },
      {
        'pipeline_step': 'feature_extraction',
        'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),
        'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},
        'output_path': 'output/plots',
        'real_eventlog_path': 'data/BaselineED_feat.csv',
        'plot_type': 'boxplot'
  }
    ]

    #print("EXPERIMENT:", experiment)
    output_path = os.path.join('..', 'config_files','algorithm','grid_experiments')
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(".")[0]}.json') 
    with open(output_path, 'w') as f:
        json.dump(experiment, f, ensure_ascii=False)
    print(f"Saved experiment config in {output_path}")
    
    return experiment

def create_objectives_grid(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    if n_para_obj==1:
        experiments = [[exp] for exp in objectives]
    else:
        experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    print(len(experiments), "experiments: ", experiments)
    
    parameters = "np.around(np.arange(0, 1.1,0.1),2), "
    tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
    tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]
    print(len(tasks))
    for exp in experiments:
        df = pd.DataFrame(data=tasks, columns=["task", *exp])
        experiment_path = os.path.join('..','data', 'grid_experiments')
        os.makedirs(experiment_path, exist_ok=True)
        experiment_path = os.path.join(experiment_path, f"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv") 
        df.to_csv(experiment_path, index=False)
        print(f"Saved experiment in {experiment_path}")
        write_single_objective_experiment(experiment_path, objectives=exp)
    #df.to_csv(f"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv" ,index=False)
        
exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=1)        
print(exp_test)