# Pipeline

imports

In [None]:
import json
import os
import csv
import time
import numpy as np
import datetime
import random
import shutil
from itertools import product 
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.image as mpimg


In [None]:
import sys
print(sys.path)
src_path = os.path.join(os.getcwd(), 'src')
sys.path.append(src_path)

In [None]:
# Enable autoreload
%load_ext autoreload
%autoreload 2

In [None]:
from src.log_stats_calculation import *
from src.simulators.param_manipulation import *
from src.sim_execution_and_evalaution import *
from src.logging import log_simulation, initialize_simulation_log, save_params, set_up_experiment_output_dir, save_simulation_log, create_results_dataframe, save_results_dataframe
from src.search_strategies.grid_search import run_grid_search
from src.simulators.simod_discovery import discover_BPS_simod
from src.pipeline_utils import get_params_to_change, set_target_val_and_range, prepare_simulation_files
from src.evaluation import generate_and_plot_quadtree_metrics, plot_quadtree_metrics_over_depth, evaluate_quadtree_vs_simulation_log, evaluate_hyperquadtree_vs_simulation_log, compute_and_save_quadtree_metrics
from src.search_strategies.quadtree import adaptive_quadtree, plot_quadtree, write_nodes_visited_to_json, write_quadtree_nodes_to_file

In [None]:
from prosimos import simulation_engine, simulation_properties_parser
from simod.simulation.prosimos import *


# Pipeline

## 0. Input

In [None]:

process_name = 'LoanApp_SR'



# target_ppis = ['lead_time']
# target_ppis = ['cost']
target_ppis = ['lead_time', 'cost']
ppi_bounds = {
    'lead_time': 'upper',
    'cost': 'upper'
}
target_range = {
    'lead_time': [0, 60*60*11],
    'cost': [0, 60000]
}

ppi_range_factor = 0.2

cases_to_simulate = 300
discover_bps_model = False
nr_simulations_per_scenario = 30
execute_strategy = ['grid_search', 'hyperquadtree'] 

confidence = 0.9
beta       = 0.85

calculate_stats = 'custom'  # 'custom' or 'simod'
dt = datetime.datetime(2025, 6, 23, 9, 0, tzinfo=datetime.timezone.utc)
simulation_tool = 'simod'

print_intermediate_results = True

# internal parameters
decimals = 3
simulation_results_confidence = True
in_out_criteria = "confidence"  # "mean" or "confidence"
simod_config_path = 'simulators/simod/resources/config/config.yml'
simod_directory = 'simulators/simod'
ppi_calculation = {
    'cost': {
        'type': 'total',
        'method': 'full_duration', # full_duration, active_time, combined
        'weight': 1.5         # only applicable for 'combined' method
    },
    'lead_time': {
        'type': 'avg'}
    }
update_parameters_list = ['resource_count', 'branching_probability']




In [None]:

# parameters to change

''''
Possible values: 
    - continous ("cont"),
    - discrete ("disc"), 
    - categorical ("cat").

Define ranges for each parameter:
    - continuous: [min, max]    
    - discrete: [min, max]
    - categorical: [list of possible values]
'''

input_parameters = {
    'arriaval_distr_mean': {
        'type': 'cont',
        'values': [0, 60*60*2 + 250], 
        'min_step_size': 60*2.5  
    },  
    'resource_count': {
        'type': 'disc',
        'values': [1, 20],  # 1 to 26 resources
        'ignore': ['applicant'],
        'min_step_size': 1
    },
    # 'branching_probability': {
    #     'type': 'cont',
    #     'values': [0, 1],  # 0% to 100%,
    #     # 'ignore': ['node_645ee027-7ae8-4f3a-9127-f99918deb220'], # Ignore all of these gateways
    #     'use':    ['node_51629ebf-d0e1-49e5-ac00-74cb1ead72c6'],  # If given, only use these gateways
    #     'min_step_size': 0.1
    # }
}



In [None]:
# Write inputs into 
base_path = os.path.join('data', process_name)

dt_str = dt.strftime("%Y-%m-%dT%H:%M:%SZ")

params= {}

params = {
    'process_name': process_name,
    'base_path': base_path,
    'target_ppis': target_ppis,
    
    'ppi_bounds': ppi_bounds,
    'ppi_range_factor': ppi_range_factor,
    'cases_to_simulate': cases_to_simulate,
    'starting_at':dt_str,
    'simulation_tool': simulation_tool,
    'nr_simulations_per_scenario': nr_simulations_per_scenario,
    'decimals': decimals,
    'simulation_results_confidence': simulation_results_confidence,
    'calculate_stats': calculate_stats,
    'confidence': confidence,
    'beta': beta,
    'in_out_criteria': in_out_criteria,
    'print_intermediate_results': print_intermediate_results,
    'execute_strategy': execute_strategy,
    'simod_config_path': simod_config_path,
    'simod_directory': simod_directory,
    'ppi_calculation': ppi_calculation,
    'input_parameters': input_parameters,
    'update_parameters_list': update_parameters_list
}

if 'target_range' in locals():
   params['target_range'] = target_range

results = {}

In [None]:


params['strategies'] = {
    'grid_search': {
        # 'granularity': 25,
        'input': 'max_granularity' 
    },
    'hyperquadtree': {
        'min_depth': 3,
        'max_depth': 8,
        'variation': ['midpoints']  # 'random' or 'midpoints' 'random', 
    },
    'quadtree': {
        'min_depth': 3,
        'max_depth': 6
    },
    'diagonal_search': {
        'stepsize_initial': 0.1,
        'step_max': 20,
        'adaptive_step': True,
        'strictness': 0.01
    },
    'hill_climbing': {
        'stepsize_initial': 0.1,
        'step_max': 20,
        # 'adaptive_step': True,
        'candidate_strategy': 'random_one',
        'walk_reps_max': 100
    }
}

### 0.5 Discovery

#### implement automatic calling of discovery

In [None]:
if discover_bps_model:

    discover_BPS_simod(params)

In [None]:


params = prepare_simulation_files(params)

##### BPMN plotting

In [None]:
# Specify the path to your PNG file
name = params['process_name']
image_path = os.path.join(params['base_path'],  f'{name}.png')

try:
    # Load and display the image
    img = mpimg.imread(image_path)
    fig = plt.figure(figsize=(12, 12))
    plt.imshow(img)
    plt.axis('off')  # Turn off the axis
    plt.show()
except:
    print(f"Image file not found at {image_path}")

## 1.  Set up

### Required Input

### Adaptation based on input ranges and parameters to change

In [None]:
# Add params_to_change to params dictionary
params['params_to_change'] = get_params_to_change(params)
# Initialize a global DataFrame to store simulation logs
simulation_log = initialize_simulation_log(params['params_to_change'])

# Create the DataFrame
results_df = create_results_dataframe()


### Set up background

In [None]:
# Set up experiment output directory
params = set_up_experiment_output_dir(params)

In [None]:
# if target range is not defined, set it here
if 'target_range' not in params:

    params = set_target_val_and_range(params)

    simulation_log = log_simulation(
        simulation_log=simulation_log,
        algorithm='orig_run',
        params=params,
        # target_ppi_dict=params['orig_target_ppi_val_dict'],
        target_ppi_dict=params['target_ppi_dict'],
        param_values=get_start_param_settings(params['params_to_change'], params),
    )
else:
    print("Use predefined Target PPI ranges.")

print('''\n#############################\n###  Intermediate output  ###''')
for ppi in params['target_ppis']:
    try:
        val = params['orig_target_ppi_val_dict'][ppi]   
        print(f'average {ppi}: {val}')
    except:
        pass
    val = params['target_range'][ppi]
    print(f'range: {val}\n')


In [None]:
# # Save paramas to json file in data/process_name/output/params.json
save_params(params)

# Write simulation log to CSV
save_simulation_log(simulation_log, params)

## 2. Search

### Search strategies

#### Grid Search Pipeline

In [None]:

%time

if 'grid_search' in params['execute_strategy']:

    # Start the timer
    start_time = time.perf_counter()

    simulation_log = run_grid_search(params, simulation_log=simulation_log)

    # Calculate elapsed time
    elapsed_time = time.perf_counter() - start_time

    save_simulation_log(simulation_log, params)

    result = pd.DataFrame([{
        'experiment': params.get('experiment_name', None),
        'algorithm': 'grid_search',
        'evals': len(simulation_log[simulation_log['algorithm'] == 'grid_search']),
        'time': elapsed_time,
        'n': params['nr_simulations_per_scenario'],
        'n_total': len(simulation_log[simulation_log['algorithm'] == 'grid_search']) * params['nr_simulations_per_scenario'],
        'acc': '-',   
        'mcc': '-'    
    }])
    # Append the new row to the DataFrame
    results_df = pd.concat([results_df, result], ignore_index=True)



In [None]:
success_count = simulation_log[simulation_log["status"] == True].shape[0]
print(f"Number of simulation scenarios inside PPI range: {success_count}")
failed_count = simulation_log[simulation_log["status"] == False].shape[0]
print(f"Number of simulation scenarios outside PPI range: {failed_count}")


#### Hill Climbing

In [None]:
if 'hill_cimbing' in params['execute_strategy']:

    from src.search_strategies.hill_climbing import hill_descent

    # Reset the temp JSON file
    create_temp_json(input_path=params['json_path'], output_path=None)

    stepsize_initial = params['strategies']['hill_climbing']['stepsize_initial']
    step_max = params['strategies']['hill_climbing']['step_max']
    candidate_strategy = params['strategies']['hill_climbing']['candidate_strategy']
    walk_reps_max = params['strategies']['hill_climbing']['walk_reps_max'] 

    # Call the hill descent function
    hill_descent(
        params, 
        step_size_initial=stepsize_initial, 
        step_max=step_max, 
        params_to_change=params_to_change, 
        candidate_strategy=candidate_strategy, 
        walk_reps_max=walk_reps_max
        )

    # Save the simulation log to a CSV file for later analysis
    simulation_log.to_csv(params['simulation_log_path'], index=False)


#### Hpyerquadtree

In [None]:
from src.search_strategies.hyperquadtree import adaptive_hyperquadtree, write_hyperquadtree_nodes_to_file, write_nodes_visited_to_json

if 'hyperquadtree' in params['execute_strategy']:


    min_depth = params['strategies']['hyperquadtree']['min_depth']
    max_depth = params['strategies']['hyperquadtree']['max_depth']

    variations = params['strategies']['hyperquadtree'].get('variation', 'midpoints')

    for variation in variations:

        algorithm = 'hyperquadtree'
        algorithm_name = f'{algorithm}_{variation}'

        # Reset the temp JSON file
        create_temp_json(input_path=params['json_path'], output_path=None)

        # Start the timer
        start_time = time.perf_counter()

        # Call the hyperquadtree function
        all_nodes, sampled_points, nodes_visited, simulation_log = adaptive_hyperquadtree(
            set_sim_params_get_sim_stats,
            is_in_target_range,
            params,
            simulation_log,
            min_depth=min_depth,
            max_depth=max_depth,
            variation=variation
        )

        # Calculate elapsed time
        elapsed_time = time.perf_counter() - start_time


        # log results
        save_simulation_log(simulation_log, params)

        params['strategies']['hyperquadtree']['paths'] = {}
        params['strategies']['hyperquadtree']['paths']['sampled_points'] = os.path.join(params['experiment_output_dir'], 'hyperquadtree_sampled_points.csv')
        params['strategies']['hyperquadtree']['paths']['nodes_visited'] = os.path.join(params['experiment_output_dir'], 'hyperquadtree_nodes_visited.csv')
        if params['print_intermediate_results']:
            print(params['strategies']['hyperquadtree']['paths'])
        
        save_params(params)
        write_hyperquadtree_nodes_to_file(all_nodes, params)
        write_nodes_visited_to_json(nodes_visited, params)

        max_depth_reached = len(nodes_visited)

        results_hqt = compute_and_save_quadtree_metrics(all_nodes, 
                                        simulation_log, 
                                        params, 
                                        nodes_visited, 
                                        max_depth=max_depth_reached, 
                                        algorithm=algorithm, 
                                        output_file_name='hyperquadtree_metrics_over_depth.csv'
                                        )

        final_hqt_iteration = results_hqt.iloc[-1, :]

        result = pd.DataFrame([{
            'experiment': exp_name, #params.get('experiment_name', None),
            'algorithm': algorithm_name,
            'evals': final_hqt_iteration['nodes_computed'],
            'time': np.round(elapsed_time, params['decimals']),
            'n': params['nr_simulations_per_scenario'],
            'n_total': len(simulation_log[simulation_log['algorithm'] == 'hyperquadtree']) * params['nr_simulations_per_scenario'],
            'acc': np.round(final_hqt_iteration['accuracy'], params['decimals']),   
            'mcc': np.round(final_hqt_iteration['mcc'], params['decimals'])
        }])
        results_df = pd.concat([results_df, result], ignore_index=True)


In [None]:
save_results_dataframe(results_df, params)

#### Display Evaluation

In [None]:
if 'hyperquadtree' in params['execute_strategy']:
    display(results_hqt)

In [None]:
# convert the time column into minutes with one decimal
results_df['time_minutes'] = (results_df['time'] / 60).round(1)
colums_to_show = ['experiment', 'algorithm', 'evals', 'time_minutes', 'acc', 'mcc']
results_df[colums_to_show]