In [1]:
import os
import shutil
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from ema_workbench import (
    Model, RealParameter, ScalarOutcome, MultiprocessingEvaluator,
    ema_logging, Constant, Scenario, HypervolumeMetric,
    GenerationalDistanceMetric, EpsilonIndicatorMetric,
    InvertedGenerationalDistanceMetric, SpacingMetric, Constraint)

from ema_workbench.em_framework.optimization import (
    EpsilonProgress, ArchiveLogger)

from problem_formulation import get_model_for_problem_formulation



In [54]:
def execute_search(eps_vals, eval_count, scen_list):
    output_data = []
    progress_data = []

    archive_path = "./archives_epsilon"
    os.makedirs(archive_path, exist_ok=True)

    with MultiprocessingEvaluator(sim_model) as runner:
        for s in scen_list:
            for rep in range(3):
                temp_path = os.path.join(archive_path, "tmp")
                if os.path.exists(temp_path):
                    shutil.rmtree(temp_path)

                metrics = [
                    ArchiveLogger(
                        archive_path,
                        [x.name for x in sim_model.levers],
                        [y.name for y in sim_model.outcomes],
                        base_filename="optimization.tar.gz",
                    ),
                    EpsilonProgress()
                ]

                res, prog = runner.optimize(
                    nfe=eval_count,
                    searchover="levers",
                    epsilons=eps_vals,
                    constraints=None,
                    convergence=metrics,
                    reference=s,
                )

                res_path = archive_path
                os.makedirs(res_path, exist_ok=True)

                res.to_csv(os.path.join(res_path, f"output__scen{s.name}__run{rep}.csv"))
                pd.DataFrame(prog).to_csv(os.path.join(res_path, f"log__scen{s.name}__run{rep}.csv"))

                output_data.append(res)
                progress_data.append(prog)

    return output_data, progress_data



In [55]:
def read_optimization_data(folder):
    collected = []
    for file in os.listdir(folder):
        if file.startswith('output__') and file.endswith('.csv'):
            df = pd.read_csv(os.path.join(folder, file))
            collected.append(df)
    return pd.concat(collected)


In [None]:

if __name__ == '__main__':
    ema_logging.log_to_stderr(ema_logging.INFO)
    sim_model, _ = get_model_for_problem_formulation(2)

    scen_df = pd.read_csv("./data/Selected_Scenarios.csv")
    scenario_bank = []

    for idx in range(scen_df.shape[0]):
        scen_data = {}
        for col in scen_df.columns:
            if col == 'scenario':
                continue
            scen_data[col] = scen_df.loc[idx, col]
        generated = Scenario(scen_df.loc[idx, 'scenario'], **scen_data)
        scenario_bank.append(generated)

    eps_sets = [
        [1000000, 1000000, 1000000, 10, 1000000],
        [10000, 10000, 10000, 1, 10000],
        [1000, 1000, 1000, 0.1, 1000]
    ]
    evals = 10000

    for eps in eps_sets:
        outputs, logs = execute_search(eps, evals, scenario_bank)
        collected_data = read_optimization_data("./archives_epsilon")

        print(f"Total solutions for ε={eps}: {collected_data.shape[0]}")

        key_outputs = [
            'Expected Annual Damage',
            'Dike Investment Costs',
            'RfR Investment Costs',
            'Evacuation Costs',
            'Expected Number of Deaths'
        ]

        subset = collected_data[key_outputs]

        sns.pairplot(subset, diag_kind="kde", corner=True)
        plt.suptitle(f"Outcome Scatter Matrix for ε={eps}", fontsize=14)
        plt.tight_layout()
        plt.savefig(f"scatter_matrix_eps_{str(eps).replace(',','_')}.png")
        plt.show()

[MainProcess/INFO] pool started with 12 workers

  0%|                                                | 0/10000 [00:00<?, ?it/s][A