In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# EMA Workbench imports
import ema_workbench
from ema_workbench import (
    Model, RealParameter, ScalarOutcome, MultiprocessingEvaluator,
    ema_logging, Constant, Scenario, HypervolumeMetric,
    GenerationalDistanceMetric, EpsilonIndicatorMetric,
    InvertedGenerationalDistanceMetric, SpacingMetric, Constraint)
from ema_workbench.em_framework.optimization import (
    EpsilonProgress, to_problem, ArchiveLogger, epsilon_nondominated)
from ema_workbench.analysis import parcoords
from ema_workbench.em_framework.optimization import EpsilonProgress

from problem_formulation import get_model_for_problem_formulation

In [3]:

def save_optimization_outputs(result, convergence, scenario_name, seed, output_dir="./archives"):
    """
    Save the optimization result and convergence data to CSV files.
    """
    os.makedirs(output_dir, exist_ok=True)

    result_path = os.path.join(output_dir, f"result__scen{scenario_name}__seed{seed}.csv")
    convergence_path = os.path.join(output_dir, f"convergence__scen{scenario_name}__seed{seed}.csv")

    result.to_csv(result_path, index=False)
    pd.DataFrame(convergence).to_csv(convergence_path, index=False)

In [17]:
def run_single_optimization(evaluator, model, scenario, seed, epsilon, nfe):
    """
    Run a single optimization instance for a given scenario and seed.
    """
    convergence_metrics = [        
                    ArchiveLogger(
                        "./archives",
                        [l.name for l in model.levers],
                        [o.name for o in model.outcomes],
                        base_filename="optimization.tar.gz",
                    ),
                    EpsilonProgress(),
                ]

    result, convergence = evaluator.optimize(
        nfe=nfe,
        searchover="levers",
        epsilons=epsilon,
        constraints=None,
        convergence=convergence_metrics,
        reference=scenario,
    )

    return result, convergence

In [25]:
def run_optimization(epsilon, nfe, scenarios, model):
    """
    Run optimization using the EMA Workbench.

    Parameters:
    epsilon (list): The epsilon values for the optimization.
    nfe (int): The number of function evaluations.
    scenarios (list): List of scenarios to be evaluated.
    model: The model to be used for optimization.

    Returns:
    tuple: A tuple containing results and convergences.
    """
    # Initialize lists to store results and convergence metrics
    results = []
    convergences = []

    # Use MultiprocessingEvaluator for parallel processing
    with MultiprocessingEvaluator(model) as evaluator:
        # Iterate over each scenario
        for scenario in scenarios:
            # Perform optimization three times for each scenario
            for i in range(3):
                # Define convergence metrics
                convergence_metrics = [
                    ArchiveLogger(
                        "./archives",
                        [l.name for l in model.levers],
                        [o.name for o in model.outcomes],
                        base_filename="optimization.tar.gz",
                    ),
                    EpsilonProgress(),
                ]

                # Run the optimization
                result, convergence = evaluator.optimize(
                    nfe=nfe,
                    searchover="levers",
                    epsilons=epsilon,
                    constraints=None,
                    convergence=convergence_metrics,
                    reference=scenario,
                )

                # Create result directory if it does not exist
                result_dir = "./archives"
                os.makedirs(result_dir, exist_ok=True)
                
                # Save the results and convergence metrics to CSV files
                result.to_csv(os.path.join(result_dir, f"result__scen{scenario.name}__seed{i}.csv"))
                pd.DataFrame(convergence).to_csv(os.path.join(result_dir, f"convergence__scen{scenario.name}__seed{i}.csv"))

                # Append the results and convergence metrics to the lists
                results.append(result)
                convergences.append(convergence)
    
    # Return the results and convergence metrics
    return results, convergences

In [26]:
def gather_optimization_results(path):
    """
    Collect and combine all optimization result CSVs from a given folder.

    Parameters:
    path (str): Path to the folder containing result CSV files.

    Returns:
    pd.DataFrame: A single DataFrame with all the loaded results.
    """
    result_frames = []

    for filename in os.listdir(path):
        if filename.endswith(".csv") and filename.startswith("result__"):
            full_path = os.path.join(path, filename)
            data = pd.read_csv(full_path)
            result_frames.append(data)

    if not result_frames:
        raise ValueError(f"No result files found in {path}")

    return pd.concat(result_frames, ignore_index=True)


In [35]:
import os
import shutil

# Ensure the archive directory exists
os.makedirs("archives", exist_ok=True)

# Ensure the 'tmp' subdirectory does not conflict
tmp_path = os.path.join("archives", "tmp")
if os.path.exists(tmp_path):
    if os.path.isfile(tmp_path):
        # If 'tmp' exists as a file, remove it
        os.remove(tmp_path)
    elif os.path.isdir(tmp_path):
        # If 'tmp' exists as a directory, clear its contents
        shutil.rmtree(tmp_path)
# Create the 'tmp' directory
os.makedirs(tmp_path, exist_ok=True)

def load_scenarios_from_csv(filepath):
    """
    Load and convert a scenario CSV file into a list of EMA Workbench Scenario objects.
    """
    df = pd.read_csv(filepath)
    scenario_list = []

    for idx, row in df.iterrows():
        scenario_data = {
            col: row[col]
            for col in df.columns
            if col.lower() != "run id" and col.lower() != "run_id"
        }
        scenario_name = row["Run ID"] if "Run ID" in df.columns else row["run_id"]
        scenario_obj = Scenario(scenario_name, **scenario_data)
        scenario_list.append(scenario_obj)

    return scenario_list

def plot_parallel_coordinates(results_df, epsilons, output_path):
    """
    Plot a parallel coordinates graph from selected outcome columns in the results.
    """
    selected_outcomes = [
        'Expected Annual Damage', 'Dike Investment Costs', 'RfR Investment Costs',
        'Evacuation Costs', 'Expected Number of Deaths', 'Total Investment Costs'
    ]

    subset = results_df[selected_outcomes]
    limits = parcoords.get_limits(subset)
    axes = parcoords.ParallelAxes(limits)
    axes.plot(subset)

    plt.title(f'Parallel Coordinates Plot for Epsilon {epsilons}')
    plt.show()

if __name__ == "__main__":
    # Enable logging at INFO level
    ema_logging.log_to_stderr(ema_logging.INFO)

    # Load the model setup for a specific problem formulation
    model, _ = get_model_for_problem_formulation(0)

    # Load scenario definitions
    scenario_file = "./data/selected_scenarios_outcomes.csv"
    scenario_list = load_scenarios_from_csv(scenario_file)

    # Optimization parameters
    epsilon_sets = [[1e6, 1e6, 1e6, 1e6, 10, 1e6]]
    function_evals = 10000

    for eps in epsilon_sets:
        # Run the optimization procedure
        results, _ = run_optimization(eps, function_evals, scenario_list, model)

        # Aggregate all results for analysis
        combined_results = gather_optimization_results("archives")

        # Generate and save the parallel coordinates plot
        plot_parallel_coordinates(combined_results, eps, f"parallel_coordinates_eps_{eps}.png")


[MainProcess/INFO] pool started with 8 workers
[MainProcess/INFO] terminating pool


FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\ralph\\Documents\\github mbd\\MBD-repository\\archives\\tmp'

In [12]:
plot_parallel_coordinates(results, epsilon_values)

NameError: name 'results' is not defined

In [22]:
def dominant_policy(results):
    # Initialize a list to store the indices of non-dominated policies
    non_dominated_indices = []
    # Loop over all pairs of policies
    for i in range(len(results)):
        for j in range(len(results)):
            # Skip if it's the same policy
            if i == j:
                continue

            # Check if policy i is dominated by policy j
            if all(results.iloc[j] <= results.iloc[i]) and any(results.iloc[j] < results.iloc[i]):
                break
        else:
            # If we didn't break from the loop, policy i is not dominated by any other policy
            non_dominated_indices.append(i)

    # Return only the non-dominated policies
    return results.iloc[non_dominated_indices]