In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from SALib.analyze import sobol
import warnings
from ema_workbench.analysis import feature_scoring
from ema_workbench.em_framework.salib_samplers import get_SALib_problem
from ema_workbench import (
    Model,
    Policy,
    ema_logging,
    SequentialEvaluator,
    MultiprocessingEvaluator,
    perform_experiments,
    Samplers,
    SequentialEvaluator,
)
import time
import itertools
import multiprocessing
from ema_workbench.em_framework import sample_uncertainties
# from dike_model_function import DikeNetwork 
from problem_formulation import get_model_for_problem_formulation, sum_over, sum_over_time
from ema_workbench.analysis import prim
from ema_workbench.em_framework.parameters import Constant

warnings.filterwarnings("ignore")

In [2]:
def normalize_out_dic(outcomes):
    norm_outcomes = {}
    for ooi in outcomes.keys():
        data = outcomes[ooi]
        mx = max(data)
        mn = min(data)
        if mx == mn:
            norm_outcomes[ooi] = data - mn
        else:
            norm_outcomes[ooi] = (data - mn)/(mx-mn)
    return norm_outcomes

In [3]:
dike_model, planning_steps = get_model_for_problem_formulation(6)

zero_policy = {"DaysToThreat": 0}
zero_policy.update({f"DikeIncrease {n}": 0 for n in planning_steps})
zero_policy.update({f"RfR {n}": 0 for n in planning_steps})

pol0 = {}
for key in dike_model.levers:
    s1, s2 = key.name.split("_")
    pol0.update({key.name: zero_policy[s2]})
policy0 = Policy("Policy 0", **pol0)

with MultiprocessingEvaluator(dike_model) as evaluator:
    results = evaluator.perform_experiments(scenarios=100,policies=policy0,uncertainty_sampling=Samplers.LHS)
exp, outcomes = results   
norm_new_out = normalize_out_dic(outcomes)
oois = list(outcomes.keys())

100%|████████████████████████████████████████| 100/100 [00:10<00:00,  9.77it/s]


In [4]:
def calculate_distance(data, oois, scenarios=None, distance='euclidean'):
    '''data is the outcomes of exploration results,
    scenarios is a list of scenario indices (decision variables), 
    oois is a list of variable names,
    distance is to choose the distance metric. options:
            bray-curtis, canberra, chebyshev, cityblock (manhattan), correlation, 
            cosine, euclidian, mahalanobis, minkowski, seuclidian,
            sqeuclidian, wminkowski
    returns a list of distance values
    '''
    #make a matrix of the data n_scenarios x oois
    scenario_data = np.zeros((len(scenarios), len(oois)))
    for i, s in enumerate(scenarios):
        for j, ooi in enumerate(oois):
            scenario_data[i][j] = data[ooi][s]
                
    distances = pdist(scenario_data, distance)
    return distances


In [5]:
def evaluate_diversity_single(x, data=norm_new_out, oois=oois, weight=0.5, distance='euclidean'):
    '''
    takes the outcomes and selected scenario set (decision variables), 
    returns a single 'diversity' value for the scenario set.
    outcomes : outcomes dictionary of the scenario ensemble
    decision vars : indices of the scenario set
    weight : weight given to the mean in the diversity metric. If 0, only minimum; if 1, only mean
    '''
    distances = calculate_distance(data, oois, list(x), distance)
    minimum = np.min(distances)
    mean = np.mean(distances)
    diversity = (1-weight)*minimum + weight*mean
    
    return [diversity]

In [6]:
def find_maxdiverse_scenarios(combinations):
    diversity = 0.0
    solutions = []
    for sc_set in combinations:
        temp_div = evaluate_diversity_single(list(sc_set))
        if temp_div[0] > diversity:
            diversity = temp_div[0]
            solutions = []
            solutions.append(sc_set)
        elif temp_div[0] == diversity:
            solutions.append(sc_set)
    #print("found diversity ", diversity)
    return diversity, solutions

In [None]:
# n_scenarios = 100000
# scenarios = sample_uncertainties(dike_model, n_scenarios)

n_scen = 10
indices = range(n_scen)
set_size = 4
combinations = itertools.combinations(indices, set_size)
combinations = list(combinations)

no_workers = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes=no_workers)

with open('output_scenarioselection.txt', 'a') as file:

    start_time = time.time()
    #now, divide this data for each worker
    worker_data = np.array_split(combinations, no_workers)
           
    result = pool.imap(find_maxdiverse_scenarios, worker_data)
    print(result)        
    #find the max of these 8 
    max_diversity = 0.0
    for r in result:
        print("result : ", r)
        if r[0] >= max_diversity:
            max_diversity = r[0]
            solutions = []
            solutions.append(r[1])
        elif r[0] == max_diversity:
            solutions.append(r[1])                  

    end_time = time.time()
    file.write("Calculations took {} seconds.\n".format(end_time-start_time))
    print("Calculations took {} seconds.\n".format(end_time-start_time))
    file.write("maximum diversity and solutions: {}, {} \n\n".format(max_diversity, solutions))
    print("maximum diversity and solutions: {}, {} \n\n".format(max_diversity, solutions))


file.close()
    
pool.close()
pool.join()

<multiprocessing.pool.IMapIterator object at 0x00000198FE46FF50>


## Robustness Metrics