## Compare the sets of dams selected by MOO algorithm with emissions objective calculated from emission factors against the solutions of MOO runs where GHG emissions had been calculated explicitly with ReEmission (G-res methodology)
## T. Janus
### Created: 11/11/2024

In [None]:
from typing import List, Set, Any, Optional, Dict
from matplotlib.legend_handler import HandlerBase
from string import Template
import math
import pathlib
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_score
from scipy.spatial import distance
from lib.notebook12 import ObjectiveCalculator, set_remap

# Load the mapping between ids used in the MOO algorithm and the IDs in the IFC database
map_file_path = pathlib.Path('outputs/moo/id_to_ifc.json')
# Some repetition here, but left for now in fear of breaking the code
with open(map_file_path, 'r') as file:
    id_map = json.load(file)
id_map: Dict[int, int] = {int(key): value for key, value in id_map.copy().items()} # Maps optim ids to ifc ids

## Implemented indices
### Average Jaccard Index

Fo any two collections of sets, $A=\{A_1,A_2,\ldots,A_n\}$ and $B=\{B_1,B_2\ldots,B_m\}$ the average Jaccard index is calculated as:

\begin{equation}
\bar{J}(A,B) = \frac{1}{n \times m} \sum_{i=1}^n \sum_{j=1}^m J(A_i, B_j)
\end{equation}

where $J(A_i, B_j)$ is the Jaccard index between sets $A_i$ and $B_j$ and is defined as

\begin{equation}
J(A_i, B_j) =  = \frac{|A_i \cap B_j|}{|A_i \cup B_j|}
\end{equation}

where $\left|A_i \cap B_j \right|$ is the cardinality of the intersection of sets $A_i$ and $B_j$, and $\left|A_i \cup B_j\right|$ is the cardinality of the union of sets $A_i$ and $B_j$.

In [None]:
# Helper functions
# Write statistics and comparison figures
def num_sets(dam_sets: Dict[int, Dict[str, List[Set[int]]]], bin_index: int, scenario: str):
    return len(dam_sets[bin_index][scenario])

def convert_to_set(dam_ids: str) -> Set[int]:
    # Remove curly braces and split the string by commas, then convert to integers
    dam_ids = dam_ids.strip('{}').split(', ')
    return set(map(int, dam_ids))

# Define similarity metrics
def cosine_similarity(vec1: List[float], vec2: List[float], verbose: bool = False) -> float:
    """ 
    Calculates cosine similarity between two vectors: vec1 and vec2
    Returns the cosine of the angle between two vectors (value between -1 and 1)
    """
    A = np.array(vec1)
    B = np.array(vect)
    cosine = np.dot(A,B)/(norm(A)*norm(B))
    if verbose:
        print("Cosine Similarity:", cosine)
    return cosine

def jaccard_similarity(set1: Set[Any], set2: Set[Any], verbose: bool = False) -> float:
    """ """
    # intersection of two sets
    intersection = set1.intersection(set2)
    # Unions of two sets
    union = set1.union(set2)
    return len(intersection) / len(union)

def average_jaccard_index(collection1, collection2, **kwargs):
    similarities = []
    for set1 in collection1:
        for set2 in collection2:
            similarities.append(jaccard_similarity(set1, set2, **kwargs))
    return np.mean(similarities)


def jaccard_distance(set1: Set[Any], set2: Set[Any], verbose: bool = False) -> float:
    """ """
    #Symmetric difference of two sets
    symmetric_difference = set1.symmetric_difference(set2)
    # Unions of two sets
    union = set1.union(set2)
    return len(symmetric_difference)/len(union)

def jaccard_score(y_true: np.ndarray, y_pred: np.ndarray, *args, **kwargs) -> np.double:
    """ """
    return jaccard_score(y_true, y_pred, *args, **kwargs)

def hamming_distance(u: 'np.typing.ArrayLike', v: 'np.typing.ArrayLike', w: Optional['np.typing.ArrayLike'] = None) -> np.double:
    """ """
    return distance.hamming(u, v, w)

def find_closest_row(df: pd.DataFrame, target_value: Any, column: str) -> pd.Series:
    """
    Returns the row where the 'value' column is closest to target_value.
    
    Parameters
    ----------
    df : pd.DataFrame
        The DataFrame to search.
    target_value : float
        The value to find the closest match for.
    
    Returns
    -------
    pd.Series
        The row where 'value' is closest to target_value.
    """
    closest_index = (df[column] - target_value).abs().idxmin()
    return df.loc[closest_index]

In [None]:
# Set paths
# Paths to output files from the algorithm with expansion / compression
sol_file_folder_5obj = pathlib.Path('moo_solver_CPAIOR/outputs/epsilon2_5obj')
sol_file_folder_3obj = pathlib.Path('moo_solver_CPAIOR/outputs/epsilon2_3obj_soued')
options = {
    '5obj' : {
        'merged_csv_file': sol_file_folder_5obj / pathlib.Path('merged_df_5obj.csv'),
        'nondom_csv_file': sol_file_folder_5obj / pathlib.Path('em_int_nondom_df_5obj.csv'),
    },
    '5obj_soued': {
        'merged_csv_file': sol_file_folder_5obj / pathlib.Path('merged_df_5obj_soued.csv'),
        'nondom_csv_file': sol_file_folder_5obj / pathlib.Path('em_int_nondom_df_5obj_soued.csv'),
    },
    '3obj_soued' : {
        'merged_csv_file': sol_file_folder_3obj / pathlib.Path('merged_df_3obj_soued.csv'),
        'nondom_csv_file': sol_file_folder_3obj / pathlib.Path('em_int_nondom_df_3obj_soued.csv'),
    }
}

In [None]:
# Load the data
scale_column = 'HP Production [GWh/year]'
optim_names = ['5obj', '5obj_soued']
n_bins = 2000
data = dict()
for optim_name in optim_names:
    data[optim_name] = dict()
    df = pd.read_csv(options[optim_name]['nondom_csv_file'])
    df["Dam IDs"] = df["Dam IDs"].apply(convert_to_set)
    data[optim_name]['Built'] = df.query('Scenario == "Built"')
    data[optim_name]['Not Built'] = df.query('Scenario == "Not Built"')
    data[optim_name]['min_built'] = data[optim_name]['Built'][scale_column].min()
    data[optim_name]['max_built'] = data[optim_name]['Built'][scale_column].max()
    data[optim_name]['min_not_built'] = data[optim_name]['Not Built'][scale_column].min()
    data[optim_name]['max_not_built'] = data[optim_name]['Not Built'][scale_column].max()

In [None]:
# Find min and max values and bin data
min_built = min(
    [data[optim_name]['Built'][scale_column].min() for optim_name in optim_names]
)
max_built = max(
    [data[optim_name]['Built'][scale_column].max() for optim_name in optim_names]
)
min_not_built = min(
    [data[optim_name]['Not Built'][scale_column].min() for optim_name in optim_names]
)
max_not_built = max(
    [data[optim_name]['Not Built'][scale_column].max() for optim_name in optim_names]
)
bins_built = np.linspace(min_built, max_built, n_bins + 1)  # (n_bins + 1) edges create n_bins bins
bins_not_built = np.linspace(min_not_built, max_not_built, n_bins + 1)  # (n_bins + 1) edges create n_bins bins

# Calculate ranges for each bin
bin_ranges_built = [
    (int(bins_built[i]), int(bins_built[i+1])) for i in range(len(bins_built) - 1)
]
mean_gen_built = list(map(np.mean, bin_ranges_built))
bin_ranges_not_built = [
    (int(bins_not_built[i]), int(bins_not_built[i+1])) for i in range(len(bins_not_built) - 1)
]
mean_gen_not_built = list(map(np.mean, bin_ranges_not_built))
for optim_name in optim_names:
    data[optim_name]['Built']['bin'] = np.digitize(data[optim_name]['Built'][scale_column], bins_built) - 1
    data[optim_name]['Not Built']['bin'] = np.digitize(data[optim_name]['Not Built'][scale_column], bins_not_built) - 1
    data[optim_name]['Built'].at[-1, 'bin'] = int(n_bins - 1)
    data[optim_name]['Not Built'].at[-1, 'bin'] = int(n_bins - 1)

In [None]:
from functools import reduce
differences = map(lambda x: x[1] - x[0], bin_ranges_not_built)
mean_difference = reduce(lambda acc, x: acc + x, differences) / len(bin_ranges_not_built)
mean_difference
bin_ranges_not_built[0][0] / 1_000, bin_ranges_not_built[-1][1] / 1_000
bin_ranges_built[0][0] / 1_000, bin_ranges_built[-1][1] / 1_000

In [None]:
print(f"HP Production bins of width {mean_difference / 1_000} TWh/year")
print(
    f"Production targets: {bin_ranges_not_built[0][0] / 1_000} " +
    f"({bin_ranges_built[0][0] / 1_000} in case of built scenario) " +
    f"up to {bin_ranges_not_built[-1][1] / 1_000} TWh/year")

In [None]:
# Inalize data structures for storing binned data
dam_sets_in_bins_built = {
    i: {optim_name: [] for optim_name in optim_names} for i in range(n_bins)}  # `n_bins` bins
dam_sets_in_bins_not_built = {
    i: {optim_name: [] for optim_name in optim_names} for i in range(n_bins)}  # `n_bins` bins
# For built dam optimization scenario
for optim_name in optim_names:
    data_sel = data[optim_name]['Built']
    for bin_num in range(n_bins):
        row_indices = data_sel.index[data_sel['bin']==bin_num].tolist()
        dam_sets = [data_sel.loc[index, "Dam IDs"] for index in row_indices]
        dam_sets_cleaned = [x for x in dam_sets if isinstance(x,set)]
        dam_sets_in_bins_built[bin_num][optim_name] = dam_sets_cleaned
# For not built dam optimization scenario
for optim_name in optim_names:
    data_sel = data[optim_name]['Not Built']
    for bin_num in range(n_bins):
        row_indices = data_sel.index[data_sel['bin']==bin_num].tolist()
        dam_sets = [data_sel.loc[index, "Dam IDs"] for index in row_indices]
        dam_sets_cleaned = [x for x in dam_sets if isinstance(x,set)]
        dam_sets_in_bins_not_built[bin_num][optim_name] = dam_sets_cleaned

## Calculate average Jaccard indices

In [None]:
# 1. Not built
ave_jaccard_sim_not_built = {}
for bin_index in range(n_bins):
    sets_5obj = dam_sets_in_bins_not_built[bin_index]['5obj']
    sets_5obj_soued = dam_sets_in_bins_not_built[bin_index]['5obj_soued']
    num_sets1 = num_sets(dam_sets = dam_sets_in_bins_built, bin_index=bin_index, scenario='5obj')
    num_sets2 = num_sets(dam_sets = dam_sets_in_bins_built, bin_index=bin_index, scenario='5obj_soued')
    ave_jaccard_sim_not_built[bin_index] = [mean_gen_not_built[bin_index], average_jaccard_index(sets_5obj, sets_5obj_soued)]
# 2. Built
ave_jaccard_sim_built = {}
for bin_index in range(n_bins):
    sets_5obj = dam_sets_in_bins_built[bin_index]['5obj']
    sets_5obj_soued = dam_sets_in_bins_built[bin_index]['5obj_soued']
    num_sets1 = num_sets(dam_sets = dam_sets_in_bins_built, bin_index=bin_index, scenario='5obj')
    num_sets2 = num_sets(dam_sets = dam_sets_in_bins_built, bin_index=bin_index, scenario='5obj_soued')
    ave_jaccard_sim_built[bin_index] = [mean_gen_built[bin_index], average_jaccard_index(sets_5obj, sets_5obj_soued)]

In [None]:
ave_jacard_df_built = pd.DataFrame.from_dict(ave_jaccard_sim_built, orient='index').\
    rename(columns={0:"HP Production [GWh/year]", 1: "Mean Jaccard Index"}).\
    assign(Status = "Built")
ave_jacard_df_not_built = pd.DataFrame.from_dict(ave_jaccard_sim_not_built, orient='index').\
    rename(columns={0:"HP Production [GWh/year]", 1: "Mean Jaccard Index"}).\
    assign(Status = "Not Built")
ave_jacard_df = pd.concat([ave_jacard_df_built, ave_jacard_df_not_built])
ave_jacard_df["HP Production [TWh/year]"] = ave_jacard_df["HP Production [GWh/year]"] / 1_000

In [None]:
min_prod_target = ave_jacard_df_built.iloc[0]['HP Production [GWh/year]']
max_prod_target = ave_jacard_df_built.iloc[-1]['HP Production [GWh/year]']

In [None]:
ave_jacard_df.query(
    '`Mean Jaccard Index` < 0.78 & Status == "Not Built" & `HP Production [TWh/year]` >41' +
    ' & `HP Production [TWh/year]` < 43')

In [None]:
ave_jacard_df.query('`Mean Jaccard Index` < 0.75 & Status == "Built"')

In [None]:
ave_jacard_df.query(
    '`Mean Jaccard Index` < 0.75 & Status == "Not Built" & `HP Production [TWh/year]` > 160')

In [None]:
ave_jacard_df.query(
    'Status == "Not Built" & `HP Production [TWh/year]` > 50')["Mean Jaccard Index"].mean()

In [None]:
ave_jacard_df.query(
    'Status == "Built" & `HP Production [TWh/year]` > 50')["Mean Jaccard Index"].mean()

In [None]:
optim_outputs = dict()
for optim_name in optim_names:
    optim_outputs[optim_name] = pd.read_csv(options[optim_name]['nondom_csv_file'])

In [None]:
ave_jacard_df.query('Status == "Built"').head()

In [None]:
# Pick data (row locations) manually - These rows contain production targets that are also picked in
# Notebook_12b, i.e. the notebook that selects optimization scenarios for visualisation and comparison
target_1a_loc = 948 # 114.90 TWh/year (Not Built)
target_1b_loc = 346 # 42 TWh/year (Not Built)
target_2a_loc = 1188 # 150.4 TWh/year (Built)
target_2b_loc = 1690 # 204.76 TWh/year (Not Built)
sc2_option = ("b", "a")

In [None]:
# pick HP Production targets - automatically pick sc2_option[0] == "b"
target_1_nb = ave_jacard_df.query('Status == "Not Built"').loc[target_1b_loc]
target_1_nb_value = target_1_nb['HP Production [TWh/year]']
target_1_nb_mji = target_1_nb['Mean Jaccard Index']
# Find emissions
target_1_nb_em_gres = find_closest_row(
    df = optim_outputs['5obj'].query('Scenario == "Not Built"'),
    target_value = target_1_nb_value * 1_000,
    column = 'HP Production [GWh/year]'
)['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000
target_1_nb_em_soued = find_closest_row(
    df = optim_outputs['5obj_soued'].query('Scenario == "Not Built"'),
    target_value = target_1_nb_value * 1_000,
    column = 'HP Production [GWh/year]'
)['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000

target_1_b = find_closest_row(
    df = ave_jacard_df.query('Status == "Built"'), 
    target_value = target_1_nb_value, 
    column = 'HP Production [TWh/year]')
target_1_b_value = target_1_b['HP Production [TWh/year]']
target_1_b_mji = target_1_b['Mean Jaccard Index']
# Find emissions
target_1_b_em_gres = find_closest_row(
    df = optim_outputs['5obj'].query('Scenario == "Built"'),
    target_value = target_1_b_value * 1_000,
    column = 'HP Production [GWh/year]'
)['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1_000_000
target_1_b_em_soued = find_closest_row(
    df = optim_outputs['5obj_soued'].query('Scenario == "Built"'),
    target_value = target_1_b_value * 1_000,
    column = 'HP Production [GWh/year]'
)['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000

#target_1_b_damids = target_1_b['Dam IDs']

if sc2_option[1] == "a":
    
    target_2_b = ave_jacard_df.query('Status == "Built"').loc[target_2a_loc]
    target_2_b_value = target_2_b['HP Production [TWh/year]']
    target_2_b_mji = target_2_b['Mean Jaccard Index']
    # Find emissions
    target_2_b_em_gres = find_closest_row(
        df = optim_outputs['5obj'].query('Scenario == "Built"'),
        target_value = target_2_b_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000

    target_2_b_em_soued = find_closest_row(
        df = optim_outputs['5obj_soued'].query('Scenario == "Built"'),
        target_value = target_2_b_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000

    target_2_nb = find_closest_row(
        df = ave_jacard_df.query('Status == "Not Built"'), 
        target_value = target_2_b_value, 
        column = 'HP Production [TWh/year]')
    target_2_nb_value = target_2_nb['HP Production [TWh/year]']
    target_2_nb_mji = target_2_nb['Mean Jaccard Index']
    # Find emissions
    target_2_nb_em_gres = find_closest_row(
        df = optim_outputs['5obj'].query('Scenario == "Not Built"'),
        target_value = target_2_nb_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000

    target_2_nb_em_soued = find_closest_row(
        df = optim_outputs['5obj_soued'].query('Scenario == "Not Built"'),
        target_value = target_2_nb_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000
    
elif sc2_option[1] == "b":
    target_2_nb = ave_jacard_df.query('Status == "Not Built"').loc[target_2b_loc]
    target_2_nb_value = target_2_nb['HP Production [TWh/year]']
    target_2_nb_mji = target_2_nb['Mean Jaccard Index']
    # Find emissions
    target_2_nb_em_gres = find_closest_row(
        df = optim_outputs['5obj'].query('Scenario == "Not Built"'),
        target_value = target_2_nb_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000
    target_2_nb_em_soued = find_closest_row(
        df = optim_outputs['5obj_soued'].query('Scenario == "Not Built"'),
        target_value = target_2_nb_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000
    
    target_2_b = find_closest_row(
        df = ave_jacard_df.query('Status == "Built"'), 
        target_value = target_2_nb_value, 
        column = 'HP Production [TWh/year]')
    target_2_b_value = target_2_b['HP Production [TWh/year]']
    target_2_b_mji = target_2_b['Mean Jaccard Index']
    # Find emissions
    target_2_b_em_gres = find_closest_row(
        df = optim_outputs['5obj'].query('Scenario == "Built"'),
        target_value = target_2_b_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000
    target_2_b_em_soued = find_closest_row(
        df = optim_outputs['5obj_soued'].query('Scenario == "Built"'),
        target_value = target_2_b_value * 1_000,
        column = 'HP Production [GWh/year]'
    )['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000

In [None]:
# Find objective functions for each of the four selected points
df.head(3)

In [None]:
class CenteredTitleHandler(HandlerBase):
    def create_artists(self, legend, orig_handle, xdescent, ydescent, width, height, fontsize, trans):
        title = legend.get_title()
        text = plt.text(0, 0, title.get_text(), 
                        ha="center", va="center", 
                        fontsize=title.get_fontsize())
        return [text]
    
def create_figure_txt(jaccard_index, em_gres, em_ipcc) -> str:
    template = Template(
        "Mean Jaccard Index: ${jaccard_index}\n"
        "Net Emission (G-res): ${em_gres}, MtCO$$_{2e}$$/year \n"
        "Net Emission (EF): ${em_ipcc}, MtCO$$_{2e}$$/year"
    )
    return template.substitute(
        jaccard_index=f"{jaccard_index:.2f}",
        em_gres=f"{em_gres:.2f}",
        em_ipcc=f"{em_ipcc:.2f}"
    )

In [None]:
# Set seaborn style for publication-quality plots
sns.set(style="white", context="talk")
# Initialize the plot
fig = plt.figure(figsize=(10, 6))
ax = plt.subplot(111)

diverging_colors = sns.color_palette("RdBu", 2)
#sns.palplot(diverging_colors)
#palette ='coolwarm'

# Create scatter plot with hue based on 'Status'
scatter = sns.scatterplot(
    ax = ax,
    data=ave_jacard_df,
    x='HP Production [GWh/year]',
    y='Mean Jaccard Index',
    hue='Status',
    palette=diverging_colors, #'tab10',    # Change palette for publication
    s=100,              # Set marker size
    alpha=0.7,
    edgecolor="k",      # Add edge color for markers
    linewidth=0.5       # Thin edge line
)
sns.despine()

# Format plot for publication
ax.set_ylabel('Mean Jaccard Index', fontsize=14, color='#4f4e4e')

# Set the main title
plt.suptitle(
    "Similarity Between Optimal Reservoir Portfolios for Two GHG Emission Criteria",  # Main title text
    fontsize=13,  # Set the font size
    color='#4f4e4e',  # Set the color
    weight='bold',
    x=0.51,  # Adjust this to align with the subtitle
    y=0.93,  # Adjust this to align with the subtitle
)
ttl = ax.title
# Set the subtitle
ax.set_title(
    "Explicitly Derived Emissions (G-res Methodology) vs Emission Factors (EF)",  # Subtitle text
    fontsize=12,  # Set the font size
    color='#4f4e4e',  # Set the color
)
# tweak the title
ttl.set_weight('normal')
ttl.set_color('#4f4e4e')

# Customize legend
plt.legend(title='Status', title_fontsize='13', fontsize='11', loc='best', 
           frameon=True,
           handler_map={None: CenteredTitleHandler()})
# Ensure tight layout and grid for better spacing
plt.tight_layout()
#plt.grid(True, linestyle="-", alpha=0.3)

# add more ticks
# ax.set_xticks(np.arange(25))
# remove tick marks
ax.xaxis.set_tick_params(size=8, color='#4f4e4e')
ax.yaxis.set_tick_params(size=8, color='#4f4e4e')

# change the color of the top and right spines to opaque gray
ax.spines['right'].set_color((.8,.8,.8))
ax.spines['top'].set_color((.8,.8,.8))

ax.set_xlim([0, 250_000])

# tweak the axis labels
xlab = ax.xaxis.get_label()
ylab = ax.yaxis.get_label()

# Get the handles and labels of the axis
handles, labels = plt.gca().get_legend_handles_labels()

plt.axvspan(
    xmin=0,  # Set the start of the shaded region
    xmax=45_000,  # Set the end of the shaded region
    alpha=0.05,  # Set the transparency of the shaded region
    color="grey",  # Set the color of the shaded region
    zorder=0,  # Put shaded region behind plot
)
ax.axvline(x=45_000, color='k', linestyle='-', alpha = 0.5, linewidth=0.5)

# Add an arrow with text highlighting a built scenario, scenario I
plt.annotate(
    text=create_figure_txt(
        jaccard_index=target_1_b_mji, 
        em_gres=target_1_b_em_gres, 
        em_ipcc=target_1_b_em_soued),  # Text for the annotation
    xy=(target_1_b_value * 1_000, target_1_b_mji),  # x and y coordinates of the data point
    xytext=(20_000, 0.90),  # x and y coordinates for the annotation text
    color="#292828",  # Color of the text
    fontsize=9,  # Fontsize of the text
    arrowprops={"color": "black", "arrowstyle": "-", "linewidth": 0.55},  # Settings for the arrow
    # Adding a shaded, rounded rectangle around the text
    bbox=dict(
        boxstyle="round,pad=0.4",     # Rounded box with padding
        edgecolor="black",            # Border color
        facecolor="white",        # Background color for the box
        linewidth=0.3,                # Border line width
        alpha=0.7                     # Transparency of the background
    )
)
ax.plot(
    target_1_b_value * 1_000, target_1_b_mji, 
    marker='o', markersize=13, markeredgecolor='black',
    markerfacecolor=(1, 1, 1, 0.9),
    markeredgewidth=1.0, alpha=0.8)
# Add an arrow with text highlighting a not-built scenario, scenario I
plt.annotate(
    text=create_figure_txt(
        jaccard_index=target_1_nb_mji,  
        em_gres=target_1_nb_em_gres, 
        em_ipcc=target_1_nb_em_soued),  # Text for the annotation  # Text for the annotation
    xy=(target_1_nb_value * 1_000, target_1_nb_mji),  # x and y coordinates of the data point
    xytext=(32_000, 0.46),  # x and y coordinates for the annotation text
    color="#292828",  # Color of the text
    fontsize=9,  # Fontsize of the text
    arrowprops={"color": "black", "arrowstyle": "-", "linewidth": 0.55},  # Settings for the arrow
    # Adding a shaded, rounded rectangle around the text
    bbox=dict(
        boxstyle="round,pad=0.4",     # Rounded box with padding
        edgecolor="black",            # Border color
        facecolor="white",        # Background color for the box
        linewidth=0.3,                # Border line width
        alpha=0.7                     # Transparency of the background
    )
)
ax.plot(
    target_1_nb_value * 1_000, target_1_nb_mji, 
    marker='o', markersize=13, markeredgecolor='black',
    markerfacecolor=(1, 1, 1, 0.9),
    markeredgewidth=1.0, alpha=0.8)
# Repeat the same for scenario II
plt.annotate(
    text=create_figure_txt(
        jaccard_index=target_2_nb_mji, 
        em_gres=target_2_nb_em_gres, 
        em_ipcc=target_2_nb_em_soued),  # Text for the annotation  # Text for the annotation  # Text for the annotation
    xy=(target_2_nb_value * 1_000, target_2_nb_mji),  # x and y coordinates of the data point
    xytext=(160_000, 0.40),  # x and y coordinates for the annotation text
    color="#292828",  # Color of the text
    fontsize=9,  # Fontsize of the text
    arrowprops={"color": "black", "arrowstyle": "-", "linewidth": 0.55},  # Settings for the arrow
    # Adding a shaded, rounded rectangle around the text
    bbox=dict(
        boxstyle="round,pad=0.4",     # Rounded box with padding
        edgecolor="black",            # Border color
        facecolor="white",        # Background color for the box
        linewidth=0.3,                # Border line width
        alpha=0.7                     # Transparency of the background
    )
)
ax.plot(
    target_2_nb_value * 1_000, target_2_nb_mji, 
    marker='o', markersize=13, markeredgecolor='black',
    markerfacecolor=(1, 1, 1, 0.9),
    markeredgewidth=1.0, alpha=0.8)

ax.annotate(
    text=create_figure_txt(
        jaccard_index=target_2_b_mji, 
        em_gres=target_2_b_em_gres, 
        em_ipcc=target_2_b_em_soued),  # Text for the annotation  # Text for the annotation  # Text for the annotation  # Text for the annotation
    xy=(target_2_b_value * 1_000, target_2_b_mji),  # x and y coordinates of the data point
    xytext=(60_000, 0.25),  # x and y coordinates for the annotation text
    color="#292828",  # Color of the text
    fontsize=9,  # Fontsize of the text
    arrowprops={"color": "black", "arrowstyle": "-", "linewidth": 0.55},  # Settings for the arrow
    # Adding a shaded, rounded rectangle around the text
    bbox=dict(
        boxstyle="round,pad=0.4",     # Rounded box with padding
        edgecolor="black",            # Border color
        facecolor="white",        # Background color for the box
        linewidth=0.3,                # Border line width
        alpha=0.7                     # Transparency of the background
    )
)
ax.plot(
    target_2_b_value * 1_000, target_2_b_mji, 
    marker='o', markersize=13, markeredgecolor='black',
    markerfacecolor=(1, 1, 1, 0.9),
    markeredgewidth=1.0, alpha=0.8)


ax.axvline(x=min_prod_target, color='gray', linestyle='--', alpha = 0.5)
ax.axvline(x=target_1_b_value * 1_000, color='gray', linestyle='--', alpha = 0.5)
ax.axvline(x=target_2_b_value * 1_000, color='gray', linestyle='--', alpha = 0.5)
ax.axvline(x=max_prod_target, color='gray', linestyle='--', alpha = 0.5)

x_ticks = [50_000, 100_000, 150_000, 200_000, 250_000]
new_labels = ["50", "100", "150", "200", "250"]  # New xtick labels
plt.xticks(ticks=x_ticks, labels=new_labels, fontsize=16)
plt.yticks(fontsize=16)
ax.set_xlabel('HP Production [TWh/year]', fontsize=14, color='#4f4e4e')

marker_edge_width = 0.7
marker_alpha = 0.9
ax.plot(
    min_prod_target, 0.2, 
    marker='*', markersize=20, color='yellow', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
ax.plot(
    max_prod_target, 0.2,
    marker='d', markersize=16, color="yellow", markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)

xlab.set_style('normal')
xlab.set_size(16)
ylab.set_style('normal')
ylab.set_size(16)

# Create the legend
leg = plt.legend(
    handles=handles,
    title="Optimization Scenario",  # Set a title for the legend
    title_fontsize=14,  # Set the legend title size
    fontsize=12,  # Set the fontsize of the legend labels
    bbox_to_anchor=(0.93, 0.19),  # Customize the position of the legend here
    frameon=False,  # Disable the legend border
    facecolor='white', framealpha=1,
    alignment = 'center',
    ncol=2,
    labels=[  # Customize the labels you want in the legend
        "Built",
        "Not Built",
    ],
)

leg._legend_box.align = "left"

# Show the plot
plt.show()


In [None]:
fig_options = ['png', 'pdf', 'svg']
fig_folder = pathlib.Path("outputs/figures/moo")
if 'png' in fig_options:  
    fig.savefig(fig_folder / 'mean_jaccard.png', bbox_inches='tight', dpi=600, transparent=True)
if 'pdf' in fig_options:
    fig.savefig(fig_folder / 'mean_jaccard.pdf', bbox_inches='tight')
if 'svg' in fig_options:
    fig.savefig(fig_folder / 'mean_jaccard.svg', bbox_inches='tight')

### The End