## Visualise optimization results
### T. Janus
### 15/01/2024

## TODO:
1. Visualise different dam scenarios on maps (borrow the maps from one of the previous notebooks
2. Create a composite figure with tiles using facetgrid etc.

In [None]:
from dataclasses import dataclass, field
from typing import ClassVar, Dict, List, Any, Tuple, Set, Tuple, Sequence
from typing import TypeAlias, TypeVar, Generic
import subprocess
import pathlib
import numpy as np
import pandas as pd
import string
from datetime import datetime
from parse import parse
from ttp import ttp
import json
import gc
import bson
import pprint
import re
import ast
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx

import pygmo as pg
from tqdm import tqdm

import seaborn as sns # for Data visualization
import matplotlib
import matplotlib.pyplot as plt # for Data visualization
from IPython.display import display, HTML

from jinja2 import Template

%matplotlib inline

T = TypeVar("T")
GenericCollection: TypeAlias = Set[T] | Tuple[T] | List[T]
NumType= TypeVar('NumType', int, float)

In [None]:
def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype
    if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.uint8).min and c_max < np.iinfo(np.uint8).max:
                    df[col] = df[col].astype(np.uint8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.uint16).min and c_max < np.iinfo(np.uint16).max:
                    df[col] = df[col].astype(np.uint16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.uint32).min and c_max < np.iinfo(np.uint32).max:
                    df[col] = df[col].astype(np.uint32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
                elif c_min > np.iinfo(np.uint64).min and c_max < np.iinfo(np.uint64).max:
                    df[col] = df[col].astype(np.uint64)
            elif str(col_type)[:5] == 'float':
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    return df

In [None]:
#matplotlib.rcParams['font.family'] = ['monospace', 'sans-serif']

In [None]:
# Auxiliary functions
def read_id_ifc_map(file_name: pathlib.Path = pathlib.Path('outputs/moo/id_to_ifc.json')):
    """Read the dictionary showing mapping between optimization dam IDs and IFC dam IDs"""
    with open(file_name, 'r') as file:
        _id_ifc_map = json.load(file)
        return {int(key) : value for key, value in _id_ifc_map.items()}
    
def set_remap(
        value_list: Set[NumType], value_map, missing_val_id: int = -99, 
        safe: bool = False) -> Set[NumType]:
    """Map values in a set to new values using a dictionary given in `value_map`"""
    if safe:
        id_set = set([value_map.get(value, missing_val_id) for value in value_list])
        return id_set
    return {value_map[value] for value in value_list}

def get_every_n_row(df: pd.DataFrame, n: int) -> pd.DataFrame:
    """Return every n-th row of a dataframe
    Used for 'skimming-down' large chunks of data to help with post-processing
    and prototyping visualisation"""
    return df.iloc[::n]

In [None]:
@dataclass
class SolutionFileParser:
    """ """
    file_path: str | pathlib.Path
    header_template: ClassVar[str] = """
Date/time: {{ date_time }}
Data file: {{ file_name }}
Wall time: {{ wall_time | to_float}} seconds.
CPU time: {{ cpu_time | to_float}} seconds.
seed: {{ seed | to_int}}
num_solutions: {{ num_solutions | to_int}}
# pruning steps (# nodes): {{ num_pruning_steps | to_int}}
Max policies considered: {{ max_policies | to_int}}
Policies considered: {{ num_policies | to_int}}
Pruned policies: {{ pruned_policies | to_int}}
epsilon: {{ epsilon | to_float}}
batch size: {{ batch_size | to_int }}
criteria: {{ criteria | ORPHRASE | split(',')}}
"""
    solution_template: str = ""
    header: str = ""
    solutions: str = ""
    data: Dict[str, Any] = field(default_factory=dict)
    
    def __post_init__(self) -> None:
        """ """
        with open(self.file_path, 'r') as file:
            raw_data = file.readlines()
            self.header = "".join(raw_data[0:13])
            self.solutions = "".join(raw_data[14:])
            
    def _create_solution_template(self) -> None:
        """ """
        sol_template = ""
        crit_template = "{{{{ {} | to_float }}}}"
        num_dams_template = "{{{{ {} | to_int }}}}"
        for criterion in self.data['header']['criteria']:
            sol_template += crit_template.format(criterion) + ", "
        sol_template += num_dams_template.format('num_dams') + ", "
        sol_template += "{{ dam_ids | ORPHRASE | split(' ')}}"
        self.solution_template = sol_template
        
    def parse(self) -> None:
        """ """
        # 1. Parse header
        parser_header = ttp(self.header, self.header_template)
        parser_header.parse()
        header_data = parser_header.result(structure="flat_list")[0]
        header_data['criteria'] = [criterion.strip() for criterion in header_data['criteria']]
        date_time_formatted = re.sub(r'_+', ',', header_data['date_time'])
        header_data['date_time'] = datetime.strptime(
            date_time_formatted, "%a,%b,%d,%H,%M,%S,%Y")
        self.data['header'] = header_data
        # 2. Dynamically create a solution template
        self._create_solution_template()
        # 3. Parse solutions
        parser_sol = ttp(self.solutions, self.solution_template)
        parser_sol.parse()
        solution_data = parser_sol.result(structure="flat_list")
        for solution in solution_data:
            solution['dam_ids'] = [int(dam_id) for dam_id in solution['dam_ids']]
        self.data['solutions'] = solution_data
        
    @property
    def solutions_df(self) -> pd.DataFrame:
        """ """
        df = pd.DataFrame(self.data['solutions'])
        if set(['loss_agri', 'loss_forest']).issubset(set(df.columns)):
            df['land_loss'] = df['loss_agri'] + df['loss_forest']
        # Calculate ghg intensity, for ghg in tonneCO2eq/year and energy in MWh/d
        # GHG intensity needs to be in gCO2eq/kWh
        df['ghg_intensity'] = df['ghg'] / df['energy'] * 1_000 / 365.25 / 24
        return df
        
        
    def to_json(self, json_file: str | pathlib.Path) -> None:
        """ """
        # Custom serialization function for datetime objects
        def serialize_datetime(obj: Any) -> str:
            if isinstance(obj, datetime):
                return obj.isoformat()
        
        with open(json_file, 'w') as file:
            json_string = json.dumps(self.data, default=serialize_datetime, indent=4)
            file.write(json_string)
            
    def to_bson(self, file_path: str | pathlib.Path) -> None:
        with open(file_path, 'wb') as bson_file:
            serialized_data = bson.dumps(self.data)
            bson_file.write(serialized_data)
            
    def to_csv(self, csv_file: str | pathlib.Path) -> None:
        """Save solutions to a csv file.
        Dam IDs are saved as a string representation of a list of integers. To retrieve
        the list of interegers, parse the dam_ids column
        df_read.dam_ids = df_read.dam_ids.map(ast.literal_eval) 
        (requires `import ast`)"""
        self.solutions_df.to_csv(csv_file, encoding='utf-8', index=False)


@dataclass
class OutputVisualiser:
    """ """
    data: pd.DataFrame
        
    @property
    def columns(self) -> List[str]:
        """ """
        return list(self.data.columns)
    
    def plot_parallel(
            self, columns: GenericCollection[str], color_col: str, title: str | None = None,
            color_limits: Tuple[float, float] = (0,200), **kwargs):
        """ """
        # Other scales:
        # px.colors.diverging.Tealrose
        # px.colors.sequential.Blues
        # px.colors.sequential.Oranges
        # px.colors.diverging.RdYlBu
        # color_continuous_scale=px.colors.diverging.Armyrose
        # color_continuous_midpoint=2
        
        fig = px.parallel_coordinates(self.data, color=color_col, dimensions=columns,
                              color_continuous_scale=px.colors.diverging.Tealrose, width=1000,
                              title=title, range_color=color_limits, **kwargs)
        fig.show()
        return fig
        
    def plot_scatter_2D(
            self, x_col: str, y_col: str, hue: str | None = None, size: str | None = None,
            palette: str = "hot", xlabel: str | None = None, ylabel: str | None = None) -> None:
        """ """
        kwargs  =   {
             'edgecolor' : "k",
             'facecolor' : "w",
             'linewidth' : 0.2,
             'linestyle' : '-',
            }
        
        fig, ax = plt.subplots(figsize=(8, 5))
        sns.set_style('white')
        sns.set_context("paper", font_scale = 1)
        sns.despine(right = True)
        sns.scatterplot(
            x = x_col, y = y_col, data = self.data, hue=hue, palette=palette, size=size,
            marker = 'o', **kwargs, alpha = 0.95)
        ax.legend(title='Development scenario / Firm Energy, MW', fontsize=10, 
                  title_fontsize=12, frameon=False,
                  ncol=3)
        plt.xlabel(xlabel, fontsize=12)
        plt.ylabel(ylabel, fontsize=12)
        fig.show()
        
        
@dataclass
class ObjectiveCalculator:
    """ """
    obj_df: pd.DataFrame
    ids: List[NumType] = field(default_factory = list) # Need to be IFC_IDS
    obj_names: ClassVar[List[int]] = [
        'HP_mean', 'HP_firm', 'tot_em', 'crop_area_loss_km2', 'forest_area_loss_km2']
    
    def _filter_df(self) -> Tuple[pd.DataFrame, List[NumType]]:
        filtered_df = self.obj_df[self.obj_df.index.isin(self.ids)]
        found_indices = filtered_df.index.to_list()
        missed_indices = set(self.ids) - set(found_indices)
        return filtered_df, list(missed_indices)
    
    @property
    def objectives(self) -> pd.Series:
        """ """
        return self._filter_df()[0][self.obj_names].sum()

    
def map_ids(moo_ids: Sequence[int], id_map: Dict[int, int]) -> Sequence[int]:
    """Takes a sequence of values, e.g. dam IDs and returns a mapped sequence where
    the original values are mapped to new values using a dictionary"""
    return [id_map.get(item, item) for item in moo_ids]


def find_solution_by_dam_numbers(df: pd.DataFrame, dam_ids: Set[int]) -> pd.DataFrame:
    """Find row(s) of dataframe matching a specified selected dams by dam id"""
    return df[df['dam_ids']] == dam_ids

def return_row_by_criterion(df: pd.DataFrame, criterion: str, value: Any) -> pd.Series:
    # Calculate the absolute differences
    differences = (df[criterion] - value).abs()
    # Find the index of the minimum absolute difference
    closest_index = differences.idxmin()
    # Return the row using the index
    closest_row = df.loc[closest_index]
    return closest_row

In [None]:
# Get built dam IFC ids
dam_data = pd.read_csv(pathlib.Path("outputs/moo/all_hp.csv"))
built_dam_ifc_ids: Set[int] = set(
    dam_data[dam_data['status'] == 'Existing']['ifc_id'].to_list())
# Read the ID to IFC map
id_ifc_map = read_id_ifc_map()
# List IFC IDs in a sorted order of dams included in the analysis
ifc_ids = sorted([id_ifc_map[_id+1] for _id, _ in enumerate(id_ifc_map) ])

### Define constants

In [None]:
mya_nobuilt_5obj_filename = 'mya_5_obj_nobuilt.sol'
mya_built_5obj_filename = 'mya_5_obj_built.sol'
# Paths to output files from the algorithm with expansion / compression
sol_file_folder = pathlib.Path('moo_solver_CPAIOR/outputs/epsilon2_5obj')
mya_nobuilt_5obj_path_cpaior = sol_file_folder / pathlib.Path(mya_nobuilt_5obj_filename)
mya_built_5obj_path_cpaior = sol_file_folder / pathlib.Path(mya_built_5obj_filename)

dam_data_filename = pathlib.Path("outputs/moo/all_hp.csv")
    
# Load the mapping between ids used in the MOO algorithm and the IDs in the IFC database
map_file_path = pathlib.Path('outputs/moo/id_to_ifc.json')
with open(map_file_path, 'r') as file:
    id_map = json.load(file)
id_map = {int(key): value for key, value in id_map.copy().items()} # Maps optim ids to ifc ids
ifc_to_id_map = {value: key for key, value in id_map.items()}

In [None]:
dam_df = pd.read_csv(dam_data_filename, index_col=0).set_index('ifc_id')
built_dam_ids_ifc = set(dam_df[dam_df['status_int'] == 1].index.to_list())
built_dam_ids_opt = set_remap(built_dam_ids_ifc, ifc_to_id_map)

In [None]:
num_existing_dams = dam_df[dam_df['status'] == 'Existing']['name'].count()
print(f"Number of existing dams: {num_existing_dams}")

In [None]:
existing_dam_ids = set(dam_df[dam_df['status'] == 'Existing'].index.to_list())
print(existing_dam_ids)

## Call the optimizer by calling external script using subprocess (not recommended)
### Better to read pre-calculated solutions as the optimization takes a long time

In [None]:
rerun_CPAIOR: bool = False

# Specify the path to your shell script
script_path = 'moo_solver_CPAIOR/run_myanmar_dam_selection.sh'

# Use subprocess to call and execute the shell script
if rerun_CPAIOR:
    try:
        subprocess.run(['bash', script_path], check=True)
        print("Optimization runs successful.")
    except subprocess.CalledProcessError as e:
        print(f"Error executing script: {e}")

## Parse outputs - 5 objective optimization

In [None]:
save_to_json: bool = False
save_to_csv: bool = False
# Parse solutions with no built dams
mya_nobuilt_parser_cpaior_5obj = SolutionFileParser(mya_nobuilt_5obj_path_cpaior)
mya_nobuilt_parser_cpaior_5obj.parse()
if save_to_json:
    mya_nobuilt_parser_cpaior_5obj.to_json(sol_file_folder / pathlib.Path('mya_5_obj_nobuilt.json'))
if save_to_csv:
    mya_nobuilt_parser_cpaior_5obj.to_csv(sol_file_folder / pathlib.Path('mya_5_obj_nobuilt.csv'))
# Parse solutions with built dams
mya_built_parser_cpaior_5obj = SolutionFileParser(mya_built_5obj_path_cpaior)
mya_built_parser_cpaior_5obj.parse()
if save_to_json:
    mya_built_parser_cpaior_5obj.to_json(sol_file_folder / pathlib.Path('mya_5_obj_built.json'))
if save_to_csv:
    mya_built_parser_cpaior_5obj.to_csv(sol_file_folder / pathlib.Path('mya_5_obj_built.csv'))

In [None]:
no_rows_to_remove: int = 6  # Remove n rows per n+1 rows, e.g. if value is 3 every 3 out of four rows 
                            # will be removed
filter_dataframe: bool = True

# Concetenate dataframes into `merged_df`
df_nobuilt = reduce_mem_usage(mya_nobuilt_parser_cpaior_5obj.solutions_df)
df_built = reduce_mem_usage(mya_built_parser_cpaior_5obj.solutions_df)

# Remove the unwanted objects from memory
del mya_nobuilt_parser_cpaior_5obj
del mya_built_parser_cpaior_5obj

## Combine dataframes with results with 'built' and 'notbuilt' scenarios

In [None]:
df_nobuilt['Scenario'] = "Not Built"
df_built['Scenario'] = "Built"

merged_df = pd.concat([df_built, df_nobuilt], ignore_index=True)
old_new_col_map = {
    'energy': "Mean annual HP, [MW]",
    'ghg': 'GHG emissions [tonne CO<sub>2,eq</sub>/year]', 
    'firm_energy': 'Firm HP, [MW]',
    'loss_agri': 'Agricultural land loss, [km<sup>2</sup>]',
    'loss_forest': 'Deforestation, [km<sup>2</sup>]',
    'num_dams': 'No. of selected dams',
    'dam_ids': 'Dam IDs',
    'land_loss': 'Land loss, [km<sup>2</sup>]',
    'ghg_intensity': 'GHG intensity [gCO<sub>2,eq</sub>/kWh]'}
merged_df.rename(columns=old_new_col_map, inplace=True)
merged_df['Dam IDs'] = merged_df['Dam IDs'].apply(set)
merged_df['Firm Power Ratio, [%]'] = merged_df['Firm HP, [MW]'] / merged_df['Mean annual HP, [MW]'] * 100
merged_df['Scenario, [1/0]'] = merged_df['Scenario'].map({'Built': 1, 'Not Built': 0})

# Define bin edges for land loss
bins = [0, 400, 800, 1200, 1600]
# Define labels for the bins
labels = ['0-400 km2', '400-800 km2', '800-1200 km2', '1200-1600 km2']
merged_df["Loss of Land [km<sup>2</sup>]"] = pd.cut(
    merged_df['Land loss, [km<sup>2</sup>]'], bins=bins, labels=labels, right=False)
# Arrange by status and energy in ascending order
merged_df = merged_df.sort_values(by=['Scenario', 'Mean annual HP, [MW]'], ascending=True)
# Introduce new columns
merged_df['HP Production [GWh/year]'] = merged_df["Mean annual HP, [MW]"] * 365.25 * 24 / 1_000
merged_df['Mean HP [GWh/d]'] = merged_df["Mean annual HP, [MW]"] * 24 / 1_000
merged_df['Firm HP [GWh/d]'] = merged_df['Firm HP, [MW]'] * 24 / 1_000

# Reduce size of some data in merged_df
merged_df['Scenario, [1/0]'] = merged_df['Scenario, [1/0]'].astype('uint8')
merged_df["Loss of Land [km<sup>2</sup>]"] = merged_df["Loss of Land [km<sup>2</sup>]"].astype('category')
merged_df['No. of selected dams'] = merged_df['No. of selected dams'].astype('int8')
# Use an automated method
merged_df = reduce_mem_usage(merged_df)

# Filter the dataframe if filtering is selected
if filter_dataframe:
    merged_df = get_every_n_row(merged_df, no_rows_to_remove+1)

# Perform non-dominated sorting in 2D for two 5D nondominated fronts: for Built and NotBuilt scenarios
# Create a pareto dominant front for not built data
xy_pairs_list_built = merged_df\
    .loc[merged_df['Scenario'] =='Built', 
         ["Mean annual HP, [MW]", 'GHG emissions [tonne CO<sub>2,eq</sub>/year]', 
          'GHG intensity [gCO<sub>2,eq</sub>/kWh]', 'Firm Power Ratio, [%]']]
xy_pairs_list_built["Mean annual HP, [MW]"] = xy_pairs_list_built["Mean annual HP, [MW]"] * -1
xy_pairs_list_built = xy_pairs_list_built.to_numpy().tolist()

xy_pairs_list_nobuilt = merged_df\
    .loc[merged_df['Scenario'] =='Not Built', 
         ["Mean annual HP, [MW]", 'GHG emissions [tonne CO<sub>2,eq</sub>/year]', 
          'GHG intensity [gCO<sub>2,eq</sub>/kWh]', 'Firm Power Ratio, [%]']]
xy_pairs_list_nobuilt["Mean annual HP, [MW]"] = xy_pairs_list_nobuilt["Mean annual HP, [MW]"] * -1
xy_pairs_list_nobuilt = xy_pairs_list_nobuilt.to_numpy().tolist()

xy_pair_array_built = np.array(xy_pairs_list_built)
xy_pair_array_nobuilt = np.array(xy_pairs_list_nobuilt)

# Find non-dominated fronts
non_dom_front_built = pg.non_dominated_front_2d(points=xy_pair_array_built[:,:2])
non_dom_front_nobuilt = pg.non_dominated_front_2d(points=xy_pair_array_nobuilt[:,:2])
# Convert back from negative to positive values
xy_pair_array_built[:,0] = xy_pair_array_built[:,0] * -1
xy_pair_array_nobuilt[:,0] = xy_pair_array_nobuilt[:,0] * -1
# Select nondominated points
xy_nondom_built = xy_pair_array_built[non_dom_front_built]
xy_nondom_nobuilt = xy_pair_array_nobuilt[non_dom_front_nobuilt]
xy_nondom_all = np.concatenate((xy_nondom_built, xy_nondom_nobuilt), axis=0)

In [None]:
# Find the solution ids of the 2D nondominated song in the order of increasing annual energy production
# Plot the results (for built and not built scenarios) as a scatterplot
em_int_nondom_built = merged_df.loc[merged_df['Scenario'] =='Built'].iloc[non_dom_front_built]
em_int_nondom_nobuilt = merged_df.loc[merged_df['Scenario'] =='Not Built'].iloc[non_dom_front_nobuilt]
em_int_nondom_df = pd.concat(
    [em_int_nondom_built, em_int_nondom_nobuilt], ignore_index=True).sort_values("Mean annual HP, [MW]")

In [None]:
save_to_file: bool = False
if save_to_file:
    em_int_nondom_df.to_csv('em_int_nondom_df.csv', index=False)
    merged_df.to_csv('merged_df.csv', index = False)

In [None]:
load_from_file: bool = False
if load_from_file:
    merged_df = pd.read_csv('merged_df.csv')
    em_int_nondom_df = pd.read_csv('em_int_nondom_df.csv')

In [None]:
#Statistics
number_of_solutions = len(df_nobuilt) + len(df_built)
print(f"Total number of solutions : {number_of_solutions}")
print(f"Scenario with built constructed dams {len(df_built)} solutions")
print(f"Scenario with zero constructed dams {len(df_nobuilt)} solutions")

In [None]:
merged_df.columns

In [None]:
# Find a solution with built dams with minumum energy generation (solution with built dams only)
built_min = merged_df[merged_df['Scenario']=='Built'].iloc[0]
built_min_damids = built_min['Dam IDs']
built_min_energy = built_min["Mean annual HP, [MW]"]
built_min_ghg_intensity = built_min['GHG intensity [gCO<sub>2,eq</sub>/kWh]']
built_min

In [None]:
# Find total energy from input data using selected dams as an input
oc_built = ObjectiveCalculator(dam_df, ids=set_remap(built_min['Dam IDs'], id_map))
print(
    f"Min HP from optimization: {built_min_energy} MW, min HP from input data: {oc_built.objectives['HP_mean']} MW")

In [None]:
em_int_nondom_df[em_int_nondom_df['Scenario']=="Built"].sort_values(by='No. of selected dams').head(3)

In [None]:
dam_ids_7848 = em_int_nondom_df.loc[7848, 'Dam IDs']
dam_ids_7599 = em_int_nondom_df.loc[7599, 'Dam IDs']

In [None]:
# Get selected data points
plot_data = em_int_nondom_df
plot_data['HP Production [TWh/year]'] = plot_data['HP Production [GWh/year]'] / 1_000
current_solution = (built_min['HP Production [GWh/year]']/1_000, built_min_ghg_intensity)
notbuilt_current = return_row_by_criterion(
    plot_data[plot_data['Scenario']=="Not Built"], 
    'HP Production [TWh/year]', value=built_min['HP Production [GWh/year]']/1_000)

# Get selected data points
current_solution = (built_min['HP Production [GWh/year]']/1_000, built_min_ghg_intensity)
notbuilt_current = return_row_by_criterion(
    plot_data[plot_data['Scenario'] == "Not Built"], 'HP Production [TWh/year]', 
    value=built_min['HP Production [GWh/year]']/1_000)
built_100 = return_row_by_criterion(
    plot_data[plot_data['Scenario'] == "Built"], 'HP Production [TWh/year]', value=100)
notbuilt_100 = return_row_by_criterion(
    plot_data[plot_data['Scenario'] == "Not Built"], 'HP Production [TWh/year]', value=100)
built_200 = return_row_by_criterion(
    plot_data[plot_data['Scenario'] == "Built"], 'HP Production [TWh/year]', value=200)
notbuilt_200 = return_row_by_criterion(
    plot_data[plot_data['Scenario'] == "Not Built"], 'HP Production [TWh/year]', value=200)

In [None]:
notbuilt_current

In [None]:
# Define bin edges for firm power ratio
firm_power_bins = [0, 15, 30, 45, 60, 75, 100]
# Define labels for the bins
firm_power_labels = ['0%-15%', '15%-30%', '30%-45%', '45%-60%', '60%-75%', '75%-100%']
firm_power_bins2 = [0, 25, 50, 75]
firm_power_labels2 = ['0%-25%', '25%-50%', '50%-75%']
em_int_nondom_df['Firm Power Ratio Cat'] = pd.cut(
    em_int_nondom_df['Firm Power Ratio, [%]'], bins=firm_power_bins2, 
    labels=firm_power_labels2, right=False)
merged_df['Firm Power Ratio Cat'] = pd.cut(
    merged_df['Firm Power Ratio, [%]'], bins=firm_power_bins, 
    labels=firm_power_labels, right=False)

In [None]:
def get_x_y_intensity_plot(data: pd.Series) -> Tuple[float, float]:
    """ """
    return (data['HP Production [TWh/year]'], data['GHG intensity [gCO<sub>2,eq</sub>/kWh]'])

marker_edge_width = 0.7
marker_alpha = 0.9
bbox = dict(boxstyle="round", pad=0.15, facecolor='none', edgecolor='none')
annotation_shrink = 0.03

kwargs_ghg_intensity  =   {
    'edgecolor':'k',
    #'marker': 'o',
    'alpha': 0.05,
    'linewidth':0.2,
    'linestyle':'-',
}
custom_palette = ["#FFFFFF", "#808080", "#000000", "#3A0CA3", "#4361EE", "#4CC9F0"]
fig, ax = plt.subplots(figsize=(8, 5))
sns.set_style('white')
sns.set_context("paper", font_scale = 1)
sns.despine(right = True)
sns.scatterplot(
    x = 'HP Production [TWh/year]', y = 'GHG intensity [gCO<sub>2,eq</sub>/kWh]', 
    data = em_int_nondom_df, palette='Set2',
    size='Firm Power Ratio, [%]', hue="Loss of Land [km<sup>2</sup>]", sizes = (1,150),
    #size="Loss of Land [km<sup>2</sup>]", hue='Firm Power Ratio Cat', 
    #sizes = {'0-400 km2': 10, '400-800 km2': 30, '800-1200 km2': 60, '1200-1600 km2': 120},
    **kwargs_ghg_intensity)
legend1 = plt.legend(loc="upper right", fontsize='large', ncols=1)
legend1.set_frame_on(False)
updated_text = [
    "Loss of Land, [km$^2$]",
    "$0-400$ km$^2$",
    "$400-800$ km$^2$",
    "$800-1200$ km$^2$",
    "$1200-1600$ km$^2$",
    "Firm Power Ratio, [%]",
    "$15$",
    "$30$",
    "$45$",
    "$60$",
    "$75$"
]
for ix, text in enumerate(updated_text):
    legend1.get_texts()[ix].set_text(text)
plt.axvline(
    x=current_solution[0], color='grey', linestyle='--', linewidth=2,
    label='Current mean annual power production, GWh/year')
plt.text(current_solution[0]-8, 34, 'Current mean annual HP production', 
         rotation=90, va='center', ha='center', fontsize=12)

# Plot chosen solution scenarios
plt.plot(
    current_solution[0], current_solution[1], 
    marker='*', markersize=17, color='yellow', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$I_{b}$', xy=(current_solution[0], current_solution[1]), 
             xytext=(current_solution[0] + 20, current_solution[1] - 7.5),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
# Current not built
curr_nbuilt_x, curr_nbuilt_y = get_x_y_intensity_plot(notbuilt_current)
plt.annotate('$I_{nb}$', xy=(curr_nbuilt_x, curr_nbuilt_y), xytext=(curr_nbuilt_x + 13, curr_nbuilt_y + 10),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
plt.plot(
    curr_nbuilt_x, curr_nbuilt_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
# Other values
built_100_x, built_100_y = get_x_y_intensity_plot(built_100)
plt.plot(
    built_100_x, built_100_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$II_b$', xy=(built_100_x, built_100_y), xytext=(built_100_x + 20, built_100_y + 10),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
notbuilt_100_x, notbuilt_100_y = get_x_y_intensity_plot(notbuilt_100)
plt.plot(
    notbuilt_100_x, notbuilt_100_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$II_{nb}$', xy=(notbuilt_100_x, notbuilt_100_y), xytext=(notbuilt_100_x + 38, notbuilt_100_y + 19),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
built_200_x, built_200_y = get_x_y_intensity_plot(built_200)
plt.plot(
    built_200_x, built_200_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$III_{b}$', xy=(built_200_x, built_200_y), xytext=(built_200_x + 13, built_200_y + 10),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
notbuilt_200_x, notbuilt_200_y = get_x_y_intensity_plot(notbuilt_200)
plt.plot(
    notbuilt_200_x, notbuilt_200_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$III_{nb}$', xy=(notbuilt_200_x, notbuilt_200_y), 
             xytext=(notbuilt_200_x + 25, notbuilt_200_y -7),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)

ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
ax.set_ylim(-5,70)
ax.set_xlim(0,250)
plt.xlabel("HP Production, TWh/year", fontsize=14)
plt.ylabel("GHG intensity, gCO$_{2,eq}$/kWh", fontsize=14)
fig.savefig(pathlib.Path('figures/moo/GHG_intensity_HP_plot.png'), transparent=True, dpi=300)

In [None]:
#### Plot
def get_x_y_emissions_plot(data: pd.Series) -> Tuple[float, float]:
    """ """
    return (
        data['HP Production [TWh/year]'], 
        data['GHG emissions [tonne CO<sub>2,eq</sub>/year]'] / 1_000_000)

merged_df['HP Production [TWh/year]'] = merged_df['HP Production [GWh/year]'] / 1_000
merged_df['GHG emissions [Mt CO2/year]'] = \
    merged_df['GHG emissions [tonne CO<sub>2,eq</sub>/year]'] / 1_000_000
em_int_nondom_df['GHG emissions [Mt CO2/year]'] = \
    em_int_nondom_df['GHG emissions [tonne CO<sub>2,eq</sub>/year]'] / 1_000_000

kwargs  =   {
    'edgecolor':'grey',
    'marker': 'o',
    'facecolor':'none',
    'linewidth':0.01,
    'linestyle':'-',
    'alpha': 0.4
}
kwargs_nondom  =   {
    'edgecolor':'k',
    'marker': 'o',
    'linewidth':0.05,
    'linestyle':'-',
    'alpha': 1
}
custom_palette = ["#FFFFFF", "#808080", "#000000", "#3A0CA3", "#4361EE", "#4CC9F0"]
        
fig, ax = plt.subplots(figsize=(8, 5), frameon=False, dpi=100)
sns.set_style('white')
sns.set_context("paper", font_scale = 1)
sns.despine(right = True)
sns.scatterplot(
    x = 'HP Production [TWh/year]', 
    y = 'GHG emissions [Mt CO2/year]', 
    data = merged_df, 
    #palette='Set2', #'YlOrRd', 
    #hue='Firm Power Ratio Cat',
    s = 50,
    **kwargs)

ax.legend(title='Development scenario / Firm Energy, MW', fontsize=10, 
          title_fontsize=12, frameon=False,
          ncol=3)

sns.scatterplot(
    x = 'HP Production [TWh/year]', y = 'GHG emissions [Mt CO2/year]', 
    data = em_int_nondom_df, palette='Set2',
    size='Firm Power Ratio Cat', hue="Loss of Land [km<sup>2</sup>]", 
    #sizes = {'0-400 km2': 10, '400-800 km2': 30, '800-1200 km2': 60, '1200-1600 km2': 120},
    sizes = {'0%-25%': 10, '25%-50%': 50, '50%-75%': 100},
    #sizes = (1,200),
    **kwargs_nondom)

legend2 = plt.legend(loc="upper left", fontsize='large', bbox_to_anchor=(0.08, 1.05))
legend2.set_frame_on(False)
updated_text = [
    "Loss of Land, [km$^2$]",
    "$0-400$ km$^2$",
    "$400-800$ km$^2$",
    "$800-1200$ km$^2$",
    "$1200-1600$ km$^2$",
    "Firm Power Ratio, [%]",
    "$0\% - 25\%$",
    "$25\% - 50\%$",
    "$50\% - 75\%$"
]
for ix, text in enumerate(updated_text):
    legend2.get_texts()[ix].set_text(text)

# Plot solution pointss
current_solution = (
    built_min['HP Production [GWh/year]']/1_000, 
    built_min['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1_000/1_000)
plt.plot(
    current_solution[0], current_solution[1], 
    marker='*', markersize=17, color='yellow', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$I_{b}$', xy=(current_solution[0], current_solution[1]), 
             xytext=(current_solution[0] + 10, current_solution[1] + 0.7),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
# Current not built
curr_nbuilt_x, curr_nbuilt_y = get_x_y_emissions_plot(notbuilt_current)
plt.plot(
    curr_nbuilt_x, curr_nbuilt_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$I_{nb}$', xy=(curr_nbuilt_x, curr_nbuilt_y), xytext=(curr_nbuilt_x + 25, curr_nbuilt_y + 1.5),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
# Other values
built_100_x, built_100_y = get_x_y_emissions_plot(built_100)
plt.plot(
    built_100_x, built_100_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$II_b$', xy=(built_100_x, built_100_y), xytext=(built_100_x + 15, built_100_y + 0.55),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
notbuilt_100_x, notbuilt_100_y = get_x_y_emissions_plot(notbuilt_100)
plt.plot(
    notbuilt_100_x, notbuilt_100_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$II_{nb}$', xy=(notbuilt_100_x, notbuilt_100_y), xytext=(notbuilt_100_x + 30, notbuilt_100_y + 1.6),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
built_200_x, built_200_y = get_x_y_emissions_plot(built_200)
plt.plot(
    built_200_x, built_200_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$III_{b}$', xy=(built_200_x, built_200_y), xytext=(built_200_x - 15, built_200_y + 0.7),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox)
notbuilt_200_x, notbuilt_200_y = get_x_y_emissions_plot(notbuilt_200)
plt.plot(
    notbuilt_200_x, notbuilt_200_y, 
    marker='o', markersize=12, color='white', markeredgecolor='k', 
    markeredgewidth=marker_edge_width, alpha=marker_alpha)
plt.annotate('$III_{nb}$', xy=(notbuilt_200_x, notbuilt_200_y), 
             xytext=(notbuilt_200_x + 23, notbuilt_200_y -0.25),
             arrowprops=dict(facecolor='black', shrink=annotation_shrink, width=2, headwidth = 8 ),
             fontsize=14, bbox=bbox) 

# Add vertical line denoting the current HP production
plt.axvline(
    x=current_solution[0], color='grey', linestyle='--', linewidth=2,
    label='Current mean annual power production, GWh/year')
plt.text(current_solution[0]-8, 2.5, 'Current mean annual HP production', 
         rotation=90, va='center', ha='center', fontsize=12)

arrow_x = current_solution[0] + 20
arrow_y_start = 0  # y-coordinate for the starting point of the arrow
arrow_y_end = 0.95  # y-coordinate for the ending point of the arrow
arrow_text = 'Forgone opportunity'  # Text to be displayed next to the arrow
# Plot the line with arrows
plt.annotate('', xy=(arrow_x, arrow_y_end), xytext=(arrow_x, arrow_y_start),
             arrowprops=dict(arrowstyle='<->', color='k', lw=1.5), annotation_clip=False)
# Add text annotation next to the arrow
plt.text(arrow_x + 3, (arrow_y_start + arrow_y_end) / 2 - 0.05, arrow_text, color='k', fontsize=12)

ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
ax.set_ylim(-0.2,5)
ax.set_xlim(0,250)
plt.xlabel("HP Production, TWh/year", fontsize=14)
plt.ylabel("GHG Emissions, Mt CO$_2$ / annum", fontsize=14)
fig.savefig(pathlib.Path('figures/moo/GHG_emissions_HP_plot.png'), transparent=True, dpi=300)

In [None]:
# Construct a dictionary with scenarios as keys and sets of constructed dams as values
def map_ids_to_ifc(scenario_ids: Dict[str, Set[int]], id_map = id_ifc_map) -> Dict[str, List[int]]:
    """ """
    return {key : list(set_remap(optim_ids, id_ifc_map)) for key, optim_ids in scenario_ids.items()}

sc_dams: Dict[str, Set[int]] = map_ids_to_ifc({
    "Ib": built_min['Dam IDs'],
    "Inb": notbuilt_current['Dam IDs'],
    "IIb": built_100['Dam IDs'],
    "IInb": notbuilt_100['Dam IDs'],
    "IIIb": built_200['Dam IDs'],
    "IIInb": notbuilt_200['Dam IDs']
})
with open(pathlib.Path('intermediate/optim_scenarios.json'), 'w') as file:  
    json_string = json.dumps(sc_dams, indent=4)
    file.write(json_string)

In [None]:
# This only works if previous figures have not been created
merged_df['GHG emissions<br>[Mt CO2,eq/year]'] = \
    merged_df['GHG emissions [tonne CO<sub>2,eq</sub>/year]']/1000_000
merged_df.rename(columns={
    'Agricultural land loss, [km<sup>2</sup>]' : 'Agricultural land<br>loss, [km2]',
    'Deforestation, [km<sup>2</sup>]' : 'Deforestation<br>[km2]',
    'Land loss, [km<sup>2</sup>]' : 'Land loss<br>[km2]',
    'GHG intensity [gCO<sub>2,eq</sub>/kWh]' : 'GHG intensity<br>[gCO2,eq/kWh]',
    'Mean HP [GWh/d]' : 'Mean HP<br>[GWh/d]',
    'Firm HP [GWh/d]' : 'Firm HP<br>[GWh/d]'
}, inplace=True)

In [None]:
merged_df['Scenario, [1/0]'] = merged_df['Scenario, [1/0]'].astype('float')

In [None]:
list(merged_df.columns)

In [None]:
mya_nobuilt_vis = OutputVisualiser(merged_df) #merged_df)
fig = mya_nobuilt_vis.plot_parallel(
    columns = (
        'Mean HP<br>[GWh/d]', 
        'Firm HP<br>[GWh/d]', 
        'GHG emissions<br>[Mt CO2,eq/year]',
        'Agricultural land<br>loss, [km2]', 
        'Deforestation<br>[km2]', 
        'Land loss<br>[km2]', 
        'GHG intensity<br>[gCO2,eq/kWh]',
        'Firm Power Ratio, [%]',
        'Scenario, [1/0]'), 
    labels = {
        'Mean HP<br>[GWh/d]' : 'Mean HP',
        'Firm HP<br>[GWh/d]' : 'Firm HP',
        'GHG emissions<br>[Mt CO2,eq/year]': 'GHG emissions',
        'Agricultural land<br>loss, [km2]' : 'Agricultural land loss',
        'Deforestation<br>[km2]' : 'Deforestation',
        'Land loss<br>[km2]' : 'Land loss',
        'GHG intensity<br>[gCO2,eq/kWh]' : 'GHG intensity',
        'Firm Power Ratio, [%]' : 'Firm power ratio',
        'Scenario, [1/0]' : 'Scenario, [1/0]'
    },
    color_col = 'GHG intensity<br>[gCO2,eq/kWh]', color_limits=(0,60))

In [None]:
output_html_path=r"figures/moo/parallel_plot.html"
input_template_path = r"figures/moo/parallel_plot_template.html"

plotly_jinja_data = {"fig":fig.to_html(full_html=False)}
#consider also defining the include_plotlyjs parameter to point to an external Plotly.js as described above

with open(output_html_path, "w", encoding="utf-8") as output_file:
    with open(input_template_path) as template_file:
        j2_template = Template(template_file.read())
        output_file.write(j2_template.render(plotly_jinja_data))