## Creation of input files for the optimization algorithm solving a problem of multiobjective hydroelectric dam selection in Myanmar
### T. Janus
### 05/01/2024, Modified on 04/11/2024

## TODO:
Change `Mong Ton` to `MongTon` in the Salween Pywr model(s) 

## TO FIX (not critical):
#### 1. The missing nodes problem
#### 2. The wrong mapping between ifc reservoirs and the labels assigned for MOO during file creation

In [None]:
from __future__ import annotations
from typing import List, Tuple, Set, Dict
from dataclasses import dataclass, field
import pathlib
import copy
import networkx as nx
import numpy as np
import pandas as pd
import random
import string
import pickle
import lib.graph as graphtools
from lib.graph import DamNetwork, NetworkSimplifier

In [None]:
# Load Pywr model edges (to construct graph representations of pywr models)
salween_edges_path = pathlib.Path("inputs/pywr_model_structs/salween_edges.json")
sittaung_edges_path = pathlib.Path("inputs/pywr_model_structs/sittaung_edges.json")
irrawaddy_edges_path = pathlib.Path("inputs/pywr_model_structs/irrawaddy_edges.json")
salween_edges = graphtools.get_model_edges(salween_edges_path)
sittaung_edges = graphtools.get_model_edges(sittaung_edges_path)
irrawaddy_edges = graphtools.get_model_edges(irrawaddy_edges_path)
# Get pywr model node coordinates (for plotting model graphs)
salween_coordinates = graphtools.get_model_coordinates(
    pathlib.Path("inputs/pywr_model_structs/salween_coordinates.json"))
sittaung_coordinates = graphtools.get_model_coordinates(
    pathlib.Path("inputs/pywr_model_structs/sittaung_coordinates.json"))
irrawaddy_coordinates = graphtools.get_model_coordinates(
    pathlib.Path("inputs/pywr_model_structs/irrawaddy_coordinates.json"))
# CSV file with IFC and Pywr dam/reservoir names
ifc_pywr_map_path = pathlib.Path("config/ifc_pywr_name_map.csv")
# Dam/reservoir data
ifc_id_map_path = pathlib.Path("outputs/moo/all_hp.csv")
# Get mappings between pywr and ifc names and ifc names and ifc ids
pywr_ifc_map = graphtools.pywr_ifc_map_from_csv(ifc_pywr_map_path)
ifc_name_ifc_id_map = graphtools.ifc_name_to_ifc_id_from_csv(ifc_id_map_path)

In [None]:
def remove_np_types(df: pd.DataFrame) -> pd.DataFrame:
    for col in df.columns:
        if pd.api.types.is_integer_dtype(df[col]):
            df[col] = df[col].astype(int)
        elif pd.api.types.is_float_dtype(df[col]):
            df[col] = df[col].astype(float)
    return df

In [None]:
@dataclass
class TestMOOInputData:
    """Class for making sure that graph representation and the names/ids/structure
    represent the true data/information before they're used to generate MOO
    input data"""
    network: DamNetwork
    dam_data_file: pathlib.Path = pathlib.Path(
        "outputs/moo/all_hp.csv")
    dam_data: pd.DataFrame = field(default_factory = pd.DataFrame)
    
    def __post_init__(self) -> None:
        """ """
        self.dam_data = pd.read_csv(self.dam_data_file)

    @property
    def true_data_num_duplicates(self) -> int:
        """ """
        ifc_id_series = self.dam_data['ifc_id']
        return len(ifc_id_series) - len(set(ifc_id_series))

    @property
    def true_data_num_dams(self) -> int:
        return len(set(self.dam_data['ifc_id']))

    @property
    def true_data_ifc_ids(self) -> List[int]:
        """ """
        return list(set(self.dam_data['ifc_id']))
    
    @property
    def network_nodes(self) -> List:
        return self.network.get_nodes()
    
    def test_dams_in_graph(
            self, graph_type: str = "node-centric", dam_id_type: str = "ifc"):
        """ """
        if dam_id_type != "ifc":
            ...

        if graph_type == 'node-centric':
            graph_nodes = set(self.network.get_nodes())
            nodes_true = set(self.true_data_ifc_ids)

            if graph_nodes != nodes_true:
                print("Graph nodes and db nodes not equal")
                print(f"Nodes missing in graph: {nodes_true - graph_nodes}")
                print(f"Extra nodes in graph not in db: {graph_nodes - nodes_true}")
            else:
                print("Graph data checks out")

        elif graph_type == 'edge-centric':
            """ """
            missing_dam_ids: List[int] = []
            for dam_id in self.true_data_ifc_ids:
                edges_with_dam = self.network.find_edges_containing_node(
                    node_id = dam_id)
                if not edges_with_dam:
                    missing_dam_ids.append(dam_id)
            if missing_dam_ids:
                print("Graph does not contain all required nodes")
                print(f"Nodes missing in graph: {missing_dam_ids}")
            else:
                print("Graph data checks out")

In [None]:
@dataclass
class NodeToEdgeConverter:
    """Class for changing the representation of the graph in which nodes represent dams
    and edges represent connections between dams through the network to the representation
    in which nodes are river (water) bodies upstream and downstream of the dam and the dam
    is represented as an edge between those nodes.
    The change of graph representation is required for specifing optimization problem as value
    on a tree network."""
    network: DamNetwork
    verbose: bool = False

    def __post_init__(self) -> None:
        self.network = copy.deepcopy(self.network)
        
    @property
    def edge_data(self, field: str = 'dam_id') -> List[str]:
        """Get data associated with edges spanning between nodes. In dam network, edges
        represent dams installed on river stretches"""
        edge_data = self.network.graph.edges(data=True)
        return [edge[2][field] for edge in edge_data]
     
    def _generate_random_int(
            self, minimum_number: int = 400,  maximum_number: int = 800) -> int:
        """ """
        return random.randint(minimum_number, maximum_number)
        
    def _add_dummy_edges(
            self, minimum_id: int, maximum_id: int, add_upstream: bool = True,
            add_downstream: bool = True) -> DamNetwork:
        """Add upstream nodes and edges to root nodes and downstream nodes and edges to leaf nodes
        Used to represent upstream and dowsnstream river stretches of dams before converting the graph
        that represents dams as nodes to the graph in which dams are adges and node are river sections
        upstream and downstream of the dam"""
        generated_numbers: Set[int] = set()
        if add_upstream:
            for root_node in self.network.root_nodes:
                while (upstream_node_name := self._generate_random_int(minimum_id, maximum_id))\
                        not in generated_numbers:   
                    self.network.graph.add_node(upstream_node_name)
                    self.network.graph.add_edge(upstream_node_name, root_node)
                    generated_numbers.add(upstream_node_name)
                    if self.verbose:
                        print("Added node: {upstream_node_name}, and edge {upstream_node_name} -> {root_node}")
                    break
        if add_downstream:
            for leaf_node in self.network.leaf_nodes:
                while (downstream_node_name := self._generate_random_int(minimum_id, maximum_id))\
                        not in generated_numbers:
                    self.network.graph.add_node(downstream_node_name)
                    self.network.graph.add_edge(leaf_node, downstream_node_name)
                    generated_numbers.add(downstream_node_name)
                    if self.verbose:
                        print("Added node: {downstream_node_name}, and edge {leaf_node} -> {downstream_node_name}")
                    break
        return self.network
    
    def _reverse_graph(self, inplace: bool = True) -> nx.Graph:
        """ """
        G_rev = self.network.graph.reverse()
        if inplace:
            self.network.graph = G_rev
        return G_rev

    def _remove_duplicates(self) -> DamNetwork:
        """ """
        # Define a dictionary that stores hashable edge data as keys and upstream and downstream 
        # nodes as values
        seen_edges = dict()
        old_dup_edges = set()
        old_out_edges = set()
        old_nodes = set()

        for edge in copy.deepcopy(self.network.graph.edges(data=True)):
            # Extract edge attributes for comparison. Skip edges with empty data
            # Store information about edges in a hash-table.
            # If edge already present in the hash-table then the edge is a duplicate and needs to be removed
            edge_data = edge[2]
            if not edge_data:
                continue
            
            edge_data_hashable = tuple(sorted(edge_data.items()))

            if edge_data_hashable not in seen_edges.keys():
                seen_edges[edge_data_hashable] = (edge[0], edge[1])
            else:
                # Get duplicate edge data
                dup_edge_source, dup_edge_target = edge[0], edge[1]
                seen_edge = seen_edges[edge_data_hashable]
                seen_edge_source, seen_edge_target = seen_edge[0], seen_edge[1]
                
                if dup_edge_source != seen_edge_source:
                    print(f"Problem with source edge {dup_edge_source} - {dup_edge_target} and\n")
                    print(f"{seen_edge_source} - {seen_edge_target}")
                    print("Skipping")
                    continue
                    #raise ValueError("Only edges having the same parents node can be duplicated.")
                    
                # Find edges going out of the bottom node of the duplicated node
                outgoing_edges = list(self.network.graph.out_edges(dup_edge_target, data=True))

                for outgoing_edge in outgoing_edges:
                    new_outgoing_edge = (seen_edge_target, outgoing_edge[1], outgoing_edge[2])
                    # Add the new outgoing edge
                    if self.verbose:
                        print(f"Adding new edge {new_outgoing_edge[0]} - {new_outgoing_edge[1]}")
                    self.network.graph.add_edge(
                        new_outgoing_edge[0], new_outgoing_edge[1],  **new_outgoing_edge[2])
                    # Remove the old outgoing edge
                    #if verbose:
                    #    print(f"Removing old edge {outgoing_edge[0]} - {outgoing_edge[1]}")
                    old_out_edges.add((outgoing_edge[0], outgoing_edge[1]))
                    #self.network.graph.remove_edge(outgoing_edge[0], outgoing_edge[1])
                    
                old_dup_edges.add((dup_edge_source, dup_edge_target))
                old_nodes.add(dup_edge_target)   
                #self.network.graph.remove_edge(dup_edge_source, dup_edge_target)
                #self.network.graph.remove_node(dup_edge_target)
                
        # Remove old edges and old nodes
        for edge in old_out_edges:
            try:
                self.network.graph.remove_edge(edge[0], edge[1])
                if self.verbose:
                    print(f"Removing old edge {edge[0]} - {edge[1]}")
            except nx.NetworkXError:
                pass
        for edge in old_dup_edges:
            try:
                self.network.graph.remove_edge(edge[0], edge[1])
                if self.verbose:
                    print(f"Removing duplicated edge {edge[0]} - {edge[1]}")
            except nx.NetworkXError:
                pass
        for node in old_nodes:
            try:
                self.network.graph.remove_node(node)
                if self.verbose:
                    print(f"Removing node {node}")
            except nx.NetworkXError:
                pass
        
        # Sanitize the graph by removing isolated nodes (there should not be any)
        isolated_nodes = list(nx.isolates(self.network.graph))
        if len(isolated_nodes) > 0:
            print("After removing duplicate edges, some isolated nodes are still present. Removing...")
            self.network.graph.remove_nodes_from(isolated_nodes)
                
    def convert(
            self, minimum_id: int, maximum_id: int, 
            reverse: bool = True, rename_nodes: bool = True, 
            remove_duplicates: bool = True,
            add_upstream: bool = True, add_downstream: bool = True) -> DamNetwork:
        """Create a line graph of the original graph such that nodes become edges and edges become nodes"""
        self._add_dummy_edges(minimum_id, maximum_id, add_upstream, add_downstream)
        self.network.graph = nx.line_graph(self.network.graph)
        for edge in self.network.graph.edges():
            upstream, downstream = edge
            label = upstream[1]
            self.network.graph[edge[0]][edge[1]]['dam_id'] = label  
        if rename_nodes:
            if self.verbose:
                print("Renaming nodes..")
            self.rename_nodes()
        if remove_duplicates:
            if self.verbose:
                print("Removing duplicates..")
            self._remove_duplicates()
        if reverse:
            if self.verbose:
                print("Reversing graph..")
            self._reverse_graph()
        return self.network
    
    def rename_nodes(self, inplace: bool = True) -> nx.DiGraph:
        """Assigns integer values in a ascending order starting from root node. Requires that the graph
        contains only one root node"""
        root_nodes = self.network.root_nodes
        if len(root_nodes) > 1:
            raise ValueError(
                f"Graph may contain only one root node, {len(root_nodes)} root nodes found")
        # Perform a depth-first traversal starting from the root node(s)
        dfs_nodes = list(nx.dfs_preorder_nodes(self.network.graph, source=root_nodes[0]))
        mapping = {old_label: new_label for new_label, old_label in enumerate(dfs_nodes)}
        renamed_graph = nx.relabel_nodes(self.network.graph, mapping)
        if inplace:
            self.network.graph = renamed_graph
        return renamed_graph

### Simplify plot, combine and transform Myanmar dam networks

In [None]:
salween_network = DamNetwork.from_edges(
    salween_edges, coordinates=salween_coordinates)
sittaung_network = DamNetwork.from_edges(
    sittaung_edges, coordinates=sittaung_coordinates)
irrawaddy_network = DamNetwork.from_edges(
    irrawaddy_edges, coordinates=irrawaddy_coordinates)
# List of dams (reservoirs to retain)
retained_node_list = list(pywr_ifc_map.keys())
# Outlet nodes
out_nodes_sal = {"outflow_Salween": 1001}
out_nodes_irr = {"Irrawaddy_Delta_output2": 1002}
out_nodes_sit = {
    "output_Rakhine_1": 1003, "output_Rakhine_2": 1004,
    "output_Rakhine_3": 1005, "output_Rakhine_4": 1006,
    "output_Rakhine_5": 1007, "output_Mekong": 1008,
    "output_Kok" : 1009, "output_Sittaung_1": 1010,
    "output_Sittaung_2" : 1011, "output_Sittaung_3" : 1012,
    "output_Tanintharyi_1" : 1013, "output_Tanintharyi_2" : 1014
}

for outlet_nodes in (out_nodes_sal, out_nodes_irr, out_nodes_sit):
    retained_node_list.extend(list(outlet_nodes.keys()))
number_out_nodes = len(out_nodes_sal) + len(out_nodes_irr) + len(out_nodes_sit)
print(
    f"Retaining {len(retained_node_list)} nodes, incl. {number_out_nodes}" +
    f" output nodes and {len(retained_node_list)-number_out_nodes} dams")

In [None]:
# Simplify the dam networks
n_simplifier_1 = NetworkSimplifier(salween_network, logging=False)
n_simplifier_2 = NetworkSimplifier(sittaung_network, logging=False)
n_simplifier_3 = NetworkSimplifier(irrawaddy_network, logging=False)
# Simplify Salween
print("Simplifying the Salween model...")
n_simplifier_1.simplify(lambda node : node not in retained_node_list, inplace=True)
n_simplifier_1.rename_nodes((pywr_ifc_map, ifc_name_ifc_id_map, out_nodes_sal), inplace=True)
n_simplifier_1.reverse_graph(inplace=True)
# Simplify Sittaung
print("Simplifying the Sittaung model...")
n_simplifier_2.simplify(lambda node : node not in retained_node_list, inplace=True)
n_simplifier_2.rename_nodes((pywr_ifc_map, ifc_name_ifc_id_map, out_nodes_sit), inplace=True)
n_simplifier_2.reverse_graph(inplace=True)
# Simplify Irrawaddy
print("Simplifying the Irrawaddy model...")
n_simplifier_3.simplify(lambda node : node not in retained_node_list, inplace=True)
n_simplifier_3.rename_nodes((pywr_ifc_map, ifc_name_ifc_id_map, out_nodes_irr), inplace=True)
n_simplifier_3.reverse_graph(inplace=True)
print("Salween simplified graph")
n_simplifier_1.network.plot(font_size=10, figsize=(5,4))
print("Sittaung simplified graph")
n_simplifier_2.network.plot(font_size=10, figsize=(5,4))
print("Irrawaddy simplified graph")
n_simplifier_3.network.plot(font_size=10, figsize=(5,4))

In [None]:
# Combine the three networks into one network sharing the same root node
# In this network the nodes are reservoirs defined by their IFC IDs and
# the edges represent connections between the reservoirs that are defined by the
# river network
combined_network = graphtools.combine_multiple_by_root_nodes(
    (n_simplifier_1.network, n_simplifier_2.network, n_simplifier_3.network))

In [None]:
# Check the combined network
test1 = TestMOOInputData(
    network=combined_network, 
    dam_data_file= pathlib.Path("outputs/moo/all_hp.csv"))
print("Testing the combined network")
print(f"Num dams: {test1.true_data_num_dams}")
test1.test_dams_in_graph()
combined_network.plot(font_size = 8, figsize=(7,5))

In [None]:
converter = NodeToEdgeConverter(combined_network)
converter.convert(
    2000, 5000, reverse=False, remove_duplicates = False, rename_nodes = False, 
    add_upstream=False)
# Plot the combined disjoint graph
converter.network.plot(edge_data_field = 'dam_id', use_coords = False)
n_comb = graphtools.combine_disjoint_by_roots(converter.network, inplace=False)
# Plot the combined joined graph
n_comb.plot(edge_data_field = 'dam_id', use_coords = False)

In [None]:
# Check the edge-centric disjoint network (before merging)
test2 = TestMOOInputData(network=converter.network)
print("Testing the disjoint edge-centric network representation")
test2.test_dams_in_graph(graph_type = "edge-centric")
print(len(test2.network.unique_ids()))

In [None]:
test3 = TestMOOInputData(network=n_comb)
print("Testing the combined edge-centric network representation")
test3.test_dams_in_graph(graph_type = "edge-centric")

## NOTE: Set `overwrite_n_comb` to True to overwrite the network file in `n_comb.pickle`

In [None]:
overwrite_n_comb: bool = False
if overwrite_n_comb:
    print("Overwriting `n_comb.pickle`.")
    n_comb_copy = copy.deepcopy(n_comb)
    with open('n_comb.pickle', 'wb') as file_handle:
        pickle.dump(n_comb_copy, file_handle)
else:
    print("`n_comb.pickle` not written.")

In [None]:
converter2 = NodeToEdgeConverter(n_comb, verbose=True)
converter2.rename_nodes()
converter2.network.plot(edge_data_field = 'dam_id', use_coords = False)
converter2._remove_duplicates()
converter2.rename_nodes()

In [None]:
test3 = TestMOOInputData(network=converter2.network)
print("Testing the dcombined edge-centric network representation")
test3.test_dams_in_graph(graph_type = "edge-centric")
print(len(test3.network.unique_ids()))

In [None]:
# Check the numbers of edges and nodes. Number of edges should be
# equal to the number of unique IFC ids in the dam database
# Number of nodes should be equal to number of edges + 1
n_edges = len(converter2.network.graph.edges)
n_nodes = len(converter2.network.graph.nodes)
print(f"Number of edges: {n_edges}, Number of nodes: {n_nodes}")

In [None]:
dams: Set[int] = set()
for (u,v,data) in converter2.network.graph.edges(data=True):
    dams.add(data['dam_id'])
print(f"Number of dams in graph = {len(dams)}")

## NOTE: Set `overwrite_n_final` to True to overwrite the final edge-centric network file in `n_final.pickle`

In [None]:
overwrite_n_final: bool = False
if overwrite_n_final:
    print("Overwriting `n_final.pickle`.")
    n_final = copy.deepcopy(converter2.network)
    with open('n_final.pickle', 'wb') as file_handle:
        pickle.dump(n_final, file_handle)
else:
    print("`n_final.pickle` not written.")

## Create input file(s) for the MultiObjective Optimization Algorithm

In [None]:
# Mapping between objective names and the names of corresponding columns in `dam_node_data` dataframe
# We're using two mappings: One uses ghg emissions obtained with re-emission (g-res methodology) and the second
# one uses emissions derived from emission factors published by Soued et al.

# 1. For optimization runs with emissions calculated explicitly with ReEmission
criteria_map_reemission = {
    "energy" : "HP_mean",
    "ghg" : "tot_em", 
    "status" : "status_int", 
    "firm_energy" : "HP_firm", 
    "loss_agri" : "crop_area_loss_km2", 
    "loss_forest" : "forest_area_loss_km2"}
# 2/ For optimization runs with emissions derived from emission factors of Soued et al.
criteria_map_soued = {
    "energy" : "HP_mean",
    "ghg" : "tot_em_soued", 
    "status" : "status_int", 
    "firm_energy" : "HP_firm", 
    "loss_agri" : "crop_area_loss_km2", 
    "loss_forest" : "forest_area_loss_km2"}

In [None]:
def create_moo_input(
        node_criteria: Tuple[str], dam_criteria: Tuple[str],
        network_graph: DamNetwork | nx.DiGraph, 
        dam_node_data: pd.DataFrame,
        output_path: pathlib.Path,
        id_ifc_map_path: pathlib.Path,
        column_mapping: Dict[str, str],
        force_status_value: int | None = None) -> nx.DiGraph:
    """Create an input text-file to the multiobjective optimization algorithm"""
    
    output_path = pathlib.Path(output_path)
    n_node_criteria, n_dam_criteria = len(node_criteria), len(dam_criteria)
    
    # Currently connectivity is ignored and therefore, all values have been set to zero
    _node_criteria_name_value_map = {
        "connectivity" : 0
    }
    
    # Get a list of column names to retrieve from dam_node_data dataframe
    dam_criteria_cols: List[str] = [column_mapping.get(name, name) for name in dam_criteria]
    
    network_graph = copy.deepcopy(network_graph)
    
    if isinstance(network_graph, DamNetwork):
        graph = network_graph.graph
    else:
        graph = network_graph

    # Rename nodes such that the nodes are named with integers from 0 to N-1 where N is the number of nodes
    graph = nx.relabel.convert_node_labels_to_integers(
        graph, first_label=0, ordering='default')
    
    root_nodes = [node for node in graph.nodes if graph.in_degree(node) == 0]
    if len(root_nodes) > 1:
        raise ValueError("Graph has more than one root node")
    else:
        root_node = root_nodes[0]
    
    # Create templates for individual sections of the text file    
    header_line_1_template = string.Template(
        "p ${n_nodes} ${n_edges} ${n_node_criteria} ${n_dam_criteria}")
    header_line_2_template = string.Template("dam_criteria ${dam_criteria_str}")
    header_line_3_template = string.Template("node_criteria ${node_criteria_str}")
    dam_line_template = string.Template("d ${dam_id} ${dam_criteria_values_str}")
    node_line_template = string.Template("n ${node_id} ${node_criteria_values_str}")
    root_node_line_template = string.Template("r ${root_node_id}")
    edge_line_template = string.Template("e ${up_node_id} ${down_node_id} ${dam_id}")

    dams: Set[int] = set()
    for (u,v,data) in graph.edges(data=True):
        dams.add(data['dam_id'])
    print(f"Number of dams after in graph after renaming nodes = {len(dams)}")
    
    # Enquire the graph object to get information about its nodes (river sections) and edges (dams)
    # Get nodes in topological order
    nodes = graph.nodes
    edges = graph.edges(data=True)
    n_nodes, n_edges = len(nodes), len(edges)
    print(f"Processing graph with {n_nodes} nodes and {n_edges} edges.")
    dam_criteria_str = " ".join(dam_criteria)
    node_criteria_str = " ".join(node_criteria)
    print("Writing header lines")
    strings: List[str] = []
    strings.append(header_line_1_template.safe_substitute(
        n_nodes=n_nodes, n_edges=n_edges, n_node_criteria=n_node_criteria, 
        n_dam_criteria=n_dam_criteria))
    strings.append(header_line_2_template.safe_substitute(
        dam_criteria_str=dam_criteria_str))
    if len(node_criteria) > 0:
        strings.append(header_line_3_template.safe_substitute(
            node_criteria_str=node_criteria_str))
    
    # Add dams
    # The code requires dams to be listed in ascending order with indices 1 to N
    # For this reason we need to rename the edges first and then list them in ascending order
    # betore writing them to file
    id_to_ifc = dict()
    old_edges =  sorted(edges, key=lambda edge: edge[2]['dam_id'])
    updated_edges = [] # Edges with update IDs
    for ix, (u, v, edge_data) in enumerate(old_edges):
        ifc_id = edge_data['dam_id']
        new_id = ix + 1
        new_edge = (u, v, {'dam_id': new_id})
        updated_edges.append(new_edge)
        # Write entries to a dictionary providing mapping between
        # new dam_ids (edge_ids) for algorithmic purposes and the original dam ids
        id_to_ifc[new_id] = ifc_id
        
    print(f"Saving the new-old dam ID map to {id_ifc_map_path.as_posix()}")
    graphtools.dict_to_json(id_to_ifc, id_ifc_map_path)
    
    sorted_updated_edges = sorted(
        updated_edges, key=lambda edge: edge[2]['dam_id'])
    
    for edge in sorted_updated_edges:
        dam_id = edge[2]['dam_id']
        dam_criteria_arr = dam_node_data.loc[id_to_ifc[dam_id], dam_criteria_cols]
        if force_status_value is not None:
            dam_criteria_arr[column_mapping["status"]] = force_status_value
        # Get rid of potential np.floating and np.integer types that may mess up conversion to string
        dam_criteria_arr_values = \
            [val.item() if isinstance(val, (np.integer, np.floating)) else val for val in dam_criteria_arr.values]
        # Convert values into a space delimited string of values
        dam_criteria_string = " ".join(map(str, dam_criteria_arr_values))
        strings.append(dam_line_template.safe_substitute(
            dam_id = dam_id, dam_criteria_values_str = dam_criteria_string))
    
    # Add nodes (WATCH OUT: IF NODE CRITERIA ARE ADDED WE NEED TO MAKE SURE
    # THAT NODES CORRESPOND TO (MODIFIED) EDGES. Does not matter at this stage
    # as we feel all nodes with connectivity criterion of zero anyway)
    for node in sorted(nodes):
        _node_criteria_vals = []
        for criterion in node_criteria:
            _node_criteria_vals.append(_node_criteria_name_value_map.get(criterion, 'NA'))
        _node_criteria_vals_str = " ".join(map(str, _node_criteria_vals))
        strings.append(node_line_template.safe_substitute(
            node_id = node, node_criteria_values_str = _node_criteria_vals_str))
    
    # Add root node
    strings.append(root_node_line_template.safe_substitute(root_node_id=root_node))
    # Add information about the edges using the data stored in the network graph
    for edge in sorted_updated_edges:
        source_node, target_node, edge_data = edge 
        dam_id_value = edge_data.get('dam_id', 'unknown')
        strings.append(edge_line_template.safe_substitute(
            up_node_id = source_node,
            down_node_id = target_node,
            dam_id = dam_id_value))
    
    with open(output_path, 'w') as file:
        for line in strings:
            file.write(line + '\n')

    return network_graph

In [None]:
# Note: the data contains rows with repeated rows (for some reason). The rows contain the same information
#       We therefore remove the redundant rows and keep only the first row
dam_node_data = pd.read_csv(ifc_id_map_path, index_col=[0]).set_index('ifc_id')
dam_node_data = remove_np_types(dam_node_data) # Note: we need to convert numpy dtypes to non-numpy but this function although seems to
#                                                be written appropriately, does not fix the issue, i.e. the dtypes are still numpy.
dam_node_data = dam_node_data[~dam_node_data.index.duplicated(keep='first')]
dam_node_data.head()

### Create input files with multiple options

### Solver 1

In [None]:
final_network = converter2.network
# -----------------------------------------------------------------------------
# I. GHG EMISSIONS CALCULATED WITH RE-EMISSION
# -----------------------------------------------------------------------------
# 5 objectives, built dam statuses included in the analysis
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = None,
    column_mapping = criteria_map_reemission,
    output_path = pathlib.Path("moo_solver/solutions_and_inputs/mya_5_obj_built.txt"))
# 5 objectives, no built dams
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = 0,
    column_mapping = criteria_map_reemission,
    output_path = pathlib.Path("moo_solver/solutions_and_inputs/mya_5_obj_nobuilt.txt"))
# -----------------------------------------------------------------------------
# I. GHG EMISSIONS CALCULATED FROM EMISSION FACTORS FROM SOUED ET AL.
# -----------------------------------------------------------------------------
# 5 objectives, built dam statuses included in the analysis
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = None,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver/solutions_and_inputs/mya_5_obj_built_soued.txt"))
# 5 objectives, no built dams
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = 0,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver/solutions_and_inputs/mya_5_obj_nobuilt_soued.txt"))

# 3 objectives, built dam statuses included in the analysis
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = None,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver/solutions_and_inputs/mya_3_obj_built_soued.txt"))
# 3 objectives, no built dams
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = 0,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver/solutions_and_inputs/mya_3_obj_nobuilt_soued.txt"))

### Solver 2

In [None]:
# -----------------------------------------------------------------------------
# I. GHG EMISSIONS CALCULATED WITH RE-EMISSION
# -----------------------------------------------------------------------------
# 5 objectives, built dam statuses included in the analysis
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = None,
    column_mapping = criteria_map_reemission,
    output_path = pathlib.Path("moo_solver_CPAIOR/Basin_Input_Files/mya_5_obj_built.txt"))
# 5 objectives, no built dams
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = 0,
    column_mapping = criteria_map_reemission,
    output_path = pathlib.Path("moo_solver_CPAIOR/Basin_Input_Files/mya_5_obj_nobuilt.txt"))
# -----------------------------------------------------------------------------
# I. GHG EMISSIONS CALCULATED FROM EMISSION FACTORS FROM SOUED ET AL.
# -----------------------------------------------------------------------------
# 5 objectives, built dam statuses included in the analysis
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = None,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver_CPAIOR/Basin_Input_Files/mya_5_obj_built_soued.txt"))
# 5 objectives, no built dams
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy", "loss_agri", "loss_forest"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = 0,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver_CPAIOR/Basin_Input_Files/mya_5_obj_nobuilt_soued.txt"))

# 3 objectives, built dam statuses included in the analysis
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = None,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver_CPAIOR/Basin_Input_Files/mya_3_obj_built_soued.txt"))
# 3 objectives, no built dams
create_moo_input( 
    node_criteria = ('connectivity', ), 
    dam_criteria = ("energy", "ghg", "status", "firm_energy"),
    network_graph = final_network.graph,
    dam_node_data = dam_node_data,
    id_ifc_map_path=pathlib.Path("outputs/moo/id_to_ifc.json"),
    force_status_value = 0,
    column_mapping = criteria_map_soued,
    output_path = pathlib.Path("moo_solver_CPAIOR/Basin_Input_Files/mya_3_obj_nobuilt_soued.txt"))

## Tests on simple graph representations

In [None]:
test_conversion: bool = False
if test_conversion:
    edges = [(1, 2), (2, 3), (4, 3), (3, 5)]
    # Create a directed graph from the list of edges
    G = nx.DiGraph(edges)
    ex_network = DamNetwork(copy.deepcopy(G))
    ex_network.plot(edge_data_field = 'dam_id', figsize=(4,3))
    converter = NodeToEdgeConverter(ex_network)
    converter._reverse_graph()
    converter.convert(666, 777, reverse=False, remove_duplicates = True)
    converter.rename_nodes()
    print(converter.edge_data)
    ex_network.plot(edge_data_field = 'dam_id', figsize=(4,3))