Merge pull request #635 from davide-f/fix_run

I merge this PR, no need for release_notes, they are minor fixes. Given that main is breaking, I merge it but feel free to add comments and they can be further improved. I'll still work on this branch
pypsa-meets-earth · Mar 17, 2023 · 508f80c · 508f80c
2 parents c408092 + b27f454
commit 508f80c
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 46 deletions.
diff --git a/config.default.yaml b/config.default.yaml
@@ -62,8 +62,8 @@ cluster_options:
     exclude_carriers: []
     remove_stubs: true
     remove_stubs_across_borders: true
-    p_threshold_drop_isolated: 1 # [MW] isolated buses are being discarded if bus mean power is below the specified threshold
-    p_threshold_merge_isolated: 10 # [MW] isolated buses are being merged into a single isolated bus if bus mean power is above the specified threshold
+    p_threshold_drop_isolated: 20 # [MW] isolated buses are being discarded if bus mean power is below the specified threshold
+    p_threshold_merge_isolated: 300 # [MW] isolated buses are being merged into a single isolated bus if bus mean power is above the specified threshold
   cluster_network:
     algorithm: kmeans
     feature: solar+onwind-time
@@ -103,7 +103,7 @@ clean_osm_data_options:  # osm = OpenStreetMap
 
 build_osm_network:  # Options of the build_osm_network script; osm = OpenStreetMap
   group_close_buses: true  # When "True", close buses are merged and guarantee the voltage matching among line endings
-  group_tolerance_buses: 500  # [m] (default 500) Tolerance in meters of the close buses to merge
+  group_tolerance_buses: 5000  # [m] (default 5000) Tolerance in meters of the close buses to merge
   split_overpassing_lines: true  # When True, lines overpassing buses are splitted and connected to the bueses
   overpassing_lines_tolerance: 1  # [m] (default 1) Tolerance to identify lines overpassing buses
   force_ac: false  # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem.

diff --git a/config.tutorial.yaml b/config.tutorial.yaml
@@ -74,8 +74,8 @@ cluster_options:
     exclude_carriers: []
     remove_stubs: true
     remove_stubs_across_borders: true
-    p_threshold_drop_isolated: 1 # [MW] isolated buses are being discarded if bus mean power is below the specified threshold
-    p_threshold_merge_isolated: 10 # [MW] isolated buses are being merged into a single isolated bus if bus mean power is above the specified threshold
+    p_threshold_drop_isolated: 20 # [MW] isolated buses are being discarded if bus mean power is below the specified threshold
+    p_threshold_merge_isolated: 300 # [MW] isolated buses are being merged into a single isolated bus if bus mean power is above the specified threshold
   cluster_network:
     algorithm: kmeans
     feature: solar+onwind-time
@@ -116,7 +116,7 @@ clean_osm_data_options:
 
 build_osm_network:  # Options of the build_osm_network script; osm = OpenStreetMap
   group_close_buses: true  # When "True", close buses are merged and guarantee the voltage matching among line endings
-  group_tolerance_buses: 500  # [m] (default 500) Tolerance in meters of the close buses to merge
+  group_tolerance_buses: 5000  # [m] (default 5000) Tolerance in meters of the close buses to merge
   split_overpassing_lines: true  # When True, lines overpassing buses are splitted and connected to the bueses
   overpassing_lines_tolerance: 1  # [m] (default 1) Tolerance to identify lines overpassing buses
   force_ac: false  # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem.

diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
@@ -309,7 +309,7 @@ def clean_frequency(df, default_frequency="50"):
 
     # TODO: default frequency may be by country
     df["tag_frequency"] = (
-        df["tag_frequency"].replace(repl_freq).fillna(default_frequency)
+        df["tag_frequency"].fillna(default_frequency).astype(str).replace(repl_freq)
     )
 
     return df
@@ -329,8 +329,11 @@ def clean_voltage(df):
         "KV30": "30kV",
     }
 
+    df.dropna(subset=["voltage"], inplace=True)
+
     df["voltage"] = (
         df["voltage"]
+        .astype(str)
         .replace(repl_voltage)
         .str.lower()
         .str.replace(" ", "")
@@ -341,8 +344,6 @@ def clean_voltage(df):
         # this line can be a fix for that if relevant
     )
 
-    df.dropna(subset=["voltage"], inplace=True)
-
     return df
 
 
@@ -367,7 +368,11 @@ def clean_circuits(df):
         "1.": "1",
     }
 
-    df["circuits"] = df["circuits"].replace(repl_circuits).str.replace(" ", "")
+    df["circuits"] = (
+        df["circuits"]
+        .replace(repl_circuits)
+        .map(lambda x: x.replace(" ", "") if isinstance(x, str) else x)
+    )
 
     return df
 
@@ -399,7 +404,9 @@ def clean_cables(df):
         "line": "1",
     }
 
-    df["cables"] = df["cables"].replace(repl_cables).str.replace(" ", "")
+    df["cables"] = df["cables"].map(
+        lambda x: x.replace(" ", "") if isinstance(x, str) else x
+    )
 
     return df
 
@@ -491,9 +498,13 @@ def fill_circuits(df):
 
     def _get_circuits_status(df):
         len_f = df["tag_frequency"].map(len)
-        len_c = df["circuits"].str.count(";") + 1
+        len_c = df["circuits"].map(
+            lambda x: x.count(";") + 1 if isinstance(x, str) else np.nan
+        )
         isna_c = df["circuits"].isna()
-        len_cab = df["cables"].str.count(";") + 1
+        len_cab = df["cables"].map(
+            lambda x: x.count(";") + 1 if isinstance(x, str) else np.nan
+        )
         isna_cab = df["cables"].isna()
         return len_f, len_c, isna_c, len_cab, isna_cab
 
@@ -518,16 +529,20 @@ def _basic_cables(f_val, cables_req=cables_req, def_circ=2):
 
     len_f, len_c, isna_c, len_cab, isna_cab = _get_circuits_status(df)
 
+    is_numeric_cables = ~pd.to_numeric(df["cables"], errors="coerce").isna()
+
     to_fill = isna_c | (len_f != len_c)
     to_fill_direct = to_fill & (len_cab == len_f)
     to_fill_merge = to_fill & (len_cab > len_f)
-    to_fill_indirect = to_fill & ~to_fill_direct & df["cables"].str.isnumeric()
+    to_fill_indirect = to_fill & ~to_fill_direct & is_numeric_cables
     to_fill_default = to_fill & ~to_fill_merge & ~to_fill_direct & ~to_fill_indirect
 
     # length of cables match the frequency one
     # matching uses directly only the cables series
     df_match_by_cables = df[to_fill_direct][["tag_frequency", "cables"]].copy()
-    df_match_by_cables["cables"] = df_match_by_cables["cables"].str.split(";")
+    df_match_by_cables["cables"] = (
+        df_match_by_cables["cables"].astype(str).str.split(";")
+    )
 
     def _filter_cables(row):
         return ";".join(
@@ -543,7 +558,9 @@ def _filter_cables(row):
 
     # length of cables elements is larger than frequency; the last cable data are merged to match
     df_merge_by_cables = df[to_fill_merge][["tag_frequency", "cables"]].copy()
-    df_merge_by_cables["cables"] = df_merge_by_cables["cables"].str.split(";")
+    df_merge_by_cables["cables"] = (
+        df_merge_by_cables["cables"].astype(str).str.split(";")
+    )
 
     def _parse_cables_to_len(row):
         lf = len(row["tag_frequency"])
@@ -588,7 +605,7 @@ def _parse_cables_to_len(row):
     )
 
     # explode column
-    df["circuits"] = df["circuits"].str.split(";")
+    df["circuits"] = df["circuits"].astype(str).str.split(";")
 
     return df
 
@@ -701,7 +718,10 @@ def prepare_generators_df(df_all_generators):
         df_all_generators["power_output_MW"].astype(str).str.contains("MW")
     ]
     df_all_generators["power_output_MW"] = (
-        df_all_generators["power_output_MW"].str.extract("(\\d+)").astype(float)
+        df_all_generators["power_output_MW"]
+        .astype(str)
+        .str.extract("(\\d+)")
+        .astype(float)
     )
 
     return df_all_generators
@@ -859,6 +879,9 @@ def clean_data(
             df_all_substations["tag_substation"] == tag_substation
         ]
 
+    # clean voltage and make sure it is string
+    df_all_substations = clean_voltage(df_all_substations)
+
     df_all_substations = gpd.GeoDataFrame(
         split_cells(pd.DataFrame(df_all_substations)),
         crs=df_all_substations.crs,

diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
@@ -73,10 +73,9 @@ def generate_scenario_by_country(
 
     clean_country_list = create_country_list(country_list)
 
-    df_landlocked = pd.read_csv(
-        "https://raw.githubusercontent.com/openclimatedata/countrygroups/main/data/lldc.csv"
-    )
-    df_landlocked["countries"] = df_landlocked.Code.map(three_2_two_digits_country)
+    # file available from https://worldpopulationreview.com/country-rankings/landlocked-countries
+    df_landlocked = pd.read_csv("landlocked.csv")
+    df_landlocked["countries"] = df_landlocked.cca2.map(three_2_two_digits_country)
 
     n_clusters = {
         "MG": 3,  # Africa
@@ -87,7 +86,9 @@ def generate_scenario_by_country(
         "GM": 2,
         "LR": 2,
         "LS": 3,
+        "NE": 4,
         "SL": 1,
+        "SZ": 4,
         "TG": 1,
         "CG": 2,
         "GN": 3,

diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
@@ -565,17 +565,27 @@ def drop_isolated_nodes(n, threshold):
     load_mean_origin = n.loads_t.p_set.mean().mean()
 
     # duplicated sub-networks mean that there is at least one interconnection between buses
-    i_islands = n.buses[
+    off_buses = n.buses[
         (~n.buses.duplicated(subset=["sub_network"], keep=False))
         & (n.buses.carrier == "AC")
-    ].index
+    ]
+    i_islands = off_buses.index
+
+    if off_buses.empty:
+        return n
+
+    # skip a isolated bus for countries represented by only isolated nodes
+    for c in off_buses["country"].unique():
+        isc_offbus = off_buses["country"] == c
+        n_bus_off = isc_offbus.sum()
+        n_bus = (n.buses["country"] == c).sum()
+        if n_bus_off == n_bus:
+            i_islands = i_islands[i_islands != off_buses[isc_offbus].index[0]]
 
     i_load_islands = n.loads_t.p_set.columns.intersection(i_islands)
 
     # isolated buses without load should be discarded
     isl_no_load = i_islands.difference(i_load_islands)
-    n.mremove("Bus", isl_no_load)
-    n.determine_network_topology()
 
     # isolated buses with load lower than a specified threshold should be discarded as well
     i_small_load = i_load_islands[
@@ -585,10 +595,12 @@ def drop_isolated_nodes(n, threshold):
     if (i_islands.empty) | (len(i_small_load) == 0):
         return n
 
-    i_loads_drop = n.loads[n.loads.bus.isin(i_small_load)].index
-    i_generators_drop = n.generators[n.generators.bus.isin(i_small_load)].index
+    i_to_drop = isl_no_load.to_list() + i_small_load.to_list()
 
-    n.mremove("Bus", i_small_load)
+    i_loads_drop = n.loads[n.loads.bus.isin(i_to_drop)].index
+    i_generators_drop = n.generators[n.generators.bus.isin(i_to_drop)].index
+
+    n.mremove("Bus", i_to_drop)
     n.mremove("Load", i_loads_drop)
     n.mremove("Generator", i_generators_drop)
 
@@ -598,7 +610,7 @@ def drop_isolated_nodes(n, threshold):
     generators_mean_final = n.generators.p_nom.mean()
 
     logger.info(
-        f"Dropped {len(i_small_load)} buses. A resulted load discrepancy is {(100 * ((load_mean_final - load_mean_origin)/load_mean_origin)):2.1}% and {(100 * ((generators_mean_final - generators_mean_origin)/generators_mean_origin)):2.1}% for average load and generation capacity, respectivelly"
+        f"Dropped {len(i_to_drop)} buses. A resulted load discrepancy is {(100 * ((load_mean_final - load_mean_origin)/load_mean_origin)):2.1}% and {(100 * ((generators_mean_final - generators_mean_origin)/generators_mean_origin)):2.1}% for average load and generation capacity, respectivelly"
     )
 
     return n
@@ -640,24 +652,23 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()):
         n.loads_t.p_set[i_load_islands].mean(axis=0) <= threshold
     ]
 
-    if (i_islands.empty) | (len(i_suffic_load) == 0):
-        return n, n.buses.index.to_series()
-
     # all the noded to be merged should be mapped into a single node
-    agg_buses_list = []
-    countries_list = n.buses.country.unique()
-    for c in countries_list:
-        buses_in_country = n.buses.loc[i_suffic_load].country == c
-        i_aggreg_bus = n.buses.loc[i_suffic_load][buses_in_country].index[0]
-        agg_buses_list.append(i_aggreg_bus)
-
-    country_to_buses_dict = dict(zip(countries_list, agg_buses_list))
-    n_buses_df = n.buses[["country"]].copy().assign(bus_id=n.buses.index)
-    n_buses_df["agg_bus"] = n_buses_df.loc[i_suffic_load, "country"].map(
-        country_to_buses_dict
+    map_isolated_node_by_country = (
+        n.buses.loc[i_suffic_load].groupby("country")["bus_id"].first().to_dict()
+    )
+    isolated_buses_mapping = n.buses.loc[i_suffic_load, "country"].replace(
+        map_isolated_node_by_country
     )
-    n_buses_df["agg_bus"].fillna(n_buses_df.bus_id, inplace=True)
-    busmap = n_buses_df["agg_bus"]
+    busmap = (
+        n.buses.index.to_series()
+        .replace(isolated_buses_mapping)
+        .astype(str)
+        .rename("busmap")
+    )
+
+    # return the original network if no changes are detected
+    if (busmap.index == busmap).all():
+        return n, n.buses.index.to_series()
 
     bus_strategies, generator_strategies = get_aggregation_strategies(
         aggregation_strategies