Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finalize clean_build_shapes #572

Merged
merged 6 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion config.default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ build_shape_options:
nprocesses: 5 # number of processes to be used in build_shapes
worldpop_method: "standard" # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout
gdp_method: "standard" # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout

contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model
clean_osm_data_options: # osm = OpenStreetMap
names_by_shapes: true # Set the country name based on the extended country shapes
threshold_voltage: 35000 # [V] assets below that voltage threshold will not be used (cable, line, generator, etc.)
Expand Down
2 changes: 1 addition & 1 deletion config.tutorial.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ build_shape_options:
nprocesses: 5 # number of processes to be used in build_shapes
worldpop_method: "standard" # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout
gdp_method: "standard" # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout

contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model
clean_osm_data_options:
names_by_shapes: true # Set the country name based on the extended country shapes
threshold_voltage: 35000 # [V] minimum voltage threshold to keep the asset (cable, line, generator, etc.) [V]
Expand Down
5 changes: 3 additions & 2 deletions scripts/build_bus_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import pypsa
from _helpers import REGION_COLS, configure_logging, two_2_three_digits_country
from shapely.geometry import Point, Polygon
from shapely.ops import unary_union
from vresutils.graph import voronoi_partition_pts

# from scripts.build_shapes import gadm
Expand All @@ -65,8 +66,6 @@ def custom_voronoi_partition_pts(points, outline, add_bounds_shape=True, multipl
----------
points : Nx2 - ndarray[dtype=float]
outline : Polygon
no_multipolygons : bool (default: False)
If true, replace each MultiPolygon by its largest component

Returns
-------
Expand All @@ -77,6 +76,8 @@ def custom_voronoi_partition_pts(points, outline, add_bounds_shape=True, multipl
from scipy.spatial import Voronoi
from shapely.geometry import Point, Polygon

outline = unary_union(outline)

points = np.asarray(points)

polygons_arr = []
Expand Down
17 changes: 15 additions & 2 deletions scripts/build_osm_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,8 +830,12 @@ def built_network(inputs, outputs, geo_crs, distance_crs):
country_list = input
bus_country_list = buses["country"].unique().tolist()

if len(bus_country_list) != len(country_list):
no_data_countries = set(country_list).difference(set(bus_country_list))
# it may happen that bus_country_list contains entries not relevant as a country name (e.g. "not found")
# difference can't give negative values; the following will return only releant country names
no_data_countries = list(set(country_list).difference(set(bus_country_list)))

if len(no_data_countries) > 0:

no_data_countries_shape = country_shapes[
country_shapes.index.isin(no_data_countries) == True
].reset_index()
Expand Down Expand Up @@ -861,6 +865,15 @@ def built_network(inputs, outputs, geo_crs, distance_crs):
crs=buses.crs,
)

non_allocated_countries = list(
set(country_list).symmetric_difference(set(bus_country_list))
)

if len(non_allocated_countries) > 0:
logger.error(
f"There following countries could not be allocated properly: {non_allocated_countries}"
)

# get transformers: modelled as lines connecting buses with different voltage
transformers = get_transformers(buses, lines)

Expand Down
134 changes: 105 additions & 29 deletions scripts/build_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import xarray as xr
from _helpers import (
configure_logging,
country_name_2_two_digits,
sets_path_to_root,
three_2_two_digits_country,
two_2_three_digits_country,
Expand All @@ -33,6 +34,7 @@
from tqdm import tqdm

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

sets_path_to_root("pypsa-earth")

Expand Down Expand Up @@ -80,7 +82,60 @@ def download_GADM(country_code, update=False, out_logging=False):
return GADM_inputfile_gpkg, GADM_filename


def get_GADM_layer(country_list, layer_id, geo_crs, update=False, outlogging=False):
def filter_gadm(
geodf,
layer,
cc,
contended_flag,
output_nonstd_to_csv=False,
):

# identify non standard geodf rows
geodf_non_std = geodf[geodf["GID_0"] != two_2_three_digits_country(cc)].copy()

if not geodf_non_std.empty:
logger.info(
f"Contended areas have been found for gadm layer {layer}. They will be treated according to {contended_flag} option"
)

# NOTE: in these options GID_0 is not changed because it is modified below
if contended_flag == "drop":
geodf.drop(geodf_non_std.index, inplace=True)
elif contended_flag != "set_by_country":
# "set_by_country" option is the default; if this elif applies, the desired option falls back to the default
logger.warning(
f"Value '{contended_flag}' for option contented_flag is not recognized.\n"
+ "Fallback to 'set_by_country'"
)

# force GID_0 to be the country code for the relevant countries
geodf["GID_0"] = cc
davide-f marked this conversation as resolved.
Show resolved Hide resolved

# country shape should have a single geomerty
if (layer == 0) and (geodf.shape[0] > 1):
logger.warning(
f"Country shape is composed by multiple shapes that are being merged in agreement to contented_flag option '{contended_flag}'"
)
# take the first row only to re-define geometry keeping other columns
geodf = geodf.iloc[[0]].set_geometry([geodf.unary_union])

# debug output to file
if output_nonstd_to_csv and not geodf_non_std.empty:
geodf_non_std.to_csv(
f"resources/non_standard_gadm{layer}_{cc}_raw.csv", index=False
)

return geodf


def get_GADM_layer(
country_list,
layer_id,
geo_crs,
contended_flag,
update=False,
outlogging=False,
):
"""
Function to retrive a specific layer id of a geopackage for a selection of countries

Expand All @@ -101,11 +156,11 @@ def get_GADM_layer(country_list, layer_id, geo_crs, update=False, outlogging=Fal
# download file gpkg
file_gpkg, name_file = download_GADM(country_code, update, outlogging)

# # get layers of a geopackage
# get layers of a geopackage
list_layers = fiona.listlayers(file_gpkg)

# # get layer name
if (layer_id < 0) | (layer_id >= len(list_layers)):
# get layer name
if (layer_id < 0) or (layer_id >= len(list_layers)):
# when layer id is negative or larger than the number of layers, select the last layer
layer_id = len(list_layers) - 1

Expand All @@ -114,10 +169,13 @@ def get_GADM_layer(country_list, layer_id, geo_crs, update=False, outlogging=Fal
geo_crs
)

# convert country name representation of the main country (GID_0 column)
geodf_temp["GID_0"] = [
three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"]
]
geodf_temp = filter_gadm(
geodf=geodf_temp,
layer=layer_id,
cc=country_code,
contended_flag=contended_flag,
output_nonstd_to_csv=False,
)

# create a subindex column that is useful
# in the GADM processing of sub-national zones
Expand Down Expand Up @@ -151,21 +209,32 @@ def _simplify_polys(polys, minarea=0.0001, tolerance=0.008, filterremote=False):
return polys.simplify(tolerance=tolerance)


def countries(countries, geo_crs, update=False, out_logging=False):
def countries(countries, geo_crs, contended_flag, update=False, out_logging=False):
"Create country shapes"

if out_logging:
logger.info("Stage 1 of 4: Create country shapes")

# download data if needed and get the layer id 0, corresponding to the countries
df_countries = get_GADM_layer(countries, 0, geo_crs, update, out_logging)
df_countries = get_GADM_layer(
countries,
0,
geo_crs,
contended_flag,
update,
out_logging,
)

# select and rename columns
df_countries = df_countries[["GID_0", "geometry"]].copy()
df_countries.rename(columns={"GID_0": "name"}, inplace=True)

# set index and simplify polygons
ret_df = df_countries.set_index("name")["geometry"].map(_simplify_polys)
# there may be "holes" in the countries geometry which cause troubles along the workflow
# e.g. that is the case for enclaves like Dahagram–Angarpota for IN/BD
ret_df.apply(lambda x: make_valid(x) if not x.is_valid else x)
ret_df.reset_index()

return ret_df

Expand Down Expand Up @@ -248,26 +317,22 @@ def eez(countries, geo_crs, country_shapes, EEZ_gpkg, out_logging=False, distanc
# load data
df_eez = load_EEZ(countries, geo_crs, EEZ_gpkg)

ret_df = df_eez[["name", "geometry"]]
# create unique shape if country is described by multiple shapes
for c_code in countries:
selection = ret_df.name == c_code
n_offshore_shapes = selection.sum()

if n_offshore_shapes > 1:
# when multiple shapes per country, then merge polygons
geom = ret_df[selection].geometry.unary_union
ret_df.drop(ret_df[selection].index, inplace=True)
ret_df = ret_df.append(
{"name": c_code, "geometry": geom}, ignore_index=True
)
eez_countries = [cc for cc in countries if df_eez.name.str.contains(cc).any()]
ret_df = gpd.GeoDataFrame(
{
"name": eez_countries,
"geometry": [
df_eez.geometry.loc[df_eez.name == cc].geometry.unary_union
for cc in eez_countries
],
}
).set_index("name")

ret_df = ret_df.set_index("name")["geometry"].map(
ret_df = ret_df.geometry.map(
lambda x: _simplify_polys(x, minarea=0.001, tolerance=0.0001)
)

ret_df = ret_df.apply(lambda x: make_valid(x))
country_shapes = country_shapes.apply(lambda x: make_valid(x))

country_shapes_with_buffer = country_shapes.buffer(distance)
ret_df_new = ret_df.difference(country_shapes_with_buffer)
Expand Down Expand Up @@ -735,6 +800,7 @@ def gadm(
gdp_method,
countries,
geo_crs,
contended_flag,
layer_id=2,
update=False,
out_logging=False,
Expand All @@ -746,7 +812,7 @@ def gadm(
logger.info("Stage 4/4: Creation GADM GeoDataFrame")

# download data if needed and get the desired layer_id
df_gadm = get_GADM_layer(countries, layer_id, geo_crs, update)
df_gadm = get_GADM_layer(countries, layer_id, geo_crs, contended_flag, update)

# select and rename columns
df_gadm.rename(columns={"GID_0": "country"}, inplace=True)
Expand Down Expand Up @@ -783,6 +849,9 @@ def gadm(
# set index and simplify polygons
df_gadm.set_index("GADM_ID", inplace=True)
df_gadm["geometry"] = df_gadm["geometry"].map(_simplify_polys)
df_gadm.geometry = df_gadm.geometry.apply(
lambda r: make_valid(r) if not r.is_valid else r
)
df_gadm = df_gadm[df_gadm.geometry.is_valid & ~df_gadm.geometry.is_empty]

return df_gadm
Expand All @@ -805,15 +874,21 @@ def gadm(
out_logging = snakemake.config["build_shape_options"]["out_logging"]
year = snakemake.config["build_shape_options"]["year"]
nprocesses = snakemake.config["build_shape_options"]["nprocesses"]
contended_flag = snakemake.config["build_shape_options"]["contended_flag"]
EEZ_gpkg = snakemake.input["eez"]
worldpop_method = snakemake.config["build_shape_options"]["worldpop_method"]
gdp_method = snakemake.config["build_shape_options"]["gdp_method"]
geo_crs = snakemake.config["crs"]["geo_crs"]
distance_crs = snakemake.config["crs"]["distance_crs"]

country_shapes = countries(countries_list, geo_crs, update, out_logging)

country_shapes.reset_index().to_file(snakemake.output.country_shapes)
country_shapes = countries(
countries_list,
geo_crs,
contended_flag,
update,
out_logging,
)
country_shapes.to_file(snakemake.output.country_shapes)

offshore_shapes = eez(
countries_list, geo_crs, country_shapes, EEZ_gpkg, out_logging
Expand All @@ -831,6 +906,7 @@ def gadm(
gdp_method,
countries_list,
geo_crs,
contended_flag,
layer_id,
update,
out_logging,
Expand Down