From 2cb11cd0238a016511a6858d91e811f9e510ea07 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Wed, 22 Feb 2023 06:49:07 +0100 Subject: [PATCH 01/21] add conv files to mastr download --- src/egon/data/datasets.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml index 2f53dc06a..6edeb105d 100755 --- a/src/egon/data/datasets.yml +++ b/src/egon/data/datasets.yml @@ -299,10 +299,14 @@ mastr: mastr_new: technologies: - - "wind" + - "biomass" + - "combustion" + - "gsgk" - "hydro" + - "nuclear" - "solar" - - "biomass" + - "storage" + - "wind" file_basename: "bnetza_mastr" deposit_id: 1132987 From 175300c2f13f1f70293cd91079172fea68628675 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Wed, 22 Feb 2023 09:42:45 +0100 Subject: [PATCH 02/21] added geocoded data to db --- src/egon/data/datasets.yml | 3 + src/egon/data/datasets/power_plants/mastr.py | 63 ++++++++++++++++++-- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml index 6edeb105d..4e85696d6 100755 --- a/src/egon/data/datasets.yml +++ b/src/egon/data/datasets.yml @@ -309,6 +309,9 @@ mastr_new: - "wind" file_basename: "bnetza_mastr" deposit_id: 1132987 + geocoding_path: + - "data_bundle_egon_data" + - "mastr_geocoding" re_potential_areas: target: diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index cb2ef91f5..6e5c30da2 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -21,6 +21,8 @@ The data is used especially for the generation of status quo grids by ding0. """ +from pathlib import Path + from geoalchemy2 import Geometry from sqlalchemy import ( Boolean, @@ -35,18 +37,30 @@ import geopandas as gpd import pandas as pd -from egon.data import db +from egon.data import config, db from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW -import egon.data.config Base = declarative_base() TESTMODE_OFF = ( - egon.data.config.settings()["egon-data"]["--dataset-boundary"] - == "Everything" + config.settings()["egon-data"]["--dataset-boundary"] == "Everything" ) +class EgonMastrGeocoded(Base): + __tablename__ = "egon_mastr_geocoded" + __table_args__ = {"schema": "supply"} + + index = Column( + Integer, Sequence("mastr_geocoded_seq"), primary_key=True, index=True + ) + zip_and_municipality = Column(String) + latitude = Column(Float) + longitude = Column(Float) + altitude = Column(Float) + geometry = Column(Geometry("POINT", 4326)) + + class EgonPowerPlantsPv(Base): __tablename__ = "egon_power_plants_pv" __table_args__ = {"schema": "supply"} @@ -212,7 +226,43 @@ def voltage_levels(p: float) -> int: return units_gdf engine = db.engine() - cfg = egon.data.config.datasets()["power_plants"] + + # import geocoded data + cfg = config.datasets()["mastr_new"] + path_parts = cfg["geocoding_path"] + path = Path(*["."] + path_parts).resolve() + path = list(path.iterdir())[0] + + deposit_id_geocoding = int(path.parts[-1].split(".")[0].split("_")[-1]) + deposit_id_mastr = cfg["deposit_id"] + + if deposit_id_geocoding != deposit_id_mastr: + raise AssertionError( + f"The zenodo (sandbox) deposit ID {deposit_id_mastr} for the MaStR" + f" dataset is not matching with the geocoding version " + f"{deposit_id_geocoding}. Make sure to hermonize the data. When " + f"the MaStR dataset is updated also update the geocoding and " + f"update the egon data bundle. The geocoding can be done using: " + f"https://github.com/RLI-sandbox/mastr-geocoding" + ) + + geocoding_gdf = gpd.read_file(path) + + # remove failed requests + geocoding_gdf = geocoding_gdf.loc[geocoding_gdf.geometry.is_valid] + + EgonMastrGeocoded.__table__.drop(bind=engine, checkfirst=True) + EgonMastrGeocoded.__table__.create(bind=engine, checkfirst=True) + + geocoding_gdf.to_postgis( + name=EgonMastrGeocoded.__tablename__, + con=engine, + if_exists="append", + schema=EgonMastrGeocoded.__table_args__["schema"], + index=True, + ) + + cfg = config.datasets()["power_plants"] cols_mapping = { "all": { @@ -392,6 +442,9 @@ def voltage_levels(p: float) -> int: # write to DB print(f" Writing {len(units)} units to DB...") + target_tables[tech].__table__.drop(bind=engine, checkfirst=True) + target_tables[tech].__table__.create(bind=engine, checkfirst=True) + units.to_postgis( name=target_tables[tech].__tablename__, con=engine, From 8a9d6b9f08dbd714a3cf06c1fdd86eaf98c386a6 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Wed, 22 Feb 2023 15:34:51 +0100 Subject: [PATCH 03/21] add geocoding results to existing renewable tech tabels --- .../data/datasets/power_plants/__init__.py | 7 +- src/egon/data/datasets/power_plants/mastr.py | 260 ++++++++++++++---- 2 files changed, 209 insertions(+), 58 deletions(-) diff --git a/src/egon/data/datasets/power_plants/__init__.py b/src/egon/data/datasets/power_plants/__init__.py index 8b2bdc533..d5ca71334 100755 --- a/src/egon/data/datasets/power_plants/__init__.py +++ b/src/egon/data/datasets/power_plants/__init__.py @@ -60,7 +60,7 @@ class PowerPlants(Dataset): def __init__(self, dependencies): super().__init__( name="PowerPlants", - version="0.0.16", + version="0.0.17.dev", dependencies=dependencies, tasks=( create_tables, @@ -111,7 +111,10 @@ def create_tables(): ] for t in tables: db.execute_sql( - f"DROP TABLE IF EXISTS {t.__table_args__['schema']}.{t.__tablename__} CASCADE;" + f""" + DROP TABLE IF EXISTS {t.__table_args__['schema']}. + {t.__tablename__} CASCADE; + """ ) t.__table__.create(bind=engine, checkfirst=True) diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 6e5c30da2..19a9b1801 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -21,9 +21,12 @@ The data is used especially for the generation of status quo grids by ding0. """ +from __future__ import annotations + from pathlib import Path from geoalchemy2 import Geometry +from loguru import logger from sqlalchemy import ( Boolean, Column, @@ -35,6 +38,7 @@ ) from sqlalchemy.ext.declarative import declarative_base import geopandas as gpd +import numpy as np import pandas as pd from egon.data import config, db @@ -73,7 +77,10 @@ class EgonPowerPlantsPv(Base): commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum postcode = Column(String(5), nullable=True) # Postleitzahl city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde federal_state = Column(String(31), nullable=True) # Bundesland + site = Column(String, nullable=True) # Standort + zip_and_municipality = Column(String, nullable=True) site_type = Column(String(69), nullable=True) # Lage usage_sector = Column(String(36), nullable=True) # Nutzungsbereich @@ -100,6 +107,8 @@ class EgonPowerPlantsPv(Base): voltage_level = Column(Integer, nullable=True) voltage_level_inferred = Column(Boolean, nullable=True) + geometry_geocoded = Column(Boolean) + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) @@ -115,7 +124,9 @@ class EgonPowerPlantsWind(Base): commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum postcode = Column(String(5), nullable=True) # Postleitzahl city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) site_type = Column(String(17), nullable=True) # Lage manufacturer_name = Column(String(100), nullable=True) # Hersteller @@ -128,6 +139,8 @@ class EgonPowerPlantsWind(Base): voltage_level = Column(Integer, nullable=True) voltage_level_inferred = Column(Boolean, nullable=True) + geometry_geocoded = Column(Boolean) + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) @@ -143,7 +156,9 @@ class EgonPowerPlantsBiomass(Base): commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum postcode = Column(String(5), nullable=True) # Postleitzahl city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) technology = Column(String(45), nullable=True) # Technologie fuel_name = Column(String(52), nullable=True) # Hauptbrennstoff @@ -155,6 +170,8 @@ class EgonPowerPlantsBiomass(Base): voltage_level = Column(Integer, nullable=True) voltage_level_inferred = Column(Boolean, nullable=True) + geometry_geocoded = Column(Boolean) + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) @@ -170,7 +187,9 @@ class EgonPowerPlantsHydro(Base): commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum postcode = Column(String(5), nullable=True) # Postleitzahl city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) plant_type = Column(String(39), nullable=True) # ArtDerWasserkraftanlage water_origin = Column(String(20), nullable=True) # ArtDesZuflusses @@ -180,51 +199,115 @@ class EgonPowerPlantsHydro(Base): voltage_level = Column(Integer, nullable=True) voltage_level_inferred = Column(Boolean, nullable=True) + geometry_geocoded = Column(Boolean) + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) -def import_mastr() -> None: - """Import MaStR data into database""" +def isfloat(num: str): + """ + Determine if string can be converted to float. + Parameters + ----------- + num : str + String to parse. + Returns + ------- + bool + Returns True in string can be parsed to float. + """ + try: + float(num) + return True + except ValueError: + return False + + +def zip_and_municipality_from_standort( + standort: str, +) -> tuple[str, bool]: + """ + Get zip code and municipality from Standort string split into a list. + Parameters + ----------- + standort : str + Standort as given from MaStR data. + Returns + ------- + str + Standort with only the zip code and municipality + as well a ', Germany' added. + """ + standort_list = standort.split() + + found = False + count = 0 + + for count, elem in enumerate(standort_list): + if len(elem) != 5: + continue + if not elem.isnumeric(): + continue + + found = True + + break + + if found: + cleaned_str = " ".join(standort_list[count:]) + + return cleaned_str, found + + logger.warning( + "Couldn't identify zip code. This entry will be dropped." + f" Original standort: {standort}." + ) + + return standort, found + + +def infer_voltage_level( + units_gdf: gpd.GeoDataFrame, +) -> gpd.GeoDataFrame: + """ + Infer nan values in voltage level derived from generator capacity to + the power plants. + + Parameters + ----------- + units_gdf : geopandas.GeoDataFrame + GeoDataFrame containing units with voltage levels from MaStR + Returnsunits_gdf: gpd.GeoDataFrame + ------- + geopandas.GeoDataFrame + GeoDataFrame containing units all having assigned a voltage level. + """ + + def voltage_levels(p: float) -> int: + if p <= 100: + return 7 + elif p <= 200: + return 6 + elif p <= 5500: + return 5 + elif p <= 20000: + return 4 + elif p <= 120000: + return 3 + return 1 + + units_gdf["voltage_level_inferred"] = False + mask = units_gdf.voltage_level.isna() + units_gdf.loc[mask, "voltage_level_inferred"] = True + units_gdf.loc[mask, "voltage_level"] = units_gdf.loc[ + mask + ].Nettonennleistung.apply(voltage_levels) + + return units_gdf - def infer_voltage_level( - units_gdf: gpd.GeoDataFrame, - ) -> gpd.GeoDataFrame: - """ - Infer nan values in voltage level derived from generator capacity to - the power plants. - - Parameters - ----------- - units_gdf : geopandas.GeoDataFrame - GeoDataFrame containing units with voltage levels from MaStR - Returnsunits_gdf: gpd.GeoDataFrame - ------- - geopandas.GeoDataFrame - GeoDataFrame containing units all having assigned a voltage level. - """ - - def voltage_levels(p: float) -> int: - if p <= 100: - return 7 - elif p <= 200: - return 6 - elif p <= 5500: - return 5 - elif p <= 20000: - return 4 - elif p <= 120000: - return 3 - return 1 - - units_gdf["voltage_level_inferred"] = False - mask = units_gdf.voltage_level.isna() - units_gdf.loc[mask, "voltage_level_inferred"] = True - units_gdf.loc[mask, "voltage_level"] = units_gdf.loc[ - mask - ].Nettonennleistung.apply(voltage_levels) - - return units_gdf +def import_mastr() -> None: + """Import MaStR data into database""" engine = db.engine() # import geocoded data @@ -271,12 +354,14 @@ def voltage_levels(p: float) -> int: "Inbetriebnahmedatum": "commissioning_date", "Postleitzahl": "postcode", "Ort": "city", + "Gemeinde": "municipality", "Bundesland": "federal_state", "Nettonennleistung": "capacity", "Einspeisungsart": "feedin_type", }, "pv": { "Lage": "site_type", + "Standort": "site", "Nutzungsbereich": "usage_sector", "Hauptausrichtung": "orientation_primary", "HauptausrichtungNeigungswinkel": "orientation_primary_angle", @@ -345,8 +430,8 @@ def voltage_levels(p: float) -> int: technologies = ["pv", "wind", "biomass", "hydro"] for tech in technologies: # read units - print(f"===== Importing MaStR dataset: {tech} =====") - print(" Reading CSV and filtering data...") + logger.info(f"===== Importing MaStR dataset: {tech} =====") + logger.debug("Reading CSV and filtering data...") units = pd.read_csv( source_files[tech], usecols=( @@ -361,19 +446,19 @@ def voltage_levels(p: float) -> int: # drop units outside of Germany len_old = len(units) units = units.loc[units.Land == "Deutschland"] - print(f" {len_old-len(units)} units outside of Germany dropped...") + logger.debug( + f"{len_old - len(units)} units outside of Germany dropped..." + ) # filter for SH units if in testmode if not TESTMODE_OFF: - print( - """ TESTMODE: - Dropping all units outside of Schleswig-Holstein... - """ + logger.info( + "TESTMODE: Dropping all units outside of Schleswig-Holstein..." ) units = units.loc[units.Bundesland == "SchleswigHolstein"] # merge and rename voltage level - print(" Merging with locations and allocate voltage level...") + logger.debug("Merging with locations and allocate voltage level...") units = units.merge( locations[["MaStRNummer", "Spannungsebene"]], left_on="LokationMastrNummer", @@ -386,7 +471,7 @@ def voltage_levels(p: float) -> int: units = infer_voltage_level(units) # add geometry - print(" Adding geometries...") + logger.debug("Adding geometries...") units = gpd.GeoDataFrame( units, geometry=gpd.points_from_xy( @@ -394,15 +479,77 @@ def voltage_levels(p: float) -> int: ), crs=4326, ) - units_wo_geom = len( - units.loc[(units.Laengengrad.isna() | units.Laengengrad.isna())] + + units["geometry_geocoded"] = ( + units.Laengengrad.isna() | units.Laengengrad.isna() ) - print( - f" {units_wo_geom}/{len(units)} units do not have a geometry!" + + units.loc[~units.geometry_geocoded, "geometry_geocoded"] = ~units.loc[ + ~units.geometry_geocoded, "geometry" + ].is_valid + + units_wo_geom = units["geometry_geocoded"].sum() + + logger.debug( + f"{units_wo_geom}/{len(units)} units do not have a geometry!" + " Adding geocoding results." ) + # determine zip and municipality string + mask = ( + units.Postleitzahl.apply(isfloat) + & ~units.Postleitzahl.isna() + & ~units.Gemeinde.isna() + ) + units["zip_and_municipality"] = np.nan + ok_units = units.loc[mask] + + units.loc[mask, "zip_and_municipality"] = ( + ok_units.Postleitzahl.astype(int).astype(str).str.zfill(5) + + " " + + ok_units.Gemeinde.astype(str).str.rstrip().str.lstrip() + + ", Deutschland" + ) + + # get zip and municipality from Standort + parse_df = units.loc[~mask] + + if not parse_df.empty and "Standort" in parse_df.columns: + init_len = len(parse_df) + + logger.info( + f"Parsing ZIP code and municipality from Standort for " + f"{init_len} values for {tech}." + ) + + parse_df[["zip_and_municipality", "drop_this"]] = ( + parse_df.Standort.astype(str) + .apply(zip_and_municipality_from_standort) + .tolist() + ) + + parse_df = parse_df.loc[parse_df.drop_this] + + if not parse_df.empty: + units.loc[ + parse_df.index, "zip_and_municipality" + ] = parse_df.zip_and_municipality + + # add geocoding to missing + units = units.merge( + right=geocoding_gdf[["zip_and_municipality", "geometry"]].rename( + columns={"geometry": "temp"} + ), + how="left", + on="zip_and_municipality", + ) + + units.loc[units.geometry_geocoded, "geometry"] = units.loc[ + units.geometry_geocoded, "temp" + ] + # drop unnecessary and rename columns - print(" Reformatting...") + logger.debug("Reformatting...") units.drop( columns=[ "LokationMastrNummer", @@ -411,6 +558,7 @@ def voltage_levels(p: float) -> int: "Breitengrad", "Spannungsebene", "Land", + "temp", ], inplace=True, ) @@ -431,7 +579,7 @@ def voltage_levels(p: float) -> int: units["th_capacity"] = units["th_capacity"] / 1e3 # assign bus ids - print(" Assigning bus ids...") + logger.debug("Assigning bus ids...") units = units.assign( bus_id=units.loc[~units.geom.x.isna()] .sjoin(mv_grid_districts[["bus_id", "geom"]], how="left") @@ -441,7 +589,7 @@ def voltage_levels(p: float) -> int: units["bus_id"] = units.bus_id.fillna(-1).astype(int) # write to DB - print(f" Writing {len(units)} units to DB...") + logger.info(f"Writing {len(units)} units to DB...") target_tables[tech].__table__.drop(bind=engine, checkfirst=True) target_tables[tech].__table__.create(bind=engine, checkfirst=True) From 7f08252c71a3f54c44851693dc232e0247718274 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Fri, 24 Feb 2023 07:58:54 +0100 Subject: [PATCH 04/21] handling of None and NaN in geom col --- src/egon/data/datasets/power_plants/mastr.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 19a9b1801..c9e02296a 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -27,6 +27,7 @@ from geoalchemy2 import Geometry from loguru import logger +from shapely.geometry import Point from sqlalchemy import ( Boolean, Column, @@ -548,6 +549,10 @@ def import_mastr() -> None: units.geometry_geocoded, "temp" ] + # fill None and NaN values with empty geom because to_postgis fails + # otherwise + units.geometry.fillna(Point(np.nan, np.nan), inplace=True) + # drop unnecessary and rename columns logger.debug("Reformatting...") units.drop( From 187e004d6fc076d944f054a998e0dcd21ee3a5d9 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Fri, 24 Feb 2023 10:00:27 +0100 Subject: [PATCH 05/21] download new data bundle --- src/egon/data/datasets.yml | 2 +- .../data/datasets/data_bundle/__init__.py | 23 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml index 4e85696d6..02888be81 100755 --- a/src/egon/data/datasets.yml +++ b/src/egon/data/datasets.yml @@ -537,7 +537,7 @@ solar_rooftop: data-bundle: sources: zenodo: - deposit_id: 1095882 + deposit_id: 1167119 targets: file: 'data_bundle_egon_data.zip' diff --git a/src/egon/data/datasets/data_bundle/__init__.py b/src/egon/data/datasets/data_bundle/__init__.py index 015c09a12..10b1f82cf 100644 --- a/src/egon/data/datasets/data_bundle/__init__.py +++ b/src/egon/data/datasets/data_bundle/__init__.py @@ -2,13 +2,13 @@ """ +from pathlib import Path from urllib.request import urlretrieve +import shutil import zipfile -import egon.data.config +from egon.data import config from egon.data.datasets import Dataset -from pathlib import Path -import shutil def download(): @@ -23,9 +23,12 @@ def download(): if data_bundle_path.exists() and data_bundle_path.is_dir(): shutil.rmtree(data_bundle_path) # Get parameters from config and set download URL - sources = egon.data.config.datasets()["data-bundle"]["sources"]["zenodo"] - url = f"""https://sandbox.zenodo.org/record/{sources['deposit_id']}/files/data_bundle_egon_data.zip""" - target_file = egon.data.config.datasets()["data-bundle"]["targets"]["file"] + sources = config.datasets()["data-bundle"]["sources"]["zenodo"] + url = ( + f"https://sandbox.zenodo.org/record/{sources['deposit_id']}/files/" + "data_bundle_egon_data.zip" + ) + target_file = config.datasets()["data-bundle"]["targets"]["file"] # Retrieve files urlretrieve(url, target_file) @@ -36,12 +39,12 @@ def download(): class DataBundle(Dataset): def __init__(self, dependencies): - deposit_id = egon.data.config.datasets()["data-bundle"]["sources"][ - "zenodo" - ]["deposit_id"] + deposit_id = config.datasets()["data-bundle"]["sources"]["zenodo"][ + "deposit_id" + ] super().__init__( name="DataBundle", - version=str(deposit_id) + "-0.0.0", + version=str(deposit_id) + "-0.0.0.dev", dependencies=dependencies, tasks=(download), ) From 9744dfdb28b86559867cc9a7bf8850b5a552d738 Mon Sep 17 00:00:00 2001 From: IlkaCu Date: Fri, 24 Feb 2023 11:59:35 +0100 Subject: [PATCH 06/21] Update deposit ID for data_bundle download --- src/egon/data/datasets.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml index 2f53dc06a..13842f2f0 100755 --- a/src/egon/data/datasets.yml +++ b/src/egon/data/datasets.yml @@ -530,7 +530,7 @@ solar_rooftop: data-bundle: sources: zenodo: - deposit_id: 1095882 + deposit_id: 1167119 targets: file: 'data_bundle_egon_data.zip' From cf10e699550cdbb3204503ceaade1b146045b157 Mon Sep 17 00:00:00 2001 From: IlkaCu Date: Fri, 24 Feb 2023 12:03:00 +0100 Subject: [PATCH 07/21] Update CHANGELOG.rst --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f86ebc43d..0118a40a0 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -486,6 +486,8 @@ Changed `#1026 `_ .. _#799: https://github.com/openego/eGon-data/issues/799 +* Change desposit ID for data_bundle download from zenodo sandbox + `#1110 `_ Bug Fixes --------- From 9792a26d60ab0576e69ab9b209fab92370ceb31d Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Fri, 24 Feb 2023 15:52:33 +0100 Subject: [PATCH 08/21] remove gens outside boundary or without geometry --- .../data/datasets/data_bundle/__init__.py | 4 +-- src/egon/data/datasets/power_plants/mastr.py | 27 ++++++++++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/egon/data/datasets/data_bundle/__init__.py b/src/egon/data/datasets/data_bundle/__init__.py index 10b1f82cf..867db59b3 100644 --- a/src/egon/data/datasets/data_bundle/__init__.py +++ b/src/egon/data/datasets/data_bundle/__init__.py @@ -44,7 +44,7 @@ def __init__(self, dependencies): ] super().__init__( name="DataBundle", - version=str(deposit_id) + "-0.0.0.dev", + version=f"{deposit_id}-0.0.0.dev", dependencies=dependencies, - tasks=(download), + tasks=(download,), ) diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index c9e02296a..169ee6584 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -27,7 +27,6 @@ from geoalchemy2 import Geometry from loguru import logger -from shapely.geometry import Point from sqlalchemy import ( Boolean, Column, @@ -44,6 +43,9 @@ from egon.data import config, db from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW +from egon.data.datasets.power_plants.pv_rooftop_buildings import ( + federal_state_data, +) Base = declarative_base() @@ -451,6 +453,11 @@ def import_mastr() -> None: f"{len_old - len(units)} units outside of Germany dropped..." ) + # get boundary + boundary = ( + federal_state_data(geocoding_gdf.crs).dissolve().at[0, "geom"] + ) + # filter for SH units if in testmode if not TESTMODE_OFF: logger.info( @@ -549,9 +556,21 @@ def import_mastr() -> None: units.geometry_geocoded, "temp" ] - # fill None and NaN values with empty geom because to_postgis fails - # otherwise - units.geometry.fillna(Point(np.nan, np.nan), inplace=True) + init_len = len(units) + + logger.info( + "Dropping units outside boundary by geometry or without geometry" + "..." + ) + + units.dropna(subset=["geometry"], inplace=True) + + units = units.loc[units.geometry.within(boundary)] + + logger.debug( + f"{init_len - len(units)}/{init_len} " + f"({((init_len - len(units)) / init_len) * 100: g} %) dropped." + ) # drop unnecessary and rename columns logger.debug("Reformatting...") From 042e34de7ddc9a2b86807fa8cab3167436c6efac Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Fri, 24 Feb 2023 15:58:22 +0100 Subject: [PATCH 09/21] moved mastr db classes into separate file --- src/egon/data/datasets/power_plants/mastr.py | 173 +----------------- .../datasets/power_plants/mastr_db_classes.py | 166 +++++++++++++++++ 2 files changed, 173 insertions(+), 166 deletions(-) create mode 100644 src/egon/data/datasets/power_plants/mastr_db_classes.py diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 169ee6584..5a1827c7e 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -25,188 +25,29 @@ from pathlib import Path -from geoalchemy2 import Geometry from loguru import logger -from sqlalchemy import ( - Boolean, - Column, - DateTime, - Float, - Integer, - Sequence, - String, -) -from sqlalchemy.ext.declarative import declarative_base import geopandas as gpd import numpy as np import pandas as pd from egon.data import config, db from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW +from egon.data.datasets.power_plants.mastr_db_classes import ( + EgonMastrGeocoded, + EgonPowerPlantsBiomass, + EgonPowerPlantsHydro, + EgonPowerPlantsPv, + EgonPowerPlantsWind, +) from egon.data.datasets.power_plants.pv_rooftop_buildings import ( federal_state_data, ) -Base = declarative_base() - TESTMODE_OFF = ( config.settings()["egon-data"]["--dataset-boundary"] == "Everything" ) -class EgonMastrGeocoded(Base): - __tablename__ = "egon_mastr_geocoded" - __table_args__ = {"schema": "supply"} - - index = Column( - Integer, Sequence("mastr_geocoded_seq"), primary_key=True, index=True - ) - zip_and_municipality = Column(String) - latitude = Column(Float) - longitude = Column(Float) - altitude = Column(Float) - geometry = Column(Geometry("POINT", 4326)) - - -class EgonPowerPlantsPv(Base): - __tablename__ = "egon_power_plants_pv" - __table_args__ = {"schema": "supply"} - - id = Column(Integer, Sequence("pp_pv_seq"), primary_key=True) - bus_id = Column(Integer, nullable=True) # Grid district id - gens_id = Column(String, nullable=True) # EinheitMastrNummer - - status = Column(String, nullable=True) # EinheitBetriebsstatus - commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum - postcode = Column(String(5), nullable=True) # Postleitzahl - city = Column(String(50), nullable=True) # Ort - municipality = Column(String, nullable=True) # Gemeinde - federal_state = Column(String(31), nullable=True) # Bundesland - site = Column(String, nullable=True) # Standort - zip_and_municipality = Column(String, nullable=True) - - site_type = Column(String(69), nullable=True) # Lage - usage_sector = Column(String(36), nullable=True) # Nutzungsbereich - orientation_primary = Column(String(11), nullable=True) # Hauptausrichtung - orientation_primary_angle = Column( - String(18), nullable=True - ) # HauptausrichtungNeigungswinkel - orientation_secondary = Column( - String(11), nullable=True - ) # Nebenausrichtung - orientation_secondary_angle = Column( - String(18), nullable=True - ) # NebenausrichtungNeigungswinkel - orientation_uniform = Column( - Boolean, nullable=True - ) # EinheitlicheAusrichtungUndNeigungswinkel - module_count = Column(Float, nullable=True) # AnzahlModule - - capacity = Column(Float, nullable=True) # Nettonennleistung - capacity_inverter = Column( - Float, nullable=True - ) # ZugeordneteWirkleistungWechselrichter in MW - feedin_type = Column(String(47), nullable=True) # Einspeisungsart - voltage_level = Column(Integer, nullable=True) - voltage_level_inferred = Column(Boolean, nullable=True) - - geometry_geocoded = Column(Boolean) - - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) - - -class EgonPowerPlantsWind(Base): - __tablename__ = "egon_power_plants_wind" - __table_args__ = {"schema": "supply"} - - id = Column(Integer, Sequence("pp_wind_seq"), primary_key=True) - bus_id = Column(Integer, nullable=True) # Grid district id - gens_id = Column(String, nullable=True) # EinheitMastrNummer - - status = Column(String, nullable=True) # EinheitBetriebsstatus - commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum - postcode = Column(String(5), nullable=True) # Postleitzahl - city = Column(String(50), nullable=True) # Ort - municipality = Column(String, nullable=True) # Gemeinde - federal_state = Column(String(31), nullable=True) # Bundesland - zip_and_municipality = Column(String, nullable=True) - - site_type = Column(String(17), nullable=True) # Lage - manufacturer_name = Column(String(100), nullable=True) # Hersteller - type_name = Column(String(100), nullable=True) # Typenbezeichnung - hub_height = Column(Float, nullable=True) # Nabenhoehe - rotor_diameter = Column(Float, nullable=True) # Rotordurchmesser - - capacity = Column(Float, nullable=True) # Nettonennleistung - feedin_type = Column(String(47), nullable=True) # Einspeisungsart - voltage_level = Column(Integer, nullable=True) - voltage_level_inferred = Column(Boolean, nullable=True) - - geometry_geocoded = Column(Boolean) - - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) - - -class EgonPowerPlantsBiomass(Base): - __tablename__ = "egon_power_plants_biomass" - __table_args__ = {"schema": "supply"} - - id = Column(Integer, Sequence("pp_biomass_seq"), primary_key=True) - bus_id = Column(Integer, nullable=True) # Grid district id - gens_id = Column(String, nullable=True) # EinheitMastrNummer - - status = Column(String, nullable=True) # EinheitBetriebsstatus - commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum - postcode = Column(String(5), nullable=True) # Postleitzahl - city = Column(String(50), nullable=True) # Ort - municipality = Column(String, nullable=True) # Gemeinde - federal_state = Column(String(31), nullable=True) # Bundesland - zip_and_municipality = Column(String, nullable=True) - - technology = Column(String(45), nullable=True) # Technologie - fuel_name = Column(String(52), nullable=True) # Hauptbrennstoff - fuel_type = Column(String(19), nullable=True) # Biomasseart - - capacity = Column(Float, nullable=True) # Nettonennleistung - th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung - feedin_type = Column(String(47), nullable=True) # Einspeisungsart - voltage_level = Column(Integer, nullable=True) - voltage_level_inferred = Column(Boolean, nullable=True) - - geometry_geocoded = Column(Boolean) - - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) - - -class EgonPowerPlantsHydro(Base): - __tablename__ = "egon_power_plants_hydro" - __table_args__ = {"schema": "supply"} - - id = Column(Integer, Sequence("pp_hydro_seq"), primary_key=True) - bus_id = Column(Integer, nullable=True) # Grid district id - gens_id = Column(String, nullable=True) # EinheitMastrNummer - - status = Column(String, nullable=True) # EinheitBetriebsstatus - commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum - postcode = Column(String(5), nullable=True) # Postleitzahl - city = Column(String(50), nullable=True) # Ort - municipality = Column(String, nullable=True) # Gemeinde - federal_state = Column(String(31), nullable=True) # Bundesland - zip_and_municipality = Column(String, nullable=True) - - plant_type = Column(String(39), nullable=True) # ArtDerWasserkraftanlage - water_origin = Column(String(20), nullable=True) # ArtDesZuflusses - - capacity = Column(Float, nullable=True) # Nettonennleistung - feedin_type = Column(String(47), nullable=True) # Einspeisungsart - voltage_level = Column(Integer, nullable=True) - voltage_level_inferred = Column(Boolean, nullable=True) - - geometry_geocoded = Column(Boolean) - - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) - - def isfloat(num: str): """ Determine if string can be converted to float. diff --git a/src/egon/data/datasets/power_plants/mastr_db_classes.py b/src/egon/data/datasets/power_plants/mastr_db_classes.py new file mode 100644 index 000000000..601b93d70 --- /dev/null +++ b/src/egon/data/datasets/power_plants/mastr_db_classes.py @@ -0,0 +1,166 @@ +from geoalchemy2 import Geometry +from sqlalchemy import ( + Boolean, + Column, + DateTime, + Float, + Integer, + Sequence, + String, +) +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class EgonMastrGeocoded(Base): + __tablename__ = "egon_mastr_geocoded" + __table_args__ = {"schema": "supply"} + + index = Column( + Integer, Sequence("mastr_geocoded_seq"), primary_key=True, index=True + ) + zip_and_municipality = Column(String) + latitude = Column(Float) + longitude = Column(Float) + altitude = Column(Float) + geometry = Column(Geometry("POINT", 4326)) + + +class EgonPowerPlantsPv(Base): + __tablename__ = "egon_power_plants_pv" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_pv_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + site = Column(String, nullable=True) # Standort + zip_and_municipality = Column(String, nullable=True) + + site_type = Column(String(69), nullable=True) # Lage + usage_sector = Column(String(36), nullable=True) # Nutzungsbereich + orientation_primary = Column(String(11), nullable=True) # Hauptausrichtung + orientation_primary_angle = Column( + String(18), nullable=True + ) # HauptausrichtungNeigungswinkel + orientation_secondary = Column( + String(11), nullable=True + ) # Nebenausrichtung + orientation_secondary_angle = Column( + String(18), nullable=True + ) # NebenausrichtungNeigungswinkel + orientation_uniform = Column( + Boolean, nullable=True + ) # EinheitlicheAusrichtungUndNeigungswinkel + module_count = Column(Float, nullable=True) # AnzahlModule + + capacity = Column(Float, nullable=True) # Nettonennleistung + capacity_inverter = Column( + Float, nullable=True + ) # ZugeordneteWirkleistungWechselrichter in MW + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + + geometry_geocoded = Column(Boolean) + + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) + + +class EgonPowerPlantsWind(Base): + __tablename__ = "egon_power_plants_wind" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_wind_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) + + site_type = Column(String(17), nullable=True) # Lage + manufacturer_name = Column(String(100), nullable=True) # Hersteller + type_name = Column(String(100), nullable=True) # Typenbezeichnung + hub_height = Column(Float, nullable=True) # Nabenhoehe + rotor_diameter = Column(Float, nullable=True) # Rotordurchmesser + + capacity = Column(Float, nullable=True) # Nettonennleistung + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + + geometry_geocoded = Column(Boolean) + + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) + + +class EgonPowerPlantsBiomass(Base): + __tablename__ = "egon_power_plants_biomass" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_biomass_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) + + technology = Column(String(45), nullable=True) # Technologie + fuel_name = Column(String(52), nullable=True) # Hauptbrennstoff + fuel_type = Column(String(19), nullable=True) # Biomasseart + + capacity = Column(Float, nullable=True) # Nettonennleistung + th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + + geometry_geocoded = Column(Boolean) + + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) + + +class EgonPowerPlantsHydro(Base): + __tablename__ = "egon_power_plants_hydro" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_hydro_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) + + plant_type = Column(String(39), nullable=True) # ArtDerWasserkraftanlage + water_origin = Column(String(20), nullable=True) # ArtDesZuflusses + + capacity = Column(Float, nullable=True) # Nettonennleistung + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + + geometry_geocoded = Column(Boolean) + + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) From 2458bd87b120295bfe849214bd54b85927966ac2 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Mon, 27 Feb 2023 11:38:16 +0100 Subject: [PATCH 10/21] changed data source to db for pv rooftop and removed doubling of geocoing --- .../data/datasets/power_plants/__init__.py | 2 - src/egon/data/datasets/power_plants/mastr.py | 2 - .../power_plants/pv_rooftop_buildings.py | 682 ++---------------- 3 files changed, 78 insertions(+), 608 deletions(-) diff --git a/src/egon/data/datasets/power_plants/__init__.py b/src/egon/data/datasets/power_plants/__init__.py index d5ca71334..fc77691b8 100755 --- a/src/egon/data/datasets/power_plants/__init__.py +++ b/src/egon/data/datasets/power_plants/__init__.py @@ -29,7 +29,6 @@ ) from egon.data.datasets.power_plants.pv_rooftop import pv_rooftop_per_mv_grid from egon.data.datasets.power_plants.pv_rooftop_buildings import ( - geocode_mastr_data, pv_rooftop_to_buildings, ) import egon.data.config @@ -73,7 +72,6 @@ def __init__(self, dependencies): pv_ground_mounted.insert, ( pv_rooftop_per_mv_grid, - geocode_mastr_data, pv_rooftop_to_buildings, ), }, diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 5a1827c7e..9f56feb9c 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -455,8 +455,6 @@ def import_mastr() -> None: # write to DB logger.info(f"Writing {len(units)} units to DB...") - target_tables[tech].__table__.drop(bind=engine, checkfirst=True) - target_tables[tech].__table__.create(bind=engine, checkfirst=True) units.to_postgis( name=target_tables[tech].__tablename__, diff --git a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py index 908fe3610..53136760e 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py @@ -8,7 +8,7 @@ * Drop generators which don't have any plausible capacity data (23.5MW > P > 0.1). * Randomly and weighted add a start-up date if it is missing. -* Extract zip and municipality from 'Standort' given in MaStR data. +* Extract zip and municipality from 'site' given in MaStR data. * Geocode unique zip and municipality combinations with Nominatim (1 sec delay). Drop generators for which geocoding failed or which are located outside the municipalities of Germany. @@ -42,13 +42,9 @@ from collections import Counter from functools import wraps -from pathlib import Path from time import perf_counter -from typing import Any from geoalchemy2 import Geometry -from geopy.extra.rate_limiter import RateLimiter -from geopy.geocoders import Nominatim from loguru import logger from numpy.random import RandomState, default_rng from pyproj.crs.crs import CRS @@ -63,8 +59,7 @@ from egon.data.datasets.electricity_demand_timeseries.hh_buildings import ( OsmBuildingsSynthetic, ) -from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW -from egon.data.datasets.power_plants.mastr import EgonPowerPlantsPv +from egon.data.datasets.power_plants.mastr_db_classes import EgonPowerPlantsPv from egon.data.datasets.scenario_capacities import EgonScenarioCapacities from egon.data.datasets.zensus_vg250 import Vg250Gem @@ -73,91 +68,24 @@ SEED = int(config.settings()["egon-data"]["--random-seed"]) # TODO: move to yml -# mastr data -MASTR_RELEVANT_COLS = [ - "EinheitMastrNummer", - "Bruttoleistung", - "Bruttoleistung_extended", - "Nettonennleistung", - "InstallierteLeistung", - "zugeordneteWirkleistungWechselrichter", - "EinheitBetriebsstatus", - "Standort", - "Bundesland", - "Land", - "Landkreis", - "Gemeinde", - "Postleitzahl", - "Ort", - "GeplantesInbetriebnahmedatum", - "Inbetriebnahmedatum", - "GemeinsamerWechselrichterMitSpeicher", - "Lage", - "Leistungsbegrenzung", - "EinheitlicheAusrichtungUndNeigungswinkel", - "Hauptausrichtung", - "HauptausrichtungNeigungswinkel", - "Nebenausrichtung", -] - -MASTR_DTYPES = { - "EinheitMastrNummer": str, - "Bruttoleistung": float, - "Bruttoleistung_extended": float, - "Nettonennleistung": float, - "InstallierteLeistung": float, - "zugeordneteWirkleistungWechselrichter": float, - "EinheitBetriebsstatus": str, - "Standort": str, - "Bundesland": str, - "Land": str, - "Landkreis": str, - "Gemeinde": str, - # "Postleitzahl": int, # fails because of nan values - "Ort": str, - "GemeinsamerWechselrichterMitSpeicher": str, - "Lage": str, - "Leistungsbegrenzung": str, - # this will parse nan values as false wich is not always correct - # "EinheitlicheAusrichtungUndNeigungswinkel": bool, - "Hauptausrichtung": str, - "HauptausrichtungNeigungswinkel": str, - "Nebenausrichtung": str, - "NebenausrichtungNeigungswinkel": str, -} +# mastr datay -MASTR_PARSE_DATES = [ - "GeplantesInbetriebnahmedatum", - "Inbetriebnahmedatum", -] - -MASTR_INDEX_COL = "EinheitMastrNummer" +MASTR_INDEX_COL = "gens_id" EPSG = 4326 SRID = 3035 # data cleaning -MAX_REALISTIC_PV_CAP = 23500 -MIN_REALISTIC_PV_CAP = 0.1 -ROUNDING = 1 - -# geopy -MIN_DELAY_SECONDS = 1 -USER_AGENT = "rli_kh_geocoder" +MAX_REALISTIC_PV_CAP = 23500 / 10**3 +MIN_REALISTIC_PV_CAP = 0.1 / 10**3 # show additional logging information VERBOSE = False -EXPORT_DIR = Path(__name__).resolve().parent / "data" -EXPORT_FILE = "mastr_geocoded.gpkg" -EXPORT_PATH = EXPORT_DIR / EXPORT_FILE -DRIVER = "GPKG" - # Number of quantiles Q = 5 # Scenario Data -CARRIER = "solar_rooftop" SCENARIOS = ["eGon2035", "eGon100RE"] SCENARIO_TIMESTAMP = { "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"), @@ -190,26 +118,18 @@ ] MIN_BUILDING_SIZE = 10.0 -UPPER_QUNATILE = 0.95 +UPPER_QUANTILE = 0.95 LOWER_QUANTILE = 0.05 -COLS_TO_RENAME = { - "EinheitlicheAusrichtungUndNeigungswinkel": ( - "einheitliche_ausrichtung_und_neigungswinkel" - ), - "Hauptausrichtung": "hauptausrichtung", - "HauptausrichtungNeigungswinkel": "hauptausrichtung_neigungswinkel", -} - COLS_TO_EXPORT = [ "scenario", "bus_id", "building_id", "gens_id", "capacity", - "einheitliche_ausrichtung_und_neigungswinkel", - "hauptausrichtung", - "hauptausrichtung_neigungswinkel", + "orientation_uniform", + "orientation_primary", + "orientation_primary_angle", "voltage_level", "weather_cell_id", ] @@ -238,92 +158,58 @@ def timeit_wrapper(*args, **kwargs): @timer_func def mastr_data( index_col: str | int | list[str] | list[int], - usecols: list[str], - dtype: dict[str, Any] | None, - parse_dates: list[str] | None, -) -> pd.DataFrame: +) -> gpd.GeoDataFrame: """ - Read MaStR data from csv. + Read MaStR data from database. Parameters ----------- index_col : str, int or list of str or int Column(s) to use as the row labels of the DataFrame. - usecols : list of str - Return a subset of the columns. - dtype : dict of column (str) -> type (any), optional - Data type for data or columns. - parse_dates : list of names (str), optional - Try to parse given columns to datetime. Returns ------- pandas.DataFrame DataFrame containing MaStR data. """ - mastr_path = Path( - WORKING_DIR_MASTR_NEW - / config.datasets()["power_plants"]["sources"]["mastr_pv"] - ).resolve() - - mastr_df = pd.read_csv( - mastr_path, - index_col=index_col, - usecols=usecols, - dtype=dtype, - parse_dates=parse_dates, - ) - - mastr_df = mastr_df.loc[ - (mastr_df.EinheitBetriebsstatus == "InBetrieb") - & (mastr_df.Land == "Deutschland") - & (mastr_df.Lage == "Bauliche Anlagen (Hausdach, Gebäude und Fassade)") - ] - - if ( - config.settings()["egon-data"]["--dataset-boundary"] - == "Schleswig-Holstein" - ): - init_len = len(mastr_df) - - mastr_df = mastr_df.loc[mastr_df.Bundesland == "SchleswigHolstein"] - - logger.info( - f"Using only MaStR data within Schleswig-Holstein. " - f"{init_len - len(mastr_df)} of {init_len} generators are dropped." + with db.session_scope() as session: + query = session.query(EgonPowerPlantsPv).filter( + EgonPowerPlantsPv.status == "InBetrieb", + EgonPowerPlantsPv.site_type + == ("Bauliche Anlagen (Hausdach, Gebäude und Fassade)"), ) + gdf = gpd.read_postgis( + query.statement, query.session.bind, index_col=index_col + ) + logger.debug("MaStR data loaded.") - return mastr_df + return gdf @timer_func def clean_mastr_data( - mastr_df: pd.DataFrame, + mastr_gdf: gpd.GeoDataFrame, max_realistic_pv_cap: int | float, min_realistic_pv_cap: int | float, - rounding: int, seed: int, -) -> pd.DataFrame: +) -> gpd.GeoDataFrame: """ Clean the MaStR data from implausible data. * Drop MaStR ID duplicates. * Drop generators with implausible capacities. * Drop generators without any kind of start-up date. - * Clean up Standort column and capacity. + * Clean up site column and capacity. Parameters ----------- - mastr_df : pandas.DataFrame + mastr_gdf : pandas.DataFrame DataFrame containing MaStR data. max_realistic_pv_cap : int or float Maximum capacity, which is considered to be realistic. min_realistic_pv_cap : int or float Minimum capacity, which is considered to be realistic. - rounding : int - Rounding to use when cleaning up capacity. E.g. when - rounding is 1 a capacity of 9.93 will be rounded to 9.9. seed : int Seed to use for random operations with NumPy and pandas. Returns @@ -331,361 +217,61 @@ def clean_mastr_data( pandas.DataFrame DataFrame containing cleaned MaStR data. """ - init_len = len(mastr_df) + init_len = len(mastr_gdf) # drop duplicates - mastr_df = mastr_df.loc[~mastr_df.index.duplicated()] - - # drop invalid entries in standort - index_to_drop = mastr_df.loc[ - (mastr_df.Standort.isna()) | (mastr_df.Standort.isnull()) - ].index - - mastr_df = mastr_df.loc[~mastr_df.index.isin(index_to_drop)] - - df = mastr_df[ - [ - "Bruttoleistung", - "Bruttoleistung_extended", - "Nettonennleistung", - "zugeordneteWirkleistungWechselrichter", - "InstallierteLeistung", - ] - ].round(rounding) - - # use only the smallest capacity rating if multiple are given - mastr_df = mastr_df.assign( - capacity=[ - most_plausible(p_tub, min_realistic_pv_cap) - for p_tub in df.itertuples(index=False) - ] - ) + mastr_gdf = mastr_gdf.loc[~mastr_gdf.index.duplicated()] # drop generators without any capacity info # and capacity of zero # and if the capacity is > 23.5 MW, because # Germanies largest rooftop PV is 23 MW # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379 - mastr_df = mastr_df.loc[ - (~mastr_df.capacity.isna()) - & (mastr_df.capacity <= max_realistic_pv_cap) - & (mastr_df.capacity > min_realistic_pv_cap) + mastr_gdf = mastr_gdf.loc[ + ~mastr_gdf.capacity.isna() + & (mastr_gdf.capacity <= max_realistic_pv_cap) + & (mastr_gdf.capacity > min_realistic_pv_cap) ] - # get zip and municipality - mastr_df[["zip_and_municipality", "drop_this"]] = pd.DataFrame( - mastr_df.Standort.astype(str) - .apply( - zip_and_municipality_from_standort, - args=(VERBOSE,), - ) - .tolist(), - index=mastr_df.index, - ) - - # drop invalid entries - mastr_df = mastr_df.loc[mastr_df.drop_this].drop(columns="drop_this") - - # add ", Deutschland" just in case - mastr_df = mastr_df.assign( - zip_and_municipality=(mastr_df.zip_and_municipality + ", Deutschland") - ) - # get consistent start-up date - mastr_df = mastr_df.assign( - start_up_date=mastr_df.Inbetriebnahmedatum, - ) - - mastr_df.loc[mastr_df.start_up_date.isna()] = mastr_df.loc[ - mastr_df.start_up_date.isna() - ].assign( - start_up_date=mastr_df.GeplantesInbetriebnahmedatum.loc[ - mastr_df.start_up_date.isna() - ] - ) - # randomly and weighted fill missing start-up dates - pool = mastr_df.loc[ - ~mastr_df.start_up_date.isna() - ].start_up_date.to_numpy() + pool = mastr_gdf.loc[ + ~mastr_gdf.commissioning_date.isna() + ].commissioning_date.to_numpy() - size = len(mastr_df) - len(pool) + size = len(mastr_gdf) - len(pool) if size > 0: - np.random.seed(seed) + rng = default_rng(seed=seed) - choice = np.random.choice( + choice = rng.choice( pool, size=size, replace=False, ) - mastr_df.loc[mastr_df.start_up_date.isna()] = mastr_df.loc[ - mastr_df.start_up_date.isna() - ].assign(start_up_date=choice) + mastr_gdf.loc[mastr_gdf.commissioning_date.isna()] = mastr_gdf.loc[ + mastr_gdf.commissioning_date.isna() + ].assign(commissioning_date=choice) logger.info( f"Randomly and weigthed added start-up date to {size} generators." ) - mastr_df = mastr_df.assign( - start_up_date=pd.to_datetime(mastr_df.start_up_date, utc=True) + mastr_gdf = mastr_gdf.assign( + commissioning_date=pd.to_datetime( + mastr_gdf.commissioning_date, utc=True + ) ) - end_len = len(mastr_df) + end_len = len(mastr_gdf) logger.debug( f"Dropped {init_len - end_len} " f"({((init_len - end_len) / init_len) * 100:g}%)" f" of {init_len} rows from MaStR DataFrame." ) - return mastr_df - - -def zip_and_municipality_from_standort( - standort: str, - verbose: bool = False, -) -> tuple[str, bool]: - """ - Get zip code and municipality from Standort string split into a list. - Parameters - ----------- - standort : str - Standort as given from MaStR data. - verbose : bool - Logs additional info if True. - Returns - ------- - str - Standort with only the zip code and municipality - as well a ', Germany' added. - """ - if verbose: - logger.debug(f"Uncleaned String: {standort}") - - standort_list = standort.split() - - found = False - count = 0 - - for count, elem in enumerate(standort_list): - if len(elem) != 5: - continue - if not elem.isnumeric(): - continue - - found = True - - break - - if found: - cleaned_str = " ".join(standort_list[count:]) - - if verbose: - logger.debug(f"Cleaned String: {cleaned_str}") - - return cleaned_str, found - - logger.warning( - "Couldn't identify zip code. This entry will be dropped." - f" Original standort: {standort}." - ) - - return standort, found - - -def most_plausible( - p_tub: tuple, - min_realistic_pv_cap: int | float, -) -> float: - """ - Try to determine the most plausible capacity. - Try to determine the most plausible capacity from a given - generator from MaStR data. - Parameters - ----------- - p_tub : tuple - Tuple containing the different capacities given in - the MaStR data. - min_realistic_pv_cap : int or float - Minimum capacity, which is considered to be realistic. - Returns - ------- - float - Capacity of the generator estimated as the most realistic. - """ - count = Counter(p_tub).most_common(3) - - if len(count) == 1: - return count[0][0] - - val1 = count[0][0] - val2 = count[1][0] - - if len(count) == 2: - min_val = min(val1, val2) - max_val = max(val1, val2) - else: - val3 = count[2][0] - - min_val = min(val1, val2, val3) - max_val = max(val1, val2, val3) - - if min_val < min_realistic_pv_cap: - return max_val - - return min_val - - -def geocoder( - user_agent: str, - min_delay_seconds: int, -) -> RateLimiter: - """ - Setup Nominatim geocoding class. - Parameters - ----------- - user_agent : str - The app name. - min_delay_seconds : int - Delay in seconds to use between requests to Nominatim. - A minimum of 1 is advised. - Returns - ------- - geopy.extra.rate_limiter.RateLimiter - Nominatim RateLimiter geocoding class to use for geocoding. - """ - locator = Nominatim(user_agent=user_agent) - return RateLimiter( - locator.geocode, - min_delay_seconds=min_delay_seconds, - ) - - -def geocoding_data( - clean_mastr_df: pd.DataFrame, -) -> pd.DataFrame: - """ - Setup DataFrame to geocode. - Parameters - ----------- - clean_mastr_df : pandas.DataFrame - DataFrame containing cleaned MaStR data. - Returns - ------- - pandas.DataFrame - DataFrame containing all unique combinations of - zip codes with municipalities for geocoding. - """ - return pd.DataFrame( - data=clean_mastr_df.zip_and_municipality.unique(), - columns=["zip_and_municipality"], - ) - - -@timer_func -def geocode_data( - geocoding_df: pd.DataFrame, - ratelimiter: RateLimiter, - epsg: int, -) -> gpd.GeoDataFrame: - """ - Geocode zip code and municipality. - Extract latitude, longitude and altitude. - Transfrom latitude and longitude to shapely - Point and return a geopandas GeoDataFrame. - Parameters - ----------- - geocoding_df : pandas.DataFrame - DataFrame containing all unique combinations of - zip codes with municipalities for geocoding. - ratelimiter : geopy.extra.rate_limiter.RateLimiter - Nominatim RateLimiter geocoding class to use for geocoding. - epsg : int - EPSG ID to use as CRS. - Returns - ------- - geopandas.GeoDataFrame - GeoDataFrame containing all unique combinations of - zip codes with municipalities with matching geolocation. - """ - logger.info(f"Geocoding {len(geocoding_df)} locations.") - - geocode_df = geocoding_df.assign( - location=geocoding_df.zip_and_municipality.apply(ratelimiter) - ) - - geocode_df = geocode_df.assign( - point=geocode_df.location.apply( - lambda loc: tuple(loc.point) if loc else None - ) - ) - - geocode_df[["latitude", "longitude", "altitude"]] = pd.DataFrame( - geocode_df.point.tolist(), index=geocode_df.index - ) - - return gpd.GeoDataFrame( - geocode_df, - geometry=gpd.points_from_xy(geocode_df.longitude, geocode_df.latitude), - crs=f"EPSG:{epsg}", - ) - - -def merge_geocode_with_mastr( - clean_mastr_df: pd.DataFrame, geocode_gdf: gpd.GeoDataFrame -) -> gpd.GeoDataFrame: - """ - Merge geometry to original mastr data. - Parameters - ----------- - clean_mastr_df : pandas.DataFrame - DataFrame containing cleaned MaStR data. - geocode_gdf : geopandas.GeoDataFrame - GeoDataFrame containing all unique combinations of - zip codes with municipalities with matching geolocation. - Returns - ------- - gepandas.GeoDataFrame - GeoDataFrame containing cleaned MaStR data with - matching geolocation from geocoding. - """ - return gpd.GeoDataFrame( - clean_mastr_df.merge( - geocode_gdf[["zip_and_municipality", "geometry"]], - how="left", - left_on="zip_and_municipality", - right_on="zip_and_municipality", - ), - crs=geocode_gdf.crs, - ).set_index(clean_mastr_df.index) - - -def drop_invalid_entries_from_gdf( - gdf: gpd.GeoDataFrame, -) -> gpd.GeoDataFrame: - """ - Drop invalid entries from geopandas GeoDataFrame. - TODO: how to omit the logging from geos here??? - Parameters - ----------- - gdf : geopandas.GeoDataFrame - GeoDataFrame to be checked for validity. - Returns - ------- - gepandas.GeoDataFrame - GeoDataFrame with rows with invalid geometries - dropped. - """ - valid_gdf = gdf.loc[gdf.is_valid] - - logger.debug( - f"{len(gdf) - len(valid_gdf)} " - f"({(len(gdf) - len(valid_gdf)) / len(gdf) * 100:g}%) " - f"of {len(gdf)} values were invalid and are dropped." - ) - - return valid_gdf + return mastr_gdf @timer_func @@ -707,14 +293,14 @@ def municipality_data() -> gpd.GeoDataFrame: @timer_func def add_ags_to_gens( - valid_mastr_gdf: gpd.GeoDataFrame, + mastr_gdf: gpd.GeoDataFrame, municipalities_gdf: gpd.GeoDataFrame, ) -> gpd.GeoDataFrame: """ Add information about AGS ID to generators. Parameters ----------- - valid_mastr_gdf : geopandas.GeoDataFrame + mastr_gdf : geopandas.GeoDataFrame GeoDataFrame with valid and cleaned MaStR data. municipalities_gdf : geopandas.GeoDataFrame GeoDataFrame with municipality data. @@ -724,7 +310,7 @@ def add_ags_to_gens( GeoDataFrame with valid and cleaned MaStR data with AGS ID added. """ - return valid_mastr_gdf.sjoin( + return mastr_gdf.sjoin( municipalities_gdf, how="left", predicate="intersects", @@ -732,13 +318,13 @@ def add_ags_to_gens( def drop_gens_outside_muns( - valid_mastr_gdf: gpd.GeoDataFrame, + mastr_gdf: gpd.GeoDataFrame, ) -> gpd.GeoDataFrame: """ Drop all generators outside of municipalities. Parameters ----------- - valid_mastr_gdf : geopandas.GeoDataFrame + mastr_gdf : geopandas.GeoDataFrame GeoDataFrame with valid and cleaned MaStR data. Returns ------- @@ -746,77 +332,18 @@ def drop_gens_outside_muns( GeoDataFrame with valid and cleaned MaStR data with generatos without an AGS ID dropped. """ - gdf = valid_mastr_gdf.loc[~valid_mastr_gdf.ags.isna()] + gdf = mastr_gdf.loc[~mastr_gdf.ags.isna()] logger.debug( - f"{len(valid_mastr_gdf) - len(gdf)} " - f"({(len(valid_mastr_gdf) - len(gdf)) / len(valid_mastr_gdf) * 100:g}%) " - f"of {len(valid_mastr_gdf)} values are outside of the municipalities" + f"{len(mastr_gdf) - len(gdf)} (" + f"{(len(mastr_gdf) - len(gdf)) / len(mastr_gdf) * 100:g}%)" + f" of {len(mastr_gdf)} values are outside of the municipalities" " and are therefore dropped." ) return gdf -class EgonMastrPvRoofGeocoded(Base): - __tablename__ = "egon_mastr_pv_roof_geocoded" - __table_args__ = {"schema": "supply"} - - zip_and_municipality = Column(String, primary_key=True, index=True) - location = Column(String) - point = Column(String) - latitude = Column(Float) - longitude = Column(Float) - altitude = Column(Float) - geometry = Column(Geometry(srid=EPSG)) - - -def create_geocoded_table(geocode_gdf): - """ - Create geocoded table mastr pv rooftop - Parameters - ----------- - geocode_gdf : geopandas.GeoDataFrame - GeoDataFrame containing geocoding information on pv rooftop locations. - """ - EgonMastrPvRoofGeocoded.__table__.drop(bind=engine, checkfirst=True) - EgonMastrPvRoofGeocoded.__table__.create(bind=engine, checkfirst=True) - - geocode_gdf.to_postgis( - name=EgonMastrPvRoofGeocoded.__table__.name, - schema=EgonMastrPvRoofGeocoded.__table__.schema, - con=db.engine(), - if_exists="append", - index=False, - # dtype={} - ) - - -def geocoded_data_from_db( - epsg: str | int, -) -> gpd.GeoDataFrame: - """ - Read OSM buildings data from eGo^n Database. - Parameters - ----------- - to_crs : pyproj.crs.crs.CRS - CRS to transform geometries to. - Returns - ------- - geopandas.GeoDataFrame - GeoDataFrame containing OSM buildings data. - """ - with db.session_scope() as session: - query = session.query( - EgonMastrPvRoofGeocoded.zip_and_municipality, - EgonMastrPvRoofGeocoded.geometry, - ) - - return gpd.read_postgis( - query.statement, query.session.bind, geom_col="geometry" - ).to_crs(f"EPSG:{epsg}") - - def load_mastr_data(): """Read PV rooftop data from MaStR CSV Note: the source will be replaced as soon as the MaStR data is available @@ -826,32 +353,22 @@ def load_mastr_data(): geopandas.GeoDataFrame GeoDataFrame containing MaStR data with geocoded locations. """ - mastr_df = mastr_data( + mastr_gdf = mastr_data( MASTR_INDEX_COL, - MASTR_RELEVANT_COLS, - MASTR_DTYPES, - MASTR_PARSE_DATES, ) - clean_mastr_df = clean_mastr_data( - mastr_df, + clean_mastr_gdf = clean_mastr_data( + mastr_gdf, max_realistic_pv_cap=MAX_REALISTIC_PV_CAP, min_realistic_pv_cap=MIN_REALISTIC_PV_CAP, seed=SEED, - rounding=ROUNDING, ) - geocode_gdf = geocoded_data_from_db(EPSG) - - mastr_gdf = merge_geocode_with_mastr(clean_mastr_df, geocode_gdf) - - valid_mastr_gdf = drop_invalid_entries_from_gdf(mastr_gdf) - municipalities_gdf = municipality_data() - valid_mastr_gdf = add_ags_to_gens(valid_mastr_gdf, municipalities_gdf) + clean_mastr_gdf = add_ags_to_gens(clean_mastr_gdf, municipalities_gdf) - return drop_gens_outside_muns(valid_mastr_gdf) + return drop_gens_outside_muns(clean_mastr_gdf) class OsmBuildingsFiltered(Base): @@ -1649,21 +1166,6 @@ def cap_per_bus_id( return df.loc[df.control != "Slack"] - # overlay_gdf = overlay_gdf.assign(capacity=np.nan) - # - # for cap, nuts in scenario_df[["capacity", "nuts"]].itertuples(index=False): - # nuts_gdf = overlay_gdf.loc[overlay_gdf.nuts == nuts] - # - # capacity = nuts_gdf.building_area.multiply( - # cap / nuts_gdf.building_area.sum() - # ) - # - # overlay_gdf.loc[nuts_gdf.index] = overlay_gdf.loc[ - # nuts_gdf.index - # ].assign(capacity=capacity.multiply(conversion).to_numpy()) - # - # return overlay_gdf[["bus_id", "capacity"]].groupby("bus_id").sum() - def determine_end_of_life_gens( mastr_gdf: gpd.GeoDataFrame, @@ -1689,7 +1191,7 @@ def determine_end_of_life_gens( before = mastr_gdf.capacity.sum() mastr_gdf = mastr_gdf.assign( - age=scenario_timestamp - mastr_gdf.start_up_date + age=scenario_timestamp - mastr_gdf.commissioning_date ) mastr_gdf = mastr_gdf.assign( @@ -1739,9 +1241,9 @@ def calculate_max_pv_cap_per_building( "capacity", "end_of_life", "building_id", - "EinheitlicheAusrichtungUndNeigungswinkel", - "Hauptausrichtung", - "HauptausrichtungNeigungswinkel", + "orientation_uniform", + "orientation_primary", + "orientation_primary_angle", ] ], how="left", @@ -1872,9 +1374,9 @@ def probabilities( ] if properties is None: properties = [ - "EinheitlicheAusrichtungUndNeigungswinkel", - "Hauptausrichtung", - "HauptausrichtungNeigungswinkel", + "orientation_uniform", + "orientation_primary", + "orientation_primary_angle", "load_factor", ] @@ -2504,7 +2006,7 @@ def voltage_levels(p: float) -> int: return buildings_gdf -def add_start_up_date( +def add_commissioning_date( buildings_gdf: gpd.GeoDataFrame, start: pd.Timestamp, end: pd.Timestamp, @@ -2533,7 +2035,7 @@ def add_start_up_date( date_range = pd.date_range(start=start, end=end, freq="1D") return buildings_gdf.assign( - start_up_date=rng.choice(date_range, size=len(buildings_gdf)) + commissioning_date=rng.choice(date_range, size=len(buildings_gdf)) ) @@ -2608,7 +2110,7 @@ def allocate_scenarios( mastr_gdf, cap_ranges=CAP_RANGES, min_building_size=MIN_BUILDING_SIZE, - upper_quantile=UPPER_QUNATILE, + upper_quantile=UPPER_QUANTILE, lower_quantile=LOWER_QUANTILE, ) @@ -2634,9 +2136,9 @@ def allocate_scenarios( ) return ( - add_start_up_date( + add_commissioning_date( meta_buildings_gdf, - start=last_scenario_gdf.start_up_date.max(), + start=last_scenario_gdf.commissioning_date.max(), end=SCENARIO_TIMESTAMP[scenario], seed=SEED, ), @@ -2670,7 +2172,7 @@ def create_scenario_table(buildings_gdf): bind=engine, checkfirst=True ) - buildings_gdf.rename(columns=COLS_TO_RENAME).assign( + buildings_gdf.assign( capacity=buildings_gdf.capacity.div(10**3) # kW -> MW )[COLS_TO_EXPORT].reset_index().to_sql( name=EgonPowerPlantPvRoofBuildingScenario.__table__.name, @@ -2681,36 +2183,6 @@ def create_scenario_table(buildings_gdf): ) -def geocode_mastr_data(): - """ - Read PV rooftop data from MaStR CSV - TODO: the source will be replaced as soon as the MaStR data is available - in DB. - """ - mastr_df = mastr_data( - MASTR_INDEX_COL, - MASTR_RELEVANT_COLS, - MASTR_DTYPES, - MASTR_PARSE_DATES, - ) - - clean_mastr_df = clean_mastr_data( - mastr_df, - max_realistic_pv_cap=MAX_REALISTIC_PV_CAP, - min_realistic_pv_cap=MIN_REALISTIC_PV_CAP, - seed=SEED, - rounding=ROUNDING, - ) - - geocoding_df = geocoding_data(clean_mastr_df) - - ratelimiter = geocoder(USER_AGENT, MIN_DELAY_SECONDS) - - geocode_gdf = geocode_data(geocoding_df, ratelimiter, EPSG) - - create_geocoded_table(geocode_gdf) - - def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: sql = """ SELECT building_id, zensus_population_id @@ -2748,7 +2220,9 @@ def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: return buildings_gdf -def add_bus_ids_sq(buildings_gdf: gpd.GeoDataFrame,) -> gpd.GeoDataFrame: +def add_bus_ids_sq( + buildings_gdf: gpd.GeoDataFrame, +) -> gpd.GeoDataFrame: """Add bus ids for status_quo units Parameters @@ -2788,7 +2262,7 @@ def pv_rooftop_to_buildings(): all_buildings_gdf = ( desagg_mastr_gdf.assign(scenario="status_quo") .reset_index() - .rename(columns={"geometry": "geom", "EinheitMastrNummer": "gens_id"}) + .rename(columns={"geometry": "geom", "gens_id": "gens_id"}) ) scenario_buildings_gdf = all_buildings_gdf.copy() From 53a62de0a5f004fdf12072719156bff9041e8b16 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Mon, 27 Feb 2023 16:55:26 +0100 Subject: [PATCH 11/21] match MW and kW values --- .../power_plants/pv_rooftop_buildings.py | 61 +++++++++---------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py index 53136760e..636763eeb 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py @@ -68,8 +68,6 @@ SEED = int(config.settings()["egon-data"]["--random-seed"]) # TODO: move to yml -# mastr datay - MASTR_INDEX_COL = "gens_id" EPSG = 4326 @@ -95,7 +93,7 @@ # Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp # https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf -MODUL_CAP = 0.4 # kWp +MODUL_CAP = 0.4 / 10**3 # MWp MODUL_SIZE = 1.096 * 1.754 # m² PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE @@ -112,9 +110,9 @@ ROOF_FACTOR = 0.5 CAP_RANGES = [ - (0, 30), - (30, 100), - (100, float("inf")), + (0, 30 / 10**3), + (30 / 10**3, 100 / 10**3), + (100 / 10**3, float("inf")), ] MIN_BUILDING_SIZE = 10.0 @@ -199,8 +197,6 @@ def clean_mastr_data( * Drop MaStR ID duplicates. * Drop generators with implausible capacities. - * Drop generators without any kind of start-up date. - * Clean up site column and capacity. Parameters ----------- @@ -1160,7 +1156,6 @@ def cap_per_bus_id( WHERE carrier = 'solar_rooftop' AND scn_name = '{scenario}' """ - # TODO: woher kommen die Slack rows??? df = db.select_dataframe(sql, index_col="bus_id") @@ -1216,6 +1211,7 @@ def calculate_max_pv_cap_per_building( ) -> gpd.GeoDataFrame: """ Calculate the estimated maximum possible PV capacity per building. + Parameters ----------- buildings_gdf : geopandas.GeoDataFrame @@ -1368,9 +1364,9 @@ def probabilities( """ if cap_ranges is None: cap_ranges = [ - (0, 30), - (30, 100), - (100, float("inf")), + (0, 30 / 10**3), + (30 / 10**3, 100 / 10**3), + (100 / 10**3, float("inf")), ] if properties is None: properties = [ @@ -1408,6 +1404,7 @@ def cap_share_per_cap_range( """ Calculate the share of PV capacity from the total PV capacity within capacity ranges. + Parameters ----------- mastr_gdf : geopandas.GeoDataFrame @@ -1423,9 +1420,9 @@ def cap_share_per_cap_range( """ if cap_ranges is None: cap_ranges = [ - (0, 30), - (30, 100), - (100, float("inf")), + (0, 30 / 10**3), + (30 / 10**3, 100 / 10**3), + (100 / 10**3, float("inf")), ] cap_share_dict = {} @@ -1467,9 +1464,9 @@ def mean_load_factor_per_cap_range( """ if cap_ranges is None: cap_ranges = [ - (0, 30), - (30, 100), - (100, float("inf")), + (0, 30 / 10**3), + (30 / 10**3, 100 / 10**3), + (100 / 10**3, float("inf")), ] load_factor_dict = {} @@ -1518,9 +1515,9 @@ def building_area_range_per_cap_range( """ if cap_ranges is None: cap_ranges = [ - (0, 30), - (30, 100), - (100, float("inf")), + (0, 30 / 10**3), + (30 / 10**3, 100 / 10**3), + (100 / 10**3, float("inf")), ] building_area_range_dict = {} @@ -1747,6 +1744,7 @@ def desaggregate_pv( ) -> gpd.GeoDataFrame: """ Desaggregate PV capacity on buildings within a given grid district. + Parameters ----------- buildings_gdf : geopandas.GeoDataFrame @@ -1818,7 +1816,7 @@ def desaggregate_pv( continue - pv_target = cap_df.at[bus_id, "capacity"] * 1000 + pv_target = cap_df.at[bus_id, "capacity"] logger.debug(f"pv_target: {pv_target}") @@ -1837,8 +1835,8 @@ def desaggregate_pv( if pot_buildings_gdf.max_cap.sum() < pv_missing: logger.error( f"In grid {bus_id} there is less PV potential (" - f"{pot_buildings_gdf.max_cap.sum():g} kW) than allocated PV " - f"capacity ({pv_missing:g} kW). The average roof utilization " + f"{pot_buildings_gdf.max_cap.sum():g} MW) than allocated PV " + f"capacity ({pv_missing:g} MW). The average roof utilization " f"will be very high." ) @@ -1881,8 +1879,7 @@ def desaggregate_pv( logger.debug("Desaggregated scenario.") logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}") logger.debug( - f"Generator capacity: " - f"{allocated_buildings_gdf.capacity.sum() / 1000: g}" + f"Generator capacity: " f"{allocated_buildings_gdf.capacity.sum(): g}" ) return gpd.GeoDataFrame( @@ -2000,8 +1997,8 @@ def voltage_levels(p: float) -> int: # Infer missing values mask = buildings_gdf.voltage_level.isna() buildings_gdf.loc[mask, "voltage_level"] = buildings_gdf.loc[ - mask - ].capacity.apply(voltage_levels) + mask, "capacity" + ].apply(voltage_levels) return buildings_gdf @@ -2172,9 +2169,7 @@ def create_scenario_table(buildings_gdf): bind=engine, checkfirst=True ) - buildings_gdf.assign( - capacity=buildings_gdf.capacity.div(10**3) # kW -> MW - )[COLS_TO_EXPORT].reset_index().to_sql( + buildings_gdf[COLS_TO_EXPORT].reset_index().to_sql( name=EgonPowerPlantPvRoofBuildingScenario.__table__.name, schema=EgonPowerPlantPvRoofBuildingScenario.__table__.schema, con=db.engine(), @@ -2210,8 +2205,8 @@ def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: if buildings_gdf.weather_cell_id.isna().any(): missing = buildings_gdf.loc[ - buildings_gdf.weather_cell_id.isna() - ].building_id.tolist() + buildings_gdf.weather_cell_id.isna(), "building_id" + ].tolist() raise ValueError( f"Following buildings don't have a weather cell id: {missing}" From ed2a6bf6137cffe058aa21123145a37559b987e8 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Tue, 28 Feb 2023 11:54:36 +0100 Subject: [PATCH 12/21] bugfix unit MW kW --- src/egon/data/datasets/power_plants/pv_rooftop.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/egon/data/datasets/power_plants/pv_rooftop.py b/src/egon/data/datasets/power_plants/pv_rooftop.py index 2a2f85600..11dfd0ad1 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop.py @@ -206,7 +206,6 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): # district than there is rooftop potential max_cap_per_bus_df = ( valid_buildings_gdf[["max_cap", "bus_id"]].groupby("bus_id").sum() - / 1000 * MAX_THEORETICAL_PV_OCCUPANCY ) From 2eaa45a65f28e15ffc891abc1220a5ace6aaa8aa Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Tue, 28 Feb 2023 12:57:56 +0100 Subject: [PATCH 13/21] debug double id column --- .../power_plants/pv_rooftop_buildings.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py index 636763eeb..1bdc31445 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py @@ -178,7 +178,7 @@ def mastr_data( gdf = gpd.read_postgis( query.statement, query.session.bind, index_col=index_col - ) + ).drop(columns="id") logger.debug("MaStR data loaded.") @@ -2083,6 +2083,9 @@ def allocate_scenarios( ROOF_FACTOR, ) + print(mastr_gdf.columns.tolist()) + print(buildings_gdf.columns.tolist()) + mastr_gdf = calculate_building_load_factor( mastr_gdf, buildings_gdf, @@ -2143,7 +2146,7 @@ def allocate_scenarios( ) -class EgonPowerPlantPvRoofBuildingScenario(Base): +class EgonPowerPlantPvRoofBuilding(Base): __tablename__ = "egon_power_plants_pv_roof_building" __table_args__ = {"schema": "supply"} @@ -2153,25 +2156,21 @@ class EgonPowerPlantPvRoofBuildingScenario(Base): building_id = Column(Integer) gens_id = Column(String, nullable=True) capacity = Column(Float) - einheitliche_ausrichtung_und_neigungswinkel = Column(Float) - hauptausrichtung = Column(String) - hauptausrichtung_neigungswinkel = Column(String) + orientation_uniform = Column(Float) + orientation_primary = Column(String) + orientation_primary_angle = Column(String) voltage_level = Column(Integer) weather_cell_id = Column(Integer) def create_scenario_table(buildings_gdf): """Create mapping table pv_unit <-> building for scenario""" - EgonPowerPlantPvRoofBuildingScenario.__table__.drop( - bind=engine, checkfirst=True - ) - EgonPowerPlantPvRoofBuildingScenario.__table__.create( - bind=engine, checkfirst=True - ) + EgonPowerPlantPvRoofBuilding.__table__.drop(bind=engine, checkfirst=True) + EgonPowerPlantPvRoofBuilding.__table__.create(bind=engine, checkfirst=True) buildings_gdf[COLS_TO_EXPORT].reset_index().to_sql( - name=EgonPowerPlantPvRoofBuildingScenario.__table__.name, - schema=EgonPowerPlantPvRoofBuildingScenario.__table__.schema, + name=EgonPowerPlantPvRoofBuilding.__table__.name, + schema=EgonPowerPlantPvRoofBuilding.__table__.schema, con=db.engine(), if_exists="append", index=False, From d8971f5bd798f59ede9cb778a3e94c7bf821b87f Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Tue, 28 Feb 2023 13:21:15 +0100 Subject: [PATCH 14/21] removed double determination of voltage level --- .../power_plants/pv_rooftop_buildings.py | 62 +------------------ 1 file changed, 1 insertion(+), 61 deletions(-) diff --git a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py index 1bdc31445..c22aedbb3 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py @@ -1946,63 +1946,6 @@ def add_buildings_meta_data( return buildings_gdf -def add_voltage_level( - buildings_gdf: gpd.GeoDataFrame, -) -> gpd.GeoDataFrame: - """ - Get voltage level data from mastr table and assign to units. Infer missing - values derived from generator capacity to the power plants. - - Parameters - ----------- - buildings_gdf : geopandas.GeoDataFrame - GeoDataFrame containing OSM buildings data with desaggregated PV - plants. - Returns - ------- - geopandas.GeoDataFrame - GeoDataFrame containing OSM building data with voltage level per - generator. - """ - - def voltage_levels(p: float) -> int: - if p <= 100: - return 7 - elif p <= 200: - return 6 - elif p <= 5500: - return 5 - elif p <= 20000: - return 4 - elif p <= 120000: - return 3 - return 1 - - # Join mastr table - with db.session_scope() as session: - query = session.query( - EgonPowerPlantsPv.gens_id, - EgonPowerPlantsPv.voltage_level, - ) - voltage_levels_df = pd.read_sql( - query.statement, query.session.bind, index_col=None - ) - buildings_gdf = buildings_gdf.merge( - voltage_levels_df, - left_on="gens_id", - right_on="gens_id", - how="left", - ) - - # Infer missing values - mask = buildings_gdf.voltage_level.isna() - buildings_gdf.loc[mask, "voltage_level"] = buildings_gdf.loc[ - mask, "capacity" - ].apply(voltage_levels) - - return buildings_gdf - - def add_commissioning_date( buildings_gdf: gpd.GeoDataFrame, start: pd.Timestamp, @@ -2083,9 +2026,6 @@ def allocate_scenarios( ROOF_FACTOR, ) - print(mastr_gdf.columns.tolist()) - print(buildings_gdf.columns.tolist()) - mastr_gdf = calculate_building_load_factor( mastr_gdf, buildings_gdf, @@ -2294,4 +2234,4 @@ def pv_rooftop_to_buildings(): all_buildings_gdf = add_bus_ids_sq(all_buildings_gdf) # export scenario - create_scenario_table(add_voltage_level(all_buildings_gdf)) + create_scenario_table(all_buildings_gdf) From 48c78b1818646bce4121f0209e6700b231b3c08c Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Mar 2023 11:39:43 +0100 Subject: [PATCH 15/21] added combustion mastr data --- src/egon/data/datasets.yml | 1 + src/egon/data/datasets/power_plants/mastr.py | 15 +++++++- .../datasets/power_plants/mastr_db_classes.py | 37 ++++++++++++++++++- .../power_plants/pv_rooftop_buildings.py | 2 +- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml index 02888be81..e969f1a5b 100755 --- a/src/egon/data/datasets.yml +++ b/src/egon/data/datasets.yml @@ -367,6 +367,7 @@ power_plants: mastr_biomass: "bnetza_mastr_biomass_cleaned.csv" mastr_hydro: "bnetza_mastr_hydro_cleaned.csv" mastr_location: "location_elec_generation_raw.csv" + mastr_combustion: "bnetza_mastr_combustion_cleaned.csv" mastr_combustion_without_chp: "supply.egon_mastr_conventional_without_chp" mastr_storage: "bnetza_mastr_storage_cleaned.csv" mastr_gsgk: "bnetza_mastr_gsgk_cleaned.csv" diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 9f56feb9c..3c8e1a817 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -35,6 +35,7 @@ from egon.data.datasets.power_plants.mastr_db_classes import ( EgonMastrGeocoded, EgonPowerPlantsBiomass, + EgonPowerPlantsCombustion, EgonPowerPlantsHydro, EgonPowerPlantsPv, EgonPowerPlantsWind, @@ -224,7 +225,7 @@ def import_mastr() -> None: }, "biomass": { "Technologie": "technology", - "Hauptbrennstoff": "fuel_name", + "Hauptbrennstoff": "main_fuel", "Biomasseart": "fuel_type", "ThermischeNutzleistung": "th_capacity", }, @@ -232,6 +233,13 @@ def import_mastr() -> None: "ArtDerWasserkraftanlage": "plant_type", "ArtDesZuflusses": "water_origin", }, + "combustion": { + "Energietraeger": "carrier", + "Hauptbrennstoff": "main_fuel", + "WeitererHauptbrennstoff": "other_main_fuel", + "Technologie": "technology", + "ThermischeNutzleistung": "th_capacity", + }, } source_files = { @@ -239,12 +247,15 @@ def import_mastr() -> None: "wind": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"], "biomass": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_biomass"], "hydro": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_hydro"], + "combustion": WORKING_DIR_MASTR_NEW + / cfg["sources"]["mastr_combustion"], } target_tables = { "pv": EgonPowerPlantsPv, "wind": EgonPowerPlantsWind, "biomass": EgonPowerPlantsBiomass, "hydro": EgonPowerPlantsHydro, + "combustion": EgonPowerPlantsCombustion, } vlevel_mapping = { "Höchstspannung": 1, @@ -271,7 +282,7 @@ def import_mastr() -> None: ) # import units - technologies = ["pv", "wind", "biomass", "hydro"] + technologies = ["pv", "wind", "biomass", "hydro", "combustion"] for tech in technologies: # read units logger.info(f"===== Importing MaStR dataset: {tech} =====") diff --git a/src/egon/data/datasets/power_plants/mastr_db_classes.py b/src/egon/data/datasets/power_plants/mastr_db_classes.py index 601b93d70..c31ce6e97 100644 --- a/src/egon/data/datasets/power_plants/mastr_db_classes.py +++ b/src/egon/data/datasets/power_plants/mastr_db_classes.py @@ -123,7 +123,7 @@ class EgonPowerPlantsBiomass(Base): zip_and_municipality = Column(String, nullable=True) technology = Column(String(45), nullable=True) # Technologie - fuel_name = Column(String(52), nullable=True) # Hauptbrennstoff + main_fuel = Column(String(52), nullable=True) # Hauptbrennstoff fuel_type = Column(String(19), nullable=True) # Biomasseart capacity = Column(Float, nullable=True) # Nettonennleistung @@ -164,3 +164,38 @@ class EgonPowerPlantsHydro(Base): geometry_geocoded = Column(Boolean) geom = Column(Geometry("POINT", 4326), index=True, nullable=True) + + +class EgonPowerPlantsCombustion(Base): + __tablename__ = "egon_power_plants_combustion" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_combustion_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) + + carrier = Column(String) # Energietraeger + main_fuel = Column(String) # Hauptbrennstoff + other_main_fuel = Column(String) # WeitererHauptbrennstoff + technology = Column(String) # Technologie + + plant_type = Column(String(39), nullable=True) # ArtDerWasserkraftanlage + water_origin = Column(String(20), nullable=True) # ArtDesZuflusses + + capacity = Column(Float, nullable=True) # Nettonennleistung + th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + + geometry_geocoded = Column(Boolean) + + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) diff --git a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py index c22aedbb3..9794b19dc 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py @@ -2196,7 +2196,7 @@ def pv_rooftop_to_buildings(): all_buildings_gdf = ( desagg_mastr_gdf.assign(scenario="status_quo") .reset_index() - .rename(columns={"geometry": "geom", "gens_id": "gens_id"}) + .rename(columns={"geometry": "geom"}) ) scenario_buildings_gdf = all_buildings_gdf.copy() From 16809248f55891bb395c41ccdd1cde91a296facc Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Mar 2023 11:56:21 +0100 Subject: [PATCH 16/21] added gsgk mastr data --- src/egon/data/datasets/power_plants/mastr.py | 9 ++++- .../datasets/power_plants/mastr_db_classes.py | 35 +++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 3c8e1a817..77e9cb405 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -36,6 +36,7 @@ EgonMastrGeocoded, EgonPowerPlantsBiomass, EgonPowerPlantsCombustion, + EgonPowerPlantsGsgk, EgonPowerPlantsHydro, EgonPowerPlantsPv, EgonPowerPlantsWind, @@ -240,6 +241,10 @@ def import_mastr() -> None: "Technologie": "technology", "ThermischeNutzleistung": "th_capacity", }, + "gsgk": { + "Energietraeger": "carrier", + "Technologie": "technology", + }, } source_files = { @@ -249,6 +254,7 @@ def import_mastr() -> None: "hydro": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_hydro"], "combustion": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_combustion"], + "gsgk": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_gsgk"], } target_tables = { "pv": EgonPowerPlantsPv, @@ -256,6 +262,7 @@ def import_mastr() -> None: "biomass": EgonPowerPlantsBiomass, "hydro": EgonPowerPlantsHydro, "combustion": EgonPowerPlantsCombustion, + "gsgk": EgonPowerPlantsGsgk, } vlevel_mapping = { "Höchstspannung": 1, @@ -282,7 +289,7 @@ def import_mastr() -> None: ) # import units - technologies = ["pv", "wind", "biomass", "hydro", "combustion"] + technologies = ["pv", "wind", "biomass", "hydro", "combustion", "gsgk"] for tech in technologies: # read units logger.info(f"===== Importing MaStR dataset: {tech} =====") diff --git a/src/egon/data/datasets/power_plants/mastr_db_classes.py b/src/egon/data/datasets/power_plants/mastr_db_classes.py index c31ce6e97..dcbf8a473 100644 --- a/src/egon/data/datasets/power_plants/mastr_db_classes.py +++ b/src/egon/data/datasets/power_plants/mastr_db_classes.py @@ -70,7 +70,6 @@ class EgonPowerPlantsPv(Base): voltage_level_inferred = Column(Boolean, nullable=True) geometry_geocoded = Column(Boolean) - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) @@ -102,7 +101,6 @@ class EgonPowerPlantsWind(Base): voltage_level_inferred = Column(Boolean, nullable=True) geometry_geocoded = Column(Boolean) - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) @@ -133,7 +131,6 @@ class EgonPowerPlantsBiomass(Base): voltage_level_inferred = Column(Boolean, nullable=True) geometry_geocoded = Column(Boolean) - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) @@ -162,7 +159,6 @@ class EgonPowerPlantsHydro(Base): voltage_level_inferred = Column(Boolean, nullable=True) geometry_geocoded = Column(Boolean) - geom = Column(Geometry("POINT", 4326), index=True, nullable=True) @@ -187,9 +183,6 @@ class EgonPowerPlantsCombustion(Base): other_main_fuel = Column(String) # WeitererHauptbrennstoff technology = Column(String) # Technologie - plant_type = Column(String(39), nullable=True) # ArtDerWasserkraftanlage - water_origin = Column(String(20), nullable=True) # ArtDesZuflusses - capacity = Column(Float, nullable=True) # Nettonennleistung th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung feedin_type = Column(String(47), nullable=True) # Einspeisungsart @@ -197,5 +190,33 @@ class EgonPowerPlantsCombustion(Base): voltage_level_inferred = Column(Boolean, nullable=True) geometry_geocoded = Column(Boolean) + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) + + +class EgonPowerPlantsGsgk(Base): + __tablename__ = "egon_power_plants_gsgk" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_gsgk_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) + + carrier = Column(String) # Energietraeger + technology = Column(String) # Technologie + + capacity = Column(Float, nullable=True) # Nettonennleistung + th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + geometry_geocoded = Column(Boolean) geom = Column(Geometry("POINT", 4326), index=True, nullable=True) From 4e7b8cb322e278a4b87e6b5b4f5fe0cc3f4b230e Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Mar 2023 13:15:28 +0100 Subject: [PATCH 17/21] added nuclear mastr data --- src/egon/data/datasets.yml | 11 +++---- src/egon/data/datasets/power_plants/mastr.py | 18 +++++++++++- .../datasets/power_plants/mastr_db_classes.py | 29 +++++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml index e969f1a5b..74cc78e64 100755 --- a/src/egon/data/datasets.yml +++ b/src/egon/data/datasets.yml @@ -362,15 +362,16 @@ hotmaps_current_policy_scenario_heat_demands_buildings: power_plants: sources: - mastr_pv: "bnetza_mastr_solar_cleaned.csv" - mastr_wind: "bnetza_mastr_wind_cleaned.csv" mastr_biomass: "bnetza_mastr_biomass_cleaned.csv" - mastr_hydro: "bnetza_mastr_hydro_cleaned.csv" - mastr_location: "location_elec_generation_raw.csv" mastr_combustion: "bnetza_mastr_combustion_cleaned.csv" mastr_combustion_without_chp: "supply.egon_mastr_conventional_without_chp" - mastr_storage: "bnetza_mastr_storage_cleaned.csv" mastr_gsgk: "bnetza_mastr_gsgk_cleaned.csv" + mastr_hydro: "bnetza_mastr_hydro_cleaned.csv" + mastr_location: "location_elec_generation_raw.csv" + mastr_nuclear: "bnetza_mastr_nuclear_cleaned.csv" + mastr_pv: "bnetza_mastr_solar_cleaned.csv" + mastr_storage: "bnetza_mastr_storage_cleaned.csv" + mastr_wind: "bnetza_mastr_wind_cleaned.csv" capacities: "supply.egon_scenario_capacities" geom_germany: "boundaries.vg250_sta_union" geom_federal_states: "boundaries.vg250_lan" diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 77e9cb405..2e3b05ea8 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -38,6 +38,7 @@ EgonPowerPlantsCombustion, EgonPowerPlantsGsgk, EgonPowerPlantsHydro, + EgonPowerPlantsNuclear, EgonPowerPlantsPv, EgonPowerPlantsWind, ) @@ -245,6 +246,10 @@ def import_mastr() -> None: "Energietraeger": "carrier", "Technologie": "technology", }, + "nuclear": { + "Energietraeger": "carrier", + "Technologie": "technology", + }, } source_files = { @@ -255,6 +260,7 @@ def import_mastr() -> None: "combustion": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_combustion"], "gsgk": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_gsgk"], + "nuclear": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_nuclear"], } target_tables = { "pv": EgonPowerPlantsPv, @@ -263,6 +269,7 @@ def import_mastr() -> None: "hydro": EgonPowerPlantsHydro, "combustion": EgonPowerPlantsCombustion, "gsgk": EgonPowerPlantsGsgk, + "nuclear": EgonPowerPlantsNuclear, } vlevel_mapping = { "Höchstspannung": 1, @@ -289,7 +296,16 @@ def import_mastr() -> None: ) # import units - technologies = ["pv", "wind", "biomass", "hydro", "combustion", "gsgk"] + technologies = [ + "pv", + "wind", + "biomass", + "hydro", + "combustion", + "gsgk", + "nuclear", + ] + for tech in technologies: # read units logger.info(f"===== Importing MaStR dataset: {tech} =====") diff --git a/src/egon/data/datasets/power_plants/mastr_db_classes.py b/src/egon/data/datasets/power_plants/mastr_db_classes.py index dcbf8a473..435f5f079 100644 --- a/src/egon/data/datasets/power_plants/mastr_db_classes.py +++ b/src/egon/data/datasets/power_plants/mastr_db_classes.py @@ -220,3 +220,32 @@ class EgonPowerPlantsGsgk(Base): geometry_geocoded = Column(Boolean) geom = Column(Geometry("POINT", 4326), index=True, nullable=True) + + +class EgonPowerPlantsNuclear(Base): + __tablename__ = "egon_power_plants_nuclear" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_gsgk_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) + + carrier = Column(String) # Energietraeger + technology = Column(String) # Technologie + + capacity = Column(Float, nullable=True) # Nettonennleistung + th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + + geometry_geocoded = Column(Boolean) + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) From 2eb6a4b230f7a5ce82307977c5c95a57ee0122ca Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Mar 2023 13:24:33 +0100 Subject: [PATCH 18/21] added storage mastr data --- src/egon/data/datasets/power_plants/mastr.py | 12 +++++++ .../datasets/power_plants/mastr_db_classes.py | 33 ++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 2e3b05ea8..74180aa60 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -40,6 +40,7 @@ EgonPowerPlantsHydro, EgonPowerPlantsNuclear, EgonPowerPlantsPv, + EgonPowerPlantsStorage, EgonPowerPlantsWind, ) from egon.data.datasets.power_plants.pv_rooftop_buildings import ( @@ -250,6 +251,12 @@ def import_mastr() -> None: "Energietraeger": "carrier", "Technologie": "technology", }, + "storage": { + "Energietraeger": "carrier", + "Technologie": "technology", + "Batterietechnologie": "battery_type", + "Pumpspeichertechnologie": "pump_storage_type", + }, } source_files = { @@ -261,7 +268,9 @@ def import_mastr() -> None: / cfg["sources"]["mastr_combustion"], "gsgk": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_gsgk"], "nuclear": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_nuclear"], + "storage": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_storage"], } + target_tables = { "pv": EgonPowerPlantsPv, "wind": EgonPowerPlantsWind, @@ -270,7 +279,9 @@ def import_mastr() -> None: "combustion": EgonPowerPlantsCombustion, "gsgk": EgonPowerPlantsGsgk, "nuclear": EgonPowerPlantsNuclear, + "storage": EgonPowerPlantsStorage, } + vlevel_mapping = { "Höchstspannung": 1, "UmspannungZurHochspannung": 2, @@ -304,6 +315,7 @@ def import_mastr() -> None: "combustion", "gsgk", "nuclear", + "storage", ] for tech in technologies: diff --git a/src/egon/data/datasets/power_plants/mastr_db_classes.py b/src/egon/data/datasets/power_plants/mastr_db_classes.py index 435f5f079..7891dd796 100644 --- a/src/egon/data/datasets/power_plants/mastr_db_classes.py +++ b/src/egon/data/datasets/power_plants/mastr_db_classes.py @@ -226,7 +226,36 @@ class EgonPowerPlantsNuclear(Base): __tablename__ = "egon_power_plants_nuclear" __table_args__ = {"schema": "supply"} - id = Column(Integer, Sequence("pp_gsgk_seq"), primary_key=True) + id = Column(Integer, Sequence("pp_nuclear_seq"), primary_key=True) + bus_id = Column(Integer, nullable=True) # Grid district id + gens_id = Column(String, nullable=True) # EinheitMastrNummer + + status = Column(String, nullable=True) # EinheitBetriebsstatus + commissioning_date = Column(DateTime, nullable=True) # Inbetriebnahmedatum + postcode = Column(String(5), nullable=True) # Postleitzahl + city = Column(String(50), nullable=True) # Ort + municipality = Column(String, nullable=True) # Gemeinde + federal_state = Column(String(31), nullable=True) # Bundesland + zip_and_municipality = Column(String, nullable=True) + + carrier = Column(String) # Energietraeger + technology = Column(String) # Technologie + + capacity = Column(Float, nullable=True) # Nettonennleistung + th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung + feedin_type = Column(String(47), nullable=True) # Einspeisungsart + voltage_level = Column(Integer, nullable=True) + voltage_level_inferred = Column(Boolean, nullable=True) + + geometry_geocoded = Column(Boolean) + geom = Column(Geometry("POINT", 4326), index=True, nullable=True) + + +class EgonPowerPlantsStorage(Base): + __tablename__ = "egon_power_plants_storage" + __table_args__ = {"schema": "supply"} + + id = Column(Integer, Sequence("pp_storage_seq"), primary_key=True) bus_id = Column(Integer, nullable=True) # Grid district id gens_id = Column(String, nullable=True) # EinheitMastrNummer @@ -240,6 +269,8 @@ class EgonPowerPlantsNuclear(Base): carrier = Column(String) # Energietraeger technology = Column(String) # Technologie + battery_type = Column(String) # Batterietechnologie + pump_storage_type = Column(String) # Pumpspeichertechnologie capacity = Column(Float, nullable=True) # Nettonennleistung th_capacity = Column(Float, nullable=True) # ThermischeNutzleistung From f06a23e01b36485bd6fe44294c128e2e5e3474b3 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Mar 2023 13:32:01 +0100 Subject: [PATCH 19/21] adapt changelog --- CHANGELOG.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0118a40a0..78cff845c 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -252,6 +252,8 @@ Added `PR #903 `_ * Add low flex scenario 'eGon2035_lowflex' `#822 `_ +* Add MaStR geocoding and handling of conventional generators + `#1095 `_ .. _PR #159: https://github.com/openego/eGon-data/pull/159 .. _PR #703: https://github.com/openego/eGon-data/pull/703 @@ -484,10 +486,13 @@ Changed created for a single process. This fixes issue `#799`_. * Insert rural heat per supply technology `#1026 `_ - -.. _#799: https://github.com/openego/eGon-data/issues/799 * Change desposit ID for data_bundle download from zenodo sandbox `#1110 `_ +* Use MaStR geocoding results for pv rooftop to buildings mapping workflow + `#1095 `_ + +.. _#799: https://github.com/openego/eGon-data/issues/799 + Bug Fixes --------- From c74113b7dcf2c8d33cb3971545acf6b060c56e6c Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Mar 2023 14:45:26 +0100 Subject: [PATCH 20/21] adapt docs --- docs/data.rst | 12 ++++++++++++ src/egon/data/datasets/power_plants/mastr.py | 1 + 2 files changed, 13 insertions(+) diff --git a/docs/data.rst b/docs/data.rst index ba551d6fa..e4e3cf6b9 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -7,3 +7,15 @@ Scenarios Published data ============== + +Data bundle +----------- + +The data bundle is published on +`zenodo `_. It contains several data +sets, which serve as a basis for egon-data. One such data set is the geocoding +for the `MaStR data set `_ which is +used for eGon-data as well. Whenever the MaStR data set is updated it is +necessary to redo the geocoding with the new data set and update the data +bundle accordingly. The geocoding can be done based on the +`mastr-geocoding repository `_. diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 74180aa60..2aad61c09 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -331,6 +331,7 @@ def import_mastr() -> None: ), index_col=None, dtype={"Postleitzahl": str}, + low_memory=False, ).rename(columns=cols_mapping) # drop units outside of Germany From ce779b52f0b6d01e19efb6abb3b70b5f2e844a73 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Mar 2023 14:48:46 +0100 Subject: [PATCH 21/21] set dataset versions --- src/egon/data/datasets/data_bundle/__init__.py | 2 +- src/egon/data/datasets/power_plants/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/egon/data/datasets/data_bundle/__init__.py b/src/egon/data/datasets/data_bundle/__init__.py index 867db59b3..928547fde 100644 --- a/src/egon/data/datasets/data_bundle/__init__.py +++ b/src/egon/data/datasets/data_bundle/__init__.py @@ -44,7 +44,7 @@ def __init__(self, dependencies): ] super().__init__( name="DataBundle", - version=f"{deposit_id}-0.0.0.dev", + version=f"{deposit_id}-0.0.1", dependencies=dependencies, tasks=(download,), ) diff --git a/src/egon/data/datasets/power_plants/__init__.py b/src/egon/data/datasets/power_plants/__init__.py index fc77691b8..672431240 100755 --- a/src/egon/data/datasets/power_plants/__init__.py +++ b/src/egon/data/datasets/power_plants/__init__.py @@ -59,7 +59,7 @@ class PowerPlants(Dataset): def __init__(self, dependencies): super().__init__( name="PowerPlants", - version="0.0.17.dev", + version="0.0.17", dependencies=dependencies, tasks=( create_tables,