Merge pull request #1063 from openego/features/#519-update-mastr-data…

…-features_#1051-add-new-mastr-data Update MaStR data and adapt PV+wind methods
openego · Dec 23, 2022 · a79ac81 · a79ac81
2 parents 8329d3e + 4e3e6ad
commit a79ac81
Show file tree

Hide file tree

Showing 16 changed files with 932 additions and 343 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -237,6 +237,8 @@ Added
   `#988 <https://github.com/openego/eGon-data/issues/988>`_
 * Add load areas
   `#1014 <https://github.com/openego/eGon-data/issues/1014>`_
+* Add new MaStR dataset
+  `#1051 <https://github.com/openego/eGon-data/issues/1051>`_
 
 .. _PR #159: https://github.com/openego/eGon-data/pull/159
 .. _PR #703: https://github.com/openego/eGon-data/pull/703
@@ -448,6 +450,8 @@ Changed
   `#987 <https://github.com/openego/eGon-data/issues/987>`_
 * Add eGon2021 scenario to demandregio dataset
   `#1035 <https://github.com/openego/eGon-data/issues/1035>`_
+* Update MaStR dataset
+  `#519 <https://github.com/openego/eGon-data/issues/519>`_
 * Add missing VOM costs for heat sector components
   `#942 <https://github.com/openego/eGon-data/issues/942>`_
 * Desaggregate industry demands to OSM areas and industrial sites

diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml
@@ -297,6 +297,15 @@ mastr:
   file_basename: "bnetza_mastr"
   deposit_id: 808086
 
+mastr_new:
+  technologies:
+    - "wind"
+    - "hydro"
+    - "solar"
+    - "biomass"
+  file_basename: "bnetza_mastr"
+  deposit_id: 1132987
+
 re_potential_areas:
   target:
     schema: "supply"
@@ -347,6 +356,7 @@ hotmaps_current_policy_scenario_heat_demands_buildings:
 power_plants:
   sources:
       mastr_pv: "bnetza_mastr_solar_cleaned.csv"
+      mastr_wind: "bnetza_mastr_wind_cleaned.csv"
       mastr_biomass: "bnetza_mastr_biomass_cleaned.csv"
       mastr_hydro: "bnetza_mastr_hydro_cleaned.csv"
       mastr_location: "location_elec_generation_raw.csv"

diff --git a/src/egon/data/datasets/chp/__init__.py b/src/egon/data/datasets/chp/__init__.py
@@ -3,6 +3,8 @@
 (CHP) plants.
 """
 
+from pathlib import Path
+
 from geoalchemy2 import Geometry
 from shapely.ops import nearest_points
 from sqlalchemy import Boolean, Column, Float, Integer, Sequence, String
@@ -11,6 +13,7 @@
 from sqlalchemy.orm import sessionmaker
 import geopandas as gpd
 import pandas as pd
+import pypsa
 
 from egon.data import config, db
 from egon.data.datasets import Dataset
@@ -19,17 +22,16 @@
     assign_use_case,
     existing_chp_smaller_10mw,
     extension_per_federal_state,
+    extension_to_areas,
     select_target,
 )
+from egon.data.datasets.mastr import WORKING_DIR_MASTR_OLD
 from egon.data.datasets.power_plants import (
     assign_bus_id,
     assign_voltage_level,
     filter_mastr_geometry,
     scale_prox2now,
 )
-import pypsa
-from egon.data.datasets.chp.small_chp import extension_to_areas
-from pathlib import Path
 
 Base = declarative_base()
 
@@ -248,9 +250,9 @@ def insert_biomass_chp(scenario):
     target = select_target("biomass", scenario)
 
     # import data for MaStR
-    mastr = pd.read_csv(cfg["sources"]["mastr_biomass"]).query(
-        "EinheitBetriebsstatus=='InBetrieb'"
-    )
+    mastr = pd.read_csv(
+        WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_biomass"]
+    ).query("EinheitBetriebsstatus=='InBetrieb'")
 
     # Drop entries without federal state or 'AusschließlichWirtschaftszone'
     mastr = mastr[
@@ -278,7 +280,9 @@ def insert_biomass_chp(scenario):
 
     # Assign bus_id
     if len(mastr_loc) > 0:
-        mastr_loc["voltage_level"] = assign_voltage_level(mastr_loc, cfg)
+        mastr_loc["voltage_level"] = assign_voltage_level(
+            mastr_loc, cfg, WORKING_DIR_MASTR_OLD
+        )
         mastr_loc = assign_bus_id(mastr_loc, cfg)
     mastr_loc = assign_use_case(mastr_loc, cfg["sources"])
 

diff --git a/src/egon/data/datasets/chp/match_nep.py b/src/egon/data/datasets/chp/match_nep.py
@@ -8,6 +8,7 @@
 
 from egon.data import config, db
 from egon.data.datasets.chp.small_chp import assign_use_case
+from egon.data.datasets.mastr import WORKING_DIR_MASTR_OLD
 from egon.data.datasets.power_plants import (
     assign_bus_id,
     assign_voltage_level,
@@ -112,7 +113,7 @@ def select_chp_from_mastr(sources):
 
     # Read-in data from MaStR
     MaStR_konv = pd.read_csv(
-        sources["mastr_combustion"],
+        WORKING_DIR_MASTR_OLD / sources["mastr_combustion"],
         delimiter=",",
         usecols=[
             "Nettonennleistung",
@@ -323,6 +324,7 @@ def insert_large_chp(sources, target, EgonChp):
     MaStR_konv["voltage_level"] = assign_voltage_level(
         MaStR_konv.rename({"el_capacity": "Nettonennleistung"}, axis=1),
         config.datasets()["chp_location"],
+        WORKING_DIR_MASTR_OLD
     )
 
     # Initalize DataFrame for match CHPs
@@ -376,6 +378,7 @@ def insert_large_chp(sources, target, EgonChp):
     MaStR_konv["voltage_level"] = assign_voltage_level(
         MaStR_konv.rename({"el_capacity": "Nettonennleistung"}, axis=1),
         config.datasets()["chp_location"],
+        WORKING_DIR_MASTR_OLD
     )
 
     # Match CHP from NEP list with aggregated MaStR units

diff --git a/src/egon/data/datasets/emobility/motorized_individual_travel/model_timeseries.py b/src/egon/data/datasets/emobility/motorized_individual_travel/model_timeseries.py
@@ -586,7 +586,7 @@ def write_to_db(write_lowflex_model: bool) -> None:
         """Write model data to eTraGo tables"""
 
         @db.check_db_unique_violation
-        def write_bus(scenario_name: str) -> None:
+        def write_bus(scenario_name: str) -> int:
             # eMob MIT bus
             emob_bus_id = db.next_etrago_id("bus")
             with db.session_scope() as session:

diff --git a/src/egon/data/datasets/mastr.py b/src/egon/data/datasets/mastr.py
@@ -1,56 +1,75 @@
+"""
+Download Marktstammdatenregister (MaStR) datasets unit registry.
+It incorporates two different datasets:
+
+Dump 2021-05-03
+* Source: https://sandbox.zenodo.org/record/808086
+* Used technologies: PV plants, wind turbines, biomass, hydro plants,
+  combustion, nuclear, gsgk, storage
+* Data is further processed in dataset
+  :py:class:`egon.data.datasets.power_plants.PowerPlants`
+
+Dump 2022-11-17
+* Source: https://sandbox.zenodo.org/record/1132839
+* Used technologies: PV plants, wind turbines, biomass, hydro plants
+* Data is further processed in module
+  :py:mod:`egon.data.datasets.power_plants.mastr` `PowerPlants`
+
+Todo: Finish docstring
+TBD
+"""
+
 from functools import partial
+from pathlib import Path
 from urllib.request import urlretrieve
 import os
 
 from egon.data.datasets import Dataset
 import egon.data.config
 
+WORKING_DIR_MASTR_OLD = Path(".", "bnetza_mastr", "dump_2021-05-03")
+WORKING_DIR_MASTR_NEW = Path(".", "bnetza_mastr", "dump_2022-11-17")
 
-def download_mastr_data(data_stages=None):
-    """
-    Download MaStR data from Zenodo
-
-    Parameters
-    ----------
-    data_stages: list
-        Select data stages you want to download data for. Possible values:
-        'raw', 'cleaned'. Defaults to 'cleaned' if omitted.
-    """
-    # Process inputs
-    if not data_stages:
-        data_stages = ["cleaned"]
-
-    # Get parameters from config and set download URL
-    data_config = egon.data.config.datasets()["mastr"]
-    zenodo_files_url = (
-        f"https://sandbox.zenodo.org/record/{data_config['deposit_id']}/files/"
-    )
-
-    files = []
-    for technology in data_config["technologies"]:
-        # Download raw data
-        if "raw" in data_stages:
-            files.append(
-                f"{data_config['file_basename']}_{technology}_raw.csv"
-            )
-        # Download cleaned data
-        if "cleaned" in data_stages:
+
+def download_mastr_data():
+    """Download MaStR data from Zenodo"""
+
+    def download(dataset_name, download_dir):
+        print(f"Downloading dataset {dataset_name} to {download_dir} ...")
+        # Get parameters from config and set download URL
+        data_config = egon.data.config.datasets()[dataset_name]
+        zenodo_files_url = (
+            f"https://sandbox.zenodo.org/record/"
+            f"{data_config['deposit_id']}/files/"
+        )
+
+        files = []
+        for technology in data_config["technologies"]:
             files.append(
                 f"{data_config['file_basename']}_{technology}_cleaned.csv"
             )
-        files.append("datapackage.json")
         files.append("location_elec_generation_raw.csv")
 
-    # Retrieve specified files
-    for filename in files:
-        if not os.path.isfile(filename):
-            urlretrieve(zenodo_files_url + filename, filename)
+        # Retrieve specified files
+        for filename in files:
+            if not os.path.isfile(filename):
+                urlretrieve(
+                    zenodo_files_url + filename, download_dir / filename
+                )
+
+    if not os.path.exists(WORKING_DIR_MASTR_OLD):
+        WORKING_DIR_MASTR_OLD.mkdir(exist_ok=True, parents=True)
+    if not os.path.exists(WORKING_DIR_MASTR_NEW):
+        WORKING_DIR_MASTR_NEW.mkdir(exist_ok=True, parents=True)
+
+    download(dataset_name="mastr", download_dir=WORKING_DIR_MASTR_OLD)
+    download(dataset_name="mastr_new", download_dir=WORKING_DIR_MASTR_NEW)
 
 
 mastr_data_setup = partial(
     Dataset,
     name="MastrData",
-    version="0.0.0",
+    version="0.0.1",
     dependencies=[],
     tasks=(download_mastr_data,),
 )