From 87f0412ec79f484fc4beebab7583bb4e1c7336cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20Ple=C3=9Fmann?= Date: Thu, 4 Mar 2021 09:47:30 +0100 Subject: [PATCH 1/6] Add function to download MaStR data from Zenodo --- src/egon/data/datasets.yml | 13 ++++++++++++ src/egon/data/importing/mastr.py | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 src/egon/data/importing/mastr.py diff --git a/src/egon/data/datasets.yml b/src/egon/data/datasets.yml index e18413422..93391c47d 100644 --- a/src/egon/data/datasets.yml +++ b/src/egon/data/datasets.yml @@ -84,3 +84,16 @@ scenario_input: paths: "capacities": "NEP2035_V2021_scnC2035.xlsx" "list_conv_pp": "Kraftwerksliste_NEP_2021_konv.csv" + +mastr: + technologies: + - "wind" + - "hydro" + - "solar" + - "biomass" + - "combustion" + - "nuclear" + - "gsgk" + - "storage" + file_basename: "bnetza_mastr" + deposit_id: 740153 diff --git a/src/egon/data/importing/mastr.py b/src/egon/data/importing/mastr.py new file mode 100644 index 000000000..d98ee5d29 --- /dev/null +++ b/src/egon/data/importing/mastr.py @@ -0,0 +1,34 @@ +import os +from urllib.request import urlretrieve +import egon.data.config + + +def download_mastr_data(data_stages=["cleaned"]): + """ + Download MaStR data from Zenodo + + Parameters + ---------- + data_stages: list + Select data stages you want to download data for. Possible values: + 'raw', 'cleaned' + """ + + # Get parameters from config and set download URL + data_config = egon.data.config.datasets()["mastr"] + zenodo_files_url = f"https://sandbox.zenodo.org/record/{data_config['deposit_id']}/files/" + + files = [] + for technology in data_config["technologies"]: + # Download raw data + if "raw" in data_stages: + files.append(f"{data_config['file_basename']}_{technology}_raw.csv") + # Download cleaned data + if "cleaned" in data_stages: + files.append(f"{data_config['file_basename']}_{technology}_cleaned.csv") + files.append("datapackage.json") + + # Retrieve specified files + for filename in files: + if not os.path.isfile(filename): + urlretrieve(zenodo_files_url + filename, filename) From 813a35639e1c3a69fbad0f95ec182ee6c977d8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20Ple=C3=9Fmann?= Date: Thu, 4 Mar 2021 10:05:04 +0100 Subject: [PATCH 2/6] Add MaStR data download to pipeline --- src/egon/data/airflow/dags/pipeline.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/egon/data/airflow/dags/pipeline.py b/src/egon/data/airflow/dags/pipeline.py index 497b35714..aadf61e7b 100644 --- a/src/egon/data/airflow/dags/pipeline.py +++ b/src/egon/data/airflow/dags/pipeline.py @@ -15,6 +15,7 @@ import egon.data.processing.power_plants as power_plants import egon.data.importing.nep_input_data as nep_input import egon.data.importing.etrago as etrago +import egon.data.importing.mastr as mastr # Prepare connection to db for operators airflow_db_connection() @@ -138,3 +139,10 @@ python_callable = etrago.create_tables ) setup >> etrago_input_data + + # Retrieve MaStR data + retrieve_mastr_data = PythonOperator( + task_id="retrieve_mastr_data", + python_callable=mastr.download_mastr_data + ) + setup >> retrieve_mastr_data From 0aada2ca4bc76675b7b5ff455f92fbfe6797540d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20Ple=C3=9Fmann?= Date: Thu, 4 Mar 2021 12:05:27 +0100 Subject: [PATCH 3/6] Add a note in CHANGELOG --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index be5b22dd0..ad5e91db0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -35,6 +35,8 @@ Added `#91 `_ * DemandRegio data import for annual electricity demands `#5 `_ +* Download cleaned open-MaStR data from Zenodo + `#14 `_ Changed ------- From d2c838c5aecfaae3b52cfdc7732bb344d18da9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20Ple=C3=9Fmann?= Date: Thu, 4 Mar 2021 12:11:59 +0100 Subject: [PATCH 4/6] Apply black code style --- src/egon/data/importing/mastr.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/egon/data/importing/mastr.py b/src/egon/data/importing/mastr.py index d98ee5d29..f02344726 100644 --- a/src/egon/data/importing/mastr.py +++ b/src/egon/data/importing/mastr.py @@ -16,16 +16,22 @@ def download_mastr_data(data_stages=["cleaned"]): # Get parameters from config and set download URL data_config = egon.data.config.datasets()["mastr"] - zenodo_files_url = f"https://sandbox.zenodo.org/record/{data_config['deposit_id']}/files/" + zenodo_files_url = ( + f"https://sandbox.zenodo.org/record/{data_config['deposit_id']}/files/" + ) files = [] for technology in data_config["technologies"]: # Download raw data if "raw" in data_stages: - files.append(f"{data_config['file_basename']}_{technology}_raw.csv") + files.append( + f"{data_config['file_basename']}_{technology}_raw.csv" + ) # Download cleaned data if "cleaned" in data_stages: - files.append(f"{data_config['file_basename']}_{technology}_cleaned.csv") + files.append( + f"{data_config['file_basename']}_{technology}_cleaned.csv" + ) files.append("datapackage.json") # Retrieve specified files From 93d6151604489aec4f763d90747819578d4f4221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20Ple=C3=9Fmann?= Date: Thu, 4 Mar 2021 12:12:06 +0100 Subject: [PATCH 5/6] Apply isort --- src/egon/data/importing/mastr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/egon/data/importing/mastr.py b/src/egon/data/importing/mastr.py index f02344726..d3b0c1d22 100644 --- a/src/egon/data/importing/mastr.py +++ b/src/egon/data/importing/mastr.py @@ -1,5 +1,6 @@ -import os from urllib.request import urlretrieve +import os + import egon.data.config From 427e9d24f12845d75da1bb829473d2262e7b6e4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20Ple=C3=9Fmann?= Date: Thu, 4 Mar 2021 12:36:22 +0100 Subject: [PATCH 6/6] Make default argument safer For explanation see: https://florimond.dev/blog/articles/2018/08/python-mutable-defaults-are-the-source-of-all-evil/ --- src/egon/data/importing/mastr.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/egon/data/importing/mastr.py b/src/egon/data/importing/mastr.py index d3b0c1d22..918e64522 100644 --- a/src/egon/data/importing/mastr.py +++ b/src/egon/data/importing/mastr.py @@ -4,7 +4,7 @@ import egon.data.config -def download_mastr_data(data_stages=["cleaned"]): +def download_mastr_data(data_stages=None): """ Download MaStR data from Zenodo @@ -12,8 +12,11 @@ def download_mastr_data(data_stages=["cleaned"]): ---------- data_stages: list Select data stages you want to download data for. Possible values: - 'raw', 'cleaned' + 'raw', 'cleaned'. Defaults to 'cleaned' if omitted. """ + # Process inputs + if not data_stages: + data_stages = ["cleaned"] # Get parameters from config and set download URL data_config = egon.data.config.datasets()["mastr"]