Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to download MaStR data from Zenodo #130

Merged
merged 6 commits into from Mar 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -35,6 +35,8 @@ Added
`#91 <https://github.com/openego/eGon-data/issues/91>`_
* DemandRegio data import for annual electricity demands
`#5 <https://github.com/openego/eGon-data/issues/5>`_
* Download cleaned open-MaStR data from Zenodo
`#14 <https://github.com/openego/eGon-data/issues/14>`_

Changed
-------
Expand Down
8 changes: 8 additions & 0 deletions src/egon/data/airflow/dags/pipeline.py
Expand Up @@ -15,6 +15,7 @@
import egon.data.processing.power_plants as power_plants
import egon.data.importing.nep_input_data as nep_input
import egon.data.importing.etrago as etrago
import egon.data.importing.mastr as mastr

# Prepare connection to db for operators
airflow_db_connection()
Expand Down Expand Up @@ -138,3 +139,10 @@
python_callable = etrago.create_tables
)
setup >> etrago_input_data

# Retrieve MaStR data
retrieve_mastr_data = PythonOperator(
task_id="retrieve_mastr_data",
python_callable=mastr.download_mastr_data
)
setup >> retrieve_mastr_data
13 changes: 13 additions & 0 deletions src/egon/data/datasets.yml
Expand Up @@ -84,3 +84,16 @@ scenario_input:
paths:
"capacities": "NEP2035_V2021_scnC2035.xlsx"
"list_conv_pp": "Kraftwerksliste_NEP_2021_konv.csv"

mastr:
technologies:
- "wind"
- "hydro"
- "solar"
- "biomass"
- "combustion"
- "nuclear"
- "gsgk"
- "storage"
file_basename: "bnetza_mastr"
deposit_id: 740153
44 changes: 44 additions & 0 deletions src/egon/data/importing/mastr.py
@@ -0,0 +1,44 @@
from urllib.request import urlretrieve
import os

import egon.data.config


def download_mastr_data(data_stages=None):
"""
Download MaStR data from Zenodo

Parameters
----------
data_stages: list
Select data stages you want to download data for. Possible values:
'raw', 'cleaned'. Defaults to 'cleaned' if omitted.
"""
# Process inputs
if not data_stages:
data_stages = ["cleaned"]

# Get parameters from config and set download URL
data_config = egon.data.config.datasets()["mastr"]
zenodo_files_url = (
f"https://sandbox.zenodo.org/record/{data_config['deposit_id']}/files/"
)

files = []
for technology in data_config["technologies"]:
# Download raw data
if "raw" in data_stages:
files.append(
f"{data_config['file_basename']}_{technology}_raw.csv"
)
# Download cleaned data
if "cleaned" in data_stages:
files.append(
f"{data_config['file_basename']}_{technology}_cleaned.csv"
)
files.append("datapackage.json")

# Retrieve specified files
for filename in files:
if not os.path.isfile(filename):
urlretrieve(zenodo_files_url + filename, filename)