Skip to content

Commit

Permalink
Merge pull request #130 from openego/features/#14-integrate-open-mast…
Browse files Browse the repository at this point in the history
…r-data

Add function to download cleaned MaStR data from Zenodo.

Post-processing of this data will happen in #134
  • Loading branch information
gplssm committed Mar 4, 2021
2 parents 52f8952 + 427e9d2 commit d04b8e8
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -35,6 +35,8 @@ Added
`#91 <https://github.com/openego/eGon-data/issues/91>`_
* DemandRegio data import for annual electricity demands
`#5 <https://github.com/openego/eGon-data/issues/5>`_
* Download cleaned open-MaStR data from Zenodo
`#14 <https://github.com/openego/eGon-data/issues/14>`_

Changed
-------
Expand Down
8 changes: 8 additions & 0 deletions src/egon/data/airflow/dags/pipeline.py
Expand Up @@ -15,6 +15,7 @@
import egon.data.processing.power_plants as power_plants
import egon.data.importing.nep_input_data as nep_input
import egon.data.importing.etrago as etrago
import egon.data.importing.mastr as mastr

# Prepare connection to db for operators
airflow_db_connection()
Expand Down Expand Up @@ -138,3 +139,10 @@
python_callable = etrago.create_tables
)
setup >> etrago_input_data

# Retrieve MaStR data
retrieve_mastr_data = PythonOperator(
task_id="retrieve_mastr_data",
python_callable=mastr.download_mastr_data
)
setup >> retrieve_mastr_data
13 changes: 13 additions & 0 deletions src/egon/data/datasets.yml
Expand Up @@ -84,3 +84,16 @@ scenario_input:
paths:
"capacities": "NEP2035_V2021_scnC2035.xlsx"
"list_conv_pp": "Kraftwerksliste_NEP_2021_konv.csv"

mastr:
technologies:
- "wind"
- "hydro"
- "solar"
- "biomass"
- "combustion"
- "nuclear"
- "gsgk"
- "storage"
file_basename: "bnetza_mastr"
deposit_id: 740153
44 changes: 44 additions & 0 deletions src/egon/data/importing/mastr.py
@@ -0,0 +1,44 @@
from urllib.request import urlretrieve
import os

import egon.data.config


def download_mastr_data(data_stages=None):
"""
Download MaStR data from Zenodo
Parameters
----------
data_stages: list
Select data stages you want to download data for. Possible values:
'raw', 'cleaned'. Defaults to 'cleaned' if omitted.
"""
# Process inputs
if not data_stages:
data_stages = ["cleaned"]

# Get parameters from config and set download URL
data_config = egon.data.config.datasets()["mastr"]
zenodo_files_url = (
f"https://sandbox.zenodo.org/record/{data_config['deposit_id']}/files/"
)

files = []
for technology in data_config["technologies"]:
# Download raw data
if "raw" in data_stages:
files.append(
f"{data_config['file_basename']}_{technology}_raw.csv"
)
# Download cleaned data
if "cleaned" in data_stages:
files.append(
f"{data_config['file_basename']}_{technology}_cleaned.csv"
)
files.append("datapackage.json")

# Retrieve specified files
for filename in files:
if not os.path.isfile(filename):
urlretrieve(zenodo_files_url + filename, filename)

0 comments on commit d04b8e8

Please sign in to comment.