Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/sphinx/source/reference/iotools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,17 @@ lower quality.
iotools.read_crn


ECMWF ERA5
^^^^^^^^^^

A global reanalysis dataset providing weather and solar resource data.

.. autosummary::
:toctree: generated/

iotools.get_era5


Generic data file readers
-------------------------

Expand Down
3 changes: 2 additions & 1 deletion docs/sphinx/source/whatsnew/v0.13.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ Enhancements
:py:func:`~pvlib.singlediode.bishop88_mpp`,
:py:func:`~pvlib.singlediode.bishop88_v_from_i`, and
:py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`)

* Add :py:func:`~pvlib.iotools.get_era5`, a function for accessing
ERA5 reanalysis data. (:pull:`2573`)


Documentation
Expand Down
1 change: 1 addition & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@
from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501
from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401
from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401
from pvlib.iotools.era5 import get_era5 # noqa: F401
196 changes: 196 additions & 0 deletions pvlib/iotools/era5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import requests
import pandas as pd
from io import BytesIO, StringIO
import zipfile
import time


VARIABLE_MAP = {
# short names
'd2m': 'temp_dew',
't2m': 'temp_air',
'sp': 'pressure',
'ssrd': 'ghi',
'tp': 'precipitation',

# long names
'2m_dewpoint_temperature': 'temp_dew',
'2m_temperature': 'temp_air',
'surface_pressure': 'pressure',
'surface_solar_radiation_downwards': 'ghi',
'total_precipitation': 'precipitation',
}


def same(x):
return x


def k_to_c(temp_k):
return temp_k - 273.15


def j_to_w(j):
return j / 3600


def m_to_cm(m):
return m / 100


UNITS = {
'u100': same,
'v100': same,
'u10': same,
'v10': same,
'd2m': k_to_c,
't2m': k_to_c,
'msl': same,
'sst': k_to_c,
'skt': k_to_c,
'sp': same,
'ssrd': j_to_w,
'strd': j_to_w,
'tp': m_to_cm,
}

Comment on lines +25 to +56
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefix all of this with _ to discourage others from using it, even within pvlib.


def get_era5(latitude, longitude, start, end, variables, api_key,
map_variables=True, timeout=60,
url='https://cds.climate.copernicus.eu/api/retrieve/v1/'):
"""
Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store.
A CDS API key is needed to access this API. Register for one at [1]_.
This API [2]_ provides a subset of the full ERA5 dataset. See [3]_ for
the available variables. Data are available on a 0.25° x 0.25° grid.
Parameters
----------
latitude : float
In decimal degrees, north is positive (ISO 19115).
longitude: float
In decimal degrees, east is positive (ISO 19115).
start : datetime like or str
First day of the requested period.
end : datetime like or str
Last day of the requested period.
variables : list of str
List of variable names to retrieve. See [1]_ for options.
api_key : str
ECMWF CDS API key.
map_variables : bool, default True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. Also converts units of some variables. See variable
:const:`VARIABLE_MAP` and :const:`UNITS`.
timeout : int, default 60
Number of seconds to wait for the requested data to become available
before timeout.
url : str, optional
API endpoint URL.
Raises
------
Exception
If ``timeout`` is reached without the job finishing.
Returns
-------
data : pd.DataFrame
Time series data. The index corresponds to the start of the interval.
meta : dict
Metadata.
References
----------
.. [1] https://cds.climate.copernicus.eu/
.. [2] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview
.. [3] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919
""" # noqa: E501
start = pd.to_datetime(start).strftime("%Y-%m-%d")
end = pd.to_datetime(end).strftime("%Y-%m-%d")
Comment on lines +111 to +112
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be localized to UTC first?


headers = {'PRIVATE-TOKEN': api_key}

# allow variables to be specified with pvlib names
reverse_map = {v: k for k, v in VARIABLE_MAP.items()}
variables = [reverse_map.get(k, k) for k in variables]

# Step 1: submit data request (add it to the queue)
params = {
"inputs": {
"variable": variables,
"location": {"longitude": longitude, "latitude": latitude},
"date": [f"{start}/{end}"],
"data_format": "csv"
}
}
slug = "processes/reanalysis-era5-single-levels-timeseries/execution"
response = requests.post(url + slug, json=params, headers=headers,
timeout=timeout)
submission_response = response.json()
if not response.ok:
raise Exception(submission_response) # likely need to accept license

job_id = submission_response['jobID']

# Step 2: poll until the data request is ready
slug = "jobs/" + job_id
poll_interval = 1
num_polls = 0
while True:
response = requests.get(url + slug, headers=headers, timeout=timeout)
poll_response = response.json()
job_status = poll_response['status']

if job_status == 'successful':
break # ready to proceed to next step
elif job_status == 'failed':
msg = (
'Request failed. Please check the ECMWF website for details: '
'https://cds.climate.copernicus.eu/requests?tab=all'
)
raise Exception(msg)

num_polls += 1
if num_polls * poll_interval > timeout:
raise requests.exceptions.Timeout(
'Request timed out. Try increasing the timeout parameter or '
'reducing the request size.'
)

time.sleep(1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could consider reducing this to 0.5


# Step 3: get the download link for our requested dataset
slug = "jobs/" + job_id + "/results"
response = requests.get(url + slug, headers=headers, timeout=timeout)
results_response = response.json()
download_url = results_response['asset']['value']['href']

# Step 4: finally, download our dataset. it's a zipfile of one CSV
response = requests.get(download_url, timeout=timeout)
zipbuffer = BytesIO(response.content)
archive = zipfile.ZipFile(zipbuffer)
filename = archive.filelist[0].filename
csvbuffer = StringIO(archive.read(filename).decode('utf-8'))
df = pd.read_csv(csvbuffer)

# and parse into the usual formats
metadata = submission_response['metadata'] # include messages from ECMWF
metadata['jobID'] = job_id
if not df.empty:
metadata['latitude'] = df['latitude'].values[0]
metadata['longitude'] = df['longitude'].values[0]

df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC')
df = df.drop(columns=['valid_time', 'latitude', 'longitude'])

if map_variables:
# convert units and rename
for shortname in df.columns:
converter = UNITS.get(shortname, same)
df[shortname] = converter(df[shortname])
df = df.rename(columns=VARIABLE_MAP)

return df, metadata
13 changes: 13 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ def nrel_api_key():
reason='requires solaranywhere credentials')


try:
# Attempt to load ECMWF API key used for testing
# pvlib.iotools.get_era5
ecwmf_api_key = os.environ["ECMWF_API_KEY"]
has_ecmwf_credentials = True
except KeyError:
has_ecmwf_credentials = False

requires_ecmwf_credentials = pytest.mark.skipif(
not has_ecmwf_credentials,
reason='requires ECMWF credentials')


try:
import statsmodels # noqa: F401
has_statsmodels = True
Expand Down
81 changes: 81 additions & 0 deletions tests/iotools/test_era5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
tests for pvlib/iotools/era5.py
"""

import pandas as pd
import pytest
import pvlib
import requests
import os
from tests.conftest import RERUNS, RERUNS_DELAY, requires_ecmwf_credentials


@pytest.fixture
def params():
api_key = os.environ["ECMWF_API_KEY"]

return {
'latitude': 40.01, 'longitude': -80.01,
'start': '2020-06-01', 'end': '2020-06-01',
'variables': ['ghi', 'temp_air'],
'api_key': api_key,
}


@pytest.fixture
def expected():
index = pd.date_range("2020-06-01 00:00", "2020-06-01 23:59", freq="h",
tz="UTC")
index.name = 'valid_time'
temp_air = [16.6, 15.2, 13.5, 11.2, 10.8, 9.1, 7.3, 6.8, 7.6, 7.4, 8.5,
8.1, 9.8, 11.5, 14.1, 17.4, 18.3, 20., 20.7, 20.9, 21.5,
21.6, 21., 20.7]
ghi = [153., 18.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 229.5,
427.8, 620.1, 785.5, 910.1, 984.2, 1005.9, 962.4, 844.1, 685.2,
526.9, 331.4]
df = pd.DataFrame({'temp_air': temp_air, 'ghi': ghi}, index=index)
return df


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5(params, expected):
df, meta = pvlib.iotools.get_era5(**params)
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
assert meta['longitude'] == -80.0
assert meta['latitude'] == 40.0
assert isinstance(meta['jobID'], str)


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5_map_variables(params, expected):
df, meta = pvlib.iotools.get_era5(**params, map_variables=False)
expected = expected.rename(columns={'temp_air': 't2m', 'ghi': 'ssrd'})
df['t2m'] -= 273.15 # apply unit conversions manually
df['ssrd'] /= 3600
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
assert meta['longitude'] == -80.0
assert meta['latitude'] == 40.0
assert isinstance(meta['jobID'], str)


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5_error(params):
params['variables'] = ['nonexistent']
match = 'Request failed. Please check the ECMWF website'
with pytest.raises(Exception, match=match):
df, meta = pvlib.iotools.get_era5(**params)


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5_timeout(params):
match = 'Request timed out. Try increasing'
with pytest.raises(requests.exceptions.Timeout, match=match):
df, meta = pvlib.iotools.get_era5(**params, timeout=1)
Loading