diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst index 12db7d6818..f54ba59928 100644 --- a/docs/sphinx/source/reference/iotools.rst +++ b/docs/sphinx/source/reference/iotools.rst @@ -237,6 +237,17 @@ lower quality. iotools.read_crn +ECMWF ERA5 +^^^^^^^^^^ + +A global reanalysis dataset providing weather and solar resource data. + +.. autosummary:: + :toctree: generated/ + + iotools.get_era5 + + Generic data file readers ------------------------- diff --git a/docs/sphinx/source/whatsnew/v0.13.2.rst b/docs/sphinx/source/whatsnew/v0.13.2.rst index e12f7277e5..8df594b643 100644 --- a/docs/sphinx/source/whatsnew/v0.13.2.rst +++ b/docs/sphinx/source/whatsnew/v0.13.2.rst @@ -27,7 +27,8 @@ Enhancements :py:func:`~pvlib.singlediode.bishop88_mpp`, :py:func:`~pvlib.singlediode.bishop88_v_from_i`, and :py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`) - +* Add :py:func:`~pvlib.iotools.get_era5`, a function for accessing + ERA5 reanalysis data. (:pull:`2573`) Documentation diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 75663507f3..0f54bce232 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -45,3 +45,4 @@ from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501 from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401 from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401 +from pvlib.iotools.era5 import get_era5 # noqa: F401 diff --git a/pvlib/iotools/era5.py b/pvlib/iotools/era5.py new file mode 100644 index 0000000000..06165cf55a --- /dev/null +++ b/pvlib/iotools/era5.py @@ -0,0 +1,196 @@ +import requests +import pandas as pd +from io import BytesIO, StringIO +import zipfile +import time + + +VARIABLE_MAP = { + # short names + 'd2m': 'temp_dew', + 't2m': 'temp_air', + 'sp': 'pressure', + 'ssrd': 'ghi', + 'tp': 'precipitation', + + # long names + '2m_dewpoint_temperature': 'temp_dew', + '2m_temperature': 'temp_air', + 'surface_pressure': 'pressure', + 'surface_solar_radiation_downwards': 'ghi', + 'total_precipitation': 'precipitation', +} + + +def same(x): + return x + + +def k_to_c(temp_k): + return temp_k - 273.15 + + +def j_to_w(j): + return j / 3600 + + +def m_to_cm(m): + return m / 100 + + +UNITS = { + 'u100': same, + 'v100': same, + 'u10': same, + 'v10': same, + 'd2m': k_to_c, + 't2m': k_to_c, + 'msl': same, + 'sst': k_to_c, + 'skt': k_to_c, + 'sp': same, + 'ssrd': j_to_w, + 'strd': j_to_w, + 'tp': m_to_cm, +} + + +def get_era5(latitude, longitude, start, end, variables, api_key, + map_variables=True, timeout=60, + url='https://cds.climate.copernicus.eu/api/retrieve/v1/'): + """ + Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store. + + A CDS API key is needed to access this API. Register for one at [1]_. + + This API [2]_ provides a subset of the full ERA5 dataset. See [3]_ for + the available variables. Data are available on a 0.25° x 0.25° grid. + + Parameters + ---------- + latitude : float + In decimal degrees, north is positive (ISO 19115). + longitude: float + In decimal degrees, east is positive (ISO 19115). + start : datetime like or str + First day of the requested period. + end : datetime like or str + Last day of the requested period. + variables : list of str + List of variable names to retrieve. See [1]_ for options. + api_key : str + ECMWF CDS API key. + map_variables : bool, default True + When true, renames columns of the DataFrame to pvlib variable names + where applicable. Also converts units of some variables. See variable + :const:`VARIABLE_MAP` and :const:`UNITS`. + timeout : int, default 60 + Number of seconds to wait for the requested data to become available + before timeout. + url : str, optional + API endpoint URL. + + Raises + ------ + Exception + If ``timeout`` is reached without the job finishing. + + Returns + ------- + data : pd.DataFrame + Time series data. The index corresponds to the start of the interval. + meta : dict + Metadata. + + References + ---------- + .. [1] https://cds.climate.copernicus.eu/ + .. [2] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview + .. [3] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919 + """ # noqa: E501 + start = pd.to_datetime(start).strftime("%Y-%m-%d") + end = pd.to_datetime(end).strftime("%Y-%m-%d") + + headers = {'PRIVATE-TOKEN': api_key} + + # allow variables to be specified with pvlib names + reverse_map = {v: k for k, v in VARIABLE_MAP.items()} + variables = [reverse_map.get(k, k) for k in variables] + + # Step 1: submit data request (add it to the queue) + params = { + "inputs": { + "variable": variables, + "location": {"longitude": longitude, "latitude": latitude}, + "date": [f"{start}/{end}"], + "data_format": "csv" + } + } + slug = "processes/reanalysis-era5-single-levels-timeseries/execution" + response = requests.post(url + slug, json=params, headers=headers, + timeout=timeout) + submission_response = response.json() + if not response.ok: + raise Exception(submission_response) # likely need to accept license + + job_id = submission_response['jobID'] + + # Step 2: poll until the data request is ready + slug = "jobs/" + job_id + poll_interval = 1 + num_polls = 0 + while True: + response = requests.get(url + slug, headers=headers, timeout=timeout) + poll_response = response.json() + job_status = poll_response['status'] + + if job_status == 'successful': + break # ready to proceed to next step + elif job_status == 'failed': + msg = ( + 'Request failed. Please check the ECMWF website for details: ' + 'https://cds.climate.copernicus.eu/requests?tab=all' + ) + raise Exception(msg) + + num_polls += 1 + if num_polls * poll_interval > timeout: + raise requests.exceptions.Timeout( + 'Request timed out. Try increasing the timeout parameter or ' + 'reducing the request size.' + ) + + time.sleep(1) + + # Step 3: get the download link for our requested dataset + slug = "jobs/" + job_id + "/results" + response = requests.get(url + slug, headers=headers, timeout=timeout) + results_response = response.json() + download_url = results_response['asset']['value']['href'] + + # Step 4: finally, download our dataset. it's a zipfile of one CSV + response = requests.get(download_url, timeout=timeout) + zipbuffer = BytesIO(response.content) + archive = zipfile.ZipFile(zipbuffer) + filename = archive.filelist[0].filename + csvbuffer = StringIO(archive.read(filename).decode('utf-8')) + df = pd.read_csv(csvbuffer) + + # and parse into the usual formats + metadata = submission_response['metadata'] # include messages from ECMWF + metadata['jobID'] = job_id + if not df.empty: + metadata['latitude'] = df['latitude'].values[0] + metadata['longitude'] = df['longitude'].values[0] + + df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC') + df = df.drop(columns=['valid_time', 'latitude', 'longitude']) + + if map_variables: + # convert units and rename + for shortname in df.columns: + converter = UNITS.get(shortname, same) + df[shortname] = converter(df[shortname]) + df = df.rename(columns=VARIABLE_MAP) + + return df, metadata diff --git a/tests/conftest.py b/tests/conftest.py index 0dc957751b..6207001c0a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,6 +130,19 @@ def nrel_api_key(): reason='requires solaranywhere credentials') +try: + # Attempt to load ECMWF API key used for testing + # pvlib.iotools.get_era5 + ecwmf_api_key = os.environ["ECMWF_API_KEY"] + has_ecmwf_credentials = True +except KeyError: + has_ecmwf_credentials = False + +requires_ecmwf_credentials = pytest.mark.skipif( + not has_ecmwf_credentials, + reason='requires ECMWF credentials') + + try: import statsmodels # noqa: F401 has_statsmodels = True diff --git a/tests/iotools/test_era5.py b/tests/iotools/test_era5.py new file mode 100644 index 0000000000..c9e1fee39a --- /dev/null +++ b/tests/iotools/test_era5.py @@ -0,0 +1,81 @@ +""" +tests for pvlib/iotools/era5.py +""" + +import pandas as pd +import pytest +import pvlib +import requests +import os +from tests.conftest import RERUNS, RERUNS_DELAY, requires_ecmwf_credentials + + +@pytest.fixture +def params(): + api_key = os.environ["ECMWF_API_KEY"] + + return { + 'latitude': 40.01, 'longitude': -80.01, + 'start': '2020-06-01', 'end': '2020-06-01', + 'variables': ['ghi', 'temp_air'], + 'api_key': api_key, + } + + +@pytest.fixture +def expected(): + index = pd.date_range("2020-06-01 00:00", "2020-06-01 23:59", freq="h", + tz="UTC") + index.name = 'valid_time' + temp_air = [16.6, 15.2, 13.5, 11.2, 10.8, 9.1, 7.3, 6.8, 7.6, 7.4, 8.5, + 8.1, 9.8, 11.5, 14.1, 17.4, 18.3, 20., 20.7, 20.9, 21.5, + 21.6, 21., 20.7] + ghi = [153., 18.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 229.5, + 427.8, 620.1, 785.5, 910.1, 984.2, 1005.9, 962.4, 844.1, 685.2, + 526.9, 331.4] + df = pd.DataFrame({'temp_air': temp_air, 'ghi': ghi}, index=index) + return df + + +@requires_ecmwf_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_era5(params, expected): + df, meta = pvlib.iotools.get_era5(**params) + pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1) + assert meta['longitude'] == -80.0 + assert meta['latitude'] == 40.0 + assert isinstance(meta['jobID'], str) + + +@requires_ecmwf_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_era5_map_variables(params, expected): + df, meta = pvlib.iotools.get_era5(**params, map_variables=False) + expected = expected.rename(columns={'temp_air': 't2m', 'ghi': 'ssrd'}) + df['t2m'] -= 273.15 # apply unit conversions manually + df['ssrd'] /= 3600 + pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1) + assert meta['longitude'] == -80.0 + assert meta['latitude'] == 40.0 + assert isinstance(meta['jobID'], str) + + +@requires_ecmwf_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_era5_error(params): + params['variables'] = ['nonexistent'] + match = 'Request failed. Please check the ECMWF website' + with pytest.raises(Exception, match=match): + df, meta = pvlib.iotools.get_era5(**params) + + +@requires_ecmwf_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_era5_timeout(params): + match = 'Request timed out. Try increasing' + with pytest.raises(requests.exceptions.Timeout, match=match): + df, meta = pvlib.iotools.get_era5(**params, timeout=1)