-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Add function for accessing ERA5 #2573
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b274e1e
d3bd426
0b5ba09
396833b
749fd5e
6828769
8c1ab6d
fd7f06d
8f4da88
9906815
ee7474d
f34309e
3c8f2f2
a0aa2c8
ac6fe82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
import requests | ||
import pandas as pd | ||
from io import BytesIO, StringIO | ||
import zipfile | ||
import time | ||
|
||
|
||
VARIABLE_MAP = { | ||
# short names | ||
'd2m': 'temp_dew', | ||
't2m': 'temp_air', | ||
'sp': 'pressure', | ||
'ssrd': 'ghi', | ||
'tp': 'precipitation', | ||
|
||
# long names | ||
'2m_dewpoint_temperature': 'temp_dew', | ||
'2m_temperature': 'temp_air', | ||
'surface_pressure': 'pressure', | ||
'surface_solar_radiation_downwards': 'ghi', | ||
'total_precipitation': 'precipitation', | ||
} | ||
|
||
|
||
def same(x): | ||
return x | ||
|
||
|
||
def k_to_c(temp_k): | ||
return temp_k - 273.15 | ||
|
||
|
||
def j_to_w(j): | ||
return j / 3600 | ||
|
||
|
||
def m_to_cm(m): | ||
return m / 100 | ||
|
||
|
||
UNITS = { | ||
'u100': same, | ||
'v100': same, | ||
'u10': same, | ||
'v10': same, | ||
'd2m': k_to_c, | ||
't2m': k_to_c, | ||
'msl': same, | ||
'sst': k_to_c, | ||
'skt': k_to_c, | ||
'sp': same, | ||
'ssrd': j_to_w, | ||
'strd': j_to_w, | ||
'tp': m_to_cm, | ||
} | ||
|
||
Comment on lines
+25
to
+56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefix all of this with |
||
|
||
def get_era5(latitude, longitude, start, end, variables, api_key, | ||
map_variables=True, timeout=60, | ||
url='https://cds.climate.copernicus.eu/api/retrieve/v1/'): | ||
""" | ||
Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store. | ||
A CDS API key is needed to access this API. Register for one at [1]_. | ||
This API [2]_ provides a subset of the full ERA5 dataset. See [3]_ for | ||
the available variables. Data are available on a 0.25° x 0.25° grid. | ||
Parameters | ||
---------- | ||
latitude : float | ||
In decimal degrees, north is positive (ISO 19115). | ||
longitude: float | ||
In decimal degrees, east is positive (ISO 19115). | ||
start : datetime like or str | ||
First day of the requested period. | ||
end : datetime like or str | ||
Last day of the requested period. | ||
variables : list of str | ||
List of variable names to retrieve. See [1]_ for options. | ||
api_key : str | ||
ECMWF CDS API key. | ||
map_variables : bool, default True | ||
When true, renames columns of the DataFrame to pvlib variable names | ||
where applicable. Also converts units of some variables. See variable | ||
:const:`VARIABLE_MAP` and :const:`UNITS`. | ||
timeout : int, default 60 | ||
Number of seconds to wait for the requested data to become available | ||
before timeout. | ||
url : str, optional | ||
API endpoint URL. | ||
Raises | ||
------ | ||
Exception | ||
If ``timeout`` is reached without the job finishing. | ||
Returns | ||
------- | ||
data : pd.DataFrame | ||
Time series data. The index corresponds to the start of the interval. | ||
meta : dict | ||
Metadata. | ||
References | ||
---------- | ||
.. [1] https://cds.climate.copernicus.eu/ | ||
.. [2] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview | ||
.. [3] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919 | ||
""" # noqa: E501 | ||
start = pd.to_datetime(start).strftime("%Y-%m-%d") | ||
end = pd.to_datetime(end).strftime("%Y-%m-%d") | ||
Comment on lines
+111
to
+112
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be localized to UTC first? |
||
|
||
headers = {'PRIVATE-TOKEN': api_key} | ||
|
||
# allow variables to be specified with pvlib names | ||
reverse_map = {v: k for k, v in VARIABLE_MAP.items()} | ||
variables = [reverse_map.get(k, k) for k in variables] | ||
kandersolar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Step 1: submit data request (add it to the queue) | ||
params = { | ||
"inputs": { | ||
"variable": variables, | ||
"location": {"longitude": longitude, "latitude": latitude}, | ||
"date": [f"{start}/{end}"], | ||
"data_format": "csv" | ||
} | ||
} | ||
slug = "processes/reanalysis-era5-single-levels-timeseries/execution" | ||
response = requests.post(url + slug, json=params, headers=headers, | ||
timeout=timeout) | ||
submission_response = response.json() | ||
if not response.ok: | ||
raise Exception(submission_response) # likely need to accept license | ||
|
||
job_id = submission_response['jobID'] | ||
|
||
# Step 2: poll until the data request is ready | ||
slug = "jobs/" + job_id | ||
poll_interval = 1 | ||
num_polls = 0 | ||
while True: | ||
response = requests.get(url + slug, headers=headers, timeout=timeout) | ||
poll_response = response.json() | ||
job_status = poll_response['status'] | ||
|
||
if job_status == 'successful': | ||
break # ready to proceed to next step | ||
elif job_status == 'failed': | ||
msg = ( | ||
'Request failed. Please check the ECMWF website for details: ' | ||
'https://cds.climate.copernicus.eu/requests?tab=all' | ||
) | ||
raise Exception(msg) | ||
|
||
num_polls += 1 | ||
if num_polls * poll_interval > timeout: | ||
raise requests.exceptions.Timeout( | ||
'Request timed out. Try increasing the timeout parameter or ' | ||
'reducing the request size.' | ||
) | ||
|
||
time.sleep(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could consider reducing this to 0.5 |
||
|
||
# Step 3: get the download link for our requested dataset | ||
slug = "jobs/" + job_id + "/results" | ||
response = requests.get(url + slug, headers=headers, timeout=timeout) | ||
results_response = response.json() | ||
download_url = results_response['asset']['value']['href'] | ||
|
||
# Step 4: finally, download our dataset. it's a zipfile of one CSV | ||
response = requests.get(download_url, timeout=timeout) | ||
zipbuffer = BytesIO(response.content) | ||
archive = zipfile.ZipFile(zipbuffer) | ||
filename = archive.filelist[0].filename | ||
csvbuffer = StringIO(archive.read(filename).decode('utf-8')) | ||
df = pd.read_csv(csvbuffer) | ||
|
||
# and parse into the usual formats | ||
metadata = submission_response['metadata'] # include messages from ECMWF | ||
metadata['jobID'] = job_id | ||
if not df.empty: | ||
metadata['latitude'] = df['latitude'].values[0] | ||
metadata['longitude'] = df['longitude'].values[0] | ||
|
||
df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC') | ||
df = df.drop(columns=['valid_time', 'latitude', 'longitude']) | ||
|
||
if map_variables: | ||
# convert units and rename | ||
for shortname in df.columns: | ||
converter = UNITS.get(shortname, same) | ||
df[shortname] = converter(df[shortname]) | ||
df = df.rename(columns=VARIABLE_MAP) | ||
|
||
return df, metadata |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
""" | ||
tests for pvlib/iotools/era5.py | ||
""" | ||
|
||
import pandas as pd | ||
import pytest | ||
import pvlib | ||
import requests | ||
import os | ||
from tests.conftest import RERUNS, RERUNS_DELAY, requires_ecmwf_credentials | ||
|
||
|
||
@pytest.fixture | ||
def params(): | ||
api_key = os.environ["ECMWF_API_KEY"] | ||
|
||
return { | ||
'latitude': 40.01, 'longitude': -80.01, | ||
'start': '2020-06-01', 'end': '2020-06-01', | ||
'variables': ['ghi', 'temp_air'], | ||
'api_key': api_key, | ||
} | ||
|
||
|
||
@pytest.fixture | ||
def expected(): | ||
index = pd.date_range("2020-06-01 00:00", "2020-06-01 23:59", freq="h", | ||
tz="UTC") | ||
index.name = 'valid_time' | ||
temp_air = [16.6, 15.2, 13.5, 11.2, 10.8, 9.1, 7.3, 6.8, 7.6, 7.4, 8.5, | ||
8.1, 9.8, 11.5, 14.1, 17.4, 18.3, 20., 20.7, 20.9, 21.5, | ||
21.6, 21., 20.7] | ||
ghi = [153., 18.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 229.5, | ||
427.8, 620.1, 785.5, 910.1, 984.2, 1005.9, 962.4, 844.1, 685.2, | ||
526.9, 331.4] | ||
df = pd.DataFrame({'temp_air': temp_air, 'ghi': ghi}, index=index) | ||
return df | ||
|
||
|
||
@requires_ecmwf_credentials | ||
@pytest.mark.remote_data | ||
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) | ||
def test_get_era5(params, expected): | ||
df, meta = pvlib.iotools.get_era5(**params) | ||
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1) | ||
assert meta['longitude'] == -80.0 | ||
assert meta['latitude'] == 40.0 | ||
assert isinstance(meta['jobID'], str) | ||
|
||
|
||
@requires_ecmwf_credentials | ||
@pytest.mark.remote_data | ||
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) | ||
def test_get_era5_map_variables(params, expected): | ||
df, meta = pvlib.iotools.get_era5(**params, map_variables=False) | ||
expected = expected.rename(columns={'temp_air': 't2m', 'ghi': 'ssrd'}) | ||
df['t2m'] -= 273.15 # apply unit conversions manually | ||
df['ssrd'] /= 3600 | ||
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1) | ||
assert meta['longitude'] == -80.0 | ||
assert meta['latitude'] == 40.0 | ||
assert isinstance(meta['jobID'], str) | ||
|
||
|
||
@requires_ecmwf_credentials | ||
@pytest.mark.remote_data | ||
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) | ||
def test_get_era5_error(params): | ||
params['variables'] = ['nonexistent'] | ||
match = 'Request failed. Please check the ECMWF website' | ||
with pytest.raises(Exception, match=match): | ||
df, meta = pvlib.iotools.get_era5(**params) | ||
|
||
|
||
@requires_ecmwf_credentials | ||
@pytest.mark.remote_data | ||
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) | ||
def test_get_era5_timeout(params): | ||
match = 'Request timed out. Try increasing' | ||
with pytest.raises(requests.exceptions.Timeout, match=match): | ||
df, meta = pvlib.iotools.get_era5(**params, timeout=1) |
Uh oh!
There was an error while loading. Please reload this page.