Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/sphinx/source/reference/iotools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,18 @@ lower quality.
iotools.read_crn


MERRA-2
^^^^^^^

A global reanalysis dataset providing weather, aerosol, and solar irradiance
data.

.. autosummary::
:toctree: generated/

iotools.get_merra2


Generic data file readers
-------------------------

Expand Down
4 changes: 3 additions & 1 deletion docs/sphinx/source/whatsnew/v0.13.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ Enhancements
:py:func:`~pvlib.singlediode.bishop88_mpp`,
:py:func:`~pvlib.singlediode.bishop88_v_from_i`, and
:py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`)

* Add :py:func:`~pvlib.iotools.get_merra2`, a function for accessing
MERRA-2 reanalysis data. (:pull:`2572`)


Documentation
Expand All @@ -53,4 +54,5 @@ Maintenance

Contributors
~~~~~~~~~~~~
* Kevin Anderson (:ghuser:`kandersolar`)

1 change: 1 addition & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@
from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501
from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401
from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401
from pvlib.iotools.merra2 import get_merra2 # noqa: F401
154 changes: 154 additions & 0 deletions pvlib/iotools/merra2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import pandas as pd
import requests
from io import StringIO


VARIABLE_MAP = {
'SWGDN': 'ghi',
'SWGDNCLR': 'ghi_clear',
'ALBEDO': 'albedo',
'T2M': 'temp_air',
'T2MDEW': 'temp_dew',
'PS': 'pressure',
'TOTEXTTAU': 'aod550',
}


def get_merra2(latitude, longitude, start, end, username, password, dataset,
variables, map_variables=True):
"""
Retrieve MERRA-2 time-series irradiance and meteorological data from
NASA's GESDISC data archive.

MERRA-2 [1]_ offers modeled data for many atmospheric quantities at hourly
resolution on a 0.5° x 0.625° global grid.

Access must be granted to the GESDISC data archive before EarthData
credentials will work. See [2]_ for instructions.

Parameters
----------
latitude : float
In decimal degrees, north is positive (ISO 19115).
longitude: float
In decimal degrees, east is positive (ISO 19115).
start : datetime like or str
First timestamp of the requested period. If a timezone is not
specified, UTC is assumed.
end : datetime like or str
Last timestamp of the requested period. If a timezone is not
specified, UTC is assumed.
username : str
NASA EarthData username.
password : str
NASA EarthData password.
dataset : str
Dataset name (with version), e.g. "M2T1NXRAD.5.12.4".
variables : list of str
List of variable names to retrieve. See the documentation of the
specific dataset you are accessing for options.
map_variables : bool, default True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.

Raises
------
ValueError
If ``start`` and ``end`` are in different years, when converted to UTC.

Returns
-------
data : pd.DataFrame
Time series data. The index corresponds to the middle of the interval.
meta : dict
Metadata.

Notes
-----
The following datasets provide quantities useful for PV modeling:

- M2T1NXRAD.5.12.4: SWGDN, SWGDNCLR, ALBEDO
- M2T1NXSLV.5.12.4: T2M, U10M, V10M, T2MDEW, PS
- M2T1NXAER.5.12.4: TOTEXTTAU

Note that MERRA2 does not currently provide DNI or DHI.

References
----------
.. [1] https://gmao.gsfc.nasa.gov/gmao-products/merra-2/
.. [2] https://disc.gsfc.nasa.gov/earthdata-login
"""

# general API info here:
# https://docs.unidata.ucar.edu/tds/5.0/userguide/netcdf_subset_service_ref.html # noqa: E501

def _to_utc_dt_notz(dt):
dt = pd.to_datetime(dt)
if dt.tzinfo is not None:
# convert to utc, then drop tz so that isoformat() is clean
dt = dt.tz_convert("UTC").tz_localize(None)
return dt

start = _to_utc_dt_notz(start)
end = _to_utc_dt_notz(end)

if (year := start.year) != end.year:
raise ValueError("start and end must be in the same year (in UTC)")

url = (
"https://goldsmr4.gesdisc.eosdis.nasa.gov/thredds/ncss/grid/"
f"MERRA2_aggregation/{dataset}/{dataset}_Aggregation_{year}.ncml"
)

parameters = {
'var': ",".join(variables),
'latitude': latitude,
'longitude': longitude,
'time_start': start.isoformat() + "Z",
'time_end': end.isoformat() + "Z",
'accept': 'csv',
}

auth = (username, password)

with requests.Session() as session:
session.auth = auth
login = session.request('get', url, params=parameters)
response = session.get(login.url, auth=auth, params=parameters)

response.raise_for_status()

content = response.content.decode('utf-8')
buffer = StringIO(content)
df = pd.read_csv(buffer)

df.index = pd.to_datetime(df['time'])

meta = {}
meta['dataset'] = dataset
meta['station'] = df['station'].values[0]
meta['latitude'] = df['latitude[unit="degrees_north"]'].values[0]
meta['longitude'] = df['longitude[unit="degrees_east"]'].values[0]

# drop the non-data columns
dropcols = ['time', 'station', 'latitude[unit="degrees_north"]',
'longitude[unit="degrees_east"]']
df = df.drop(columns=dropcols)

# column names are like T2M[unit="K"] by default. extract the unit
# for the metadata, then rename col to just T2M
units = {}
rename = {}
for col in df.columns:
name, _ = col.split("[", maxsplit=1)
unit = col.split('"')[1]
units[name] = unit
rename[col] = name

meta['units'] = units
df = df.rename(columns=rename)

if map_variables:
df = df.rename(columns=VARIABLE_MAP)

return df, meta
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,20 @@ def nrel_api_key():
reason='requires solaranywhere credentials')


try:
# Attempt to load NASA EarthData credentials used for testing
# pvlib.iotools.get_merra2
earthdata_username = os.environ["EARTHDATA_USERNAME"]
earthdata_password = os.environ["EARTHDATA_PASSWORD"]
has_earthdata_credentials = True
except KeyError:
has_earthdata_credentials = False

requires_earthdata_credentials = pytest.mark.skipif(
not has_earthdata_credentials,
reason='requires EarthData credentials')


try:
import statsmodels # noqa: F401
has_statsmodels = True
Expand Down
83 changes: 83 additions & 0 deletions tests/iotools/test_merra2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
tests for pvlib/iotools/merra2.py
"""

import pandas as pd
import pytest
import pvlib
import os
from tests.conftest import RERUNS, RERUNS_DELAY, requires_earthdata_credentials


@pytest.fixture
def params():
earthdata_username = os.environ["EARTHDATA_USERNAME"]
earthdata_password = os.environ["EARTHDATA_PASSWORD"]

return {
'latitude': 40.01, 'longitude': -80.01,
'start': '2020-06-01 15:00', 'end': '2020-06-01 20:00',
'dataset': 'M2T1NXRAD.5.12.4', 'variables': ['ALBEDO', 'SWGDN'],
'username': earthdata_username, 'password': earthdata_password,
}


@pytest.fixture
def expected():
index = pd.date_range("2020-06-01 15:30", "2020-06-01 20:30", freq="h",
tz="UTC")
index.name = 'time'
albedo = [0.163931, 0.1609407, 0.1601474, 0.1612476, 0.164664, 0.1711341]
ghi = [ 930., 1002.75, 1020.25, 981.25, 886.5, 743.5]
df = pd.DataFrame({'albedo': albedo, 'ghi': ghi}, index=index)
return df


@pytest.fixture
def expected_meta():
return {
'dataset': 'M2T1NXRAD.5.12.4',
'station': 'GridPointRequestedAt[40.010N_80.010W]',
'latitude': 40.0,
'longitude': -80.0,
'units': {'ALBEDO': '1', 'SWGDN': 'W m-2'}
}


@requires_earthdata_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_merra2(params, expected, expected_meta):
df, meta = pvlib.iotools.get_merra2(**params)
pd.testing.assert_frame_equal(df, expected, check_freq=False)
assert meta == expected_meta


@requires_earthdata_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_merra2_map_variables(params, expected, expected_meta):
df, meta = pvlib.iotools.get_merra2(**params, map_variables=False)
expected = expected.rename(columns={'albedo': 'ALBEDO', 'ghi': 'SWGDN'})
pd.testing.assert_frame_equal(df, expected, check_freq=False)
assert meta == expected_meta


def test_get_merra2_error():
with pytest.raises(ValueError, match='must be in the same year'):
pvlib.iotools.get_merra2(40, -80, '2019-12-31', '2020-01-02',
username='anything', password='anything',
dataset='anything', variables=[])


@requires_earthdata_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_merra2_timezones(params, expected, expected_meta):
# check with tz-aware start/end inputs
for key in ['start', 'end']:
dt = pd.to_datetime(params[key])
params[key] = dt.tz_localize('UTC').tz_convert('Etc/GMT+5')
df, meta = pvlib.iotools.get_merra2(**params)
pd.testing.assert_frame_equal(df, expected, check_freq=False)
assert meta == expected_meta
Loading