diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst index 12db7d6818..b2b7826113 100644 --- a/docs/sphinx/source/reference/iotools.rst +++ b/docs/sphinx/source/reference/iotools.rst @@ -237,6 +237,18 @@ lower quality. iotools.read_crn +MERRA-2 +^^^^^^^ + +A global reanalysis dataset providing weather, aerosol, and solar irradiance +data. + +.. autosummary:: + :toctree: generated/ + + iotools.get_merra2 + + Generic data file readers ------------------------- diff --git a/docs/sphinx/source/whatsnew/v0.13.2.rst b/docs/sphinx/source/whatsnew/v0.13.2.rst index e12f7277e5..1edc8d77b7 100644 --- a/docs/sphinx/source/whatsnew/v0.13.2.rst +++ b/docs/sphinx/source/whatsnew/v0.13.2.rst @@ -27,7 +27,8 @@ Enhancements :py:func:`~pvlib.singlediode.bishop88_mpp`, :py:func:`~pvlib.singlediode.bishop88_v_from_i`, and :py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`) - +* Add :py:func:`~pvlib.iotools.get_merra2`, a function for accessing + MERRA-2 reanalysis data. (:pull:`2572`) Documentation @@ -53,4 +54,5 @@ Maintenance Contributors ~~~~~~~~~~~~ +* Kevin Anderson (:ghuser:`kandersolar`) diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 75663507f3..12534d0b16 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -45,3 +45,4 @@ from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501 from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401 from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401 +from pvlib.iotools.merra2 import get_merra2 # noqa: F401 diff --git a/pvlib/iotools/merra2.py b/pvlib/iotools/merra2.py new file mode 100644 index 0000000000..668bba5618 --- /dev/null +++ b/pvlib/iotools/merra2.py @@ -0,0 +1,154 @@ +import pandas as pd +import requests +from io import StringIO + + +VARIABLE_MAP = { + 'SWGDN': 'ghi', + 'SWGDNCLR': 'ghi_clear', + 'ALBEDO': 'albedo', + 'T2M': 'temp_air', + 'T2MDEW': 'temp_dew', + 'PS': 'pressure', + 'TOTEXTTAU': 'aod550', +} + + +def get_merra2(latitude, longitude, start, end, username, password, dataset, + variables, map_variables=True): + """ + Retrieve MERRA-2 time-series irradiance and meteorological data from + NASA's GESDISC data archive. + + MERRA-2 [1]_ offers modeled data for many atmospheric quantities at hourly + resolution on a 0.5° x 0.625° global grid. + + Access must be granted to the GESDISC data archive before EarthData + credentials will work. See [2]_ for instructions. + + Parameters + ---------- + latitude : float + In decimal degrees, north is positive (ISO 19115). + longitude: float + In decimal degrees, east is positive (ISO 19115). + start : datetime like or str + First timestamp of the requested period. If a timezone is not + specified, UTC is assumed. + end : datetime like or str + Last timestamp of the requested period. If a timezone is not + specified, UTC is assumed. + username : str + NASA EarthData username. + password : str + NASA EarthData password. + dataset : str + Dataset name (with version), e.g. "M2T1NXRAD.5.12.4". + variables : list of str + List of variable names to retrieve. See the documentation of the + specific dataset you are accessing for options. + map_variables : bool, default True + When true, renames columns of the DataFrame to pvlib variable names + where applicable. See variable :const:`VARIABLE_MAP`. + + Raises + ------ + ValueError + If ``start`` and ``end`` are in different years, when converted to UTC. + + Returns + ------- + data : pd.DataFrame + Time series data. The index corresponds to the middle of the interval. + meta : dict + Metadata. + + Notes + ----- + The following datasets provide quantities useful for PV modeling: + + - M2T1NXRAD.5.12.4: SWGDN, SWGDNCLR, ALBEDO + - M2T1NXSLV.5.12.4: T2M, U10M, V10M, T2MDEW, PS + - M2T1NXAER.5.12.4: TOTEXTTAU + + Note that MERRA2 does not currently provide DNI or DHI. + + References + ---------- + .. [1] https://gmao.gsfc.nasa.gov/gmao-products/merra-2/ + .. [2] https://disc.gsfc.nasa.gov/earthdata-login + """ + + # general API info here: + # https://docs.unidata.ucar.edu/tds/5.0/userguide/netcdf_subset_service_ref.html # noqa: E501 + + def _to_utc_dt_notz(dt): + dt = pd.to_datetime(dt) + if dt.tzinfo is not None: + # convert to utc, then drop tz so that isoformat() is clean + dt = dt.tz_convert("UTC").tz_localize(None) + return dt + + start = _to_utc_dt_notz(start) + end = _to_utc_dt_notz(end) + + if (year := start.year) != end.year: + raise ValueError("start and end must be in the same year (in UTC)") + + url = ( + "https://goldsmr4.gesdisc.eosdis.nasa.gov/thredds/ncss/grid/" + f"MERRA2_aggregation/{dataset}/{dataset}_Aggregation_{year}.ncml" + ) + + parameters = { + 'var': ",".join(variables), + 'latitude': latitude, + 'longitude': longitude, + 'time_start': start.isoformat() + "Z", + 'time_end': end.isoformat() + "Z", + 'accept': 'csv', + } + + auth = (username, password) + + with requests.Session() as session: + session.auth = auth + login = session.request('get', url, params=parameters) + response = session.get(login.url, auth=auth, params=parameters) + + response.raise_for_status() + + content = response.content.decode('utf-8') + buffer = StringIO(content) + df = pd.read_csv(buffer) + + df.index = pd.to_datetime(df['time']) + + meta = {} + meta['dataset'] = dataset + meta['station'] = df['station'].values[0] + meta['latitude'] = df['latitude[unit="degrees_north"]'].values[0] + meta['longitude'] = df['longitude[unit="degrees_east"]'].values[0] + + # drop the non-data columns + dropcols = ['time', 'station', 'latitude[unit="degrees_north"]', + 'longitude[unit="degrees_east"]'] + df = df.drop(columns=dropcols) + + # column names are like T2M[unit="K"] by default. extract the unit + # for the metadata, then rename col to just T2M + units = {} + rename = {} + for col in df.columns: + name, _ = col.split("[", maxsplit=1) + unit = col.split('"')[1] + units[name] = unit + rename[col] = name + + meta['units'] = units + df = df.rename(columns=rename) + + if map_variables: + df = df.rename(columns=VARIABLE_MAP) + + return df, meta diff --git a/tests/conftest.py b/tests/conftest.py index 0dc957751b..3cc58ad6e2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,6 +130,20 @@ def nrel_api_key(): reason='requires solaranywhere credentials') +try: + # Attempt to load NASA EarthData credentials used for testing + # pvlib.iotools.get_merra2 + earthdata_username = os.environ["EARTHDATA_USERNAME"] + earthdata_password = os.environ["EARTHDATA_PASSWORD"] + has_earthdata_credentials = True +except KeyError: + has_earthdata_credentials = False + +requires_earthdata_credentials = pytest.mark.skipif( + not has_earthdata_credentials, + reason='requires EarthData credentials') + + try: import statsmodels # noqa: F401 has_statsmodels = True diff --git a/tests/iotools/test_merra2.py b/tests/iotools/test_merra2.py new file mode 100644 index 0000000000..9730e7f9af --- /dev/null +++ b/tests/iotools/test_merra2.py @@ -0,0 +1,83 @@ +""" +tests for pvlib/iotools/merra2.py +""" + +import pandas as pd +import pytest +import pvlib +import os +from tests.conftest import RERUNS, RERUNS_DELAY, requires_earthdata_credentials + + +@pytest.fixture +def params(): + earthdata_username = os.environ["EARTHDATA_USERNAME"] + earthdata_password = os.environ["EARTHDATA_PASSWORD"] + + return { + 'latitude': 40.01, 'longitude': -80.01, + 'start': '2020-06-01 15:00', 'end': '2020-06-01 20:00', + 'dataset': 'M2T1NXRAD.5.12.4', 'variables': ['ALBEDO', 'SWGDN'], + 'username': earthdata_username, 'password': earthdata_password, + } + + +@pytest.fixture +def expected(): + index = pd.date_range("2020-06-01 15:30", "2020-06-01 20:30", freq="h", + tz="UTC") + index.name = 'time' + albedo = [0.163931, 0.1609407, 0.1601474, 0.1612476, 0.164664, 0.1711341] + ghi = [ 930., 1002.75, 1020.25, 981.25, 886.5, 743.5] + df = pd.DataFrame({'albedo': albedo, 'ghi': ghi}, index=index) + return df + + +@pytest.fixture +def expected_meta(): + return { + 'dataset': 'M2T1NXRAD.5.12.4', + 'station': 'GridPointRequestedAt[40.010N_80.010W]', + 'latitude': 40.0, + 'longitude': -80.0, + 'units': {'ALBEDO': '1', 'SWGDN': 'W m-2'} + } + + +@requires_earthdata_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_merra2(params, expected, expected_meta): + df, meta = pvlib.iotools.get_merra2(**params) + pd.testing.assert_frame_equal(df, expected, check_freq=False) + assert meta == expected_meta + + +@requires_earthdata_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_merra2_map_variables(params, expected, expected_meta): + df, meta = pvlib.iotools.get_merra2(**params, map_variables=False) + expected = expected.rename(columns={'albedo': 'ALBEDO', 'ghi': 'SWGDN'}) + pd.testing.assert_frame_equal(df, expected, check_freq=False) + assert meta == expected_meta + + +def test_get_merra2_error(): + with pytest.raises(ValueError, match='must be in the same year'): + pvlib.iotools.get_merra2(40, -80, '2019-12-31', '2020-01-02', + username='anything', password='anything', + dataset='anything', variables=[]) + + +@requires_earthdata_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_merra2_timezones(params, expected, expected_meta): + # check with tz-aware start/end inputs + for key in ['start', 'end']: + dt = pd.to_datetime(params[key]) + params[key] = dt.tz_localize('UTC').tz_convert('Etc/GMT+5') + df, meta = pvlib.iotools.get_merra2(**params) + pd.testing.assert_frame_equal(df, expected, check_freq=False) + assert meta == expected_meta