Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Have pvlib.iotools.read_solrad return metadata #1968

Merged
merged 7 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/sphinx/source/whatsnew/v0.10.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ v0.10.4 (Anticipated March, 2024)
Enhancements
~~~~~~~~~~~~
* Added the Huld PV model used by PVGIS (:pull:`1940`)

* Added metadata parsing to :py:func:`~pvlib.iotools.read_solrad` to follow the standard iotools
convention of returning a tuple of (data, meta). Previously the function only returned a dataframe. (:pull:`1968`)

Bug fixes
~~~~~~~~~
Expand Down
59 changes: 36 additions & 23 deletions pvlib/iotools/solrad.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Functions to read data from the NOAA SOLRAD network.
"""
"""Functions to read data from the NOAA SOLRAD network."""

import numpy as np
import pandas as pd
import requests
import io

# pvlib conventions
BASE_HEADERS = (
Expand Down Expand Up @@ -49,8 +49,15 @@

def read_solrad(filename):
"""
Read NOAA SOLRAD fixed-width file into pandas dataframe. The SOLRAD
network is described in [1]_ and [2]_.
Read NOAA SOLRAD fixed-width file into pandas dataframe.

The SOLRAD network is described in [1]_ and [2]_.

.. versionchanged:: 0.10.4
The function now returns a tuple where the first element is a dataframe
AdamRJensen marked this conversation as resolved.
Show resolved Hide resolved
and the second element is a dictionary containing metadata. Previous
versions of this function only returned a dataframe.


Parameters
----------
Expand All @@ -62,6 +69,8 @@ def read_solrad(filename):
data: Dataframe
A dataframe with DatetimeIndex and all of the variables in the
file.
metadata : dict
Metadata.

Notes
-----
Expand Down Expand Up @@ -91,19 +100,28 @@ def read_solrad(filename):
widths = WIDTHS
dtypes = DTYPES

meta = {}

if str(filename).startswith('ftp') or str(filename).startswith('http'):
response = requests.get(filename)
file_buffer = io.StringIO(response.content.decode())
else:
file_buffer = open(str(filename), 'r')

AdamRJensen marked this conversation as resolved.
Show resolved Hide resolved
AdamRJensen marked this conversation as resolved.
Show resolved Hide resolved
# The first line has the name of the station, and the second gives the
# station's latitude, longitude, elevation above mean sea level in meters,
# and the displacement in hours from local standard time.
meta['station_name'] = file_buffer.readline().strip()

meta_line = file_buffer.readline().split()
meta['latitude'] = float(meta_line[0])
meta['longitude'] = float(meta_line[1])
meta['altitude'] = float(meta_line[2])
meta['TZ'] = int(meta_line[3])

# read in data
data = pd.read_fwf(filename, header=None, skiprows=2, names=names,
widths=widths, na_values=-9999.9)

# loop here because dtype kwarg not supported in read_fwf until 0.20
for (col, _dtype) in zip(data.columns, dtypes):
ser = data[col].astype(_dtype)
if _dtype == 'float64':
# older verions of pandas/numpy read '-9999.9' as
# -9999.8999999999996 and fail to set nan in read_fwf,
# so manually set nan
ser = ser.where(ser > -9999, other=np.nan)
data[col] = ser
data = pd.read_fwf(file_buffer, header=None, names=names,
widths=widths, na_values=-9999.9, dtypes=dtypes)

# set index
# columns do not have leading 0s, so must zfill(2) to comply
Expand All @@ -114,10 +132,5 @@ def read_solrad(filename):
data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] +
dts['minute'], format='%Y%m%d%H%M', utc=True)
data = data.set_index(dtindex)
try:
# to_datetime(utc=True) does not work in older versions of pandas
data = data.tz_localize('UTC')
except TypeError:
pass

return data
return data, meta
32 changes: 25 additions & 7 deletions pvlib/tests/iotools/test_solrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import pytest

from pvlib.iotools import solrad
from ..conftest import DATA_DIR, assert_frame_equal
from ..conftest import DATA_DIR, assert_frame_equal, RERUNS, RERUNS_DELAY


testfile = DATA_DIR / 'abq19056.dat'
testfile_mad = DATA_DIR / 'msn19056.dat'

https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/msn/'
'2019/msn19056.dat')

columns = [
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
Expand Down Expand Up @@ -87,15 +88,32 @@
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
'float64', 'float64']
meta = {'station_name': 'Albuquerque', 'latitude': 35.03796,
'longitude': -106.62211, 'altitude': 1617, 'TZ': -7}
meta_mad = {'station_name': 'Madison', 'latitude': 43.07250,
'longitude': -89.41133, 'altitude': 271, 'TZ': -6}


@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
(testfile, index, columns, values, dtypes),
(testfile_mad, index, columns_mad, values_mad, dtypes_mad)
@pytest.mark.parametrize('testfile,index,columns,values,dtypes,meta', [
(testfile, index, columns, values, dtypes, meta),
(testfile_mad, index, columns_mad, values_mad, dtypes_mad, meta_mad)
])
def test_read_solrad(testfile, index, columns, values, dtypes):
def test_read_solrad(testfile, index, columns, values, dtypes, meta):
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = solrad.read_solrad(testfile)
out, m = solrad.read_solrad(testfile)
assert_frame_equal(out, expected)
assert m == meta


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_read_solrad_https():
# Test reading of https files.
# If this test begins failing, SOLRAD's data structure or data
# archive may have changed.
local_data, _ = solrad.read_solrad(testfile_mad)
remote_data, _ = solrad.read_solrad(https_testfile)
# local file only contains four rows to save space
assert_frame_equal(local_data, remote_data.iloc[:4])