Skip to content

Commit

Permalink
Have pvlib.iotools.read_solrad return metadata (#1968)
Browse files Browse the repository at this point in the history
* Remove junk

* Add function

* Add tests

* Update v0.10.4.rst

* Remove double testfile

* Change elevation to altitude

* use "with open" from code review
  • Loading branch information
AdamRJensen committed Mar 5, 2024
1 parent 09cf445 commit 33045d2
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 31 deletions.
3 changes: 2 additions & 1 deletion docs/sphinx/source/whatsnew/v0.10.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ v0.10.4 (Anticipated March, 2024)
Enhancements
~~~~~~~~~~~~
* Added the Huld PV model used by PVGIS (:pull:`1940`)

* Added metadata parsing to :py:func:`~pvlib.iotools.read_solrad` to follow the standard iotools
convention of returning a tuple of (data, meta). Previously the function only returned a dataframe. (:pull:`1968`)

Bug fixes
~~~~~~~~~
Expand Down
60 changes: 37 additions & 23 deletions pvlib/iotools/solrad.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Functions to read data from the NOAA SOLRAD network.
"""
"""Functions to read data from the NOAA SOLRAD network."""

import numpy as np
import pandas as pd
import requests
import io

# pvlib conventions
BASE_HEADERS = (
Expand Down Expand Up @@ -49,8 +49,15 @@

def read_solrad(filename):
"""
Read NOAA SOLRAD fixed-width file into pandas dataframe. The SOLRAD
network is described in [1]_ and [2]_.
Read NOAA SOLRAD fixed-width file into pandas dataframe.
The SOLRAD network is described in [1]_ and [2]_.
.. versionchanged:: 0.10.4
The function now returns a tuple where the first element is a dataframe
and the second element is a dictionary containing metadata. Previous
versions of this function only returned a dataframe.
Parameters
----------
Expand All @@ -62,6 +69,8 @@ def read_solrad(filename):
data: Dataframe
A dataframe with DatetimeIndex and all of the variables in the
file.
metadata : dict
Metadata.
Notes
-----
Expand Down Expand Up @@ -91,19 +100,29 @@ def read_solrad(filename):
widths = WIDTHS
dtypes = DTYPES

meta = {}

if str(filename).startswith('ftp') or str(filename).startswith('http'):
response = requests.get(filename)
file_buffer = io.StringIO(response.content.decode())
else:
with open(str(filename), 'r') as file_buffer:
file_buffer = io.StringIO(file_buffer.read())

# The first line has the name of the station, and the second gives the
# station's latitude, longitude, elevation above mean sea level in meters,
# and the displacement in hours from local standard time.
meta['station_name'] = file_buffer.readline().strip()

meta_line = file_buffer.readline().split()
meta['latitude'] = float(meta_line[0])
meta['longitude'] = float(meta_line[1])
meta['altitude'] = float(meta_line[2])
meta['TZ'] = int(meta_line[3])

# read in data
data = pd.read_fwf(filename, header=None, skiprows=2, names=names,
widths=widths, na_values=-9999.9)

# loop here because dtype kwarg not supported in read_fwf until 0.20
for (col, _dtype) in zip(data.columns, dtypes):
ser = data[col].astype(_dtype)
if _dtype == 'float64':
# older verions of pandas/numpy read '-9999.9' as
# -9999.8999999999996 and fail to set nan in read_fwf,
# so manually set nan
ser = ser.where(ser > -9999, other=np.nan)
data[col] = ser
data = pd.read_fwf(file_buffer, header=None, names=names,
widths=widths, na_values=-9999.9, dtypes=dtypes)

# set index
# columns do not have leading 0s, so must zfill(2) to comply
Expand All @@ -114,10 +133,5 @@ def read_solrad(filename):
data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] +
dts['minute'], format='%Y%m%d%H%M', utc=True)
data = data.set_index(dtindex)
try:
# to_datetime(utc=True) does not work in older versions of pandas
data = data.tz_localize('UTC')
except TypeError:
pass

return data
return data, meta
32 changes: 25 additions & 7 deletions pvlib/tests/iotools/test_solrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import pytest

from pvlib.iotools import solrad
from ..conftest import DATA_DIR, assert_frame_equal
from ..conftest import DATA_DIR, assert_frame_equal, RERUNS, RERUNS_DELAY


testfile = DATA_DIR / 'abq19056.dat'
testfile_mad = DATA_DIR / 'msn19056.dat'

https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/msn/'
'2019/msn19056.dat')

columns = [
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
Expand Down Expand Up @@ -87,15 +88,32 @@
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
'float64', 'float64']
meta = {'station_name': 'Albuquerque', 'latitude': 35.03796,
'longitude': -106.62211, 'altitude': 1617, 'TZ': -7}
meta_mad = {'station_name': 'Madison', 'latitude': 43.07250,
'longitude': -89.41133, 'altitude': 271, 'TZ': -6}


@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
(testfile, index, columns, values, dtypes),
(testfile_mad, index, columns_mad, values_mad, dtypes_mad)
@pytest.mark.parametrize('testfile,index,columns,values,dtypes,meta', [
(testfile, index, columns, values, dtypes, meta),
(testfile_mad, index, columns_mad, values_mad, dtypes_mad, meta_mad)
])
def test_read_solrad(testfile, index, columns, values, dtypes):
def test_read_solrad(testfile, index, columns, values, dtypes, meta):
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = solrad.read_solrad(testfile)
out, m = solrad.read_solrad(testfile)
assert_frame_equal(out, expected)
assert m == meta


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_read_solrad_https():
# Test reading of https files.
# If this test begins failing, SOLRAD's data structure or data
# archive may have changed.
local_data, _ = solrad.read_solrad(testfile_mad)
remote_data, _ = solrad.read_solrad(https_testfile)
# local file only contains four rows to save space
assert_frame_equal(local_data, remote_data.iloc[:4])

0 comments on commit 33045d2

Please sign in to comment.