In [4]:
%matplotlib inline
import h5pyd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy.spatial import cKDTree

## Get file from NSRDB

Following examples from notebooks here: https://github.com/NREL/hsds-examples/blob/master/notebooks/03_NSRDB_introduction.ipynb

For more information on what data is available from the files, see the NSRDB documentation @ NREL: https://developer.nrel.gov/docs/

Actual documentation on how to use the h5py dataset object: https://docs.h5py.org/en/stable/quick.html#quick


For this to work you must first install h5pyd:

pip install --user h5pyd
Next you'll need to configure HSDS:

    `hsconfigure`
and enter at the prompt:

    hs_endpoint = https://developer.nrel.gov/api/hsds
    hs_username = None
    hs_password = None
    hs_api_key = ul1bgjdq34XTFiAN4rOh8eadBuJhUtsaEFMyWoJr
    The example API key here is for demonstation and is rate-limited per IP. To get your own API key, visit https://developer.nrel.gov/signup/

You can also add the above contents to a configuration file at ~/.hscfg

Or they can be passed in like as we've done below.

In [5]:
# Open the desired year of nsrdb data
# server endpoint, username, password is found via a config file
f = h5pyd.File("/nrel/nsrdb/v3/nsrdb_2012.h5", 'r', 'https://developer.nrel.gov/api/hsds', None, None, None, 'ul1bgjdq34XTFiAN4rOh8eadBuJhUtsaEFMyWoJr')

In [6]:
list(f.attrs)  # list attributes belonging to the root group

['Version']

In [7]:
f.attrs['Version']   # attributes can be used to provide desriptions of the content

'3.0.6'

## Data Sets

In [8]:
list(f)  # list the datasets in the file

['air_temperature',
 'alpha',
 'aod',
 'asymmetry',
 'cld_opd_dcomp',
 'cld_reff_dcomp',
 'clearsky_dhi',
 'clearsky_dni',
 'clearsky_ghi',
 'cloud_press_acha',
 'cloud_type',
 'coordinates',
 'dew_point',
 'dhi',
 'dni',
 'fill_flag',
 'ghi',
 'meta',
 'ozone',
 'relative_humidity',
 'solar_zenith_angle',
 'ssa',
 'surface_albedo',
 'surface_pressure',
 'time_index',
 'total_precipitable_water',
 'wind_direction',
 'wind_speed']

In [15]:
# Full resolution subset of California
meta = pd.DataFrame(f['meta'][...])
CA = meta.loc[meta['state'] == b'California'] # Note .h5 saves strings as bit-strings
CA.head()

Unnamed: 0,latitude,longitude,elevation,timezone,country,state,county,urban,population,landcover
70276,32.529999,-117.099998,55.0625,-8,b'United States',b'California',b'San Diego',b'None',32326,130
70588,32.57,-117.099998,7.1,-8,b'United States',b'California',b'San Diego',b'Tijuana',27971,190
70589,32.57,-117.059998,24.92,-8,b'United States',b'California',b'San Diego',b'Tijuana',51608,190
70590,32.57,-117.019997,96.599998,-8,b'United States',b'California',b'San Diego',b'Tijuana',15236,110
70591,32.57,-116.980003,140.600006,-8,b'United States',b'California',b'San Diego',b'Tijuana',2949,130


In [None]:
# Extract datetime index for datasets
time_index = pd.to_datetime(f['time_index'][...].astype(str))
march = time_index.month == 3
np.where(march)[0]

In [26]:
# geolocation coordinates
coordinates_dset = f['coordinates']

# y axis
global_horizontal_irradiance_dset = f['ghi']

# x axes
air_temperature_dset = f['air_temperature']
relative_humidity_dset = f['relative_humidity']
cloud_optical_depth_dset = f['cld_opd_dcomp']
cloud_effective_radius_dset = f['cld_reff_dcomp']
cloud_type_dset = f['cloud_type']

array([2880, 2881, 2882, ..., 4365, 4366, 4367], dtype=int64)

In [24]:
air_temperature_dset[np.where(march)[0]]

TypeError: PointSelection __getitem__ only works with bool arrays

In [20]:
# Extract coordinates (lat, lon)
print(dict(f['coordinates'].attrs))
coords = f['coordinates'][...]

{'description': '(latitude, longitude)'}


In [43]:
global_horizontal_irradiance_dset[np.where(march)[0],0]


array([763, 752, 728, ..., 225, 565, 576], dtype=int16)

In [None]:
df = pd.DataFrame()
df['longitude'] = coords[::10, 1]
df['latitude'] = coords[::10, 0]