# HRRR Subhourly Benchmark

We are going to benchmark the speed of calculating the speed to create a timeseries of wind speed and direction from the HRRR aggregation using s3 and gcs. There is a large difference in the performance of both running in the `us-east-1`` region. 

In [8]:
import xarray as xr
import fsspec
import numpy as np
import gribberish
import pyproj

In [3]:
ROTCON_P = 0.622515
LON_XX_P = -97.5
LAT_TAN_P = 38.5
CRS = '+proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5'

to_xy = pyproj.Transformer.from_crs('epsg:4326', CRS, always_xy=True).transform

def wind_speed_dir(u, v, lng):
    """Calculate wind speed (in mph) and direction from u and v components.
    """
    angle2 = ROTCON_P*(lng-LON_XX_P)*0.017453
    sinx2 = np.sin(angle2)
    cosx2 = np.cos(angle2)

    un = cosx2 * u + sinx2 * v
    vn = -sinx2 * u + cosx2 * v

    wind_speed = np.sqrt(un**2 + vn**2) * 2.23694
    wind_dir = (270 - np.arctan2(vn, un) * 180 / np.pi) % 360

    return wind_speed, wind_dir

def get_forecast(ds, lat, lng):
    x_sel, y_sel = to_xy(lng, lat)
    
    selected_data = ds.interp(x=x_sel, y=y_sel, method='linear')
    time = selected_data.time
    u = selected_data.ugrd
    v = selected_data.vgrd

    wind_speed, wind_dir = wind_speed_dir(u, v, lng)

    return {
        'time': time.values,
        'wind_speed': wind_speed.values,
        'wind_dir': wind_dir.values,
    }

## fsspec + xarray + s3
First lets load the dataset using `fsspec` and s3

In [4]:
fs_fsspec_s3 = fsspec.filesystem("reference", target_protocol='s3', target_options={'anon': True}, fo=f's3://nextgen-dmac/kerchunk/hrrr_subhourly.json', remote_protocol='s3', rem={'anon': True})
m = fs_fsspec_s3.get_mapper("")
ds_fsspec_s3 = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={'time': 1})
ds_fsspec_s3

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 14.54 MiB 14.54 MiB Shape (1059, 1799) (1059, 1799) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059,

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 14.54 MiB 14.54 MiB Shape (1059, 1799) (1059, 1799) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059,

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


Then time how long it takes to extract the wind forecast for a single point.

In [5]:
%%time
forecast = get_forecast(ds_fsspec_s3, 38.5, -97.5)
forecast

CPU times: user 644 ms, sys: 64.6 ms, total: 709 ms
Wall time: 2.73 s


{'time': array(['2023-07-25T14:15:00', '2023-07-25T14:30:00',
        '2023-07-25T14:45:00', '2023-07-25T15:00:00',
        '2023-07-25T15:15:00', '2023-07-25T15:30:00',
        '2023-07-25T15:45:00', '2023-07-25T16:00:00',
        '2023-07-25T16:15:00', '2023-07-25T16:30:00',
        '2023-07-25T16:45:00', '2023-07-25T17:00:00',
        '2023-07-25T17:15:00', '2023-07-25T17:30:00',
        '2023-07-25T17:45:00', '2023-07-25T18:00:00',
        '2023-07-25T18:15:00', '2023-07-25T18:30:00',
        '2023-07-25T18:45:00', '2023-07-25T19:00:00',
        '2023-07-25T19:15:00', '2023-07-25T19:30:00',
        '2023-07-25T19:45:00', '2023-07-25T20:00:00',
        '2023-07-25T20:15:00', '2023-07-25T20:30:00',
        '2023-07-25T20:45:00', '2023-07-25T21:00:00',
        '2023-07-25T21:15:00', '2023-07-25T21:30:00',
        '2023-07-25T21:45:00', '2023-07-25T22:00:00',
        '2023-07-25T22:15:00', '2023-07-25T22:30:00',
        '2023-07-25T22:45:00', '2023-07-25T23:00:00',
        '2023-07-25T

## fsspec + xarray + gcs
First lets load the dataset using `fsspec` and gcs

In [6]:
fs_fsspec_gcs = fsspec.filesystem("reference", target_protocol='gcs', target_options={'anon': True}, fo=f'gs://squall-hrrr/hrrr_subhourly.json', remote_protocol='gcs', rem={'anon': True})
m = fs_fsspec_gcs.get_mapper("")
ds_fsspec_gcs = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={'time': 1})
ds_fsspec_gcs

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 14.54 MiB 14.54 MiB Shape (1059, 1799) (1059, 1799) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059,

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 14.54 MiB 14.54 MiB Shape (1059, 1799) (1059, 1799) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059,

Unnamed: 0,Array,Chunk
Bytes,14.54 MiB,14.54 MiB
Shape,"(1059, 1799)","(1059, 1799)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.02 GiB 14.54 MiB Shape (72, 1059, 1799) (1, 1059, 1799) Dask graph 72 chunks in 2 graph layers Data type float64 numpy.ndarray",1799  1059  72,

Unnamed: 0,Array,Chunk
Bytes,1.02 GiB,14.54 MiB
Shape,"(72, 1059, 1799)","(1, 1059, 1799)"
Dask graph,72 chunks in 2 graph layers,72 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [7]:
%%time
forecast = get_forecast(ds_fsspec_gcs, 38.5, -97.5)
forecast

CPU times: user 10.6 s, sys: 2.43 s, total: 13 s
Wall time: 49.2 s


{'time': array(['2023-07-25T14:15:00', '2023-07-25T14:30:00',
        '2023-07-25T14:45:00', '2023-07-25T15:00:00',
        '2023-07-25T15:15:00', '2023-07-25T15:30:00',
        '2023-07-25T15:45:00', '2023-07-25T16:00:00',
        '2023-07-25T16:15:00', '2023-07-25T16:30:00',
        '2023-07-25T16:45:00', '2023-07-25T17:00:00',
        '2023-07-25T17:15:00', '2023-07-25T17:30:00',
        '2023-07-25T17:45:00', '2023-07-25T18:00:00',
        '2023-07-25T18:15:00', '2023-07-25T18:30:00',
        '2023-07-25T18:45:00', '2023-07-25T19:00:00',
        '2023-07-25T19:15:00', '2023-07-25T19:30:00',
        '2023-07-25T19:45:00', '2023-07-25T20:00:00',
        '2023-07-25T20:15:00', '2023-07-25T20:30:00',
        '2023-07-25T20:45:00', '2023-07-25T21:00:00',
        '2023-07-25T21:15:00', '2023-07-25T21:30:00',
        '2023-07-25T21:45:00', '2023-07-25T22:00:00',
        '2023-07-25T22:15:00', '2023-07-25T22:30:00',
        '2023-07-25T22:45:00', '2023-07-25T23:00:00',
        '2023-07-25T