In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import os.path as osp
import timeit
from osgeo import gdal, osr
from contextlib import redirect_stdout
import io
from pathlib import Path

from grib_to_geotiff import grib_to_geotiff
from grib_to_geotiff2 import grib_to_geotiff2

The purpose of this notebook is to demonstrate data extraction from HRRR grib files. The grib files are retrieved via `wrfxpy/src/ingest/retrieve_gribs`. The HRRR bands are extracted from the 3d Pressure Levels model. The bands and their definitions can be found at the [HRRR inventory](https://www.nco.ncep.noaa.gov/pmb/products/hrrr/hrrr.t00z.wrfprsf00.grib2.shtml). Each HRRR band needed for training FMDA models is extracted and saved as an individual geotiff file. The code below demonstrates how that is done, given existing `.grib2` files.

# Extract HRRR Bands

A dataframe will determine which bands to extract, and provides useful metadata on those fields. The code will loop over rows of the dataframe and save individual `.tif` files.

In [None]:
band_df_hrrr = pd.DataFrame({
    'Band': [616, 620, 624, 628, 629, 661, 561, 612, 643],
    'hrrr_name': ['TMP', 'RH', "WIND", 'PRATE', 'APCP',
                  'DSWRF', 'SOILW', 'CNWAT', 'GFLUX'],
    'dict_name': ["temp", "rh", "wind", "rain", "precip_accum",
                 "solar", "soilm", "canopyw", "groundflux"],
    'descr': ['2m Temperature [K]', 
              '2m Relative Humidity [%]', 
              '10m Wind Speed [m/s]'
              'surface Precip. Rate [kg/m^2/s]',
              'surface Total Precipitation [kg/m^2]',
              'surface Downward Short-Wave Radiation Flux [W/m^2]',
              'surface Total Precipitation [kg/m^2]',
              '0.0m below ground Volumetric Soil Moisture Content [Fraction]',
              'Plant Canopy Surface Water [kg/m^2]',
              'surface Ground Heat Flux [W/m^2]']
})

band_df_hrrr

In [None]:
# Path to HRRR grib2 file
grib_path = "hrrr.t02z.wrfprsf00.grib2"
outpath = "."

In [None]:
# Loop over df and extract
for index, row in band_df_hrrr.iterrows():
    print("~"*25)
    band = row["Band"]
    filename_prefix = osp.join(outpath, Path(osp.basename(grib_path)).stem)
    print(filename_prefix)
    print(f"Extracting band {band}, {row['descr']}")
    grib_to_geotiff(grib_path, filename_prefix, band)

# Speed Benchmark Test

The methodology above saves individual bands in their own `.tif` files. So there is one file for each band and each hour. Below we compare the speed of this methdology with another approach that saves the needed HRRR bands in a single `.tif` file per hour. This alternative method appears faster for extracting and writing data, but it is no faster when it comes to reading in the data. Since it is the speed of reading the data into a model that we are primarily concerned with, we won't change the methodology in the above sections.

## Run both functions

In [None]:
def suppress_print_output(func):
    def wrapper(*args, **kwargs):
        with io.StringIO() as buf, redirect_stdout(buf):
            func(*args, **kwargs)
    return wrapper

def f1_verbose():
    for band in bands:
        # print(f"Extracting band {band}")
        grib_to_geotiff(grib_path, "AAA",band)
def f2_verbose():
    grib_to_geotiff2(grib_path, "BBB",bands)

# Wrap the original function with the suppress_print_output function
f1 = suppress_print_output(f1_verbose)
f2 = suppress_print_output(f2_verbose)

In [None]:
f1()

In [None]:
f2()

## Check Equality

In [None]:
# Extract data from tif file
ds = gdal.Open("AAA.585.tif")
band = ds.GetRasterBand(1)
data1 = band.ReadAsArray()

In [None]:
# Extract data from tif file
ds = gdal.Open("BBB.fmda_bands.tif")
band = ds.GetRasterBand(1)
data2 = band.ReadAsArray()

In [None]:
np.all(data1 == data2)

In [None]:
ds = gdal.Open("AAA.628.tif")
band = ds.GetRasterBand(1)
data1 = band.ReadAsArray()
ds = gdal.Open("BBB.fmda_bands.tif")
band = ds.GetRasterBand(4)
data2 = band.ReadAsArray()
np.all(data1 == data2)

In [None]:
ds = gdal.Open("AAA.664.tif")
band = ds.GetRasterBand(1)
data1 = band.ReadAsArray()
ds = gdal.Open("BBB.fmda_bands.tif")
band = ds.GetRasterBand(len(bands))
data2 = band.ReadAsArray()
np.all(data1 == data2)

## Time Methods

### Time Band Extraction

In [None]:
timeit.timeit(
    f1, number = 20
)

In [None]:
timeit.timeit(
    f2, number = 20
)

### Time Reading

In [None]:
bands

In [None]:
def r1():
    for band in bands:
        with gdal.Open(f"AAA.{band}.tif") as ds:
        # ds = gdal.Open(f"AAA.{band}.tif")
            band = ds.GetRasterBand(1)
            data1 = band.ReadAsArray()
            # print(hash(data1.tobytes())) # can use to compare results
    
# def r2():
#     with gdal.Open(f"BBB.fmda_bands.tif") as ds:
#     for i,band in enumerate(bands):
#         band = ds.GetRasterBand(i+1)
#         data2 = band.ReadAsArray()
#         # print(hash(data2.tobytes()))

In [None]:
r1()

In [None]:
timeit.timeit(
    r1, number = 100
)

In [None]:
timeit.timeit(
    r2, number = 100
)