In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import pickle
import os
import os.path as osp
from osgeo import gdal, osr
from scipy.interpolate import griddata, RegularGridInterpolator
from synoptic.services import stations_timeseries, stations_metadata



# FMDA Dictionary Tutorial

The purpose of this notebook is to demonstrate creating fmda dictionaries to be used for training ML models of fuel moisture. This notebook combines the techniques from other notebooks in this directory, so see `interpolation_tutorial` and `synopticpy_tutorial` for more information. This code will live in `wrfxpy` in the python module `build_fmda_dict.py`.

**Goals:** given a user input of a date range and latitude/longitude bounding box, return a dictionary with top-level keys for each RAWS station within the bounding box that has fuel moisture data, and then for each station subdictionaries of formatted static location information, RAWS goundlevel sensor data, and atmospheric data from HRRR interpolated to the station location.

This notebook will demonstrate retrieving RAWS data using `SynopticPy`, but within `wrfxpy` for older times this data is retrieved from a stash of saved fuel moisture data.

*Note:* this requires a formatted stash of geotiff files, which are bands extracted from HRRR grib files.

## User Inputs

Below we manually enter the user inputs to define the spatiotemporal frame for the data collection. Within `wrfxpy`, these arguments are entered from the command line and read within python as system arguments with `sys.argv[...]`. The arguments should be formatted as:

* `start`: (str) start time formatted as "YYYYmmDDHHMM"
* `end`: (str) end time formatted as "YYYYmmDDHHMM"
* `bbox`: (list) of format `[lonmin, latmin, lonmax, lonmin]` (mimicking format from `SynopticPy`)

In [2]:
start = "202401010000" # Jan 1, 2024, midnight UTC
end = "202401010200" # Jan 31, 2024, 2am UTC
bbox = [-105, 37, -103, 39]

# Format times as datetime
t0 = datetime.strptime(start, "%Y%m%d%H%M")
t1 = datetime.strptime(end, "%Y%m%d%H%M")

## Static Data Objects

Below are objects decalred at the start of `build_fmda_dict.py` and used throughout. They include a dataframe of HRRR data bands, determined from [HRRR documentation](https://www.nco.ncep.noaa.gov/pmb/products/hrrr/hrrr.t00z.wrfprsf00.grib2.shtml). Also, there is a file path string object `hrrrpath` which points to the stash of formatted geotiff files. For this tutorial, those data simply live in the same directory.

In [42]:
# NOTE: choosing to exclude solar bands 'DLWRF', 'USWRF', 'ULWRF'
# Downward shortwave is expected theoretically to be the most useful solar field 
# RAWS have downward shortwave sensors, so these could be compared to model fields
band_df_hrrr = pd.DataFrame({
    'Band': [585, 616, 620, 628, 629, 661],
    'hrrr_name': ['GUST', 'TMP', 'RH', 'PRATE', 'APCP',
                  'DSWRF'],
    'dict_name': ["wind", "temp", "rh", "rain", "precip_accum",
                 "solarDS"],
    'descr': ['surface Wind Speed (Gust) [m/s]',
              '2 m Temperature [K]', 
              '2 m Relative Humidity [%]', 
              'surface Precip. Rate [kg/m^2/s]',
              'surface Total Precipitation [kg/m^2]',
              'surface Downward Short-Wave Radiation Flux [W/m^2]']
})

hrrrpath = "geotiff_files" # path for atmospheric data stash

In [4]:
print(f"Building FMDA Dictionary for RAWS Sites within {bbox}")
print("~"*50)

Building FMDA Dictionary for RAWS Sites within [-105, 37, -103, 39]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## Get RAWS Station Level Data

In [5]:
sts = stations_metadata(bbox=bbox,vars=["fuel_moisture"])
params = dict(
    stid=["PLACEHOLDER"], # change this in the loop
    vars=["air_temp", "relative_humidity", "precip_accum", "fuel_moisture", "wind_speed", "solar_radiation"],
    start=t0,
    end=t1
)


 🚚💨 Speedy Delivery from Synoptic API [metadata]: https://api.synopticdata.com/v2/stations/metadata?bbox=-105,37,-103,39&vars=fuel_moisture&token=🙈HIDDEN



In [6]:
def format_precip(precipa):
    rain=np.array(precipa, dtype = 'float64')
    rain = np.diff(rain) # first difference to convert accumulated to hourly
    rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
    # Highest ever recorded hourly rainfall in inches is about 16: https://www.weather.gov/owp/hdsc_world_record
    rain[rain > 100] = np.NaN # filter out erroneously high
    rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
    return rain

In [7]:
def format_raws_df(df):
    # Given input dataframe (the output of retrieve_raws_api), return formatted dictionary
    # Inputs:
    # df: (dataframe)
    # Returns: fmda dictionary

    ## Format Return Dictionaries
    loc = {
        "STID": df.attrs["STID"],
        'lat' : df.attrs['latitude'],
        'lon' : df.attrs['longitude'],
        'elev': df.attrs["ELEVATION"]
    }
    
    ## Extract times from dataframe index
    times = df.index.strftime('%Y-%m-%dT%H:%M:%SZ').to_numpy() # convert index to utc time
    ## Convert dataframe to dictionary
    raws = df.to_dict(orient = "list")
    
    # Convert lists to NumPy arrays
    raws = {key: np.array(value) for key, value in raws.items()}

    raws["time_raws"]=times
    raws["hours"]=len(times)
    
    ## Convert C to K 
    if df.attrs["UNITS"]["air_temp"] == "Celsius":
        print("Converting RAWS temp from C to K")
        raws["air_temp"] = raws["air_temp"]+273.15

    ## Calculate Hourly Precipitation from accumulated
    if "precip_accum" in df.columns:
        print("Calculating hourly precipitation")
        raws["rain"] = format_precip(raws["precip_accum"])
    
    return loc, raws

In [8]:
# Function to return nested dictionary, 
# Top-level keys is station ID with start YYYYmm
# Next-level keys is location data and RAWS sensor data
def build_raws_dict(sts):
    # Inputs:
    # sts: (df) dataframe of station data, output of stations_metadata
    out_dict = {} # set up return dictionary

    for st in sts:
        print("~"*50)
        print(f"Collecting RAWS data for {st}")
        params["stid"] = [st]
        try:
            dat = stations_timeseries(verbose="HIDE", **params)
    
            if "fuel_moisture" in dat.columns:
                print("Collected FMC data")
                loc, raws = format_raws_df(dat)
                title = f"{st}_{t0.year}{t0.strftime('%m')}"
                out_dict[title] = {"loc":loc, "RAWS": raws}
            else:
                print("No FMC found for this station and time")
        except AssertionError as e:
            # Error handling behavior
            print("AssertionError caught:", e)
            
    return out_dict

In [9]:
out_dict = build_raws_dict(sts)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Collecting RAWS data for CCEC2

 🚚💨 Speedy Delivery from Synoptic API [timeseries]: https://api.synopticdata.com/v2/stations/timeseries?stid=CCEC2&vars=air_temp,relative_humidity,precip_accum,fuel_moisture,wind_speed,solar_radiation&start=202401010000&end=202401010200&token=🙈HIDDEN

Collected FMC data
Converting RAWS temp from C to K
Calculating hourly precipitation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Collecting RAWS data for CUHC2

 🚚💨 Speedy Delivery from Synoptic API [timeseries]: https://api.synopticdata.com/v2/stations/timeseries?stid=CUHC2&vars=air_temp,relative_humidity,precip_accum,fuel_moisture,wind_speed,solar_radiation&start=202401010000&end=202401010200&token=🙈HIDDEN

Collected FMC data
Converting RAWS temp from C to K
Calculating hourly precipitation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Collecting RAWS data for CCYC2

 🚚💨 Speedy Delivery from Synoptic API [timeseries]: https://api.synopticdata.

### View collected data after this step

*Note:* this is for illustration only, not done within `wrfxpy`

In [10]:
out_dict.keys()

dict_keys(['CCEC2_202401', 'CUHC2_202401', 'CCYC2_202401', 'RRAC2_202401', 'TT568_202401', 'TT689_202401'])

In [11]:
out_dict[[*out_dict.keys()][0]]

{'loc': {'STID': 'CCEC2', 'lat': 37.5425, 'lon': -104.03194, 'elev': 5422},
 'RAWS': {'air_temp': array([273.15 , 270.928]),
  'fuel_moisture': array([7.4, 7.3]),
  'precip_accum': array([0., 0.]),
  'relative_humidity': array([64., 81.]),
  'solar_radiation': array([14.,  0.]),
  'wind_speed': array([2.68, 2.68]),
  'time_raws': array(['2024-01-01T00:23:00Z', '2024-01-01T01:23:00Z'], dtype=object),
  'hours': 2,
  'rain': array([nan,  0.])}}

## Get HRRR Data

Using dictionary produced by RAWS data retrieval above, fill with time series of interpolated HRRR data at each location. 

In [47]:
def get_projection_info(ds, epsg = 4326):
    # Given a geotiff file (a HRRR band), 
    # return info necessary to transform lat/lon coords to the file structure
    # Inputs: 
    # ds: (osgeo.gdal.Dataset)
    # epsg: (int) default 4326 for lon/lat
    # Return: (tuple) with fields (ct, g_inv)
        # ct: (osgeo.osr.CoordinateTransformation)
        # gt_inv: (tuple) output of gdal.InvGeoTransform, also could be found with gdalinfo on command line
    gt = ds.GetGeoTransform()
    gp = ds.GetProjection()
    if(ds.RasterCount>1):
        print('Not Implemented for multiple Raster bands')
        sys.exit(-1)
    # Get Projection info
    point_srs = osr.SpatialReference()
    point_srs.ImportFromEPSG(4326) # hardcode for lon/lat
    # GDAL>=3: make sure it's x/y
    # see https://trac.osgeo.org/gdal/wiki/rfc73_proj6_wkt2_srsbarn
    point_srs.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)
    file_srs = osr.SpatialReference()
    file_srs.ImportFromWkt(gp)
    ct = osr.CoordinateTransformation(point_srs, file_srs)
    gt_inv = gdal.InvGeoTransform(gt)

    return ct, gt_inv

In [55]:
def build_hrrr_path(d, band):
    # Inputs: 
    # d: (datetime)
    # band: (int) HRRR band number
    # Returns: (str) filepath to geotiff file
    day_file = d.strftime("%Y%m%d") # HRRR data stash is in this format
    hour = d.strftime("%H")
    tpath = osp.join(hrrrpath, day_file, f"hrrr.t{hour}z.wrfprsf00.{band}.tif")
    return tpath

In [70]:
def build_hrrr_dict(tstart, tend, dat):
    # tstart: (datetime)    start time
    # tend: (datetime)     end time
    # dat: (dict) dictionary, output of build_raws_dict
    
    # Get dates array
    dates = pd.date_range(start=tstart,end=tend, freq="1H")

    # Get Projection data from first band from band_df_hrrr,
    # reuse projection info for other bands
    # NOTE: this results in 1 extra read of geotiff files, but doing it for clarity
    d = dates[0]
    band = band_df_hrrr.Band[0]
    tpath = build_hrrr_path(d, band)
    print(f"Opening: {tpath}")
    if not osp.exists(tpath): 
        raise FileNotFoundError(f"The file '{tpath}' does not exist.")
    ds = gdal.Open(tpath)
    ct, gt_inv = get_projection_info(ds)
    ds = None # close connection

    # Loop over bands, build time series for each station in dictionary
    for index, row in band_df_hrrr.iterrows():
        print("~"*50)
        band = row["Band"]
        print(f"Building Time Series for band: {band}, {row['descr']}")
        for i in range(0, len(dates)):
            d = dates[i]
            tpath = build_hrrr_path(d, band)
            print(f"Opening: {tpath}")
            ds = gdal.Open(tpath)
            
            ds = None # close connection
    return

In [71]:
print("~"*50)
build_hrrr_dict(t0, t1, out_dict)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Opening: geotiff_files\20240101\hrrr.t00z.wrfprsf00.585.tif
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building Time Series for band: 585, surface Wind Speed (Gust) [m/s]
Opening: geotiff_files\20240101\hrrr.t00z.wrfprsf00.585.tif
Opening: geotiff_files\20240101\hrrr.t01z.wrfprsf00.585.tif
Opening: geotiff_files\20240101\hrrr.t02z.wrfprsf00.585.tif
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building Time Series for band: 616, 2 m Temperature [K]
Opening: geotiff_files\20240101\hrrr.t00z.wrfprsf00.616.tif
Opening: geotiff_files\20240101\hrrr.t01z.wrfprsf00.616.tif
Opening: geotiff_files\20240101\hrrr.t02z.wrfprsf00.616.tif
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Building Time Series for band: 620, 2 m Relative Humidity [%]
Opening: geotiff_files\20240101\hrrr.t00z.wrfprsf00.620.tif
Opening: geotiff_files\20240101\hrrr.t01z.wrfprsf00.620.tif
Opening: geotiff_files\20240101\hrrr.t02z.wrfprsf00.620.tif
~~~~~~~~~~