# Quality assurance and quality control

## Low level winds and temperatures

In [185]:
%%capture
from collections import namedtuple
from metpy.units import units

import glob
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import metpy.calc as mpcalc
import numpy as np
import os
import pandas as pd
import re
import sys
import xarray as xr

sys.path.insert(1, "/Users/lukeconibear/repos/wxapps/")

from apps.low_level_winds.src.low_level_winds import low_level_winds, utils

plt.rcParams.update(
    {
        "text.usetex": True,
        "font.family": "serif",
        "font.sans-serif": ["Computer Modern Roman"],
        "axes.grid": False,
        "savefig.dpi": 700,
        "figure.figsize": [8, 6],
        "font.size": 14,
    }
)
plt.style.use(["seaborn-colorblind"])
xr.set_options(display_expand_data=False)

In [159]:
hrrr_raw_filenames = sorted(
    glob.glob("../../wxapps/apps/low_level_winds/tests/test_data/HRRR*.grb2")
)
hrrr_out_filenames = sorted(
    glob.glob("../../pangeo_tools/low_level_winds_temps/processed_data/*hrrr*.nc")
)

In [163]:
def run_half_hrrr_conus(hrrr_raw_filename):
    ds_hrrr_hybrid = low_level_winds.open_hrrr_dataset(
        filename=hrrr_raw_filename,
        filter_by_keys={"typeOfLevel": "hybrid"},
        data_vars=["gh", "t", "u", "v"],
    )

    levels_to_keep = [
        lev
        for lev in range(len(ds_hrrr_hybrid.hybrid))
        if ds_hrrr_hybrid["gh"].isel(hybrid=lev).min().values <= 300
    ]
    ds_hrrr_hybrid = ds_hrrr_hybrid.isel(hybrid=levels_to_keep)

    resolution_lat = 0.027  # approx 3km
    resolution_lon = 0.027  # approx 3km
    max_lat = np.round(ds_hrrr_hybrid.lat.max().values, 0)
    min_lat = np.round(ds_hrrr_hybrid.lat.min().values, 0)
    max_lon = (
        np.round(ds_hrrr_hybrid.lon.max().values, 0) - 360
    )  # needs to be between -180 and 180
    min_lon = (
        np.round(ds_hrrr_hybrid.lon.min().values, 0) - 360
    )  # needs to be between -180 and 180
    gridcells_lat_old = len(ds_hrrr_hybrid.y.values)
    gridcells_lon_old = len(ds_hrrr_hybrid.x.values)
    gridcells_lat_new = int((max_lat - min_lat) / resolution_lat)
    gridcells_lon_new = int((max_lon - min_lon) / resolution_lon)
    reuse_weights = True
    regridding_weights_filename = f"bilinear_{gridcells_lat_old}x{gridcells_lon_old}_{gridcells_lat_new}x{gridcells_lon_new}.nc"
    periodic = False
    save_weights = True

    ds_hrrr_hybrid_regridded = low_level_winds.regrid_ds(
        ds_hrrr_hybrid,
        resolution_lat,
        resolution_lon,
        max_lat,
        min_lat,
        max_lon,
        min_lon,
        reuse_weights,
        regridding_weights_filename,
        periodic,
        save_weights,
    )

    # change 0.0 values (added from regridding) to np.nan
    da_hrrr_air_temperature_regridded = xr.where(
        ds_hrrr_hybrid_regridded["t"] == 0,
        np.nan,
        ds_hrrr_hybrid_regridded["t"],
    )

    da_hrrr_air_temperature_regridded = (
        (da_hrrr_air_temperature_regridded * units("K"))
        .metpy.convert_units("degC")
        .metpy.dequantify()
    )

    da_hrrr_u_wind_component_regridded = ds_hrrr_hybrid_regridded["u"] * units("m/s")
    da_hrrr_v_wind_component_regridded = ds_hrrr_hybrid_regridded["v"] * units("m/s")

    da_hrrr_wind_speed_regridded = mpcalc.wind_speed(
        da_hrrr_u_wind_component_regridded, da_hrrr_v_wind_component_regridded
    ).metpy.dequantify()
    da_hrrr_wind_direction_regridded = mpcalc.wind_direction(
        da_hrrr_u_wind_component_regridded, da_hrrr_v_wind_component_regridded
    ).metpy.dequantify()

    # change 0.0 values (added from regridding) to np.nan
    da_hrrr_wind_speed_regridded = xr.where(
        da_hrrr_wind_speed_regridded == 0, np.nan, da_hrrr_wind_speed_regridded
    )
    da_hrrr_wind_direction_regridded = xr.where(
        da_hrrr_wind_direction_regridded == 0,
        np.nan,
        da_hrrr_wind_direction_regridded,
    )

    ds_hrrr_regridded_combined = xr.combine_by_coords(
        [
            xr.Dataset(
                {"air_temperature_low_level": da_hrrr_air_temperature_regridded}
            ),
            xr.Dataset({"wind_speed_low_level": da_hrrr_wind_speed_regridded}),
            xr.Dataset({"wind_direction_low_level": da_hrrr_wind_direction_regridded}),
        ]
    )
    return ds_hrrr_regridded_combined

In [164]:
threshold_ratios = [round(ratio, 1) for ratio in np.linspace(0.1, 0.5, 5)]
threshold_diffs = [int(diff) for diff in np.linspace(1, 5, 5)]

In [165]:
data_variables = [
    "air_temperature_low_level",
    "wind_speed_low_level",
    "wind_direction_low_level",
]

In [172]:
overall_threshold_percentages_of_ratios_within = {
    f"{data_variable}": {
        str(threshold_ratio): [] for threshold_ratio in threshold_ratios
    }
    for data_variable in data_variables
}
overall_threshold_percentages_of_diffs_within = {
    f"{data_variable}": {str(threshold_diff): [] for threshold_diff in threshold_diffs}
    for data_variable in data_variables
}

In [167]:
LevelPairs = namedtuple("LevelPairs", "hybrid lev")
pair1 = LevelPairs(hybrid=0, lev=0)
pair2 = LevelPairs(hybrid=1, lev=3)
pairs = (pair1, pair2)

In [168]:
def evaluate_forecast(hrrr_raw_filename, hrrr_out_filename):
    ds_hrrr_regridded_combined = run_half_hrrr_conus(hrrr_raw_filename)
    ds_hrrr_out = xr.open_dataset(hrrr_out_filename)

    threshold_percentages_of_diffs_within_data_variables = {}
    threshold_percentages_of_ratios_within_data_variables = {}

    for data_variable in data_variables:
        always_nan_gridcells = (
            ds_hrrr_out[data_variable].isel(lev=0, time=0).isnull().sum().values
        )
        non_nan_gridcells = (
            ds_hrrr_out.lon.shape[0] * ds_hrrr_out.lat.shape[0]
        ) - always_nan_gridcells

        threshold_percentages_of_ratios_within = {
            str(threshold_ratio): [] for threshold_ratio in threshold_ratios
        }
        threshold_percentages_of_diffs_within = {
            str(threshold_diff): [] for threshold_diff in threshold_diffs
        }

        for pair in pairs:
            raw_values = (
                ds_hrrr_regridded_combined[data_variable]
                .isel(hybrid=pair.hybrid)
                .values
            )
            interpolation_values = (
                ds_hrrr_out[data_variable].isel(lev=pair.lev, time=0).values
            )

            actual_diff = np.subtract(
                raw_values,
                interpolation_values,
            )

            actual_ratio = np.divide(
                raw_values,
                interpolation_values,
            )

            actual_diff = xr.DataArray(
                actual_diff,
                dims=ds_hrrr_out.isel(lev=pair.lev, time=0).dims,
                coords=ds_hrrr_out.isel(lev=pair.lev, time=0).coords,
            )

            actual_ratio = xr.DataArray(
                actual_ratio,
                dims=ds_hrrr_out.isel(lev=pair.lev, time=0).dims,
                coords=ds_hrrr_out.isel(lev=pair.lev, time=0).coords,
            )

            for threshold_ratio in threshold_ratios:
                actual_ratio_within = actual_ratio.where(
                    cond=np.abs(actual_ratio) > (1.0 - threshold_ratio)
                ).where(cond=np.abs(actual_ratio) < (1.0 + threshold_ratio))

                threshold_percentage_of_values_within = 1 - (
                    (actual_ratio_within.isnull().sum().values - always_nan_gridcells)
                    / non_nan_gridcells
                )

                threshold_percentages_of_ratios_within[str(threshold_ratio)].append(
                    threshold_percentage_of_values_within
                )

            for threshold_diff in threshold_diffs:
                actual_diff_within = actual_diff.where(
                    cond=np.abs(actual_diff) < threshold_diff
                )

                threshold_percentage_of_diffs_within = 1 - (
                    (actual_diff_within.isnull().sum().values - always_nan_gridcells)
                    / non_nan_gridcells
                )

                threshold_percentages_of_diffs_within[str(threshold_diff)].append(
                    threshold_percentage_of_diffs_within
                )

        threshold_percentages_of_diffs_within_data_variables[
            data_variable
        ] = threshold_percentages_of_diffs_within
        threshold_percentages_of_ratios_within_data_variables[
            data_variable
        ] = threshold_percentages_of_ratios_within

    return (
        threshold_percentages_of_diffs_within_data_variables,
        threshold_percentages_of_ratios_within_data_variables,
    )

In [181]:
%%capture
for index in range(len(hrrr_raw_filenames)):
    hrrr_raw_filename = hrrr_raw_filenames[index]
    hrrr_out_filename = hrrr_out_filenames[index]

    (
        threshold_percentages_of_diffs_within_data_variables,
        threshold_percentages_of_ratios_within_data_variables,
    ) = evaluate_forecast(hrrr_raw_filename, hrrr_out_filename)

    for data_variable in data_variables:
        for threshold_ratio in overall_threshold_percentages_of_ratios_within[
            data_variable
        ].keys():
            overall_threshold_percentages_of_ratios_within[data_variable][
                threshold_ratio
            ].append(
                threshold_percentages_of_ratios_within_data_variables[data_variable][
                    threshold_ratio
                ]
            )

In [182]:
for data_variable in data_variables:
    print(data_variable)
    for (
        threshold_ratio,
        threshold_values,
    ) in overall_threshold_percentages_of_ratios_within[data_variable].items():
        print(
            (
                f"Percentage of ratios within {100 * float(threshold_ratio):0.0f}%:"
                f" {100 * np.nanmean(threshold_values):0.3f}%"
            )
        )

    print()

air_temperature_low_level
Percentage of ratios within 10%: 100.000%
Percentage of ratios within 20%: 100.000%
Percentage of ratios within 30%: 100.000%
Percentage of ratios within 40%: 100.000%
Percentage of ratios within 50%: 100.000%

wind_speed_low_level
Percentage of ratios within 10%: 99.684%
Percentage of ratios within 20%: 99.923%
Percentage of ratios within 30%: 99.964%
Percentage of ratios within 40%: 99.978%
Percentage of ratios within 50%: 99.989%

wind_direction_low_level
Percentage of ratios within 10%: 99.707%
Percentage of ratios within 20%: 99.806%
Percentage of ratios within 30%: 99.842%
Percentage of ratios within 40%: 99.863%
Percentage of ratios within 50%: 99.879%



In [189]:
for hrrr_raw_filename in hrrr_raw_filenames:
    print(hrrr_raw_filename.split('/')[-1])

HRRR_NAT_2022071421_f000.grb2
HRRR_NAT_2022071421_f001.grb2
HRRR_NAT_2022071421_f002.grb2
HRRR_NAT_2022071422_f000.grb2
HRRR_NAT_2022071422_f001.grb2
HRRR_NAT_2022071422_f002.grb2
HRRR_NAT_2022071423_f000.grb2
HRRR_NAT_2022071423_f001.grb2
HRRR_NAT_2022071423_f002.grb2
HRRR_NAT_2022071500_f000.grb2


In [190]:
len(hrrr_raw_filenames)

10