In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import numpy as np
import calendar
import time
from matplotlib import colors
from sklearn import preprocessing
import cartopy.crs as crs
import cartopy.feature as cfeature

In [None]:
def get_model_rmse_months_clim_divs(
    model, fcast_and_error_df, max_fhour, var, units, minmax
):
    fcast_and_error_df = fcast_and_error_df.copy()
    fcast_and_error_df = fcast_and_error_df[
        fcast_and_error_df["lead_time_ONLY_HOURS"] <= max_fhour
    ]
    fcast_and_error_df[f"{var}_sq"] = fcast_and_error_df[var] ** 2
    rmse_months = (
        fcast_and_error_df.groupby([fcast_and_error_df.time.dt.month, "station"])[
            f"{var}_sq"
        ]
        .mean()
        .apply(np.sqrt)
    )
    rmse_months_stns = (
        fcast_and_error_df.groupby([fcast_and_error_df.time.dt.month, "station"])[
            f"{var}_sq"
        ]
        .mean()
        .apply(np.sqrt)
    )
    rmse_stns = (
        fcast_and_error_df.groupby(["station"])[f"{var}_sq"].mean().apply(np.sqrt)
    )

    new_column_order = [
        "Coastal",
        "Champlain Valley",
        "St. Lawrence Valley",
        "Hudson Valley",
        "Great Lakes",
        "Mohawk Valley",
        "Central Lakes",
        "Eastern Plateau",
        "Northern Plateau",
        "Western Plateau",
    ]
    return rmse_months, rmse_stns, rmse_months_stns


def get_monthly_rmse_data(fhour_end, var):
    (
        gfs_rmse_months,
        gfs_rmse_stns,
        gfs_rmse_months_stns,
    ) = get_model_rmse_months_clim_divs(
        "GFS",
        gfs_fcast_and_error_df,
        fhour_end,
        var,
        var_dict.get(var),
        var_val_range.get(var),
    )
    (
        nam_rmse_months,
        nam_rmse_stns,
        nam_rmse_months_stns,
    ) = get_model_rmse_months_clim_divs(
        "NAM",
        nam_fcast_and_error_df,
        fhour_end,
        var,
        var_dict.get(var),
        var_val_range.get(var),
    )

    gfs_rmse_months = gfs_rmse_months.reset_index()
    nam_rmse_months = nam_rmse_months.reset_index()

    gfs_rmse_months["NWP Model"] = "GFS"
    nam_rmse_months["NWP Model"] = "NAM"

    print("GFS", gfs_rmse_months[f"{var}_sq"].mean())
    print("NAM", nam_rmse_months[f"{var}_sq"].mean())

    if fhour_end <= 18:
        (
            hrrr_rmse_months,
            hrrr_rmse_stns,
            hrrr_rmse_months_stns,
        ) = get_model_rmse_months_clim_divs(
            "HRRR",
            hrrr_fcast_and_error_df,
            fhour_end,
            var,
            var_dict.get(var),
            var_val_range.get(var),
        )
        hrrr_rmse_months = hrrr_rmse_months.reset_index()
        hrrr_rmse_months["NWP Model"] = "HRRR"
        print("HRRR", hrrr_rmse_months[f"{var}_sq"].mean())
        rmse_to_plot = pd.concat([gfs_rmse_months, nam_rmse_months, hrrr_rmse_months])
    else:
        rmse_to_plot = pd.concat([gfs_rmse_months, nam_rmse_months])

    return rmse_to_plot


def plot_rmse_boxplots(df, fhour_end, var):
    rmse_to_plot = df
    palette_DJF = {"GFS": "dodgerblue", "NAM": "dodgerblue", "HRRR": "dodgerblue"}
    palette_MAM = {"GFS": "darkorchid", "NAM": "darkorchid", "HRRR": "darkorchid"}
    palette_JJA = {"GFS": "darkgreen", "NAM": "darkgreen", "HRRR": "darkgreen"}
    palette_SON = {"GFS": "coral", "NAM": "coral", "HRRR": "coral"}
    h_ind = 3
    if fhour_end > 18:
        palette_DJF.pop("HRRR", None)
        palette_MAM.pop("HRRR", None)
        palette_JJA.pop("HRRR", None)
        palette_SON.pop("HRRR", None)
        h_ind = h_ind - 1

    plt.figure(figsize=(12, 5), dpi=100)
    ax = sns.boxplot(
        x="station",
        y=f"{var}_sq",
        data=rmse_to_plot,
        hue="NWP Model",
        width=0.7,
        zorder=4,
        palette="binary",
        medianprops=dict(color="white"),
    )
    sns.stripplot(
        x="station",
        y=f"{var}_sq",
        data=rmse_to_plot[rmse_to_plot["time"].isin([12, 1, 2])],
        hue="NWP Model",
        size=5,
        dodge=True,
        zorder=5,
        palette=palette_DJF,
    )
    sns.stripplot(
        x="station",
        y=f"{var}_sq",
        data=rmse_to_plot[rmse_to_plot["time"].isin([3, 4, 5])],
        hue="NWP Model",
        size=5,
        dodge=True,
        zorder=5,
        palette=palette_MAM,
    )
    sns.stripplot(
        x="station",
        y=f"{var}_sq",
        data=rmse_to_plot[rmse_to_plot["time"].isin([6, 7, 8])],
        hue="NWP Model",
        size=5,
        dodge=True,
        zorder=5,
        palette=palette_JJA,
    )
    sns.stripplot(
        x="station",
        y=f"{var}_sq",
        data=rmse_to_plot[rmse_to_plot["time"].isin([9, 10, 11])],
        hue="NWP Model",
        size=5,
        dodge=True,
        zorder=5,
        palette=palette_SON,
    )
    handles, labels = ax.get_legend_handles_labels()
    l = plt.legend(
        handles[0:h_ind] + handles[h_ind::h_ind],
        labels[0:h_ind] + ["DJF", "MAM", "JJA", "SON"],
        loc=2,
        borderaxespad=0.0,
    )
    plt.xticks(rotation=90)
    plt.grid(True, alpha=0.4, zorder=1)
    plt.ylabel(f"RMSE t2m_error")
    plt.ylim(ymax=4.0, ymin=1.0)
    plt.xlabel("Station")
    plt.tight_layout()

In [None]:
def read_data(init):
    years = ["2018", "2019", "2020", "2021"]
    savedir = "/home/aevans/ai2es/processed_data/frcst_err/"

    nam_fcast_and_error = []
    gfs_fcast_and_error = []
    hrrr_fcast_and_error = []

    for year in years:
        nam_fcast_and_error.append(
            pd.read_parquet(
                f"{savedir}nam_fcast_and_error_df_{init}z_{year}_mask_water_ny.parquet"
            )
        )
        gfs_fcast_and_error.append(
            pd.read_parquet(
                f"{savedir}gfs_fcast_and_error_df_{init}z_{year}_mask_water_ny.parquet"
            )
        )
        hrrr_fcast_and_error.append(
            pd.read_parquet(
                f"{savedir}hrrr_fcast_and_error_df_{init}z_{year}_mask_water_ny.parquet"
            )
        )

    nam_fcast_and_error_df = pd.concat(nam_fcast_and_error)
    gfs_fcast_and_error_df = pd.concat(gfs_fcast_and_error)
    hrrr_fcast_and_error_df = pd.concat(hrrr_fcast_and_error)

    # need to remove the random forecasts that have forecast hours 0
    # these are random because they only exist in the files that Ryan T. provided
    gfs_fcast_and_error_df = gfs_fcast_and_error_df[
        gfs_fcast_and_error_df["lead_time_ONLY_HOURS"] != 0.0
    ]
    nam_fcast_and_error_df = nam_fcast_and_error_df[
        nam_fcast_and_error_df["lead_time_ONLY_HOURS"] != 0.0
    ]
    hrrr_fcast_and_error_df = hrrr_fcast_and_error_df[
        hrrr_fcast_and_error_df["lead_time_ONLY_HOURS"] != 0.0
    ]
    return gfs_fcast_and_error_df, nam_fcast_and_error_df, hrrr_fcast_and_error_df

In [None]:
def err_plot(df, month, up):
    projPC = crs.PlateCarree()
    latN = df["lat"].max() + 1
    latS = df["lat"].min() - 1
    lonW = df["lon"].max() + 1
    lonE = df["lon"].min() - 1
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(
        figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()}
    )
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )
    plotter = ax.scatter(
        x=df["lon"],
        y=df["lat"],
        c=df["t2m_error_sq"],
        s=df["t2m_error_sq"] * 50,
        marker="o",
        edgecolor="black",
        cmap="jet",
        transform=crs.PlateCarree(),
    )
    titl_Str = f"t2m_Forecast Error {up} {month}"
    ax.set_title(titl_Str, size=16)
    ax.set_xlabel("Longitude", size=14)
    ax.set_ylabel("Latitude", size=14)
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)
    ax.grid()
    fig.colorbar(plotter, ax=ax)

In [None]:
def count_plot(tops, bottoms, month):
    projPC = crs.PlateCarree()
    latN = 45.5
    latS = 40
    lonW = -80
    lonE = -71.5
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(
        figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()}
    )
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )
    plotter = ax.scatter(
        x=tops["lon"],
        y=tops["lat"],
        c="g",
        s=50,
        marker="v",
        edgecolor="black",
        transform=crs.PlateCarree(),
    )
    plotter2 = ax.scatter(
        x=bottoms["lon"],
        y=bottoms["lat"],
        c="orange",
        s=50,
        marker="o",
        edgecolor="black",
        transform=crs.PlateCarree(),
    )
    ax.set_title(f"Best/Worst Sites by Error {month}", size=16)
    ax.set_xlabel("Longitude", size=14)
    ax.set_ylabel("Latitude", size=14)
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)
    ax.grid()

In [None]:
def count_plot_top(df):
    projPC = crs.PlateCarree()
    latN = 45.5
    latS = 40
    lonW = -80
    lonE = -71.5
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(
        figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()}
    )
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )
    plotter = ax.scatter(
        x=df["lon"],
        y=df["lat"],
        c="g",
        s=50,
        marker="o",
        edgecolor="black",
        transform=crs.PlateCarree(),
    )
    ax.set_title(f"Top 10% Sites by t2m Error", size=16)
    ax.set_xlabel("Longitude", size=14)
    ax.set_ylabel("Latitude", size=14)
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)
    ax.grid()

In [None]:
def count_plot_bott(df):
    projPC = crs.PlateCarree()
    latN = 45.5
    latS = 40
    lonW = -80
    lonE = -71.5
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(
        figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()}
    )
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )
    plotter = ax.scatter(
        x=df["lon"],
        y=df["lat"],
        c="r",
        s=50,
        marker="o",
        edgecolor="black",
        transform=crs.PlateCarree(),
    )
    ax.set_title(f"Bottom 10% Sites by t2m Error", size=16)
    ax.set_xlabel("Longitude", size=14)
    ax.set_ylabel("Latitude", size=14)
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)
    ax.grid()

In [None]:
def plot_model_rmse_heatmap_fhours_clim_divs(
    model, fcast_and_error_df, var, units, minmax, stations
):
    fcast_and_error_df = fcast_and_error_df.copy()
    fcast_and_error_df[f"{var}_sq"] = fcast_and_error_df[var] ** 2
    fcast_and_error_df = fcast_and_error_df[fcast_and_error_df["t2m_error_sq"] <= 18]
    fcast_and_error_df = fcast_and_error_df[fcast_and_error_df["lead_time_HOUR"] < 25]
    fcast_and_error_df = fcast_and_error_df[fcast_and_error_df["lead_time_HOUR"] != 0]
    fcast_and_error_df = fcast_and_error_df[fcast_and_error_df["lead_time_DAY"] == 0]
    rmse = (
        fcast_and_error_df.groupby(["lead_time_HOUR", "station"])[f"{var}_sq"]
        .mean()
        .apply(np.sqrt)
    )

    rmse_unstacked = rmse.unstack()[stations].T
    rmse_unstacked = rmse_unstacked.drop_duplicates()
    plt.figure(figsize=(20, 5))
    ax = sns.heatmap(
        rmse_unstacked,
        annot=True,
        linewidths=0.5,
        cmap=cm.YlGnBu,
        vmin=minmax[0],
        vmax=minmax[1],
        cbar_kws={"label": f"RMSE [{units}]"},
    )
    ax.set_xlabel("Forecast Hour")
    ax.set_title(f"{model}, {var.replace('_error','')}, Root Mean Square Error")
    plt.tight_layout()

In [None]:
def format_plot_df(tippity_df, keys):
    plot_df = pd.DataFrame()
    stat_ls = []
    lat_ls = []
    lon_ls = []
    for i, _ in enumerate(tippity_df["station"]):
        if tippity_df["station"].iloc[i] in keys:
            stations = tippity_df["station"].iloc[i]
            lats = tippity_df["lat"].iloc[i]
            lons = tippity_df["lon"].iloc[i]
            stat_ls.append(stations)
            lat_ls.append(lats)
            lon_ls.append(lons)
    plot_df["stations"] = stat_ls
    plot_df["lat"] = lat_ls
    plot_df["lon"] = lon_ls

    return plot_df

In [None]:
var_dict = {
    "d2m_error": "$^{\circ}$C",
    "u_dir_error": "$^{\circ}$",
    "u_total_error": "m s$^{-1}$",
    "t2m_error": "$^{\circ}$C",
    "new_tp_error": "mm",
    "prmsl_error": "hPa",
}

var_val_range = {
    "d2m_error": [1.0, 4.0],
    "u_dir_error": [40, 100],
    "u_total_error": [1, 3.5],
    "t2m_error": [1.5, 3.5],
    "new_tp_error": [0, 3.5],
    "prmsl_error": [-20, 100],
}

In [None]:
init = "12"

gfs_fcast_and_error_df, nam_fcast_and_error_df, hrrr_fcast_and_error_df = read_data(
    init
)
gfs_fcast_and_error_df = gfs_fcast_and_error_df.reset_index()
nam_fcast_and_error_df = nam_fcast_and_error_df.reset_index()
hrrr_fcast_and_error_df = hrrr_fcast_and_error_df.reset_index()

In [None]:
gfs_fcast_and_error_df.head()

In [None]:
rmse_to_plot = get_monthly_rmse_data(18, "t2m_error")

In [None]:
rmse_to_plot = rmse_to_plot.sort_values(by=["t2m_error_sq"])
rmse_to_plot

In [None]:
plt.scatter(rmse_to_plot["station"], rmse_to_plot["t2m_error_sq"])

In [None]:
rmse_to_plot["t2m_error_sq"].describe()

In [None]:
diction = pd.read_csv("/home/aevans/landtype/notebooks/nysm_coords.csv")

In [None]:
ny_stations = diction["station"]
ny_lats = diction["latitude"]
ny_lons = diction["longitude"]

In [None]:
latdict = dict(zip(diction.station, diction.latitude))
londict = dict(zip(diction.station, diction.longitude))

In [None]:
rmse_to_plot["lon"] = rmse_to_plot["station"].map(londict)
rmse_to_plot["lat"] = rmse_to_plot["station"].map(latdict)

In [None]:
rmse_to_plot

In [None]:
rmse_hrrr = rmse_to_plot[rmse_to_plot["NWP Model"] == "HRRR"]
rmse_hrrr

In [None]:
tops_ls = []
bottoms_ls = []


for i in np.arange(1, 13):
    # sort by month
    df_err = rmse_hrrr.where(rmse_hrrr["time"] == i).dropna()

    # get top and bottom
    top10 = df_err[:13]
    bottom10 = df_err[-13:]
    top10.to_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/top/t{init}/ny/t{init}z_topcsv_{i}_hrrr.csv"
    )
    bottom10.to_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/bottom/t{init}/ny/t{init}z_bottcsv_{i}_hrrr.csv"
    )

    # plot
    count_plot(top10, bottom10, i)

In [None]:
tippity_df = pd.DataFrame()
for i in np.arange(1, 13):
    df = pd.read_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/top/t{init}/ny/t{init}z_topcsv_{i}_hrrr.csv"
    )
    tippity_df = pd.concat([df, tippity_df])

In [None]:
rockbott_df = pd.DataFrame()
for i in np.arange(1, 13):
    df = pd.read_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/bottom/t{init}/ny/t{init}z_bottcsv_{i}_hrrr.csv"
    )
    rockbott_df = pd.concat([df, rockbott_df])

In [None]:
rockbott_df

In [None]:
keys = tippity_df["station"].value_counts()[:13].keys()
keys

In [None]:
plot_df = pd.DataFrame()
stat_ls = []
lat_ls = []
lon_ls = []
for i, _ in enumerate(tippity_df["station"]):
    if tippity_df["station"].iloc[i] in keys:
        stations = tippity_df["station"].iloc[i]
        lats = tippity_df["lat"].iloc[i]
        lons = tippity_df["lon"].iloc[i]
        stat_ls.append(stations)
        lat_ls.append(lats)
        lon_ls.append(lons)
plot_df["stations"] = stat_ls
plot_df["lat"] = lat_ls
plot_df["lon"] = lon_ls

In [None]:
keys = rockbott_df["station"].value_counts()[:13].keys()
keys

In [None]:
plot_df_bott = format_plot_df(rockbott_df, keys)

In [None]:
plot_df_bott

In [None]:
count_plot_top(plot_df)

In [None]:
count_plot_bott(plot_df_bott)

# GFS

In [None]:
rmse_gfs = rmse_to_plot[rmse_to_plot["NWP Model"] == "GFS"]
rmse_gfs

In [None]:
for i in np.arange(1, 13):
    # sort by month
    df_err = rmse_gfs.where(rmse_gfs["time"] == i).dropna()

    # get top and bottom
    top10 = df_err[:13]
    bottom10 = df_err[-13:]
    top10.to_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/top/t{init}/ny/t{init}z_topcsv_{i}_gfs.csv"
    )
    bottom10.to_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/bottom/t{init}/ny/t{init}z_bottcsv_{i}_gfs.csv"
    )

    # plot
    count_plot(top10, bottom10, i)

In [None]:
tippity_df_gfs = pd.DataFrame()
for i in np.arange(1, 13):
    df = pd.read_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/top/t{init}/ny/t{init}z_topcsv_{i}_gfs.csv"
    )
    tippity_df_gfs = pd.concat([df, tippity_df_gfs])

In [None]:
rockbott_df_gfs = pd.DataFrame()
for i in np.arange(1, 13):
    df = pd.read_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/bottom/t{init}/ny/t{init}z_bottcsv_{i}_gfs.csv"
    )
    rockbott_df_gfs = pd.concat([df, rockbott_df_gfs])

In [None]:
keys = tippity_df_gfs["station"].value_counts()[:13].keys()
keys

In [None]:
plot_df_top_gfs = format_plot_df(tippity_df_gfs, keys)

In [None]:
keys = rockbott_df_gfs["station"].value_counts()[:13].keys()
keys

In [None]:
plot_df_bott_gfs = format_plot_df(rockbott_df_gfs, keys)

In [None]:
count_plot_top(plot_df_top_gfs)

In [None]:
count_plot_bott(plot_df_bott_gfs)

# NAM

In [None]:
rmse_nam = rmse_to_plot[rmse_to_plot["NWP Model"] == "NAM"]
rmse_nam

In [None]:
for i in np.arange(1, 13):
    # sort by month
    df_err = rmse_nam.where(rmse_nam["time"] == i).dropna()

    # get top and bottom
    top10 = df_err[:13]
    bottom10 = df_err[-13:]
    top10.to_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/top/t{init}/ny/t{init}z_topcsv_{i}_nam.csv"
    )
    bottom10.to_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/bottom/t{init}/ny/t{init}z_bottcsv_{i}_nam.csv"
    )

    # plot
    count_plot(top10, bottom10, i)

In [None]:
tippity_df_nam = pd.DataFrame()
for i in np.arange(1, 13):
    df = pd.read_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/top/t{init}/ny/t{init}z_topcsv_{i}_gfs.csv"
    )
    tippity_df_nam = pd.concat([df, tippity_df_nam])

In [None]:
rockbott_df_nam = pd.DataFrame()
for i in np.arange(1, 13):
    df = pd.read_csv(
        f"/home/aevans/nwp_bias/data/unboxing_df/bottom/t{init}/ny/t{init}z_bottcsv_{i}_gfs.csv"
    )
    rockbott_df_nam = pd.concat([df, rockbott_df_nam])

In [None]:
keys = tippity_df_nam["station"].value_counts()[:13].keys()
keys

In [None]:
plot_df_top_nam = format_plot_df(tippity_df_nam, keys)

In [None]:
keys = rockbott_df_nam["station"].value_counts()[:13].keys()
keys

In [None]:
plot_df_bott_nam = format_plot_df(rockbott_df_nam, keys)

In [None]:
count_plot_top(plot_df_top_nam)

In [None]:
count_plot_bott(plot_df_bott_nam)

# Least Error

In [None]:
tops = pd.DataFrame()
tops = pd.concat([tippity_df, tops])
tops = pd.concat([tippity_df_gfs, tops])
tops = pd.concat([tippity_df_nam, tops])

In [None]:
tops.head()

In [None]:
top_keys = tops["station"].value_counts()[:13].keys()
top_keys

In [None]:
plot_df_tops = format_plot_df(tops, keys)

In [None]:
count_plot_top(plot_df_tops)

# Most Error

In [None]:
botts = pd.DataFrame()
botts = pd.concat([rockbott_df, botts])
botts = pd.concat([rockbott_df_gfs, botts])
botts = pd.concat([rockbott_df_nam, botts])

In [None]:
keys = botts["station"].value_counts()[:13].keys()
keys

In [None]:
plot_df_botts = format_plot_df(botts, keys)

In [None]:
count_plot_bott(plot_df_botts)

In [None]:
bott_bott = get_monthly_rmse_data(18, "t2m_error")
bott_bott = bott_bott[bott_bott["station"].isin(keys)]

In [None]:
top_top = get_monthly_rmse_data(18, "t2m_error")
top_top = top_top[top_top["station"].isin(top_keys)]

In [None]:
bott_bott

In [None]:
plot_rmse_boxplots(bott_bott, 18, "t2m_error")

In [None]:
plot_rmse_boxplots(top_top, 18, "t2m_error")

In [None]:
var = "t2m_error"
units = "$^{\circ}$C"
plot_model_rmse_heatmap_fhours_clim_divs(
    "GFS", gfs_fcast_and_error_df, var, units, var_val_range.get(var), keys
)

In [None]:
plot_model_rmse_heatmap_fhours_clim_divs(
    "NAM", nam_fcast_and_error_df, var, units, var_val_range.get(var), keys
)
plot_model_rmse_heatmap_fhours_clim_divs(
    "HRRR", hrrr_fcast_and_error_df, var, units, var_val_range.get(var), keys
)