# Collect validation results for all methods, months, and regions, then summarize with various univariate and multivariate metrics

In [1]:
import sys

sys.path.insert(0, "../source")

import numpy as np
import pandas as pd

from validation import collect_metrics, multivariate_dss


In [2]:
blocks = ["b1", "b2"]
months = ["202102", "202104", "202107", "202110"]

validation_results = []
for block in blocks:
    for month in months:
        df = pd.read_csv(
            f"../data/intermediate/validation/{month}/validation_results_{block}.csv",
            index_col=0,
        )
        validation_results.append(df)

df_results = pd.concat(validation_results, axis=0).reset_index(drop=True)
df_results


Unnamed: 0,Method,Month,Region,lat,lon,data,predictions,rmspe,difference,ratio,INT,DSS
0,Trend surface,202102,b1,40.025,-94.825,-0.232284,0.257301,0.447078,0.489585,1.095075,1.755622,-0.410852
1,Trend surface,202102,b1,40.025,-94.775,-0.010717,0.258267,0.451726,0.268984,0.595459,1.773871,-1.234789
2,Trend surface,202102,b1,40.025,-93.275,0.456127,0.274542,0.399457,-0.181586,-0.454582,1.568618,-1.628654
3,Trend surface,202102,b1,40.025,-93.225,-0.078473,0.274478,0.427185,0.352951,0.826225,1.677501,-1.018430
4,Trend surface,202102,b1,40.025,-91.825,1.154629,0.265364,0.589974,-0.889265,-1.507294,2.316757,1.216584
...,...,...,...,...,...,...,...,...,...,...,...,...
15784,Cokriging,202110,b2,41.425,-101.375,0.418129,0.297223,0.341103,-0.120907,-0.354458,1.339392,-2.025502
15785,Cokriging,202110,b2,41.475,-102.975,-0.184186,0.218746,0.326823,0.402932,1.232875,1.283319,-0.716692
15786,Cokriging,202110,b2,41.475,-102.925,-0.059710,0.217965,0.326224,0.277675,0.851178,1.280969,-1.515836
15787,Cokriging,202110,b2,41.475,-101.425,0.112492,0.293377,0.326452,0.180885,0.554094,1.281864,-1.931923


In [3]:
df_metrics_sub = collect_metrics(df_results).reset_index()
df_metrics_sub


Unnamed: 0,Method,Month,Region,N,BIAS,RASPE,DSS_MEAN,INT_MEAN
0,Cokriging,202102,b1,685,-0.12,0.51,-0.34,2.86
1,Cokriging,202102,b2,554,0.05,0.27,-1.42,1.71
2,Cokriging,202104,b1,563,-0.01,0.37,-1.03,1.97
3,Cokriging,202104,b2,594,0.1,0.37,-0.96,1.97
4,Cokriging,202107,b1,757,-0.06,0.58,-0.13,2.85
5,Cokriging,202107,b2,712,0.01,0.56,-0.22,2.95
6,Cokriging,202110,b1,690,0.03,0.32,-1.34,1.65
7,Cokriging,202110,b2,708,0.06,0.29,-1.44,1.55
8,Kriging,202102,b1,685,-0.12,0.51,-0.33,2.86
9,Kriging,202102,b2,554,0.04,0.27,-1.42,1.71


In [4]:
multivariate_scores = []
for names, df_group in df_results.groupby(["Method", "Month", "Region"]):
    method, month, block = names
    method = method.split(" ")[0].lower()

    data = np.expand_dims(df_group["data"].values, axis=-1)
    mean = pd.read_csv(
        f"../data/intermediate/validation/{month}/mean_{method}_{block}.csv", header=None
    ).values
    covariance = pd.read_csv(
        f"../data/intermediate/validation/{month}/covariance_{method}_{block}.csv", header=None
    ).values
    mDSS = np.round_(multivariate_dss(data, mean, covariance), 2)
    df = pd.DataFrame(
        dict(Method=[method], Month=[month], Region=[block], mDSS=[mDSS]), index=[1]
    )
    multivariate_scores.append(df)

df_mDSS = pd.concat(multivariate_scores, axis=0).reset_index(drop=True)
df_mDSS.loc[:, "Method"] = df_mDSS.loc[:, "Method"].str.capitalize()
df_mDSS.loc[df_mDSS["Method"] == "Trend", "Method"] = "Trend surface"
df_mDSS


Unnamed: 0,Method,Month,Region,mDSS
0,Cokriging,202102,b1,-286.08
1,Cokriging,202102,b2,-823.63
2,Cokriging,202104,b1,-634.6
3,Cokriging,202104,b2,-664.41
4,Cokriging,202107,b1,-311.94
5,Cokriging,202107,b2,-356.18
6,Cokriging,202110,b1,-1078.5
7,Cokriging,202110,b2,-1106.32
8,Kriging,202102,b1,-285.8
9,Kriging,202102,b2,-823.59


In [6]:
df_metrics = df_metrics_sub.merge(df_mDSS, on=["Method", "Month", "Region"]).rename(
    columns={"INT_MEAN": "INT", "DSS_MEAN": "DSS", "mDSS": "MDSS"}
)
df_metrics["Month"] = df_metrics["Month"].apply(str)
metrics_keep = ["BIAS", "RASPE", "INT", "DSS", "MDSS"]

df_metrics.to_csv("../data/output/validation_metrics.csv", index=False)


In [8]:
def format_metrics(df: pd.DataFrame, month: str, metrics: list) -> pd.DataFrame:
    df = df[df["Month"] == month].drop(columns="Month").rename(columns={"Region": "Block"})
    df = (
        pd.melt(df, id_vars=["Method", "Block"], var_name="Metric")
        .pivot(index=["Block", "Method"], columns="Metric", values="value")
        .rename(index={"b1": "Corn Belt", "b2": "Cropland"})
    )
    df.columns.name = None
    return df.loc[:, metrics]


def metrics_to_latex(df: pd.DataFrame, month: str):
    bold_min = lambda col: ["font-weight:bold" if x == col.min() else "" for x in col]
    bold_abs_min = lambda col: [
        "font-weight:bold" if np.abs(x) == np.abs(col).min() else "" for x in col
    ]
    table_tex = (
        df.style.apply(bold_abs_min, subset=pd.IndexSlice[["Corn Belt"], ["BIAS"]])
        .apply(bold_min, subset=pd.IndexSlice[["Corn Belt"], ["RASPE"]])
        .apply(bold_min, subset=pd.IndexSlice[["Corn Belt"], ["INT"]])
        .apply(bold_min, subset=pd.IndexSlice[["Corn Belt"], ["DSS"]])
        .apply(bold_min, subset=pd.IndexSlice[["Corn Belt"], ["MDSS"]])
        .apply(bold_abs_min, subset=pd.IndexSlice[["Cropland"], ["BIAS"]])
        .apply(bold_min, subset=pd.IndexSlice[["Cropland"], ["RASPE"]])
        .apply(bold_min, subset=pd.IndexSlice[["Cropland"], ["INT"]])
        .apply(bold_min, subset=pd.IndexSlice[["Cropland"], ["DSS"]])
        .apply(bold_min, subset=pd.IndexSlice[["Cropland"], ["MDSS"]])
        .format(precision=2, escape="latex")
    ).to_latex(
        position="h!",
        label=f"tab:validation-metrics-{month}",
        caption="Caption here",
        sparse_columns=True,
        multirow_align="t",
        # clines="skip-last;data",
        hrules=True,
        convert_css=True,
    )
    print(table_tex)
    return table_tex


In [9]:
for month in months:
    df_metrics_month = format_metrics(df_metrics, month, metrics_keep)
    display(df_metrics_month)
    table = metrics_to_latex(df_metrics_month, month)


Unnamed: 0_level_0,Unnamed: 1_level_0,BIAS,RASPE,INT,DSS,MDSS
Block,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Corn Belt,Cokriging,-0.12,0.51,2.86,-0.34,-286.08
Corn Belt,Kriging,-0.12,0.51,2.86,-0.33,-285.8
Corn Belt,Trend surface,-0.1,0.52,2.83,-0.35,-237.14
Cropland,Cokriging,0.05,0.27,1.71,-1.42,-823.63
Cropland,Kriging,0.04,0.27,1.71,-1.42,-823.59
Cropland,Trend surface,0.07,0.28,1.62,-1.51,-837.88


\begin{table}[h!]
\caption{Caption here}
\label{tab:validation-metrics-202102}
\begin{tabular}{llrrrrr}
\toprule
 &  & BIAS & RASPE & INT & DSS & MDSS \\
Block & Method &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{Corn Belt} & Cokriging & -0.12 & \bfseries 0.51 & 2.86 & -0.34 & \bfseries -286.08 \\
 & Kriging & -0.12 & \bfseries 0.51 & 2.86 & -0.33 & -285.80 \\
 & Trend surface & \bfseries -0.10 & 0.52 & \bfseries 2.83 & \bfseries -0.35 & -237.14 \\
\multirow[t]{3}{*}{Cropland} & Cokriging & 0.05 & \bfseries 0.27 & 1.71 & -1.42 & -823.63 \\
 & Kriging & \bfseries 0.04 & \bfseries 0.27 & 1.71 & -1.42 & -823.59 \\
 & Trend surface & 0.07 & 0.28 & \bfseries 1.62 & \bfseries -1.51 & \bfseries -837.88 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,BIAS,RASPE,INT,DSS,MDSS
Block,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Corn Belt,Cokriging,-0.01,0.37,1.97,-1.03,-634.6
Corn Belt,Kriging,-0.0,0.37,1.97,-1.03,-634.61
Corn Belt,Trend surface,0.03,0.38,1.93,-1.01,-569.48
Cropland,Cokriging,0.1,0.37,1.97,-0.96,-664.41
Cropland,Kriging,0.1,0.37,1.97,-0.96,-664.26
Cropland,Trend surface,0.1,0.37,1.88,-1.01,-599.19


\begin{table}[h!]
\caption{Caption here}
\label{tab:validation-metrics-202104}
\begin{tabular}{llrrrrr}
\toprule
 &  & BIAS & RASPE & INT & DSS & MDSS \\
Block & Method &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{Corn Belt} & Cokriging & -0.01 & \bfseries 0.37 & 1.97 & \bfseries -1.03 & -634.60 \\
 & Kriging & \bfseries -0.00 & \bfseries 0.37 & 1.97 & \bfseries -1.03 & \bfseries -634.61 \\
 & Trend surface & 0.03 & 0.38 & \bfseries 1.93 & -1.01 & -569.48 \\
\multirow[t]{3}{*}{Cropland} & Cokriging & \bfseries 0.10 & \bfseries 0.37 & 1.97 & -0.96 & \bfseries -664.41 \\
 & Kriging & \bfseries 0.10 & \bfseries 0.37 & 1.97 & -0.96 & -664.26 \\
 & Trend surface & \bfseries 0.10 & \bfseries 0.37 & \bfseries 1.88 & \bfseries -1.01 & -599.19 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,BIAS,RASPE,INT,DSS,MDSS
Block,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Corn Belt,Cokriging,-0.06,0.58,2.85,-0.13,-311.94
Corn Belt,Kriging,-0.01,0.59,2.93,-0.12,-308.63
Corn Belt,Trend surface,0.03,0.62,3.03,0.03,21.32
Cropland,Cokriging,0.01,0.56,2.95,-0.22,-356.18
Cropland,Kriging,0.03,0.56,2.93,-0.2,-355.44
Cropland,Trend surface,0.07,0.6,3.05,-0.08,-57.77


\begin{table}[h!]
\caption{Caption here}
\label{tab:validation-metrics-202107}
\begin{tabular}{llrrrrr}
\toprule
 &  & BIAS & RASPE & INT & DSS & MDSS \\
Block & Method &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{Corn Belt} & Cokriging & -0.06 & \bfseries 0.58 & \bfseries 2.85 & \bfseries -0.13 & \bfseries -311.94 \\
 & Kriging & \bfseries -0.01 & 0.59 & 2.93 & -0.12 & -308.63 \\
 & Trend surface & 0.03 & 0.62 & 3.03 & 0.03 & 21.32 \\
\multirow[t]{3}{*}{Cropland} & Cokriging & \bfseries 0.01 & \bfseries 0.56 & 2.95 & \bfseries -0.22 & \bfseries -356.18 \\
 & Kriging & 0.03 & \bfseries 0.56 & \bfseries 2.93 & -0.20 & -355.44 \\
 & Trend surface & 0.07 & 0.60 & 3.05 & -0.08 & -57.77 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,BIAS,RASPE,INT,DSS,MDSS
Block,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Corn Belt,Cokriging,0.03,0.32,1.65,-1.34,-1078.5
Corn Belt,Kriging,0.02,0.32,1.65,-1.34,-1082.24
Corn Belt,Trend surface,0.02,0.32,1.62,-1.38,-949.96
Cropland,Cokriging,0.06,0.29,1.55,-1.44,-1106.32
Cropland,Kriging,0.05,0.29,1.56,-1.44,-1110.2
Cropland,Trend surface,0.05,0.29,1.5,-1.5,-1062.25


\begin{table}[h!]
\caption{Caption here}
\label{tab:validation-metrics-202110}
\begin{tabular}{llrrrrr}
\toprule
 &  & BIAS & RASPE & INT & DSS & MDSS \\
Block & Method &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{Corn Belt} & Cokriging & 0.03 & \bfseries 0.32 & 1.65 & -1.34 & -1078.50 \\
 & Kriging & \bfseries 0.02 & \bfseries 0.32 & 1.65 & -1.34 & \bfseries -1082.24 \\
 & Trend surface & \bfseries 0.02 & \bfseries 0.32 & \bfseries 1.62 & \bfseries -1.38 & -949.96 \\
\multirow[t]{3}{*}{Cropland} & Cokriging & 0.06 & \bfseries 0.29 & 1.55 & -1.44 & -1106.32 \\
 & Kriging & \bfseries 0.05 & \bfseries 0.29 & 1.56 & -1.44 & \bfseries -1110.20 \\
 & Trend surface & \bfseries 0.05 & \bfseries 0.29 & \bfseries 1.50 & \bfseries -1.50 & -1062.25 \\
\bottomrule
\end{tabular}
\end{table}

