# domain_title summary statistics 

In [None]:
chunk_start
shelf = shelf_mask
from IPython.display import Markdown as md
import glob
import nctoolkit as nc
from mask import mask_all, mask_shelf
from ecoval import tidy_name

In [None]:
%%capture --no-display

show_map = False

if len([x for x in glob.glob("*.ipynb") if "summary_shelf" in x]) > 0:
    ds_regions = nc.open_data(f"{data_dir}/amm7_val_subdomains.nc")
    ds_regions.subset(variables = ["Shelf", "Ocean"])
    ds_regions.sum_all()
    ds_regions.as_missing(0)
    if shelf:
        mask_shelf(ds_regions)
    
    ds_plot = ds_regions.pub_plot(legend_position=None, land = "lightgrey")
    show_map = True


In [None]:
if show_map:
    if shelf:
        md(f"**Figure {i_figure}**: Map of the shelf area used for the evaluation.")
    else:
        md(f"**Figure {i_figure}**: Map of the ocean area used for the evaluation.")

## Taylor diagrams

In [None]:
import nctoolkit as nc
import pandas as pd
import geopandas as gpd
from IPython.display import Markdown as md
from IPython.display import display_markdown 
import warnings
warnings.filterwarnings('ignore')
from plotnine import *
import numpy as np
import os
import glob as glob
from mask import mask_all, mask_shelf
%load_ext rpy2.ipython

i_table = 1
stamp = nc.session_info["stamp"]
out = ".trackers/" + stamp
if not os.path.exists(".trackers"):
    os.makedirs(".trackers")
# save out as empty file
with open(out, 'w') as f:
    f.write("")

In [None]:
ensemble = nc.create_ensemble("../../results/annual_mean")

In [None]:
df_taylor = []
for ff in ensemble:
    variable = os.path.basename(ff).split("_")[1].replace(".nc", "")
    ds_ff = nc.open_data(ff)
    if True:
        mask_shelf(ds_ff)
    else:
        mask_all(ds_ff)

    df_ff = ds_ff.to_dataframe().reset_index()
    lon_name = [df_ff.columns[i] for i in range(len(df_ff.columns)) if "lon" in df_ff.columns[i]][0]
    lat_name = [df_ff.columns[i] for i in range(len(df_ff.columns)) if "lat" in df_ff.columns[i]][0]
    df_taylor.append(
        df_ff
        .loc[:,[lon_name, lat_name, "model", "observation"]]
        .dropna()
        .assign(variable = variable)
    )
df_taylor = pd.concat(df_taylor).reset_index(drop=True)

# fix name of variable
df_taylor = (
    df_taylor
    .assign(variable = lambda x: x["variable"].apply(tidy_name))
)

In [None]:
%%capture --no-display
%%R -i df_taylor

library(plotrix, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)

# get unique variable from df_taylor

variables <- df_taylor %>%
    group_by(variable) %>%
    summarize(nsd = sd(model)/sd(observation))  %>%
    arrange(desc(nsd)) %>%
    pull(variable) 

pch = 1:length(variables)
col = rainbow(length(variables))

r_min <- df_taylor %>%
    group_by(variable) %>%
    summarise(r = cor(observation, model, use = "complete.obs")) %>%
    summarize(r = min(r)) %>%
    pull(r)

pos_cor = r_min >= -0.1

i <- 1
for (vv in variables){

    df_vv <- df_taylor %>%
        filter(variable == vv)
    if(i == 1){
        plot_size <- df_vv %>%
        # get the standard deviation of all
        summarize(nsd = sd(model)/sd(observation)) %>%
        pull(nsd)

    }

    if (i == 1){
        taylor.diagram(df_vv$observation, df_vv$model, pch = pch[i], col = col[i], add = FALSE, normalize = TRUE,
        pos.cor = pos_cor, main = NULL

        )
    } else {
        taylor.diagram(df_vv$observation, df_vv$model, pch = pch[i], col = col[i], add = TRUE, normalize = TRUE,
        pos.cor = pos_cor

        )
    }

i <- i + 1
}

legend( plot_size * 1.3, plot_size * 1.9, legend = variables, pch = pch, col = col, bty = "n")





In [None]:
variables = df_taylor.variable.unique()

In [None]:
md(f"**Figure {i_figure}**: Taylor diagram for annual mean of {', '.join(variables)}. This diagram compares climatological annual averages of the model and observations across the model's spatial domain. Standard devaiation is normalized by the standard deviation of the observations, and a standard deviation below 1 indicates that the model is less variable than the observations. Note: This figure summarizes the overall ability of the model to reproduce climatological spatial patterns, and it does not represent temporal performance.") 
i_figure += 1

## Model biases

In [None]:
df_bias= []
for ff in ensemble:
    variable = os.path.basename(ff).split("_")[1].replace(".nc", "").title()
    if variable.lower() == "sst":
        variable = "SST"
    ds_ff = nc.open_data(ff)
    ds_ff.set_precision("F32")
    if True:
        mask_shelf(ds_ff)
    else:
        mask_all(ds_ff)
    ds_ff.assign(bias = lambda x: x.model - x.observation)
    ds_ff.spatial_mean()
    bias = ds_ff.to_dataframe().reset_index().bias.values[0]
    unit = ds_ff.contents.unit[0]
    name = variable 
    model = ds_ff.to_dataframe().reset_index().model.values[0]
    observation = ds_ff.to_dataframe().reset_index().observation.values[0]
    df_bias.append(pd.DataFrame({"Variable": [name], "Modelled spatial mean": [model], "Observational spatial mean":[observation], "Model bias": [bias], "Unit": [unit]}))
df_bias = pd.concat(df_bias).reset_index(drop=True)
df_bias = df_bias.assign(percentage_bias = lambda x: x["Model bias"]/x["Observational spatial mean"]*100)
df_bias.loc[df_bias.Variable == "temperature", "percentage_bias"] = np.nan
df_bias.columns = ["Variable", "Modelled spatial mean", "Observational spatial mean", "Model bias", "Unit", "Percentage bias"]
# tidy Variable
df_bias = df_bias.assign(Variable = lambda x: x["Variable"].apply(tidy_name))
df_bias.style.hide(axis="index")

In [None]:
md(f"**Table {i_table}**: Bias of model compared with observations. The bias is calculated as the modelled spatial mean minus the observational spatial mean. The percentage bias is calculated as the model bias divided by the observational spatial mean.")
i_table += 1

## Spatial performance of the model

In [None]:
df_cor = []
for ff in ensemble:
    variable = os.path.basename(ff).split("_")[1].replace(".nc", "").title()
    if variable.lower() == "sst":
        variable = "SST"
    ds_ff = nc.open_data(ff)
    ds_ff.set_precision("F32")
    if True:
        mask_shelf(ds_ff)
    else:
        mask_all(ds_ff)
    ds_ff.cor_space("model", "observation")
    ff_cor = (
        ds_ff
        .to_dataframe()
        .dropna()
        .cor
        .values
        [0]
    )
    df_cor.append(pd.DataFrame({"Variable": [variable], "Correlation": [ff_cor]}))
df_cor = pd.concat(df_cor).reset_index(drop=True)
df_cor.columns = ["Variable", "Spatial correlation between model and observations"]
# tidy Variable
df_cor = df_cor.assign(Variable = lambda x: x["Variable"].apply(tidy_name))
df_cor.style.hide(axis="index")

In [None]:
md(f"**Table {i_table}**: Pearson correlation coefficient between model and observations for annual mean of {', '.join(variables)}. This table compares climatological annual averages of the model and observations across the model's spatial domain. Standard devaiation is normalized by the standard deviation of the observations, and a standard deviation below 1 indicates that the model is less variable than the observations.") 
i_table += 1

## Temporal performance of the model

In [None]:
global_grid = False
for ff in  glob.glob("../../results/temporals/*.nc"):
    ds_ff = nc.open_data(ff)
    df_ff = ds_ff.to_dataframe().reset_index()
    lat_name = [df_ff.columns[i] for i in range(len(df_ff.columns)) if "lat" in df_ff.columns[i]][0]
    lat_min = df_ff[lat_name].values.min()
    lat_max = df_ff[lat_name].values.max()
    if lat_min < -89 and lat_max > 89:
        global_grid = True

In [None]:
df_cor = []
for ff in  glob.glob("../../results/temporals/*.nc"):
    ds_ff = nc.open_data(ff)
    if global_grid:
        ds_ff.to_latlon(lon = [-179.5, 179.5], lat = [-89.5, 89.5], res = 1)
    if True:
        mask_shelf(ds_ff)
    else:
        mask_all(ds_ff)
    df_ff = ds_ff.to_dataframe().reset_index().dropna()
    variable = os.path.basename(ff).split("_")[0].replace(".nc", "").title()
    if variable.lower() == "sst":
        variable = "SST"
    df_ff = df_ff.assign(variable = variable)
    df_cor.append(df_ff)
# tidy variable name


df_cor = pd.concat(df_cor).reset_index(drop=True)
df_cor = df_cor.assign(variable = lambda x: x["variable"].apply(tidy_name))




In [None]:
%%capture --no-display
%%R -i df_cor -i global_grid

library(ggplot2, warn.conflicts = FALSE)
world_map <- map_data("world")

xlim <- c(min(df_cor$lon), max(df_cor$lon))
ylim <- c(min(df_cor$lat), max(df_cor$lat))

if (global_grid){
        lon_breaks <- c(-180, -120, -60, 0, 60, 120, 180)
        lon_labels <- c("180°W", "120°W", "60°W", "0°", "60°E", "120°E", "180°E")
        lat_breaks <- c(-60, -30, 0, 30, 60)
        lat_labels <- c("60°S", "30°S", "0°", "30°N", "60°N")
} else {
        lon_breaks <- c(-20, -10, 0, 10)
        lon_labels <- c("20°W", "10°W", "0°", "10°E")
        lat_breaks <- c(40, 50, 60)
        lat_labels <- c("40°N", "50°N", "60°N")
}

min_val <- min(df_cor$cor)
max_val <- max(df_cor$cor)


gg <- ggplot(df_cor)+
        geom_tile(aes(x  = lon,y =   lat, fill = cor))+ 
        geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "grey", colour = "grey")+
        coord_cartesian(xlim = xlim, ylim = ylim)+
        scale_x_continuous(breaks = lon_breaks, labels = lon_labels)+
        scale_y_continuous(breaks = lat_breaks, labels = lat_labels)+
        theme_bw(base_size = 12)+
        facet_wrap(~variable)+
        labs(fill = "Correlation coefficient")+
        theme_bw(base_family = "Helvetica", base_size = 8) +
        theme(
          legend.position = "bottom", legend.direction = "horizontal", legend.box = "horizontal", legend.key.width = unit(3.0, "cm"),
          legend.key.height = unit(0.5, "cm")
        ) +
        labs(x = NULL, y = NULL) +
        theme(plot.margin = unit(c(2, 0, 2, 0), "mm")) +
        theme(plot.title = element_text(hjust = 0.5))
        # make the legend 3 cm wide
        # theme( legend_key_size = unit(3, "cm"))


if (min_val < 0 & max_val > 0){
        gg <- gg + 
                scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
                guide = guide_colorbar(title.position = "bottom", title.hjust = 0.5, title.theme = element_text(angle = 0, size = 10, family = "Helvetica")),
                breaks = seq(-1, 1, 0.25))
}
if (min_val > 0){
        gg <- gg + 
                scale_fill_viridis_c(
                guide = guide_colorbar(title.position = "bottom", title.hjust = 0.5, title.theme = element_text(angle = 0, size = 10, family = "Helvetica"))
                )
}
gg


            




In [None]:
md(f"**Figure {i_figure}**: Spatial correlation (Pearson correlation coefficient) between model and observations for annual mean of {', '.join(variables)}. This figure compares climatological monthly averages of the model and observations across the model's spatial domain.")
i_figure += 1

The overall ability of the model reproduce the seasonality of each variable was estimated by calculating the spatial mean of the Pearson correlation coefficient between the model and the observations. The spatial mean was calculated by averaging the correlation coefficient of each grid cell. 

In [None]:
df_cor = []
for ff in  glob.glob("../../results/temporals/*.nc"):
    ds_ff = nc.open_data(ff, checks = False)
    ds_ff.spatial_mean()
    variable = os.path.basename(ff).split("_")[0].replace(".nc", "").title()
    if variable.lower() == "sst":
        variable = "SST"
    df_cor.append(pd.DataFrame({"Variable": [variable], "Correlation": [ds_ff.to_dataframe().reset_index().cor.values[0]]}))

df_cor = pd.concat(df_cor).reset_index(drop=True)

# tidy Variable
df_cor = df_cor.assign(Variable = lambda x: x["Variable"].apply(tidy_name))



In [None]:
df_cor.style.hide(axis="index")

In [None]:
md(f"**Table {i_table}**: Spatial average of the temporal correlation (Pearson correlation coefficient) between model and observations for annual mean of {', '.join(variables)}. The correlation is calculated for each grid cell individually using monthly climatological averages. The spatial average is then calculated for each variable.")
i_table += 1