# Validation of near-bottom ices_variable 

In [None]:
# bin_value using function from r4ecology's github
import numpy as np
def bin_value(x, bin_res):
    return np.floor((x + bin_res / 2) / bin_res + 0.5) * bin_res - bin_res / 2


In [None]:
import pandas as pd
import seawater as sw
import numpy as np
from plotnine import *
from holteandtalley import HolteAndTalley
import nctoolkit as nc
import random
from tqdm import tqdm
from IPython.display import clear_output
# add markdown ability, i.e. md()
from IPython.display import Markdown as md
i_figure = 1
i_table = 1
## add ability to do %%R using rpy2
%load_ext rpy2.ipython


In [None]:
variable = "ices_variable"
# get the units. File inspection could be randomized in case people have put loose files in there...
import glob
df = pd.read_csv("../../matched/mapping.csv")
df = df.query("variable == @variable")
pattern = list(df.pattern)[0]
while True:
    i = 0
    patterns = pattern.split("/")
    for x in patterns:
        if x == "**":
            break
        i+=1
    new_pattern = glob.glob("/".join(patterns[0:i])+"/" + "**" )[-1].split("/")[-1]
    patterns[i] = new_pattern
    pattern = "/".join(patterns)

    
    if len([x for x in pattern.split("/") if x == "**"]) == 0:
        break
paths = glob.glob(pattern)
ds = nc.open_data(paths[0])
model_variable = list(df.model_variable)[0]
unit = list(ds.contents.query("variable == @model_variable").unit)[0]

## Read in the data

In [None]:
df = pd.read_csv(f"../../matched/ices/bottom/ices_bottom_{variable}.csv")
# Danish part is always dubious
df = df.query("lon < 9")
ds= nc.open_data("/data/proteus1/scratch/rwi/evaldata/data/amm7_val_subdomains.nc")
ds.subset(variable = "Shelf")
ds.as_missing(0)
ds.regrid(df.loc[:,["lon", "lat"]], "nn")
df_grid = ds.to_dataframe().reset_index().dropna().drop_duplicates()
df = df.merge(df_grid)
df_locs = df.loc[:,["lon", "lat"]].drop_duplicates()
# bin to 0.01 resolution
df["lon"] = df["lon"].apply(lambda x: bin_value(x, 0.5))
df["lat"] = df["lat"].apply(lambda x: bin_value(x, 0.5))
df = df.groupby(["lon", "lat", "year", "month"]).mean().reset_index()

In [None]:
md(f"Near-bottom values of {variable} were extracted from ICES bottle and CTD data.")
md(f"This data was extracted from vertical profiles. The near-bottom value was defined as the value closest to the bottom, that was within 5 m of the bottom. Bathymetry was estimated using GEBCO Bathymetry data.")

md(f"In total there were {len(df)} near-bottom values extracted from the  ICES database.")

md("**Note:** this analysis has been restricted to observations on the shelf region.")



In [None]:
# bottom 1% of observations
bot_low = df.observation.quantile(0.001)
df = df.query(f"observation >= {bot_low}")

In [None]:
%%capture --no-display
%%R -i df_locs -i variable -i unit -w 1000 -h 1200
library(tidyverse, warn.conflicts = FALSE)
world_map <- map_data("world")
# get lon, lat limits from profile_mld

xlim = c(min(df_locs$lon), max(df_locs$lon))
ylim = c(min(df_locs$lat), max(df_locs$lat))



gg <- df_locs %>%
# final six months of the year
    ggplot()+
    geom_point(aes(lon, lat))+
    theme_gray(base_size = 24)+
    # add colour scale. Minimum zero, label 100, ">100"
    geom_polygon(data = world_map, aes(long, lat, group = group), fill = "grey60")+
    coord_fixed(xlim = xlim, ylim = ylim, ratio = 1.5) 

# figure out if lon minimum is less than -10
if( min(df_locs$lon) < -10 ){
    # add sensible labels for longitude and latitude

    gg <- gg +
    scale_x_continuous(breaks = seq(-10, 5, 5), labels = c("10°W", "5°W", "0°", "5°E"))+ 
    scale_y_continuous(breaks = seq(45, 60, 5), labels = c("45°N", "50°N", "55°N", "60°N"))+
    labs(x = "", y = "") 


}

    # move legen

gg

In [None]:
md(f"**Figure {i_figure}:** Map of near-bottom {variable} observations from ICES.")

In [None]:
# calculate number of observations per month
import calendar
df1 = df.groupby(["lon", "lat", "month"]).count().reset_index()
# plot number of observations per month using plotnine and geom_bar
# convert month to jan, feb, etc.
gg = (
    ggplot(df1, aes(x="month", y="observation")) + 
    geom_bar(stat="identity")+
    scale_x_continuous(breaks=range(1,13), labels=list(calendar.month_abbr[1:]))+
    labs(y = "Number of observations", x= "")
)
gg = gg.draw()
gg


In [None]:
md(f"**Figure {i_figure}:** Number of near-bottom observations per month for {variable}.")
i_figure += 1

In [None]:
%%capture --no-display
%%R -i df -i variable -i unit -w 1000 -h 1200
#%%R -i df -i variable -i unit -w 1600 -h 1000

library(tidyverse, warn.conflicts = FALSE)
world_map <- map_data("world")
# get lon, lat limits from profile_mld

xlim = c(min(df$lon), max(df$lon))
ylim = c(min(df$lat), max(df$lat))


df <- df %>%
    mutate(bias = model - observation) 

# calculate the absolate bias

df1 <- df %>%
    mutate(bias = abs(bias))
# calculate the 98th percentile of the absolute bias
bias_high <- df1$bias %>% quantile(0.98)
# cap the bias to +/1 98th percentile
df$bias[df$bias > bias_high] <- bias_high
df$bias[df$bias < -bias_high] <- -bias_high


# # convert month number to month in profile_mld
df$month <- factor(df$month, labels = month.abb)

title <- str_glue("Bias in bottom {variable} ({unit})")



gg <- df %>%
# final six months of the year
    ggplot()+
    geom_raster(aes(lon, lat, fill = bias))+
    facet_wrap(~month)+
    theme_gray(base_size = 24)+
    # add colour scale. Minimum zero, label 100, ">100"
    geom_polygon(data = world_map, aes(long, lat, group = group), fill = "grey60")+
    coord_fixed(xlim = xlim, ylim = ylim, ratio = 1.5) +
    # move legend to the top. Make it 3 cm wide
    # move legend title to the bottom and centre it
    scale_fill_gradient2(low = "blue", high = "red",
                       guide = guide_colorbar(title.position = "bottom", title.hjust = 0.5, title.theme = element_text(angle = 0, size = 20, family = "Helvetica"))
  )+
    theme(
    legend.position = "bottom", legend.direction = "horizontal", legend.box = "horizontal", legend.key.width = unit(6.0, "cm"),
    legend.key.height = unit(1.0, "cm"))+
    # set the legend title to bias
    labs(fill = title)

# figure out if lon minimum is less than -10
if( min(df$lon) < -10 ){
    # add sensible labels for longitude and latitude

    gg <- gg +
    scale_x_continuous(breaks = seq(-10, 5, 5), labels = c("10°W", "5°W", "0°", "5°E"))+ 
    scale_y_continuous(breaks = seq(45, 60, 5), labels = c("45°N", "50°N", "55°N", "60°N"))+
    labs(x = "", y = "") 


}

    # move legen

gg

In [None]:
md(f"**Figure {i_figure}**: Bias in near-bottom {variable}. The bias is calculated as model - observation. The colour scale is from blue (negative bias) to red (positive bias). The colour scale is capped at the 98th percentile of the absolute bias. This is to avoid a few extreme outliers from dominating the colour scale. **Note:** values have been binned and averaged to 0.5 degree resolution.") 
i_figure += 1

In [None]:
chunk_end