# template_title validation using ICES point observations

In [None]:
chunk_start
from IPython.display import Markdown as md
import pandas as pd
from plotnine import *
import warnings
warnings.filterwarnings('ignore')
import geopandas as gpd
%load_ext rpy2.ipython
import numpy as np
from mask import mask_all, mask_shelf
file = "template_file_name"
variable = file.split("_")[1].replace(".csv", "")
Variable = variable.capitalize()
i_figure = 1

In [None]:
md(f"{Variable} was validated using ICES oceanographic data, which was downloaded from [ICES](https://www.ices.dk/data/data-portals/Pages/ocean.aspx).")

ICES oceanographic data are available from the ICES Data Centre (https://ocean.ices.dk/HydChem/HydChem.aspx). The data are collected by ICES and ICES member countries and are quality controlled by the ICES Data Centre. This data was collected using a variety of sampling methods and instruments, including CTD and bottles. They are availabled at multiple depths, and we derived depth from pressure in the raw data. The data are available from 1903 to present. The pressure was converted to depth using the Python package [seawater](https://pythonhosted.org/seawater/). Observational data was matched up with model data by regridding the model data to the observational data for each date with model and observational data. These matchups were for both longitude and latitude and depth.

Citation for the ICES dataset should be as follows:

**ICES Data Portal, Dataset on Ocean HydroChemistry, Extracted March 3, 2023. ICES, Copenhagen**.

The data is a combination of depth-resolved bottle and low- and high-resolution CDT data.

In [None]:
import numpy as np
grid = pd.read_csv("../../matched/model_grid.csv")
lon = grid.loc[:,[x for x in grid.columns if "lon" in x]].values
lon = np.unique(lon)
lon.sort()
lat = grid.loc[:,[x for x in grid.columns if "lat" in x]].values
lat = np.unique(lat)
lat.sort()
# get unique values in grid and sort them
lon = np.unique(lon)
lon.sort()
lon_res = lon[1] - lon[0]
lat_res = lat[1] - lat[0]
def bin_value(x, bin_res):
    return np.floor((x + bin_res / 2) / bin_res + 0.5) * bin_res - bin_res / 2
grid_size = len(grid)

In [None]:
df = pd.read_csv(file)
df_size = df.groupby("year").size().reset_index()
df_size.columns = ["year", "n_matches"]
gg = (ggplot( df_size)+
 geom_col(aes(x = "year",  y = "n_matches"))+
 labs(y = "Number of observations per year")
)

gg = gg.draw()
gg


In [None]:
md(f"**Figure {i_figure}**: Number of matchups between ICES point observations and model data for each year") 
i_figure += 1

In [None]:
df_size = (
    df.loc[:,[ "lon", "lat"]]
    .assign(lon = lambda x: bin_value(x.lon, lon_res) )
    .assign(lat = lambda x: bin_value(x.lat, lat_res) )
    .drop_duplicates()
)
spatial_coverage = np.round(len(df_size)/grid_size *100, 2)
md(f"In total there are {len(df):,} matchups between the model and observations. In total, the observational data covers {spatial_coverage}% of the model grid.")

Locations with model-observation matchups are shown below.

In [None]:
%%capture --no-display
xlim = [np.min(df.lon), np.max(df.lon)]
ylim = [np.min(df.lat), np.max(df.lat)]
shape = gpd.read_file(f"{data_dir}/mapping/TM_WORLD_BORDERS-0.3.shp")

gg = (
    ggplot( df)+
     geom_point(aes(x  = "lon",y =   "lat"), size = 0.1)+
    geom_map(shape, aes("LON", "LAT"), fill = "grey")+
    coord_cartesian(xlim = xlim, ylim = ylim)
)

gg = gg.draw()
gg

In [None]:
md(f"**Figure {i_figure}**: The spatial locations of the matchups between the model and observations for {variable}.") 
i_figure += 1

In [None]:
%%capture --no-display
gg = (ggplot( df)+
 geom_point(aes(x  = "model",y =   "observation"), size = 0.1)
)

gg = gg.draw()
gg


In [None]:
md(f"**Figure {i_figure}**: Scatter plot of modelled vs observed {variable}.")
i_figure += 1

In [None]:
chunk_end