# Chlorophyll validation using OCCCI data 

In [None]:
chunk_start

In [None]:
ff = "../../matched/gridded/occci/occci_model.nc"
ds_model = nc.open_data(ff)
mask_all(ds_model)
years = set(ds_model.years)
year_min = min(years)
year_max = max(years)
year_range = f"{year_min}-{year_max}"

In [None]:
md(f"Chlorophyll was validated using data from the Ocean Colour CCI dataset. The Ocean Colour CCI dataset is a merged dataset of chlorophyll from multiple satellites. The dataset is available from 1997 to 2023. The dataset is available at 4km resolution. The dataset is available from https://esa-oceancolour-cci.org/. We matched up the model for years with complete records in both the model and observational data, resulting in a year range of {year_range}.") 
md(f"Observational data is available at a spatial resolution of 4km. The model data is available at a spatial resolution of 1km. We therefore averaged the model data to 4km resolution to match the observational data. We then matched the model data to the observational data using a nearest neighbour approach. The observational data was regridded to match the model data resolution using bilinear interpolation using the nctoolkit") 


In [None]:
ds_model.subset(variable = "model")
ds_model.tmean("month")
ds_model.as_missing(0)
ds_model.run()
ds_annual = ds_model.copy()
ds_annual.tmean()
ds_annual.set_longnames({"model": "Chlorophyll-a concentration"})

In [None]:
ds_obs = nc.open_data(ff)
ds_obs.subset(variable = "observation")
ds_obs.run()
ds_obs.tmean("month")
ds_obs.as_missing(0)
ds_obs.run()

obs_mask = ds_obs.copy()
obs_mask > -1e20
mod_mask = ds_model.copy()
mod_mask > -1e20
mod_mask * obs_mask
mod_mask.run()
ds_model * mod_mask
ds_obs * mod_mask

In [None]:
chunk_clim

In [None]:
chunk_bias

## Can the model reproduce seasonality of chlorophyll?

The ability of the model to reproduce seasonality of chlorophyll is assessed by comparing the modelled and observed seasonal cycle of chlorophyll. The seasonal cycle is calculated by averaging the monthly values of chlorophyll over all available model years. The seasonal cycle is calculated for each grid cell. The modelled seasonal cycle is compared to the observed seasonal cycle of chlorophyll. The observed seasonal cycle is calculated by averaging the observed monthly values of chlorophyll over all available years. The seasonal cycle is calculated for each grid cell. The modelled seasonal cycle is compared to the observed seasonal cycle using the correlation coefficient between the two. The correlation coefficient is calculated for each grid cell. The correlation coefficient ranges from -1 to 1. A value of 1 indicates a perfect agreement between the modelled and observed seasonal cycle of chlorophyll. A value of -1 indicates a perfect disagreement between the modelled and observed seasonal cycle of chlorophyll. A value of 0 indicates no agreement between the modelled and observed seasonal cycle of chlorophyll. 

In [None]:
ds1 = ds_model.copy()
ds1.cdo_command("setname,model")
ds1.run()
ds2 = ds_obs.copy()
ds2.cdo_command("setname,observation")
ds2.run()
ds_cor = nc.open_data([ds1.current[0], ds2.current[0]])
ds_cor.merge(match=["month"])
ds_cor.run()
ds_ts = ds_cor.copy()
ds_cor.cor_time("model", "observation")
title = f"Seasonal temporal correlation between {variable} for model and observations"
ds_cor.run()


# output to nc

out = f"../../results/temporals/{variable}_cor.nc"
if not os.path.exists(os.path.dirname(out)):
    os.makedirs(os.path.dirname(out))
ds_cor.to_nc(out, zip = True, overwrite = True)

In [None]:

df_cor = ds_cor.to_dataframe().reset_index()
# get range of lon and lat without missing values of cor in df_cor
ds_cor.pub_plot()

In [None]:
md(f"**Figure {i_figure}**: Seasonal temporal correlation between model and observations for {variable}. The correlation is calculated using the model and observational data for the years {year_range}, with monthly means calculated using all data") 
i_figure += 1

## Can the model reproduce long-term changes in chlorophyll?

In [None]:
ds_lt = nc.open_data(ff)
mask_all(ds_lt)
# mask out missing values
ds_mask = ds_lt.copy()
ds_mask > -1e20
ds_mask.sum_all()
ds_mask == 2
ds_mask.as_missing(0)
ds_lt * ds_mask
ds_lt.tmean("year")
ds_lt.cor_time("model", "observation")
ds_lt.run()
ds_lt.pub_plot()

In [None]:
md(f"**Figure {i_figure}**: The temporal correlation between model and observations for chlorophyll-a concentration. The correlation is calculated using the model and observational data for the years {year_range}, with annual means calculated using all data. Only months with data in both the model and observational data are included in the annual mean calculation.") 
i_figure += 1

In [None]:
md_result, i_figure = ecoval.global_regionals(ds_model, ds_obs, variable, i_figure)

In [None]:
md_result

In [None]:
ds_annual = ds_model.copy()
ds_annual.rename({ds_annual.variables[0]: "model"})
ds_annual.append(ds_obs)
ds_annual.tmean()
ds_annual.merge("variables")
ds_annual.rename({ds_obs.variables[0]: "observation"})
out_dir = "../../results/annual_mean/"
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
out_file = out_dir + f"annualmean_{variable}.nc"
ds_annual.to_nc(out_file, zip = True, overwrite = True)

In [None]:
chunk_end