# Temporal and spatial correlations between simulations 

In [None]:
# First identify what is available....

import glob
import os
from IPython.display import Markdown as md
# ability to open pickle
import warnings
warnings.filterwarnings('ignore')
import pickle
#pkg_resources
import pkg_resources

import pandas as pd
import nctoolkit as nc
from plotnine import *
import os
import molmass
def get_molar_mass(element):
    from molmass import Formula
    f = Formula(element)
    return f.mass

paths = glob.glob("../../data/climatologies/**/**/*.nc")
measures = [os.path.basename(x).split("-")[0] for x in paths]
variables = [os.path.basename(x).split("-")[2] for x in paths]
i_table = 1
i_figure = 1

df_options = pd.DataFrame({"measure": measures, "variable": variables, "path": paths})

ff = "../../sim_dict.pkl"
sim_0_name = "simulation 1"
sim_1_name = "simulation 2"
if os.path.exists(ff):
    sim_dict = pickle.load(open(ff, "rb"))
    sim_0_name = sim_dict["sim0"]
    sim_1_name = sim_dict["sim1"]

measures = [x for x in ["vertical_integration", "top", "vertical_mean", "bottom"] if x in measures]



107 files were created by nctoolkit in prior or current sessions. Consider running deep_clean!
nctoolkit is using Climate Data Operators version 2.4.4


Temporal correlations were calculated using monthly climatologies at each grid cell. The correlation coefficient was calculated using 12 monthly pairs of values from both simulations.

In [None]:
data_path = pkg_resources.resource_filename("ecoval", "data/amm7_val_subdomains.nc")
ds_shelf = nc.open_data(data_path)
ds_shelf.subset(variable = "Shelf")
ds_shelf.as_missing(0)



for mm in set(measures):
    df_all = []

    if mm == "vertical_integration":
        md(f"## Temporal correlations of vertically integrated values")
    if mm == "top":
        md(f"## Temporal correlations of sea surface values") 
    if mm == "bottom":
        md(f"## Temporal correlations of near-bottom values") 
    if mm == "vertical_mean":
        md(f"## Temporal correlations of vertical mean values") 

    mm_variables = list(set(df_options[df_options["measure"] == mm]["variable"].values))
    for vv in mm_variables:
        try:
            vv_paths = df_options[(df_options["measure"] == mm) & (df_options["variable"] == vv)]["path"].values
            # path ending with sim_0.nc
            ff1 = [x for x in vv_paths if x.endswith("sim_0.nc")][0]
            ff2 = [x for x in vv_paths if x.endswith("sim_1.nc")][0]
            #ds1 = nc.open_data("/data/proteus1/scratch/rwi/validations/differences/data/climatologies/chlorophyll/vertical_integration/vertical_integration_climatology_chlorophyll_sim_0.nc")
            ds1 = nc.open_data(ff1)
            ds2 = nc.open_data(ff2)
            try:
                ds1.fix_amm7_grid()
            except:
                pass
            try:
                ds2.fix_amm7_grid()
            except:
                pass
            ds1.as_missing(0)
            ds2.as_missing(0)
            
            ds_cor = nc.cor_time(ds1, ds2)
            if mm == "bottom":
                ds_cor * ds_shelf
            ds_cor.subset(lon = [-19, 9], lat = [42, 63])
            df_cor = ds_cor.to_dataframe().reset_index()
            columns = df_cor.columns
            columns = [x for x in columns if "cor" in x or "lon" in x or "lat" in x and "bnds" not in x] 
            df_cor = df_cor[columns]
            # rename lon
            lon_name = [x for x in df_cor.columns if "lon" in x][0]
            df_cor = df_cor.rename(columns={lon_name: "lon"})
            # rename lat
            lat_name = [x for x in df_cor.columns if "lat" in x][0]
            df_cor = df_cor.rename(columns={lat_name: "lat"})
            # melt
            df_cor["variable"] = vv
            df_cor["measure"] = mm
            df_all.append(df_cor)
        except:
            pass
    df_all = pd.concat(df_all)
    #
    df_summary = df_all.loc[:,["variable", "cor"]].groupby(["variable"]).mean().reset_index()
    df_summary = df_summary[["variable", "cor"]]
    # sort the summary
    df_summary = df_summary.sort_values("cor", ascending = False)
    # make the variable names titles
    df_summary["variable"] = df_summary["variable"].str.title()
    # better column names
    df_summary = df_summary.rename(columns={"variable": "Variable", "cor": "Correlation coefficient"})
    # display the summary
    df_summary.style.hide(axis="index")

    md(f"**Table {i_table}**: Spatial average of the temporal correlation for each variable {mm}") 
    i_table = i_table + 1

    gg = (
        ggplot(df_all)+
        geom_raster(aes("lon", "lat", fill = "cor"))+
        facet_wrap("~variable")

    )
    if df_all.cor.min() < 0:
        gg = gg + scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0)

    # display the plot
    gg.draw()



            


## Spatial correlation coefficients between simulations

In [None]:
data_path = pkg_resources.resource_filename("ecoval", "data/amm7_val_subdomains.nc")
ds_shelf = nc.open_data(data_path)
ds_shelf.subset(variable = "Shelf")
ds_shelf.as_missing(0)



for mm in set(measures):
    df_all = []

    if mm == "vertical_integration":
        md(f"## Spatial correlations of vertically integrated values")
    if mm == "top":
        md(f"## Spatial correlations of sea surface values") 
    if mm == "bottom":
        md(f"## Spatial correlations of near-bottom values") 
    if mm == "vertical_mean":
        md(f"## Spatial correlations of vertical mean values") 

    mm_variables = list(set(df_options[df_options["measure"] == mm]["variable"].values))
    for vv in mm_variables:
        try:
            vv_paths = df_options[(df_options["measure"] == mm) & (df_options["variable"] == vv)]["path"].values
            # path ending with sim_0.nc
            ff1 = [x for x in vv_paths if x.endswith("sim_0.nc")][0]
            ff2 = [x for x in vv_paths if x.endswith("sim_1.nc")][0]
            #ds1 = nc.open_data("/data/proteus1/scratch/rwi/validations/differences/data/climatologies/chlorophyll/vertical_integration/vertical_integration_climatology_chlorophyll_sim_0.nc")
            ds1 = nc.open_data(ff1)
            ds2 = nc.open_data(ff2)
            try:
                ds1.fix_amm7_grid()
            except:
                pass
            try:
                ds2.fix_amm7_grid()
            except:
                pass
            ds1.as_missing(0)
            ds2.as_missing(0)
            ds1.tmean()
            ds2.tmean()
            ds1 * ds_shelf
            ds2 * ds_shelf
            
            ds_cor = nc.cor_space(ds1, ds2)
            df_cor = ds_cor.to_dataframe().reset_index()
            df_cor = pd.DataFrame({"cor":[float(df_cor.cor.values[0])]})
            # melt
            df_cor["variable"] = vv
            df_cor["measure"] = mm
            df_all.append(df_cor)
        except:
            pass
    df_all = pd.concat(df_all)
    #
    df_summary = df_all.loc[:,["variable", "cor"]].groupby(["variable"]).mean().reset_index()
    df_summary = df_summary[["variable", "cor"]]
    # sort the summary
    df_summary = df_summary.sort_values("cor", ascending = False)
    # make the variable names titles
    df_summary["variable"] = df_summary["variable"].str.title()
    # better column names
    df_summary = df_summary.rename(columns={"variable": "Variable", "cor": "Correlation coefficient"})
    # display the summary
    df_summary.style.hide(axis="index")

    md(f"**Table {i_table}**: Spatial correlation coefficient for each variable {mm} using annual average values in each grid cell.") 
    i_table = i_table + 1




            
