# Ability of models to reproduce seasonality

In [None]:
chunk_start

In [None]:
# model_dict = {
#     "LOCATE":"/data/proteus1/scratch/gle/getmval/locate",
#     "GETM":"/data/proteus1/scratch/rwi/adhoc/getm/3dmn"
# }

model_dict = model_dict_str
num_models = len(model_dict)

In [None]:
annual_paths = []
for key in model_dict:
    if not os.path.exists(model_dict[key] + "/results/temporals/"):
        raise ValueError("No annual mean folder found for " + key)
    paths = glob.glob(model_dict[key] + "/results/temporals/*")
    paths = tidy_summary_paths(paths)
    annual_paths.append(
        pd.DataFrame({"path": paths})
        .assign(model = key)
    )

annual_paths = pd.concat(annual_paths)

annual_paths["base_name"] = annual_paths["path"].apply(lambda x: os.path.basename(x))
# only interested in netcdf file in path
annual_paths = annual_paths[annual_paths["base_name"].str.contains(".nc")]
annual_paths["base_name"] = annual_paths["base_name"].apply(fix_basename)


i_figure = 1

In [None]:
annual_paths = (
    annual_paths
    .groupby("base_name")
    .count()
      .query("model > 1")
      .reset_index()
      .drop(columns = ["path", "model"])
      .merge(annual_paths)
    )

In [None]:
base_names = annual_paths.base_name.unique() 

In [None]:
output = dict()
text_output = dict()
# list to track data frames with correlation coefficients
df_cor = []
for bb in base_names:
    variable = bb.split("_")[0].replace(".nc", "")
    bb_paths = annual_paths.query("base_name == @bb").reset_index(drop = True)
    n_cols = len(bb_paths)
    # generate the mask first

    ds_mask = nc.open_data(bb_paths.path[0])
    ds_mask.subset(variable = "cor")
    ds_mask.run()
    for ff in bb_paths.path[1:]:
        ds_ff = nc.open_data(ff)
        ds_ff.subset(variable = "cor")
        ds_ff.regrid(ds_mask, "nn")
        ds_mask * ds_ff
        ds_mask.run()
        ds_mask.abs()
        ds_mask > 0
        ds_mask.run()
    df_mask = (
        ds_mask.to_dataframe()
        .dropna()
        .reset_index()
    )
    lon_name = [x for x in df_mask.columns if "lon" in x][0]
    lat_name = [x for x in df_mask.columns if "lat" in x][0]
    #rename
    df_mask = df_mask.rename(columns = {lon_name: "lon", lat_name: "lat"})
    lon_min = df_mask.lon.min()
    lon_max = df_mask.lon.max()
    lat_min = df_mask.lat.min()
    lat_max = df_mask.lat.max()
    lons = [lon_min, lon_max]
    lats = [lat_min, lat_max]
    # coerse to float
    lons = [float(x) for x in lons]
    lats = [float(x) for x in lats]
    ds_mask.subset(lon = lons, lat = lats)
    ds_mask.run()
    import matplotlib.pyplot as plt

    plt.subplots_adjust(wspace=20, hspace=20)

    fig = plt.figure(figsize=(10, 10))

    # Create 4x4 Grid
    key = md(f"## Seasonality of sea surface {fix_variable_name(variable)}")

    gs = fig.add_gridspec(nrows=1, ncols=num_models, wspace = 0.45, hspace = 0)
    # get the minimum and maximum values for the colorbar

    z_max = -1
    z_min = 1


    for i in range(0, len(bb_paths)):
        ds = nc.open_data(bb_paths.path[i])
        ds.subset(variable = "cor")
        ds.regrid(ds_mask, "nn")
        ds * ds_mask
        ds.run()
        i_max = ds.to_dataframe().dropna().reset_index().cor.max()
        i_min = ds.to_dataframe().dropna().reset_index().cor.min()
        if i_max > z_max:
            z_max = i_max
        if i_min < z_min:
            z_min = i_min


    for i in range(0, len(bb_paths)):
        ds = nc.open_data(bb_paths.path[i])
        ds.subset(variable = "cor")
        ds.regrid(ds_mask, "nn")
        ds * ds_mask
        ds.run()
        #get the model run name
        model_name = bb_paths.model[i]
        ds.to_latlon(lon = lons, lat = lats, res = [0.111, 0.067]) 
        ds.pub_plot(  fig = fig, gs = gs[0,i], title = model_name, limits = [z_min, z_max])
        ds.spatial_mean()
        cor_value = ds.to_dataframe().dropna().reset_index().cor[0]
        # stick this in a dataframme
        df_cor.append(
            pd.DataFrame({"model": [model_name], "variable": variable, "cor": cor_value})
        )
    output[key] = fig 
    name1 = bb_paths.base_name[0].split(variable)[1].split("_")[-1].replace(".nc", "")
    name2 = bb_paths.base_name[1].split(variable)[1].split("_")[-1].replace(".nc", "")
    comp_text = None
    if name1 == name2:
        if len(name1) > 0 and name1 != "cor":
            source = name1.upper()
            comp_text = f"Correlations coefficients were calculated using monthly averages in the simulations ands the observational data from **{source}**."
    text_output[key] = comp_text


In [None]:
for key in output:
    key
    if text_output[key] is not None:
        display(md(text_output[key]))
    display(output[key])
    # coerce key to str
    key_str = str(key)
    md(f"**Figure {i_figure}**: Correlation coefficient between models and observations in each grid cell and each climatological month.")
    i_figure += 1

In [None]:
df_cor = pd.concat(df_cor)
df_pivot = (
    df_cor.pivot(index = "variable", columns = "model", values = "cor").reset_index()
)
for i in range(0, len(df_pivot)):
    # add a star to the highest value in each row
    max_value = df_pivot.iloc[i, 1:].max()
    for j in range(1, len(df_pivot.columns)):
        if df_pivot.iloc[i, j] == max_value:
            # use markdown to bold the value
            df_pivot.iloc[i, j] = f"{df_pivot.iloc[i, j]:.3g}**"
            # df_pivot.iloc[i, j] = f"**{df_pivot.iloc[i, j]}**" 
            # df_pivot.iloc[i, j] = f"**{df_pivot.iloc[i, j]}**"
        else:
            df_pivot.iloc[i, j] = f"{df_pivot.iloc[i, j]:.3g}"
# make everything a string
df_pivot = df_pivot.astype(str)
df_pivot

## Overall summary of temporal performance of simulations

In [None]:
# spread model and cor in columns
df_display(
    df_pivot
)
md(f"**Table {i_table}**: Spatial average of correlation coefficients between models and observations for each simulation and each variable.")
i_table += 1