# Ability of models to reproduce spatial patterns 

In [None]:
chunk_start

The ability of the models to reproduce spatial patterns for key variables at the **sea surface** was assessed by comparing the modelled value and the observed values in each grid cell.
The spatial correlation coefficient was used to quantify the spatial pattern similarity between the modelled and observed values. This was calculated for each variable and each model using the values in each grid cell.

Models were compared by regridding each one to the same grid and ensuring grid cells with missing values in at least one model were excluded.

**Note**: these summaries provided performance across the entire domain. The results are only strictly comparable when model grids are the same.

In [None]:
i_figure = 1
model_dict = model_dict_str 
num_models = len(model_dict)

In [None]:
annual_paths = []
for key in model_dict:
    if not os.path.exists(model_dict[key] + "/results/annual_mean/"):
        raise ValueError("No annual mean folder found for " + key)
    paths = glob.glob(model_dict[key] + "/results/annual_mean/*")
    paths = tidy_summary_paths(paths)
    annual_paths.append(
        pd.DataFrame({"path": paths})
        .assign(model = key)
    )

annual_paths = pd.concat(annual_paths)

annual_paths["base_name"] = annual_paths["path"].apply(lambda x: os.path.basename(x))
# only interested in netcdf file in path
annual_paths = annual_paths[annual_paths["base_name"].str.contains(".nc")]
annual_paths["base_name"] = annual_paths["base_name"].apply(fix_basename)




In [None]:
annual_paths = (
    annual_paths
    .groupby("base_name")
    .count()
      .query("model > 1")
      .reset_index()
      .drop(columns = ["path", "model"])
      .merge(annual_paths)
    )

In [None]:
base_names = annual_paths.base_name.unique() 

In [None]:
output = dict()
# list to track data frames with correlation coefficients
df_cor = []
for bb in base_names:
    variable = bb.split("_")[1].replace(".nc", "")
    bb_paths = annual_paths.query("base_name == @bb").reset_index(drop = True)
    n_cols = len(bb_paths)
    # generate the mask first

    ds_mask = nc.open_data(bb_paths.path[0])
    ds_mask.run()
    for ff in bb_paths.path[1:]:
        ds_ff = nc.open_data(ff)
        ds_ff.regrid(ds_mask, "nn")
        ds_mask * ds_ff
        ds_mask.run()
        ds_mask.abs()
        ds_mask > 0
        ds_mask.run()
    df_mask = (
        ds_mask.to_dataframe()
        .dropna()
        .reset_index()
    )
    lon_name = [x for x in df_mask.columns if "lon" in x][0]
    lat_name = [x for x in df_mask.columns if "lat" in x][0]
    # rename
    df_mask = df_mask.rename(columns = {lon_name: "lon", lat_name: "lat"})
    lon_min = df_mask.lon.min()
    lon_max = df_mask.lon.max()
    lat_min = df_mask.lat.min()
    lat_max = df_mask.lat.max()
    lons = [lon_min, lon_max]
    lats = [lat_min, lat_max]
    ds_mask.subset(lon = lons, lat = lats)
    ds_mask.run()




    for i in range(0, len(bb_paths)):
        ds = nc.open_data(bb_paths.path[i])
        ds.regrid(ds_mask, "nn")
        ds * ds_mask
        ds.run()
        #get the model run name
        model_name = bb_paths.model[i]
        ds.cor_space("model", "observation")
        cor_value = ds.to_dataframe().dropna().reset_index().cor[0]
        # stick this in a dataframme
        df_cor.append(
            pd.DataFrame({"model": [model_name], "variable": variable, "cor": cor_value})
        )


In [None]:
df_cor = pd.concat(df_cor)

In [None]:
# spread model and cor in columns
df_display(
    df_cor.pivot(index = "variable", columns = "model", values = "cor").reset_index()
)
md(f"**Figure {i_figure}**: Correlation coefficients between annual mean sea surface values for each model and the observation data for each variable.")     
i_figure += 1