## Ability of models to reproduce regional patterns

In [None]:
chunk_start

**Note**: These comparisons are only exact when the model grids are identical. Otherwise, the comparisons are based on the overlap between the model grids and the region of interest, and the results may not be totally comparable.

In [None]:
# model_dict = {
#     "LOCATE":"/data/proteus1/scratch/gle/getmval/locate",
#     "GETM":"/data/proteus1/scratch/rwi/adhoc/getm/3dmn"
# }

model_dict = model_dict_str
num_models = len(model_dict)

In [None]:

# step 1 is to figure out the variables available
variables = []
for dir_name in list(model_dict.values()):
    paths = glob.glob(dir_name + "/results/regionals/*")
    paths = tidy_summary_paths(paths)
    variables += [os.path.basename(x).split("_")[1] for x in paths]

# restrict variables to those that occur twice
variables = list(set([x for x in variables if variables.count(x) >= 2]))# step 1 is to figure out the variables available

In [None]:
for vv in variables:
    paths = glob.glob(model_dict[list(model_dict.keys())[0]] + "/results/regionals/*" + vv + "_regionals.csv")
    df1 = pd.read_csv(paths[0])
    break
if len([x for x in df1.long_name if "Irish Sea" in x]) > 0:
    data_path = pkg_resources.resource_filename("ecoval", "data/amm7_val_subdomains.nc")
    ds_regions = nc.open_data(data_path, checks = False)
    # pull this in from the package data

    ds_regions.as_missing(0)
    ds_regions.set_fill(-9999)
    ds_regions.run()
    regions_contents = ds_regions.contents

In [None]:
try:
    lon_name = [x for x in ds_regions.to_xarray().coords if "lon" in x][0]
    lat_name = [x for x in ds_regions.to_xarray().coords if "lat" in x][0]
    df_mapped = (
        ds_regions
        .to_dataframe()
        .reset_index()
        # rename the columns
        .rename(columns = {lon_name: "lon", lat_name: "lat"})
        .melt(id_vars = ["lon", "lat"])
        .dropna()
        .merge(regions_contents.loc[:,["variable", "long_name"]])
        .drop(columns = [ "value"])
    )
    bad = ["Rosa", "Locate Shelf"]
    df_mapped = df_mapped.query("long_name not in @bad")
    xlim = np.array([df_mapped.lon.min(), df_mapped.lon.max()])
    ylim = np.array([df_mapped.lat.min(), df_mapped.lat.max()])

    def fix_name(x):
        x = x.replace("North East", "NE")
        x = x.replace("North ", "N ")
        if x == "Channel":
            x = "English Channel"
        return x

    fix_name = np.vectorize(fix_name)


    df_mapped.long_name = fix_name(df_mapped.long_name)
    regional = True
except:
    df_mapped = 1
    regional = False




In [None]:
%%capture --no-display

%%R -i regional -i df_mapped -i xlim -i ylim 
options(warn=-1)

if (regional){

    library(tidyverse)

    world_map <- map_data("world")

    gg <-  ggplot(df_mapped)+
        geom_tile(aes(x = lon, y = lat))+
        coord_cartesian(xlim = xlim, ylim = ylim)+
        theme_bw(base_size = 10)+
        facet_wrap(~long_name)+
        theme(axis.title.x = element_blank(),
              axis.title.y = element_blank())

y_labels <-  as.numeric(na.omit(layer_scales(gg)$y$break_positions()))
x_labels <- as.numeric(na.omit(layer_scales(gg)$x$break_positions()))
x_breaks <- x_labels
y_breaks <- y_labels

# y labels are north-south coordinates. Make them more appropriate
# i.e. 10 should be 10 °N, -10 should be 10 °S

y_labels <- ifelse(y_labels >= 0, paste0(y_labels, "°N"), paste0(abs(y_labels), "°S"))
x_labels <- ifelse(x_labels >= 0, paste0(x_labels, "°E"), paste0(abs(x_labels), "°W"))

gg <- gg + scale_y_continuous(breaks = y_breaks, labels = y_labels)+
    scale_x_continuous(breaks = x_breaks, labels = x_labels)+
        geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "grey", color = "grey")



    gg

}

In [None]:
if regional:
    md(f"**Figure {chapter}{i_figure}**: Regions used for validation.")
i_figure += 1

In [None]:

for vv in variables:
    try:
      df = [] 
      for i in range(num_models):
        paths = glob.glob(model_dict[list(model_dict.keys())[i]] + "/results/regionals/*" + vv + "_regionals.csv")
        paths = tidy_summary_paths(paths)
        if i == 0:
          name1 = os.path.basename(paths[0]).split("_")[0].replace(".nc", "")
        if i == 1:
          name2 = os.path.basename(paths[0]).split("_")[0].replace(".nc", "")
        df1 = pd.read_csv(paths[0])
        key = list(model_dict.keys())[i]
        # change model to key in variable
        df1["variable"] = [x if x != "model" else key for x in df1["variable"]]
        df.append(df1)

      # paths = glob.glob(model_dict[list(model_dict.keys())[1]] + "/results/regionals/*" + vv + "_regionals.csv")
      # paths = tidy_summary_paths(paths)
      # name2 = os.path.basename(paths[0]).split("_")[0].replace(".nc", "")
      # df2 = pd.read_csv(paths[0])
      # key = list(model_dict.keys())[1]
      # # change model to key in variable
      # df2["variable"] = [x if x != "model" else key for x in df2["variable"]]
      df = pd.concat(df)
      df = df.dropna()
      df = df.loc[:,["variable", "value", "month", "long_name"]]
      df = df.groupby(["variable", "month", "long_name"]).mean().reset_index()
      # change month to month name
      md(f"## Can the model reproduce regional sea surface {fix_variable_name(vv)}?")

      comp_text = None
      x = "no"
      if name1 == name2:
          if len(name1) > 0 and name1 != "cor":
              source = name1.upper()
              comp_text = f"Regional spatial averages were calculated using monthly averages in the simulations and the observational data from **{source}**."
      if comp_text is not None:
          md(comp_text)

      (
          ggplot(df)+
          geom_line(aes(x = "month", y = "value", color = "variable"))+
          facet_wrap("long_name", scales = "free")+
          # legend at the top
          # drop the legend name
          # theme_bw()+
          theme(legend_title = element_blank())+
          theme(legend_position = "top")+
          expand_limits(y = 0)+
          # suitable breaks for months
          scale_x_continuous(breaks = [2, 4, 6, 8, 10, 12 ], labels = ["Feb", "Apr", "Jun", "Aug", "Oct", "Dec"])+ 
          # rotate the x-axis labels
            theme(axis_text_x = element_text(angle = 45))+
          labs(y = vv, x = "Month")
      )
      md(f"**Figure {i_figure}**. Spatial average **sea surface** {fix_variable_name(vv)} for each model in each region. The values shown are monthly climatologies.")
      i_figure = i_figure + 1
    except:
      pass

