# Differences in phenology 

In [None]:
# First identify what is available....

import glob
import os
from IPython.display import Markdown as md
# add ability to do %%R magic
%load_ext rpy2.ipython


# ability to open pickle
import warnings
warnings.filterwarnings('ignore')
import pickle
#pkg_resources
import pkg_resources
import numpy as np

import pandas as pd
import nctoolkit as nc
from plotnine import *
import os
import molmass
def get_molar_mass(element):
    from molmass import Formula
    f = Formula(element)
    return f.mass

paths = glob.glob("../../data/climatologies/**/**/*.nc")
measures = [os.path.basename(x).split("-")[0] for x in paths]
variables = [os.path.basename(x).split("-")[2] for x in paths]
i_table = 1
i_figure = 1

df_options = pd.DataFrame({"measure": measures, "variable": variables, "path": paths})

ff = "../../sim_dict.pkl"
sim_0_name = "simulation 1"
sim_1_name = "simulation 2"
if os.path.exists(ff):
    sim_dict = pickle.load(open(ff, "rb"))
    sim_0_name = sim_dict["sim0"]
    sim_1_name = sim_dict["sim1"]

measures = [x for x in ["phenology"] if x in measures]



In [None]:

def extract_df(ds):
    var = ds.variables[0]
    df = ds.to_dataframe()
    df = df.reset_index()
    lon_name = [x for x in df.columns if "lon" in x and "bnds" not in x][0]
    lat_name = [x for x in df.columns if "lat" in x and "bnds" not in x][0]
    df = df.dropna().loc[:, [lon_name, lat_name, var]]
    # change var to day
    df = df.rename(columns = {var: "day"}).assign(variable = var)
    return df

df_all = []
df_diff = []
df_ave = []
for mm in set(measures):


    mm_variables = list(set(df_options[df_options["measure"] == mm].variable))
    # mm_variables = list(set(df_options[df_options["measure"] == mm]["variable"].values))
    rr_plot = True
    for vv in mm_variables:
        if True:
            vv_paths = df_options[(df_options["measure"] == mm) & (df_options["variable"] == vv)]["path"].values
            # path ending with sim_0.nc
            ff1 = [x for x in vv_paths if x.endswith("sim_0.nc")][0]
            ff2 = [x for x in vv_paths if x.endswith("sim_1.nc")][0]
            #ds1 = nc.open_data("/data/proteus1/scratch/rwi/validations/differences/data/climatologies/chlorophyll/vertical_integration/vertical_integration_climatology_chlorophyll_sim_0.nc")
            ds1 = nc.open_data(ff1)
            ds2 = nc.open_data(ff2)
            ds1.top()
            ds2.top()
            try:
                ds1.fix_amm7_grid()
            except:
                pass
            try:
                ds2.fix_amm7_grid()
            except:
                pass
            ds1.to_latlon(lon = [-18, 9], lat = [42, 63], res = 0.05)
            ds2.to_latlon(lon = [-18, 9], lat = [42, 63], res = 0.05)
            # percentage difference
            ds1_ave = ds1.copy()
            ds2_ave = ds2.copy()
            ds1_ave.spatial_mean()
            ds2_ave.spatial_mean()
            var = ds1_ave.variables[0]
            ds1_ave = float(ds1_ave.to_dataframe()[var].values[0])
            df_add = pd.DataFrame({"simulation": sim_0_name, "variable": var, "value": ds1_ave}, index = [0])
            df_add = df_add.assign(variable = vv)
            df_ave.append(df_add)
            ds2_ave = float(ds2_ave.to_dataframe()[var].values[0])
            df_add = pd.DataFrame({"simulation": sim_1_name, "variable": var, "value": ds2_ave}, index = [0])
            df_add = df_add.assign(variable = vv)
            df_ave.append(df_add)
            # append to df_ave


            df1 = extract_df(ds1)
            # add simulation name
            df1["simulation"] = sim_0_name
            df2 = extract_df(ds2)
            df2["simulation"] = sim_1_name
            df1 = df1.assign(variable = vv)
            df2 = df2.assign(variable = vv)
            df_all.append(pd.concat([df1, df2]))
            ds1-ds2
            df1 = extract_df(ds1)
            df1 = df1.assign(variable = vv)
            df1["simulation"] = sim_0_name
            df_diff.append(df1)
df_all = pd.concat(df_all).reset_index(drop = True)
df_diff = pd.concat(df_diff).reset_index(drop = True)


df_ave = pd.concat(df_ave).reset_index(drop = True)




In [None]:
%%capture --no-display
%%R -i df_all -i df_diff 
# unique variables in df_all
variables <- unique(df_all$variable)
library(tidyverse)
library(ggplot2)
world_map <- map_data("world")
# create cache directory if non-existent
dir.create("cache", showWarnings = FALSE)
for (vv in variables){
        title = str_glue("Day of maximum {vv}")
        lon_label = c("20°W", "10°W", "0°", "10°E")
        lat_label = c("45°N", "50°N", "55°N", "60°N")
        gg1 <- df_all %>%
            filter(variable == vv) %>%
            ggplot()+
            geom_raster(aes(lon, lat, fill = day))+
            geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "grey60")+
            coord_fixed(ratio = 1.5, xlim = c(min(df_all$lon), max(df_all$lon)), ylim = c(min(df_all$lat), max(df_all$lat)))+
            theme_bw()+
            # ensure the legend is at the bottom and appropriately sized
            theme(
            legend.position = "bottom", legend.direction = "horizontal", legend.box = "horizontal", legend.key.width = unit(2.0, "cm"),
            legend.key.height = unit(0.3, "cm"))+
            scale_fill_viridis_c(na.value = "white",
                       #breaks = c(0.4, 0.6, 0.8, 1.0), labels = c("0.4", "0.6", "0.8", ">1"),
                       guide = guide_colorbar(title.position = "bottom", title.hjust = 0.5, title.theme = element_text(angle = 0, size = 12 , family = "Helvetica"))
            )+
            # add suitable legend title
            labs(fill = str_glue("Day of maximum {vv}"))+
            facet_wrap(~simulation)

        gg1 <- gg1 +
            scale_y_continuous(breaks = c(45, 50, 55, 60), labels = lat_label)+
            scale_x_continuous(breaks = c(-20, -10, 0, 10), labels = lon_label)+
            labs(x = "", y = "")

        # remove some white space using expand limits
        gg1 <- gg1 + expand_limits(x = c(0, 0), y = c(0, 0))
        

        # save the plot 
        ggplot2::ggsave(str_glue("cache/{vv}_day_max.png"), gg1, width = 16, height = 12, units= "cm")

        diffs <- df_diff %>%
            filter(variable == vv) %>%
            select(day)
        # figure out the lower 2% and upper 98% quantiles
        lower <- quantile(diffs$day, 0.02)
        upper <- quantile(diffs$day, 0.98)
        max_diff <- max(abs(c(lower, upper)))
        # cap the values
        
        gg_diff <- df_diff %>%
            filter(variable == vv) %>%
            # cap the values
            mutate(day = ifelse(day > max_diff, max_diff, day)) %>%
            mutate(day = ifelse(day < -max_diff, -max_diff, day)) %>%
            ggplot()+
            geom_raster(aes(lon, lat, fill = day))+
            geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "grey60")+
            coord_fixed(ratio = 1.5, xlim = c(min(df_all$lon), max(df_all$lon)), ylim = c(min(df_all$lat), max(df_all$lat)))+
            theme_bw()+
            # ensure the legend is at the bottom and appropriately sized
            theme(
            legend.position = "bottom", legend.direction = "horizontal", legend.box = "horizontal", legend.key.width = unit(1.5, "cm"),
            legend.key.height = unit(0.3, "cm"))+
            scale_fill_gradient2(
                limits = c(-max_diff, max_diff),
                low = "blue", high = "red", mid = "white", midpoint = 0,
                       #breaks = c(0.4, 0.6, 0.8, 1.0), labels = c("0.4", "0.6", "0.8", ">1"),
                       guide = guide_colorbar(title.position = "bottom", title.hjust = 0.5, title.theme = element_text(angle = 0, size = 12 , family = "Helvetica"))
            )+
            labs(fill = "Difference (days)")

        gg_diff <- gg_diff +
            scale_y_continuous(breaks = c(45, 50, 55, 60), labels = lat_label)+
            scale_x_continuous(breaks = c(-20, -10, 0, 10), labels = lon_label)+
            labs(x = "", y = "")
            
        # save the plot
        # needs to combinable with gg1 using image magic
        ggplot2::ggsave(str_glue("cache/{vv}_day_max_diff.png"), gg_diff, width = 9, height = 12, units= "cm") 

        # combine the two using image magic
        system(str_glue("convert cache/{vv}_day_max.png cache/{vv}_day_max_diff.png +append cache/{vv}_combined.png"))
        

}




In [None]:
## list files in the cache directory
paths = glob.glob("cache/*.png")
# only files with combined in the path
paths = [x for x in paths if "combined" in x]
for ff in paths:
    variable = os.path.basename(ff).split("_")[0]
    md(f"## Phenology of {variable}")
    md(f"The day of maximum {vv} is shown for both simulations below.")
    from IPython.display import Image
    # display the image
    display(Image(filename = ff))
    md(f"**Figure {i_figure}**: The day of maximum {variable} for both simulations. Positive differences indicate that the peak occurs later in the {sim_0_name} simulation compared to the {sim_1_name} simulation.")
    i_figure = i_figure + 1

    df_var = df_ave.query("variable == @variable")
    text = ""
    diff = None
    for sim in list(df_var.simulation):
        value = df_var.query("simulation == @sim")["value"].values[0]
        if text != "":
            diff = value - value1
        if text == "":
            text += f"The average day of maximum {variable} for the {sim} simulation is {value:.2f}, "
        else:
            text += f" and the average day of maximum {variable} for the {sim} simulation is {value:.2f}. "

        if diff is not None:
            if diff > 0:
                text += f"The peak occurs {diff:.2f} days **later** in the {sim} simulation compared to the {sim_0_name} simulation." 
            else:
                text += f"The peak occurs {abs(diff):.2f} days **earlier** in the {sim} simulation compared to the {sim_0_name} simulation."

        value1 = df_var.query("simulation == @sim")["value"].values[0]

        # What is the average for sim 0 and sim 1
        # sim_0_name
        # sim_1_name
    md(text)

