### Create valdation plots using ggplot2

In [None]:
# Load required libraries
library(ggplot2)
library(dplyr)
library(readr)
library(gridExtra)
library(grid)
library(gtable)

# Read and prepare data
file_x <- file.path("comparison_data", "validation_outputs_comb_30_6.csv")
file_y <- file.path("comparison_data", "results_chris.csv")
df_x <- read_csv(file_x)
df_y <- read_csv(file_y)

# Merge and filter data
dams_to_remove <- c('A Laing Ni Dam', 'Taninthayi')
res_name_zero_degassing <- 'Taninthayi'
merged_df <- inner_join(df_x, df_y, by = "Name") %>%
  mutate(CH4degas_gCO2eqm2yr_INT100yr = ifelse(Name == res_name_zero_degassing, 0, CH4degas_gCO2eqm2yr_INT100yr)) %>%
  filter(!(Name %in% dams_to_remove))

# Save merged data
csv_output_path <- file.path('..', 'outputs_and_intermediate', 'merged_data.csv')
write_csv(merged_df, csv_output_path)

# Define comparisons and custom labels
comparisons <- list(
  list(y = "co2_diffusion", x = "CO2yrdiff_gCO2eqm2yr_GROSSINTEG",
       xlab = expression(CO[2]~Diffusion~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CO[2]~Diffusion~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "CO₂ Diffusion"),

  list(y = "ch4_diffusion", x = "CH4yrdiff_gCO2eqm2yr_.INTEG",
       xlab = expression(CH[4]~Diffusion~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CH[4]~Diffusion~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "CH₄ Diffusion"),

  list(y = "ch4_ebullition", x = "CH4ebull_gCO2eqm2yr",
       xlab = expression(CH[4]~Ebulllition~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CH[4]~Ebulllition~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "CH₄ Ebullition"),

  list(y = "ch4_degassing", x = "CH4degas_gCO2eqm2yr_INT100yr",
       xlab = expression(CH[4]~Degassing~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CH[4]~Degassing~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "CH₄ Degassing")
)

# Create a plot function
create_plot <- function(data, cmp, show_legend = FALSE) {
  x <- data[[cmp$x]]
  y <- data[[cmp$y]]
  
  r2_val <- summary(lm(y ~ x))$r.squared
  r2_label <- paste0("R² = ", formatC(r2_val, format = "f", digits = 3))
  
  p <- ggplot(data, aes_string(x = cmp$x, y = cmp$y)) +
    geom_point(
      aes(shape = factor(type), 
          fill = catchment.biogenic_factors.climate, 
          size = reservoir.area),
      alpha = 0.9, color = 'grey30', shape = 21, stroke = 0.35) +
    geom_smooth(method = "lm", se = FALSE, color = "grey30", size = 0.5) +
    geom_abline(intercept = 0, slope = 1, colour = "gray17", 
                linewidth = 0.7, alpha = 0.6, linetype = 2) +
    annotate("text", x = min(x, na.rm = TRUE), y = max(y, na.rm = TRUE),
             label = r2_label, hjust = 0, vjust = 1, size = 3) +
    # Fix: Use a single size value for all legend items
    guides(
      fill = guide_legend(override.aes = list(size = 4)),
      shape = guide_legend(override.aes = list(size = 4)),
      size = guide_legend(override.aes = list(size = 4))  # Use a single size value
    ) +
    scale_shape_discrete(name = "Type") +
    scale_fill_brewer(palette = "RdBu", name = "Climate\nZone") +
    scale_size_continuous(name = "Reservoir\nArea (km²)") +
    labs(title = cmp$title, x = cmp$xlab, y = cmp$ylab) +
    theme_light() +
    theme(
      plot.title = element_text(size = 9, face = "bold", hjust = 0.5),
      axis.title = element_text(size = 8, face = "bold", hjust = 0.5),
      axis.text = element_text(size = 8),
      panel.grid.major = element_line(color = "gray85"),
      panel.grid.minor = element_blank(),
      axis.line = element_line(color = "black"),
      axis.ticks = element_line(color = "black"),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
      legend.position = if(show_legend) "bottom" else "none",
      legend.title = element_text(size = 9, face = "bold"),
      legend.text = element_text(size = 8),
      legend.key.size = unit(1.2, "lines"),
      legend.key.width = unit(1.5, "lines"),
      legend.title.align = 0.5,
      legend.box = "horizontal",
      legend.spacing.x = unit(10, "pt"),
      legend.margin = margin(t = 0, r = 15, b = 0, l = 15, unit = "pt")
    )
  
  return(p)
}

# Create plots and assemble
legend_plot <- create_plot(merged_df, comparisons[[1]], show_legend = TRUE)
g_legend <- function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}
legend <- g_legend(legend_plot)

plot_list <- lapply(comparisons, function(cmp) {
  create_plot(merged_df, cmp, show_legend = FALSE)
})

g <- arrangeGrob(grobs = plot_list, ncol = 4)
g_with_spacing <- gtable_add_col_space(g, unit(0.5, "cm"))

final_plot <- grid.arrange(
  g_with_spacing, 
  legend, 
  heights = c(6, 1),
  ncol = 1
)

comparison_png_file <- file.path('..', 'outputs_and_intermediate', 'comparison_plot.png')
comparison_svg_file <- file.path('..', 'outputs_and_intermediate', 'comparison_plot.svg')

ggsave(comparison_png_file, final_plot, width = 11.7, height = 3.3, dpi = 600)
ggsave(comparison_svg_file, final_plot, width = 11.7, height = 3.3)

In [None]:
comparisons_p2 <- list(
  list(y = "co2_preimp", x = "preimpEM.gCO2eqm2yr",
       xlab = expression(CO[2]~Emission~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CO[2]~Emission~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "Preimpoundment CO₂ emission"),

  list(y = "ch4_preimp", x = "preimpEM.CH4.gCO2eqm2yr",
       xlab = expression(CH[4]~Emission~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CH[4]~Emission~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "Preimpoundment CH₄ emission"),

  list(y = "co2_net", x = "CO2yrdiff_gCO2eqm2yr_NETINTEG.preimpCor",
       xlab = expression(CO[2]~Emission~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CO[2]~Emission~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "Net CO₂ emission"),

  list(y = "ch4_net", x = "total_CH4_yr_preimp.cor.gCO2eqm2yr",
       xlab = expression(CH[4]~Emission~(Gres)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       ylab = expression(CH[4]~Emission~(ReEmission)~(g~CO[2]~eq~m^{-2}~yr^{-1})),
       title = "Net CH₄ emission")
)

# Create plots and assemble for the second comparison set
legend_plot_p2 <- create_plot(merged_df, comparisons_p2[[1]], show_legend = TRUE)
legend_p2 <- g_legend(legend_plot_p2)

plot_list_p2 <- lapply(comparisons_p2, function(cmp) {
  create_plot(merged_df, cmp, show_legend = FALSE)
})

g_p2 <- arrangeGrob(grobs = plot_list_p2, ncol = 4)  # Changed to 2 columns
g_with_spacing_p2 <- gtable_add_col_space(g_p2, unit(0.8, "cm"))  # Slightly more spacing

final_plot_p2 <- grid.arrange(
  g_with_spacing_p2, 
  legend_p2, 
  heights = c(6, 1),
  ncol = 1
)

net_emissions_svg_file <- file.path('..', 'outputs_and_intermediate', 'comparison_plot_net_emissions.svg')
net_emissions_png_file <- file.path('..', 'outputs_and_intermediate', 'comparison_plot_net_emissions.png')

# Save the second plot with a different filename
ggsave(net_emissions_png_file, final_plot_p2, width = 11.7, height = 3.3, dpi = 600)
ggsave(net_emissions_svg_file, final_plot_p2, width = 11.7, height = 3.3)