In [None]:
######################################################################
## 04_ANALYSIS: Calculating CpG oe ratio plots in R
######################################################################

## This should be done after the python steps have been completed

### Note that any block starting with "SCRIPT" or "ON COMMAND LINE" should be run in terminal and not in notebook

In [1]:
######################################################################
## BLOCK 1. Setup & Load Merged Files
######################################################################

library(ggplot2)
library(viridis)
library(dplyr)
library(readr)
library(Cairo)

# Load data
df_gene <- read_tsv("~/Mytilus/genome/cpg_methylation_filtered.tsv", show_col_types = FALSE) %>%
  mutate(region = "Gene Body")
df_prom <- read_tsv("~/Mytilus/genome/promoter_cpg_methylation_filtered.tsv", show_col_types = FALSE) %>%
  mutate(region = "Promoter")

# Combine and transform
combined <- bind_rows(df_gene, df_prom) %>%
  mutate(
    log_meth = log10(avg_methylation + 1e-6),
    region = factor(region, levels = c("Gene Body", "Promoter"))
  )

# Spearman correlations
rho_gene <- cor(df_gene$cpg_oe, log10(df_gene$avg_methylation + 1e-6), method = "spearman")
rho_prom <- cor(df_prom$cpg_oe, log10(df_prom$avg_methylation + 1e-6), method = "spearman")

rho_df <- tibble(
  cpg_oe = 1.5,
  log_meth = 1.8,
  region = c("Gene Body", "Promoter"),
  rho_text = c(
    paste0("rho == ", round(rho_gene, 3)),
    paste0("rho == ", round(rho_prom, 3))
  )
)

Loading required package: viridisLite


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [17]:
######################################################################
## BLOCK 2. Panel Figure – CpG o/e vs Methylation (Gene Body & Promoter)
######################################################################

library(ggplot2)
library(viridis)
library(dplyr)
library(grid)   # for unit() in guide_colorbar

# Keep rho labels inside the panel; preserve 'region' for faceting
rho_df2 <- rho_df %>%
  mutate(cpg_oe = 1.25, log_meth = 1.6)  # inside xlim/ylim

p_fig1 <- ggplot(combined, aes(x = cpg_oe, y = log_meth)) +
  # === ORIGINAL LOOK: filled density polygons + contour lines ===
  stat_density_2d(
    aes(fill = after_stat(level)),
    geom    = "polygon",
    contour = TRUE,
    n       = 200,
    bins    = 10,
    alpha   = 0.40,
    color   = NA
  ) +
  stat_density_2d(
    aes(color = after_stat(level)),
    n    = 200,
    bins = 10,
    size = 0.9
  ) +
  # Spearman rho text
  geom_text(
    data = rho_df2,
    aes(x = cpg_oe, y = log_meth, label = rho_text),
    inherit.aes = FALSE, parse = TRUE,
    hjust = 1, vjust = 1, size = 6, fontface = "italic", color = "black"
  ) +
  facet_wrap(~ region, nrow = 1) +
  coord_cartesian(xlim = c(0.1, 1.3), ylim = c(-1.6, 1.8)) +

  # === LEGEND: continuous viridis colorbar; vivid (alpha=1) swatch ===
  scale_fill_viridis_c(
    option = "D",
    name   = "Density",
    guide  = guide_colorbar(
      ticks        = TRUE,
      ticks.colour = "black",
      barwidth     = unit(5,  "mm"),
      barheight    = unit(40, "mm"),
      frame.colour = "black"
    )
  ) +
  scale_color_viridis_c(option = "D", guide = "none") +
  guides(fill = guide_colorbar(override.aes = list(alpha = 1))) +

  theme_minimal(base_size = 14) +
  theme(
    strip.text      = element_text(size = 16),
    legend.position = "right",
    panel.border    = element_rect(color = "black", fill = NA, linewidth = 0.5)
  ) +
  labs(
    x = "CpG Observed/Expected (CpG o/e)",
    y = expression("Mean Methylation ("*log[10]*" proportion)")
  )

# Save as vector PDF (no dpi for vector)
ggsave(
  filename = "Fig1.pdf",
  plot     = p_fig1,
  device   = grDevices::cairo_pdf,
  width    = 10, height = 5, units = "in",
  bg       = "white"
)