In [19]:
setwd("/Users/rebecca/sudmant/analyses/myotis/analysis/exploratory/species_TE_proportion")

library(dplyr)
library(scales)
library(ggplot2)
library(data.table)

myo_meta <- read.csv("/Users/rebecca/sudmant/analyses/myotis/data/myotis_meta.csv")

In [13]:
## Plot proportion of peaks in TEs

frac_peaks_list <- lapply(seq_along(myo_meta$Abbr), function(i) {
  
  peak_features <- fread(
    file.path("results/data", paste0(myo_meta$Abbr[i], "_peaks_TEs.tsv")),
    data.table = FALSE
  )
  peak_features$Type <- peak_features$V11
  peak_features$Type[grep("^DNA", peak_features$Type)] <- "DNA"
  peak_features$Type[grep("^SINE", peak_features$Type)] <- "SINE"
  peak_features$Type[grep("^LINE", peak_features$Type)] <- "LINE"
  peak_features$Type[grep("^LTR", peak_features$Type)] <- "LTR"
  peak_features$Type[grep("^Simple", peak_features$Type)] <- "Simple repeat"
  peak_features$Type[grep("Unknown", peak_features$Type)] <- "Unknown"
  peak_features$Type[grep("RNA", peak_features$Type)] <- "rRNA/tRNA/snRNA"
  peak_features$Type[grep("^Sat", peak_features$Type)] <- "Other"
  peak_features$Type[grep("^Retro", peak_features$Type)] <- "Other"
  peak_features$Type[peak_features$Type == "."] <- "NA"
  peak_features$Type <- gsub("_", " ", peak_features$Type)
  
  df <- peak_features %>%
    dplyr::mutate(TE = ifelse(Type == "NA", FALSE, TRUE)) %>%
    dplyr::group_by(V4) %>%
    dplyr::reframe(
      TE = unique(TE),
      Type = paste(unique(Type), collapse = ", ")
    ) %>%
    dplyr::mutate(
      Type = ifelse(grepl(",", Type), "Unknown", Type)
    ) %>%
    dplyr::group_by(TE, Type) %>%
    dplyr::reframe(No.Peaks = n()) %>%
    dplyr::mutate(Species = myo_meta$Field_Name[i])
  
  return(df)
  
})

In [20]:
df <- do.call(rbind, frac_peaks_list)

df <- df %>%
  dplyr::mutate(
    TE = ifelse(Type == "Simple repeat", FALSE, TE)
  ) %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(Frac_Peaks = No.Peaks/sum(No.Peaks))
  
pdf("results/figures/peak_TE_proportion.pdf", width = 8, height = 6)

x_order <- df %>%
  dplyr::filter(TE == TRUE) %>%
  dplyr::group_by(Species) %>%
  dplyr::summarise(n = sum(Frac_Peaks)) %>%
  dplyr::arrange(desc(n))
  
df$Species <- factor(df$Species, levels = x_order$Species)
  
print(
  ggplot(df, aes(x = Species, y = Frac_Peaks, fill = TE, color = TE)) +
    geom_bar(stat = "identity") +
    theme_minimal() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1),
          axis.title.y = element_text(margin = margin(r = 15)),
          plot.margin = unit(c(1, 1, 1, 1), "cm")) +
    labs(title = "Proportion of peaks in transposable elements") +
    ylab("Proportion") 
)

## Stratify by TE type:

df_type <- df %>%
  dplyr::filter(TE == TRUE) %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(Frac_Peaks = No.Peaks/sum(No.Peaks))
  
x_order <- df_type %>%
  dplyr::group_by(Type) %>%
  dplyr::reframe(n = mean(Frac_Peaks)) %>%
  dplyr::arrange(desc(n))

df_type$Type <- factor(df_type$Type, levels = x_order$Type)

print(
  ggplot(df_type, aes(x = Type, y = Frac_Peaks)) +
    geom_bar(stat = "identity") +
    theme_minimal() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1, size = 7),
          axis.title.y = element_text(margin = margin(r = 15)),
          panel.grid = element_blank(),
          plot.margin = unit(c(1, 3, 1, 3), "cm")) +
    labs(title = "Proportion of peaks in classes of transposable elements") +
    ylab("Proportion") +
    facet_wrap(. ~ Species)
)

x_order <- df_type %>%
  dplyr::group_by(Type) %>%
  dplyr::reframe(n = sum(No.Peaks)) %>%
  dplyr::arrange(desc(n))

df_type$Type <- factor(df_type$Type, levels = x_order$Type)

print(
  ggplot(df_type, aes(x = Type, y = No.Peaks)) +
    geom_bar(stat = "identity") +
    theme_minimal() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1, size = 7),
          axis.title.y = element_text(margin = margin(r = 15)),
          panel.grid = element_blank(),
          plot.margin = unit(c(1, 3, 1, 3), "cm")) +
    labs(title = "# of peaks in classes of transposable elements") +
    ylab("# peaks") + scale_y_continuous(labels = comma) +
    facet_wrap(. ~ Species)
)

dev.off()