In [87]:
setwd("/Users/rebecca/sudmant/analyses/myotis/analysis/species_peaks")

library(scales)
library(dplyr)
library(ggplot2)
library(data.table)

myo_meta <- read.csv("/Users/rebecca/sudmant/analyses/myotis/data/myotis_meta.csv")

In [90]:
peak_files <- list.files(path = "resources", pattern = "TEs", full.names = TRUE)

pdf("results/peaks_TE_types.pdf", width = 8, height = 6)

frac_TEs_list <- lapply(seq_along(myo_meta$Abbr), function(i) {

  peak_features <- fread(
    peak_files[grep(myo_meta$Abbr[i], peak_files)], 
    data.table = FALSE
  )

  peak_features$Type <- peak_features$V11
  peak_features$Type[grep("^DNA", peak_features$Type)] <- "DNA"
  peak_features$Type[grep("^SINE", peak_features$Type)] <- "SINE"
  peak_features$Type[grep("^LINE", peak_features$Type)] <- "LINE"
  peak_features$Type[grep("^LTR", peak_features$Type)] <- "LTR"
  peak_features$Type[grep("^Simple", peak_features$Type)] <- "Simple repeat"
  peak_features$Type[grep("Unknown", peak_features$Type)] <- "Unknown"
  peak_features$Type[grep("RNA", peak_features$Type)] <- "rRNA/tRNA/snRNA"
  peak_features$Type[grep("^Sat", peak_features$Type)] <- "Other"
  peak_features$Type[grep("^Retro", peak_features$Type)] <- "Other"
  peak_features$Type <- gsub("_", " ", peak_features$Type)
  
  peak_type <- peak_features %>%
    dplyr::filter(Type != ".") %>%
    dplyr::group_by(V4) %>%
    dplyr::reframe(
      Type = unique(Type)
    ) 
  
  x_order <- peak_type %>%
    dplyr::group_by(Type) %>%
    dplyr::reframe(n = n()) %>%
    dplyr::arrange(desc(n))
    
  peak_type$Type <- factor(peak_type$Type, levels = x_order$Type) 
  
  print(
    ggplot(peak_type, aes(x = Type)) +
    geom_bar() +
    theme_minimal() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1),
          axis.title.y = element_blank(),
          plot.margin = unit(c(1, 1, 1, 1), "cm")) +
    labs(title = myo_meta$Field_Name[i], 
         subtitle = "Transposable elements in accessible regions") +
    scale_y_continuous(labels = comma) 
  )
  
  ## Within DNA subtypes:
  peak_type <- peak_features %>%
    dplyr::filter(grepl("DNA", V11)) %>%
    dplyr::group_by(V4) %>%
    dplyr::reframe(
      Type = unique(V11)
    ) 
  
  x_order <- peak_type %>%
    dplyr::group_by(Type) %>%
    dplyr::reframe(n = n()) %>%
    dplyr::arrange(desc(n))
    
  peak_type$Type <- factor(peak_type$Type, levels = x_order$Type) 
  
  print(
    ggplot(peak_type, aes(x = Type)) +
    geom_bar() +
    theme_minimal() +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1),
          axis.title.y = element_blank(),
          plot.margin = unit(c(1, 1, 1, 1), "cm")) +
    labs(title = myo_meta$Field_Name[i], 
         subtitle = "DNA transposable elements in accessible regions") +
    scale_y_continuous(labels = comma)
  )
  
  uniq_peaks <- peak_features %>%
    dplyr::mutate(TE = ifelse(V10 == ".", FALSE, TRUE)) %>%
    dplyr::group_by(V4) %>%
    dplyr::reframe(TE = unique(TE))
  
  return(sum(uniq_peaks$TE == TRUE)/nrow(uniq_peaks))
  
})

dev.off()

In [91]:
frac_TEs_list