In [2]:
setwd("/Users/rebecca/sudmant/analyses/myotis/analysis/exploratory/species_peaks")

library(dplyr)
library(scales)
library(data.table)

myo_meta <- read.csv("/Users/rebecca/sudmant/analyses/myotis/data/myotis_meta.csv")

## Get differential peak status between species in orthologous gene promoters present in ALL species

In [3]:
shared_genes <- read.csv("results/data/shared_genes.csv")

In [12]:
# Get peak status in promoters of shared genes:

peak_ortho_files <- list.files(path = "results/data", 
                               pattern = "exon1_5000.*orthologs",
                               full.names = TRUE)

promoter_peak_list <- lapply(seq_along(myo_meta$Abbr), function(i) {
  peak_ortho <- fread(peak_ortho_files[grep(myo_meta$Abbr[i], peak_ortho_files)], data.table = FALSE)
  peak_ortho <- peak_ortho %>%
    dplyr::filter(SYMBOL %in% shared_genes$Gene) %>%
    dplyr::group_by(SYMBOL) %>%
    dplyr::reframe(Peak = ifelse(
      sum(Peak) > 1, TRUE, FALSE
    ))
  return(unique(peak_ortho$SYMBOL[peak_ortho$Peak == TRUE]))
})
names(promoter_peak_list) <- myo_meta$Field_Name

spec_peak_status <- as.data.frame.matrix(table(stack(promoter_peak_list)))

In [14]:
## Cluster species by their peak status:

library(ComplexHeatmap)

cor_mat <- cor(spec_peak_status)
diag(cor_mat) <- NA

n_genes <- nrow(spec_peak_status)

plot_title <- paste("Peak promoter status over", comma(n_genes), "shared orthologous genes")

pdf(file = paste0("results/figures/promoter_peak_status_correlation_heatmap.pdf"), width = 11, height = 9)

print(
  ComplexHeatmap::Heatmap(name = "Peak status correlation", 
                          column_title = plot_title,
                          matrix = cor_mat, 
                          clustering_method_rows = "average",
                          clustering_method_columns = "average",
                          column_names_rot = 45)
)

dev.off()

pdf(file = paste0("results/figures/promoter_peak_status_heatmap.pdf"), width = 9, height = 9)

print(
  ComplexHeatmap::Heatmap(name = "Peak status", 
                          column_title = plot_title,
                          matrix = spec_peak_status, 
                          clustering_method_rows = "average",
                          clustering_method_columns = "average",
                          show_row_names = FALSE,
                          column_names_rot = 45)
)

dev.off()

"The input is a data frame-like object, convert it to a matrix."


In [132]:
## What promoters have peaks in ALL species?

shared_peaks <- rownames(spec_peak_status)[rowSums(spec_peak_status) == ncol(spec_peak_status)]
shared_peaks_df <- data.frame(Gene = shared_peaks)
write.csv(shared_peaks_df, file = "results/data/shared_promoters_shared_peaks.csv", row.names = FALSE)

## What genes have peaks in N-1 species?

semi_shared_peaks <- rownames(spec_peak_status)[rowSums(spec_peak_status) == (ncol(spec_peak_status)-1)]
## Which species is missing a peak in these genes?
semi_shared_spec <- unlist(lapply(semi_shared_peaks, function(gene) {
  spec_col <- which(
    as.logical(spec_peak_status[rownames(spec_peak_status) == gene,]) == FALSE
  )
  colnames(spec_peak_status)[spec_col]
}))
names(semi_shared_spec) <- semi_shared_peaks
semi_shared_df <- data.frame(Gene = semi_shared_peaks, 
                             Species = semi_shared_spec, 
                             row.names = NULL)

write.csv(semi_shared_df, file = "results/data/shared_promoters_semi_shared_peaks.csv", row.names = FALSE)

## What genes peaks in just 1 species?

private_peaks <- rownames(spec_peak_status)[rowSums(spec_peak_status) == 1]
## What species is asssociated with each private gene?
private_spec <- unlist(lapply(private_peaks, function(gene) {
  spec_col <- which(
    as.logical(spec_peak_status[rownames(spec_peak_status) == gene,])
  )
  colnames(spec_peak_status)[spec_col]
}))
names(private_spec) <- private_peaks
private_df <- data.frame(Gene = private_peaks, 
                         Species = private_spec, 
                         row.names = NULL)

write.csv(private_df, file = "results/data/shared_promoters_private_peaks.csv", row.names = FALSE)