In [2]:
setwd("/Users/rebecca/sudmant/analyses/myotis/analysis/reciprocal_best_hits/01_RBHs/results/data")

library(pafr)
library(dplyr)
library(data.table)
library(GenomicRanges)

myo_meta <- read.csv("/Users/rebecca/sudmant/analyses/myotis/data/myotis_meta.csv")

In [3]:
overlap_ranges = function(x, y) {
  rv = x
  start(rv) = max(start(x), start(y))
  end(rv) = min(end(x), end(y))
  return(rv)
  ## Ref: https://bioinformatics.stackexchange.com/questions/874/intersection-of-two-genomic-ranges-to-keep-metadata
}

In [4]:
## Get UCRs for each species:

for (i in seq_along(myo_meta$Abbr)) {
  
  spec1 <- myo_meta$Abbr[i]
  spec1_rbhs_files <- list.files(pattern = paste0("^", spec1, ".*_RBHs.csv"), 
                                 full.names = TRUE)
  
      
  ## Load in all RBHs for the working species:
    
  spec1_ranges_list <- lapply(myo_meta$Abbr[-i], function(spec2) {

    spec1_rbhs <- fread(spec1_rbhs_files[grep(spec2, spec1_rbhs_files)], 
                        data.table = FALSE)
    spec1_rbhs <- spec1_rbhs %>%
      dplyr::mutate(
        seqnames = Spec_Chr,
        start = Spec_Start,
        end = Spec_End
      ) %>%
      dplyr::select(seqnames, start, end)
    return(makeGRangesFromDataFrame(spec1_rbhs))
    
  })
  
  ## Take the intersection of all of these regions:
  spec1_ucrs <- as.data.frame(suppressWarnings(
    Reduce(GenomicRanges::intersect, spec1_ranges_list)
  ))      
  
  write.table(spec1_ucrs,
              file = paste0("../../../02_UCRs/results/data/", spec1, "_UCRs.bed"), 
              col.names = FALSE, row.names = FALSE, quote = FALSE,
              sep = "\t")                                 
  
}

In [5]:
## Map UCRs from each species to all other species:

for (i in seq_along(myo_meta$Abbr)) {

  for (j in seq_along(myo_meta$Abbr)) {
    
    if (i < j) {
      
      spec1 <- myo_meta$Abbr[i]
      spec2 <- myo_meta$Abbr[j]

      ## Intersect RBHs with UCRs from species 1:

      spec1_rbhs <- fread(paste0(spec1, "_vs_", spec2, "_RBHs.csv"), data.table = FALSE)

      spec1_rbhs <- spec1_rbhs %>%
        dplyr::mutate(
          ID = paste(
            Spec1_Query_ID, Spec1_Subj_ID, 
            Spec2_Query_ID, Spec2_Subj_ID
          ),
          seqnames = Spec_Chr,
          start = Spec_Start,
          end = Spec_End
        ) %>%
        dplyr::select(seqnames, start, end, ID)
        
      spec1_ucrs <- fread(paste0("../../../02_UCRs/results/data/", spec1, "_UCRs.bed"), data.table = FALSE)
      colnames(spec1_ucrs) <- c("seqnames", "start", "end", "width", "strand")

      spec1_rbhs_ranges <- makeGRangesFromDataFrame(spec1_rbhs, keep.extra.columns = TRUE, ignore.strand = TRUE)
      spec1_ucrs_ranges <- makeGRangesFromDataFrame(spec1_ucrs, ignore.strand = TRUE)

      o <- findOverlaps(query = spec1_rbhs_ranges, subject = spec1_ucrs_ranges)
      grl1 <- split(spec1_rbhs_ranges[queryHits(o)], 1:length(o))
      grl2 <- split(spec1_ucrs_ranges[subjectHits(o)], 1:length(o))
      spec1_intersect <- unlist(mendoapply(overlap_ranges, grl1, y=grl2))

      spec1_df <- data.frame(Spec1_Chr = spec1_intersect@seqnames,
                             Spec1_Start = spec1_intersect@ranges@start,
                             Width = spec1_intersect@ranges@width,
                             spec1_intersect@elementMetadata)

      spec1_df <- spec1_df %>%
        dplyr::mutate(Spec1_End = Spec1_Start + Width-1) %>%
        dplyr::select(-Width)

      ## Intersect RBHs with UCRs from species 2:

      spec2_rbhs <- fread(paste0(spec2, "_vs_", spec1, "_RBHs.csv"), data.table = FALSE)

      spec2_rbhs <- spec2_rbhs %>%
        dplyr::mutate(
          ID = paste(
            Spec1_Query_ID, Spec1_Subj_ID, 
            Spec2_Query_ID, Spec2_Subj_ID
          ),
          seqnames = Spec_Chr,
          start = Spec_Start,
          end = Spec_End
        ) %>%
        dplyr::select(seqnames, start, end, ID)

      spec2_ucrs <- fread(paste0("../../../02_UCRs/results/data/", spec2, "_UCRs.bed"), data.table = FALSE)
      colnames(spec2_ucrs) <- c("seqnames", "start", "end", "width", "strand")

      spec2_rbhs_ranges <- makeGRangesFromDataFrame(spec2_rbhs, keep.extra.columns = TRUE, ignore.strand = TRUE)
      spec2_ucrs_ranges <- makeGRangesFromDataFrame(spec2_ucrs, ignore.strand = TRUE)

      o <- findOverlaps(query = spec2_rbhs_ranges, subject = spec2_ucrs_ranges)
      grl1 <- split(spec2_rbhs_ranges[queryHits(o)], 1:length(o))
      grl2 <- split(spec2_ucrs_ranges[subjectHits(o)], 1:length(o))
      spec2_intersect <- unlist(mendoapply(overlap_ranges, grl1, y=grl2))

      spec2_df <- data.frame(Spec2_Chr = spec2_intersect@seqnames,
                             Spec2_Start = spec2_intersect@ranges@start,
                             Width = spec2_intersect@ranges@width,
                             spec2_intersect@elementMetadata)

      spec2_df <- spec2_df %>%
        dplyr::mutate(Spec2_End = Spec2_Start + Width-1) %>%
        dplyr::select(-Width)

      ## Get mapping between UCRs for species 1 and species 2:

      pairwise_ucrs <- merge(spec1_df, spec2_df, by = "ID")
      
      if (nrow(pairwise_ucrs) > 0) {
        
        colnames(pairwise_ucrs) <- gsub("Spec1", spec1, colnames(pairwise_ucrs))
        colnames(pairwise_ucrs) <- gsub("Spec2", spec2, colnames(pairwise_ucrs))

        ## Save
        fwrite(pairwise_ucrs[,-1], file = paste0("../../../02_UCRs/results/data/", spec1, "_vs_", spec2, "_UCRs.csv"))
        fwrite(pairwise_ucrs[,-1], file = paste0("../../../02_UCRs/results/data/", spec2, "_vs_", spec1, "_UCRs.csv"))

      }
     
    }
    
  }
  
}
