In [None]:
library(data.table)
library(ggplot2)
library(ggsci)
library(magrittr) 
library(ggrepel)
library(dplyr)
library(ggpubr)


rescale <- function(x, CDS_start,CDS_end) ( (x-CDS_start)/(CDS_end - CDS_start) * 100)
confidence_interval_upper <- function(vector, interval=0.95) {
  # Standard deviation of sample
  vec_sd <- sd(vector, na.rm = TRUE)
  # Sample size
  n <- length(vector)
  # Median of sample
  vec_mean <- mean(vector, na.rm = TRUE)
  # Error according to t distribution
  error <- qt((interval + 1)/2, df = n - 1) * vec_sd / sqrt(n)
  # Confidence interval as a vector
  result <- vec_mean + error
  return(result)
}
confidence_interval_lower <- function(vector, interval=0.95) {
  # Standard deviation of sample
  vec_sd <- sd(vector, na.rm = TRUE)
  # Sample size
  n <- length(vector)
  # Median of sample
  vec_mean <- mean(vector, na.rm = TRUE)
  # Error according to t distribution
  error <- qt((interval + 1)/2, df = n - 1) * vec_sd / sqrt(n)
  # Confidence interval as a vector
  result <- vec_mean - error
  return(result)
}
plotRPFsmRNA <- function(TE, sample, alpha, cex, color_TE=FALSE, 
                   removeZero=TRUE, log2=TRUE, breaks.length=50, ...){
  if(!is.list(TE)){
    stop("TE must be output of translationalEfficiency.")
  }
  if(!any(c("RPFs", "mRNA", "TE") %in% names(TE))){
    stop("TE must be output of translationalEfficiency.")
  }
  if(missing(sample)){
    stop("sample is required.")
  }
  norm01 = function(x){(x-min(x))/(max(x)-min(x))}
  rbPal <- colorRampPalette(c('red','blue'))
  
  mRNA <- TE$mRNA
  RPFs <- TE$RPFs
  TE <- TE$TE
  if(!is.numeric(sample)){
    sample <- which(colnames(TE) %in% sample)
  }
  if(length(sample)>1){
    sample <- sample[1]
    message("Only first sample will be plotted.")
  }
  TE <- TE[, sample]
  RPFs <- RPFs[, sample]
  mRNA <- mRNA[, sample]
  if(removeZero){
    keep <- RPFs>0 & mRNA>0
    if(sum(keep)<1){
      stop("No data available for plotting.")
    }
    mRNA <- mRNA[keep]
    RPFs <- RPFs[keep]
    TE <- TE[keep]
  }
  if(log2){
    mRNA <- log2(mRNA)
    RPFs <- log2(RPFs)
    TE <- log2(TE)
  }
  test = cor.test(mRNA,RPFs, na.rm = TRUE)
  opar <- par(fig=c(0, .75, 0, .75), new=FALSE, mar=c(5.1, 4.1, 0, 0))
  on.exit(par(opar))
  dots <- list(...)
  args <- dots
  args$x <- RPFs
  args$y <- mRNA
  if (!color_TE){
    args$col <- alpha
  } else {
    args$col <- rbPal(10)[as.numeric(cut(norm01(TE),breaks = 10))]
  }
  if(length(args$xlab)==0) args$xlab <- "RPFs level"
  if(length(args$ylab)==0) args$ylab <- "mRNA level"
  do.call(plot, args)
  par(fig=c(0, .75, 0, .75), new=TRUE, mar=c(5.1, 4.1, 0, 0))
  args <- dots
  args$x <- quantile(RPFs,probs = seq(0, 1, 0.01), na.rm = TRUE)["15%"]
  args$y <- quantile(mRNA,probs = seq(0, 1, 0.001), na.rm = TRUE)["99.9%"]
  args$col <- "black"
  args$cex <- cex
  args$labels <- paste0(ifelse(test$p.value == 0, "p<2.2e-16",format(test$p.value, digit=2)),", R=",format(test$estimate, digit=2))
  do.call(text, args)
  ylim <- par("usr")[3:4]
  xlim <- par("usr")[1:2]
  par(fig=c(.75, 1, 0, .75), new=TRUE, mar=c(5.1, 0, 4.1, 5.1))
  yhist <- hist(mRNA, breaks=seq(ylim[1], ylim[2], length.out = breaks.length),
                plot=FALSE)
  args <- dots
  args$height <- yhist$density
  args$axes <- FALSE
  args$space <- 0
  args$horiz <- TRUE
  args$cex <- NULL
  do.call(barplot, args)
  par(fig=c(0, .75, 0.75, 1), new=TRUE, mar=c(0, 4.1, 4.1, 0))
  xhist <- hist(RPFs, breaks=seq(xlim[1], xlim[2], length.out = breaks.length),
                plot=FALSE)
  args <- dots
  args$height <- xhist$density
  args$axes <- FALSE
  args$space <- 0
  args$horiz <- FALSE
  args$cex <- NULL
  do.call(barplot, args)
}



In [None]:
setwd("/Users/inamojun/TMDU-LR_isoform_atlas/Figures")

In [None]:
#load data
load("../data/data_Fig04.RData")

In [None]:
head(TE90)
str(te)

In [None]:
head(te)
dim(te)

In [None]:
options(repr.plot.width=4, repr.plot.height=4)
plotRPFsmRNA(TE90, sample=5, log2=TRUE, pch=20, cex=1.2, alpha=c(scales::alpha('darkblue', 0.05)), color_TE=TRUE)

In [None]:
cor.test(te$med_te,te$five_utr_length,method="spearman")
cor.test(te$med_te,te$three_utr_length,method="spearman")
cor.test(te$med_te,te$CDS_length,method="spearman")
cor.test(te$med_te,te$med_RNA,method="spearman")
cor.test(te$med_te,te$med_ribo,method="spearman")
cor.test(te$normalized_med_te,te$normalized_five_utr_length,method="spearman")
cor.test(te$normalized_med_te,te$normalized_three_utr_length,method="spearman")
cor.test(te$normalized_med_te,te$normalized_CDS_length,method="spearman")
cor.test(te$normalized_med_te,te$kozak_score,method="spearman")
te %>%
  dplyr::group_by(te_rank) %>%
  dplyr::summarize(median(five_utr_length))
te %>%
  dplyr::group_by(te_rank) %>%
  dplyr::summarize(median(three_utr_length))
te %>%
  dplyr::group_by(te_rank) %>%
  dplyr::summarize(median(CDS_length))

In [None]:
options(repr.plot.width=4, repr.plot.height=4)
for (i in c("normalized_length","normalized_five_utr_length","normalized_CDS_length","normalized_three_utr_length","kozak_score","avg_codon_freq","min_codon_freq","au_element_count","au_element_frac","max_au_length","MFE","centroid","MEA","gc_content")) { 
  x=eval(parse(text=paste0("te$",i)))
  y=te$normalized_med_te
  if(i=="kozak_score"){
    # something with a UTR that is less than 6nt is not likely to follow kozak behavior, so just setting the score to -1 to indicate oddball status 
    y = y[x!=-1]
    x = x[x!=-1]
  }
  # x=rnorm(10000, mean=0, sd=1)
  # y=rnorm(10000, mean=0, sd=1)
  test = cor.test(x,y,method="spearman")
  plot(x,y, las = 1, col = c(scales::alpha('black', 0.1)), pch=16, xlab = i, ylab = "Translation Efficiency");
  abline(lm(y ~ x), col = 4, lwd = 3)
  legend("topright", col=rgb(1,0,0), legend = paste0(ifelse(test$p.value < 2.2e-16, "p < 2.2e-16",paste0("p = ",format(test$p.value, digit=2))),", rho = ",format(test$estimate, digit=2)), 
         bty = "n")
}
