# Riborex Analysis T0, T1, T24 all interaction


In [1]:
suppressMessages(library(riborex))
suppressMessages(library(fdrtool))


annotations <- read.table(file.path('..', 'data', 'annotations', 'hg38_gene_names_stripped.tsv'), 
                          header=F, 
                          col.names = c('gene_id', 'gene_name', 'gene_type'))
rownames(annotations) <- annotations$gene_id

readcounts.dir <- file.path('..', 'data', 'read_counts', 'byCDS')
results.dir <- file.path('..', 'results', 'translation_efficiency')
rna.design.file <- file.path('..', 'data', 'design_files', 'rna_seq_design.tsv')
ribo.design.file <- file.path('..', 'data', 'design_files', 'ribo_seq_design.tsv')

## Suffix of htseq-count output
counts.suffix <- '.CDS.counts.tsv'

rna.design.info <- read.table(rna.design.file, header=T, stringsAsFactors=FALSE)
ribo.design.info <- read.table(ribo.design.file, header=T, stringsAsFactors=FALSE)
histone.genes.df <- read.table(file.path('..', 'data', 'annotations', 
                                         'histone_genes.tsv'), 
                               header = T, 
                               stringsAsFactors = F,
                               sep = '\t') 


histone.genes.1 <- annotations[grep('HIST', annotations$gene_name), ]$gene_id
histone.genes.2 <-annotations[annotations$gene_name %in% 
                                histone.genes.df$Approved.Symbol, ]$gene_id
# Add RP1-34B50.21 separately since symbol doesn't have HIST in it.
histone.genes <- unique(c(histone.genes.1, histone.genes.2, 'ENSG00000282988'))

count.reads <- function(design.info, dirname){
 files <- paste(design.info$SampleFile, counts.suffix, sep='')
 sampleName <- design.info$SampleName
 sampleTable <- data.frame(sampleName=sampleName, fileName=files)
 ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
          directory = file.path(readcounts.dir, dirname),
          design = ~ 1)
 rownames(ddsHTSeq) <- gsub('\\.[0-9]+', '', rownames(ddsHTSeq))
 ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
 dds <- DESeq(ddsHTSeq)

     # Remove histones
 ddsHTSeq <- ddsHTSeq[!(rownames(ddsHTSeq) %in% histone.genes),]
 ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
 return(counts(dds))
}

write_results <- function(df, prefix){
 df<- as.data.frame(df)
 df <- df[order(df$padj),]
 df$gene_name <- annotations[rownames(df),]$gene_name
 df.sig <- subset(df, padj<0.05)
 df.sig.up <- subset(df.sig, log2FoldChange>0)
 df.sig.down <- subset(df.sig, log2FoldChange<0)
 write.table(df, 
             file = file.path(results.dir, paste(prefix, 'all', 'tsv', sep='.')),
             sep = '\t')
 write.table(df.sig, 
             file = file.path(results.dir, paste(prefix, 'sig', 'tsv', sep='.')),
             sep = '\t')
 write.table(df.sig.up, 
             file = file.path(results.dir, paste(prefix, 'sig', 'up', 'tsv', sep='.')),
             sep = '\t')
 write.table(df.sig.down, 
             file = file.path(results.dir, paste(prefix, 'sig', 'down', 'tsv', sep='.')),
             sep = '\t'
             )
 return (df.sig)
}

riborex.for.cellline <- function(rna.read.counts, ribo.read.counts, cell.line, contrast, merge.T1=FALSE){
 rna.read.counts <- rna.read.counts[, grepl(cell.line, colnames(rna.read.counts))]
 ribo.read.counts <- ribo.read.counts[, grepl(cell.line, colnames(ribo.read.counts))]
 rna.conditions.time <- as.factor(as.vector(sapply(colnames(rna.read.counts), function(x) unlist(strsplit(x, '_'))[3])))
 ribo.conditions.time <- as.factor(as.vector(sapply(colnames(ribo.read.counts), function(x) unlist(strsplit(x, '_'))[4])))
 rna.conditions.cell <- as.factor(as.vector(sapply(colnames(rna.read.counts), function(x) unlist(strsplit(x, '_'))[1])))
 ribo.conditions.cell <- as.factor(as.vector(sapply(colnames(ribo.read.counts), function(x) unlist(strsplit(x, '_'))[1])))
 
 if (merge.T1){
   levels(rna.conditions.time)[levels(rna.conditions.time)=='T0'] <- 'T0T1'
   levels(rna.conditions.time)[levels(rna.conditions.time)=='T1'] <- 'T0T1'
   levels(ribo.conditions.time)[levels(ribo.conditions.time)=='T0'] <- 'T0T1'
   levels(ribo.conditions.time)[levels(ribo.conditions.time)=='T1'] <- 'T0T1'
 }
 rna.conditions <- data.frame('time' = rna.conditions.time)
 ribo.conditions <- data.frame('time' = ribo.conditions.time)
 common.genes <- intersect(rownames(rna.read.counts), rownames(ribo.read.counts)) 
 rna.read.counts <- rna.read.counts[common.genes,]
 ribo.read.counts <- ribo.read.counts[common.genes,]
 colnames(rna.read.counts) <- paste(colnames(rna.read.counts), 'RNA', sep='_')
 colnames(ribo.read.counts) <- paste(colnames(ribo.read.counts), 'Ribo', sep='_')
 res <- riborex(rna.read.counts, 
        ribo.read.counts, 
        rna.conditions, 
        ribo.conditions, 
        contrast = contrast)
 return (res)
}
                                                    
                                                    
doPvalueAdjustment <- function(results){
  hist(results$pvalue,  main = 'DESeq2 unadjusted p-values', 
       xlab = 'Unadjusted p-values')
  results <- results[ !is.na(results$padj), ]
  results <- results[ !is.na(results$pvalue), ]
  results <- results[, -which(names(results) == 'padj')]
  resultsFDR <- fdrtool(results$stat, 
                        statistic= 'normal', 
                        plot = T)
  results[,'padj']  <- p.adjust(resultsFDR$pval,
                                method = 'BH')
  hist(resultsFDR$pval, 
       main = 'DESeq2 corrected p-values | Empirical null', 
       xlab = 'Corrected p-values')
  return (results)
}

In [1]:
                                                    


rna.read.counts.all <- count.reads(rna.design.info, 'RNA_seq')
ribo.read.counts.all <- count.reads(ribo.design.info, 'Ribo_seq')

rna.conditions.time.all <- as.factor(as.vector(sapply(colnames(rna.read.counts.all), function(x) unlist(strsplit(x, '_'))[3])))
ribo.conditions.time.all <- as.factor(as.vector(sapply(colnames(ribo.read.counts.all), function(x) unlist(strsplit(x, '_'))[4])))

rna.conditions.time.all.t0t1 <- rna.conditions.time.all
ribo.conditions.time.all.t0t1 <- ribo.conditions.time.all

levels(rna.conditions.time.all.t0t1)[levels(rna.conditions.time.all.t0t1)=='T0'] <- 'T0T1'
levels(rna.conditions.time.all.t0t1)[levels(rna.conditions.time.all.t0t1)=='T1'] <- 'T0T1'

levels(ribo.conditions.time.all.t0t1)[levels(ribo.conditions.time.all.t0t1)=='T0'] <- 'T0T1'
levels(ribo.conditions.time.all.t0t1)[levels(ribo.conditions.time.all.t0t1)=='T1'] <- 'T0T1'

rna.conditions.cell.all <- as.factor(as.vector(sapply(colnames(rna.read.counts.all), function(x) unlist(strsplit(x, '_'))[1])))
ribo.conditions.cell.all <- as.factor(as.vector(sapply(colnames(ribo.read.counts.all), function(x) unlist(strsplit(x, '_'))[1])))

rna.conditions.all <- data.frame('cell.type' = rna.conditions.cell.all, 
                                 'time' = rna.conditions.time.all)
ribo.conditions.all <- data.frame('cell.type' = ribo.conditions.cell.all, 
                                  'time' = ribo.conditions.time.all)

rna.conditions.all.t0t1 <- data.frame('cell.type' = rna.conditions.cell.all, 
                                      'time' = rna.conditions.time.all.t0t1)
ribo.conditions.all.t0t1 <- data.frame('cell.type' = ribo.conditions.cell.all, 
                                       'time' = ribo.conditions.time.all.t0t1)

common.genes <- intersect(rownames(rna.read.counts.all), rownames(ribo.read.counts.all)) 

rna.read.counts.all <- rna.read.counts.all[common.genes,]
ribo.read.counts.all <- ribo.read.counts.all[common.genes,]

colnames(rna.read.counts.all) <- paste(colnames(rna.read.counts.all), 'RNA', sep='_')
colnames(ribo.read.counts.all) <- paste(colnames(ribo.read.counts.all), 'Ribo', sep='_')

“the design is ~ 1 (just an intercept). is this intended?”estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 20 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
“the design is ~ 1 (just an intercept). is this intended?”estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 26 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing


In [2]:
contrast.t1vst0 <- c('time', 'T1', 'T0')
contrast.t24vst0 <- c('time', 'T24', 'T0')
contrast.t24vst1 <- c('time', 'T24', 'T1')

contrast.t24vst0t1 <- c('time', 'T24', 'T0T1')

# All T1 vs T0

In [3]:
res.t1vst0 <- riborex(rna.read.counts.all, 
                      ribo.read.counts.all, 
                      rna.conditions.all, 
                      ribo.conditions.all, 
                      contrast = contrast.t1vst0)
write_results(res.t1vst0, 'All.Riborex.T1vsT0')

DESeq2 mode selected
combining design matrix
applying DESeq2 to modified design matrix
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
“cannot open file '../results/translation_efficiency/with_histones/All.Riborex.T1vsT0.all.tsv': No such file or directory”

ERROR: Error in file(file, ifelse(append, "a", "w")): cannot open the connection


# All T24 vs T0

In [None]:
res.t24vst0 <- riborex(rna.read.counts.all, 
                       ribo.read.counts.all, 
                       rna.conditions.all, 
                       ribo.conditions.all, 
                       contrast = contrast.t24vst0)
write_results(res.t24vst0, prefix='All.Riborex.T24vsT0')

# All T24 vs T1 

In [None]:
res.t24vst1 <- riborex(rna.read.counts.all, 
                       ribo.read.counts.all, 
                       rna.conditions.all, 
                       ribo.conditions.all, 
                       contrast = contrast.t24vst1)
write_results(res.t24vst0, prefix='All.Riborex.T24vsT1')

# All T24 vs (T1+T0)

In [None]:
res.t24vst0t1 <- riborex(rna.read.counts.all, 
                         ribo.read.counts.all, 
                         rna.conditions.all.t0t1, 
                         ribo.conditions.all.t0t1, 
                         contrast = contrast.t24vst0t1)
write_results(res.t24vst0t1, prefix='All.Riborex.T24vsT0T1')