In [2]:
suppressMessages(library(readr))
suppressMessages(library(DESeq2))
suppressMessages(library(RColorBrewer))
suppressMessages(library(ggplot2))
suppressMessages(library(BiocParallel))
suppressMessages(library(pheatmap))
suppressMessages(library(sva))
suppressMessages(library(reshape2))
suppressMessages(library(edgeR))
suppressMessages(library(cowplot))
suppressMessages(library(riborex))
suppressMessages(library(ggrepel))
suppressMessages(library(fdrtool))
suppressMessages(library(grid))

'%notin%' <- function(x,y)!('%in%'(x,y))
cbbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

colors <- list(species = c("GRCh38" = cbbPalette[5],
                            "Mmul8" =  cbbPalette[6],
                             "panTro3" =  cbbPalette[7]),
              assay = c("ribo" = cbbPalette[3],
                       "rna" =  cbbPalette[4]))

write_results <- function(df, results.dir, prefix){
  df<- as.data.frame(df)
  df <- df[order(df$padj),]

  df.sig <- subset(df, padj<0.05)
  df.sig.up <- subset(df.sig, log2FoldChange>0)
  df.sig.down <- subset(df.sig, log2FoldChange<0)
  write.table(df, file = file.path(results.dir,
                                   paste(prefix, 'tsv', sep='.')), sep = '\t')

  write.table(df.sig, file = file.path(results.dir,
                                   paste(prefix, 'sig', 'tsv', sep='.')), sep = '\t')
  write.table(df.sig.up,  file = file.path(results.dir,
                                       paste(prefix, 'sig', 'up', 'tsv', sep='.')), sep = '\t')
  write.table(df.sig.down,  file = file.path(results.dir,
                                         paste(prefix, 'sig', 'down', 'tsv', sep='.')), sep = '\t')
  return (df.sig)
}

plotHeatMap <- function(rlogdist, filename=NULL){
  sampleDists <- dist(t(assay(rlogdist)))
  sampleDistMatrix <- as.matrix(sampleDists)
  rownames(sampleDistMatrix) <- colnames(rlogdist)#paste(rlogdist$condition, colnames(rlogdist), sep="-")
  colnames(sampleDistMatrix) <- NULL
  colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
  pheatmap(sampleDistMatrix,
           clustering_distance_rows=sampleDists,
           cellwidth=10,
           cellheight=10,
          clustering_distance_cols=sampleDists,
           col=colors,)
  if (!is.null(filename)) {
      pheatmap(sampleDistMatrix,
               cellwidth=20,
               cellheight=20,
               clustering_distance_rows=sampleDists,
               clustering_distance_cols=sampleDists,
               col=colors, filename=filename)
  }
}

metadata <- read.csv('../../re-ribo-smk/data/ortho-datasets-metadata/rna_ribo_metadata.tsv', sep='\t',stringsAsFactors=F)
metadata.panTro3 <- subset(metadata, metadata$species == 'panTro3')
sex <- factor(metadata.panTro3$sex, levels=c("male", "female"))
assay <- factor(metadata.panTro3$assay, levels=c("rna", "ribo"))

panTro3.cds.counts <- read.csv('../../re-ribo-smk/data/ortho-datasets-counts/panTro3_cds.tsv', sep='\t', row.names=1)
panTro3.cds.counts <- panTro3.cds.counts[, metadata.panTro3$experiment_accession]


coldata <- data.frame(row.names=colnames(panTro3.cds.counts), sex, assay)
dds <- DESeqDataSetFromMatrix(countData=panTro3.cds.counts,
                              colData=coldata,
                              design=~assay)
dds <- DESeq(dds)
dds <- dds[apply(counts(dds), 1, function(row) all(row !=0 )),]
rld  <- rlogTransformation(dds , blind=TRUE)
                 
                 
annotation.df <- as.data.frame(colData(dds)[, c("assay", "sex")])
rownames(annotation.df) <- rownames(colData(dds))
colnames(annotation.df) <- c("assay", "sex")
pheatmap(assay(rld), show_rownames = F, cluster_rows = F, annotation_col=annotation.df, annotation_colors = colors, file='../../re-ribo-smk/plots/panTro3/panTro3_cds_rna_ribo_heatmap.pdf') 
                 
data <- plotPCA(rld, intgroup = c("assay", "sex"), returnData=TRUE)
percentVar <- round(100 * attr(data, "percentVar"))

ggplot(data, aes(PC1, PC2, color=assay, shape=sex, label = rownames(data))) +
      scale_color_manual("assay", values=colors$assay) +
geom_text_repel() +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + #ggtitle('PCA -- all samples') +
  theme(text = element_text(size=12))      
ggsave('../../re-ribo-smk/plots/panTro3/panTro3_cds_rna_ribo_pca.pdf')

results.panTro3.TE <- results(dds)
write_results(results.panTro3.TE,
              "/home/cmb-panasas2/skchoudh/github_projects/re-ribo-smk/data/ortho-datasets-translational-efficiency/", 
              "panTro3_TE")


panTro3.uORF.counts <- read.csv('../../re-ribo-smk/data/ortho-datasets-counts/panTro3_uORF.tsv', sep='\t', row.names=1)
panTro3.uORF.counts <- panTro3.uORF.counts[, metadata.panTro3$experiment_accession]

coldata <- data.frame(row.names=colnames(panTro3.uORF.counts), sex, assay)
dds <- DESeqDataSetFromMatrix(countData=panTro3.uORF.counts,
                              colData=coldata,
                              design=~assay)
dds <- DESeq(dds)
dds <- dds[apply(counts(dds), 1, function(row) all(row !=0 )),]
rld  <- rlogTransformation(dds , blind=TRUE)
                 
                 
annotation.df <- as.data.frame(colData(dds)[, c("assay", "sex")])
rownames(annotation.df) <- rownames(colData(dds))
colnames(annotation.df) <- c("assay", "sex")
pheatmap(assay(rld), show_rownames = F, cluster_rows = F, annotation_col=annotation.df, annotation_colors = colors) 
pheatmap(assay(rld), show_rownames = F, cluster_rows = F, annotation_col=annotation.df, annotation_colors = colors, file='../../re-ribo-smk/plots/panTro3/panTro3_uORF_rna_ribo_heatmap.pdf')
                 
data <- plotPCA(rld, intgroup = c("assay", "sex"), returnData=TRUE)
percentVar <- round(100 * attr(data, "percentVar"))

ggplot(data, aes(PC1, PC2, color=assay, shape=sex, label = rownames(data))) +
      scale_color_manual("assay", values=colors$assay) +
geom_text_repel() +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + #ggtitle('PCA -- all samples') +
  theme(text = element_text(size=12))      
ggsave('../../re-ribo-smk/plots/panTro3/panTro3_uORF_rna_ribo_pca.pdf')

results.panTro3.dORF <- results(dds)
write_results(results.panTro3.dORF,
              "/home/cmb-panasas2/skchoudh/github_projects/re-ribo-smk/data/ortho-datasets-translational-efficiency/", 
              "panTro3_dORF")


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
Saving 7 x 7 in image


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
ENSPTRG00000001238,14557.9286,10.285848,0.2462130,41.77623,0.000000e+00,0.000000e+00
ENSPTRG00000051784,4943.3752,9.565784,0.2670627,35.81849,5.693032e-281,2.815774e-277
ENSPTRG00000052585,3597.6217,8.819081,0.2472084,35.67468,9.763496e-279,3.219350e-275
ENSPTRG00000052551,8074.3628,11.669247,0.3573702,32.65311,7.239056e-234,1.790219e-230
ENSPTRG00000042706,7677.2689,10.827130,0.3366482,32.16155,6.089732e-227,1.204793e-223
ENSPTRG00000048013,8753.1078,10.418803,0.3299261,31.57920,7.126380e-219,1.174903e-215
ENSPTRG00000047137,5540.5392,10.908557,0.3533290,30.87365,2.697742e-209,3.812295e-206
ENSPTRG00000043682,5281.9546,8.146660,0.2690314,30.28145,2.012099e-201,2.487960e-198
ENSPTRG00000004722,6648.2416,11.222235,0.3817613,29.39594,6.188051e-190,6.801356e-187
ENSPTRG00000047324,3646.1549,8.621888,0.3021371,28.53634,4.149928e-179,4.105109e-176


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
Saving 7 x 7 in image


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
ENSPTRG00000017560,279.90138,-5.350642,0.4058629,-13.183373,1.093885e-39,2.074007e-36
ENSPTRG00000021307,183.63000,-4.184111,0.3610846,-11.587622,4.761696e-31,4.514088e-28
ENSPTRG00000013221,117.82757,4.474337,0.3993672,11.203566,3.916437e-29,1.856391e-26
ENSPTRG00000016989,180.91716,-3.130960,0.2794493,-11.204037,3.895668e-29,1.856391e-26
ENSPTRG00000039581,393.03286,3.106161,0.3054798,10.168138,2.751187e-24,1.043250e-21
ENSPTRG00000012227,600.46460,2.665438,0.2627385,10.144831,3.493809e-24,1.104044e-21
ENSPTRG00000046892,1028.88034,-2.361917,0.2430980,-9.715905,2.579478e-22,6.986700e-20
ENSPTRG00000011268,951.23127,-5.790654,0.5975515,-9.690636,3.304642e-22,7.832001e-20
ENSPTRG00000021229,230.53161,2.635395,0.2736155,9.631747,5.872270e-22,1.237092e-19
ENSPTRG00000008899,157.93034,-4.198067,0.4405129,-9.529951,1.573576e-21,2.983500e-19
