# load functions

In [271]:
suppressMessages(suppressWarnings(source("./jupyter_common.R")))

# common parameters

In [272]:
type <- "limma-voom.mrna"
level <- "htseq_gene"

rundate_appendix <- ".rdna_rn18s"
pattern_remove_gene <- NULL

th_adj_pvalue <- 0.05
th_log2fc <- log2(1.2)

list_genes <- list()
strdir <- type

dir_limma_output <- "out/limma-voom.mrna"
dir_rnaseq <- sprintf('./%s/170224.rdna_rn18s/NMuMG', dir_limma_output) 
dir_riboseq <- sprintf("./%s/161021.rdna_rn18s/NMuMG", dir_limma_output)

f_write_table <- FALSE
dir_rnaseq
dir_riboseq
strdir

# read gtf

In [273]:
library('rtracklayer')
my_file <- "data/mouse/Mus_musculus.GRCm39.104.rdna_rn18s.gtf.gz"
show(my_file)
granges_gtf <- import(my_file)
head(granges_gtf)
length(granges_gtf)

[1] "data/mouse/Mus_musculus.GRCm39.104.rdna_rn18s.gtf.gz"


GRanges object with 6 ranges and 22 metadata columns:
      seqnames              ranges strand |   source       type     score
         <Rle>           <IRanges>  <Rle> | <factor>   <factor> <numeric>
  [1]        1 150956201-150958296      + |   havana gene              NA
  [2]        1 150956201-150958296      + |   havana transcript        NA
  [3]        1 150956201-150958296      + |   havana exon              NA
  [4]        1 150983666-150984611      + |   havana gene              NA
  [5]        1 150983666-150984611      + |   havana transcript        NA
  [6]        1 150983666-150984611      + |   havana exon              NA
          phase            gene_id gene_version   gene_name gene_source
      <integer>        <character>  <character> <character> <character>
  [1]      <NA> ENSMUSG00000102628            2     Gm37671      havana
  [2]      <NA> ENSMUSG00000102628            2     Gm37671      havana
  [3]      <NA> ENSMUSG00000102628            2     Gm37671      h

In [275]:
df_gtf0 <- as.data.frame(granges_gtf)
if (grepl("isoform", level)) {
    df_gtf <- df_gtf0[df_gtf0$type=='transcript',]
} else {
    df_gtf <- df_gtf0[df_gtf0$type=='gene',]
}
          
#head(df_gtf)
t(df_gtf[c(1,2,3),])
dim(df_gtf)

length(unique(df_gtf$gene_id))

gene_name <- unique(df_gtf$gene_name)
any(is.na(gene_name))
any(gene_name == 'NA')
any(nchar(gene_name)==0)
length(gene_name)


Unnamed: 0,1,4,7
seqnames,1,1,1
start,150956201,150983666,151012258
end,150958296,150984611,151013531
width,2096,946,1274
strand,+,+,+
source,havana,havana,havana
type,gene,gene,gene
score,,,
phase,,,
gene_id,ENSMUSG00000102628,ENSMUSG00000100595,ENSMUSG00000097426


## gene_id

In [276]:
length(unique(df_gtf$gene_id))

In [277]:
f <- grepl('^ucsc', df_gtf$gene_id)
gene_id_ucsc <- unique(df_gtf$gene_id[f])
head(gene_id_ucsc)
length(gene_id_ucsc)

In [278]:
length(setdiff(df_gtf$gene_id, gene_id_ucsc))

## gene_name

In [279]:
length(unique(df_gtf$gene_name))

In [280]:
f <- grepl('^ucsc', df_gtf$gene_name)
gene_name_ucsc <- unique(df_gtf$gene_name[f])
head(gene_name_ucsc)
length(gene_name_ucsc)

In [281]:
length(setdiff(df_gtf$gene_name, gene_name_ucsc))
56634-1181

## gene_biotype

In [282]:
f <- df_gtf$gene_biotype == 'protein_coding'
length(unique(df_gtf[f,'gene_name']))

In [283]:
f <- df_gtf$gene_biotype == 'rRNA'
f <- f & !grepl("n-R5s", df_gtf$gene_name)
sort(unique(df_gtf[f,'gene_name']))

## transcript_id

In [284]:
length(unique(df_gtf$transcript_id))

## transcript_name

In [285]:
length(unique(df_gtf$transcript_name))

## transcript_biotype

In [286]:
f <- df_gtf$transcript_biotype == 'protein_coding'
length(unique(df_gtf[f,'transcript_name']))

In [287]:
idx <- which(df_gtf0$transcript_biotype == "rRNA" & df_gtf0$exon_number > 1)
if (length(idx) > 0) {
    head(df_gtf0[idx, ])
    dim(df_gtf0[idx, ])
} else {
    f <- df_gtf0$transcript_biotype == "rRNA"
    unique(df_gtf[f, 'exon_number'])
}

# df_count_mrna

In [288]:
# Jake quick lysis totalRNA 

fname_in <- sprintf("%s/blancgrp_211613_RNAseq_total_stranded.counts.raw.txt.gz", dir_rnaseq)
verb('%s\n', fname_in)
df_count_mrna <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

# add seqnames, start, end, witdth, strand, source
sym <- rownames(df_count_mrna)
if (grepl("isoform", level)) {
    idx <- match(rownames(df_count_mrna), df_gtf$transcript_name)
    df_count_mrna <- cbind(df_gtf[idx, 1:6], df_count_mrna)
    df_count_mrna$biotype <- df_gtf[idx, "transcript_biotype"]
    
} else {    
    idx <- match(rownames(df_count_mrna), df_gtf$gene_name)
    df_count_mrna <- cbind(df_gtf[idx, 1:6], df_count_mrna)    
    df_count_mrna$biotype <- df_gtf[idx, "gene_biotype"]
}

rownames(df_count_mrna) <- sym
head(df_count_mrna)

./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/blancgrp_211613_RNAseq_total_stranded.counts.raw.txt.gz


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,JBQS001,JBQS002,JBQS003,JBQS004,JBQS005,JBQS006,JBQS007,JBQS008,JBQS009,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>
rDNA_promoter,BK000964.3,45306,45446,141,+,stjude,0,0,0,0,0,0,0,0,0,rDNA
rDNA_promoter1,BK000964.3,1,141,141,+,stjude,0,0,0,0,0,0,0,0,0,rDNA
0610005C13Rik,7,45217218,45224751,7534,-,havana,1,0,0,1,0,0,0,0,0,lncRNA
0610006L08Rik,7,74468566,74503561,34996,-,havana,0,0,0,0,0,0,0,0,0,lncRNA
0610009B22Rik,11,51576213,51579701,3489,-,ensembl_havana,50,47,48,40,52,68,28,85,41,protein_coding
0610009E02Rik,2,26335708,26349402,13695,+,havana,8,7,6,2,2,4,3,4,3,lncRNA


# df_count_ribo

In [289]:
fname_in <- sprintf("%s/blancgrp_161021_Riboseq.counts.raw.txt.gz", dir_riboseq)
verb('%s\n', fname_in)
df_count_ribo <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

sym <- rownames(df_count_ribo)
if (grepl("isoform", level)) {
    idx <- match(rownames(df_count_ribo), df_gtf$transcript_name)
    df_count_ribo <- cbind(df_gtf[idx, 1:6], df_count_ribo)
    df_count_ribo$biotype <- df_gtf[idx, "transcript_biotype"]
    
} else {    
    idx <- match(rownames(df_count_ribo), df_gtf$gene_name)
    df_count_ribo <- cbind(df_gtf[idx, 1:6], df_count_ribo)    
    df_count_ribo$biotype <- df_gtf[idx, "gene_biotype"]
}

rownames(df_count_ribo) <- sym
head(df_count_ribo)

./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/blancgrp_161021_Riboseq.counts.raw.txt.gz


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,RD0175,RD0176,RD0177,RD0178,RD0179,RD0180,RD0184,RD0185,RD0186,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>
rDNA_promoter,BK000964.3,45306,45446,141,+,stjude,0,0,0,0,0,0,0,0,0,rDNA
rDNA_promoter1,BK000964.3,1,141,141,+,stjude,0,0,0,0,0,0,0,0,0,rDNA
0610005C13Rik,7,45217218,45224751,7534,-,havana,0,0,0,0,0,0,0,0,0,lncRNA
0610006L08Rik,7,74468566,74503561,34996,-,havana,0,0,0,0,0,0,0,0,0,lncRNA
0610009B22Rik,11,51576213,51579701,3489,-,ensembl_havana,32,34,28,12,12,19,35,38,31,protein_coding
0610009E02Rik,2,26335708,26349402,13695,+,havana,0,0,0,0,0,1,0,0,0,lncRNA


# df_transcription

In [290]:
# Randall totalRNA 
# log2cpm
fname_in <- sprintf("%s/blancgrp_211613_RNAseq_total_stranded.log2cpm.txt.gz", dir_rnaseq)
verb('%s\n', fname_in)
df_log2cpm <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_log2cpm) <- c('unt.transcription','tgfb.transcription','tgfbCX5461.transcription')

if (!is.null(pattern_remove_gene)) {
  dim(df_log2cpm)
  df_log2cpm <- df_log2cpm[!grepl(pattern_remove_gene, rownames(df_log2cpm)),]
}
head(df_log2cpm)
dim(df_log2cpm)

fname_in <- sprintf("%s/blancgrp_211613_RNAseq_total_stranded.unt--vs--tgfb.all.txt.gz", dir_rnaseq)
verb('%s\n', fname_in)
df_mrna <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_mrna) <- c('unt.transcription','tgfb.transcription',
                       'log2FCuntVStgfb.transcription','FDRuntVStgfb.transcription',
                       'PuntVStgfb.transcription')

if (!is.null(pattern_remove_gene)) {
  dim(df_mrna)
  df_mrna <- df_mrna[!grepl(pattern_remove_gene, rownames(df_mrna)),]
}
head(df_mrna)
dim(df_mrna)

fname_in <- sprintf("%s/blancgrp_211613_RNAseq_total_stranded.tgfb--vs--tgfbCX.all.txt.gz", dir_rnaseq)
verb('%s\n', fname_in)
df_mrna_cx <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_mrna_cx) <- c('tgfb.transcription','tgfbCX5461.transcription',
                          'log2FCtgfbVStgfbCX5461.transcription','FDRtgfbVStgfbCX5461.transcription',
                          'PtgfbVStgfbCX5461.transcription')

if (!is.null(pattern_remove_gene)) {
  dim(df_mrna_cx)
  df_mrna_cx <- df_mrna_cx[!grepl(pattern_remove_gene, rownames(df_mrna_cx)),]
}
head(df_mrna_cx)
dim(df_mrna_cx)

# https://adairama.wordpress.com/2017/11/22/how-to-merge-multiple-datasets-in-r-based-on-row-names/
mylist <- list(df_log2cpm[,c('unt.transcription','tgfb.transcription','tgfbCX5461.transcription')],
                df_mrna[,c('log2FCuntVStgfb.transcription',
                           'FDRuntVStgfb.transcription',
                           'PuntVStgfb.transcription')],
                df_mrna_cx[,c('log2FCtgfbVStgfbCX5461.transcription',
                           'FDRtgfbVStgfbCX5461.transcription',
                           'PtgfbVStgfbCX5461.transcription')])
for(i in 1:length(mylist)){
  #colnames(mylist[[i]]) <- paste0( names(mylist)[i], "_", colnames(mylist[[i]]) )
  mylist[[i]]$ROWNAMES  <- rownames(mylist[[i]])
}
df_transcription <- plyr::join_all(mylist, by="ROWNAMES", type="full")
rownames(df_transcription) <- df_transcription$ROWNAMES; df_transcription$ROWNAMES <- NULL


df_transcription$unt.tgfb.DEtranscription <- 'notSig'
df_transcription$tgfb.tgfbCX5461.DEtranscription <- 'notSig'
df_transcription$reversible.transcription <- 'notSig'

f_up <- df_transcription$log2FCuntVStgfb.transcription > th_log2fc & df_transcription$FDRuntVStgfb.transcription < th_adj_pvalue
f_dn <- df_transcription$log2FCtgfbVStgfbCX5461.transcription < -th_log2fc & df_transcription$FDRtgfbVStgfbCX5461.transcription < th_adj_pvalue
df_transcription$unt.tgfb.DEtranscription[f_up] <- 'up'
df_transcription$tgfb.tgfbCX5461.DEtranscription[f_dn] <- 'down'
df_transcription$reversible.transcription[f_up & f_dn] <- 'upDown'

f_dn <- df_transcription$log2FCuntVStgfb.transcription < -th_log2fc & df_transcription$FDRuntVStgfb.transcription < th_adj_pvalue
f_up <- df_transcription$log2FCtgfbVStgfbCX5461.transcription > th_log2fc & df_transcription$FDRtgfbVStgfbCX5461.transcription < th_adj_pvalue
df_transcription$unt.tgfb.DEtranscription[f_dn] <- 'down'
df_transcription$tgfb.tgfbCX5461.DEtranscription[f_up] <- 'up'
df_transcription$reversible.transcription[f_dn & f_up] <- 'downUp'

#tail(df_transcription)
df_transcription[c('Abcc2'),]
dim(df_transcription)

./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/blancgrp_211613_RNAseq_total_stranded.log2cpm.txt.gz


Unnamed: 0_level_0,unt.transcription,tgfb.transcription,tgfbCX5461.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
0610009B22Rik,-1.7361927,-1.77777696,-1.52337076
0610009E02Rik,-4.4542278,-5.89842646,-5.22659361
0610009L18Rik,-5.7348513,-5.04982074,-5.3231791
0610010F05Rik,-0.5443124,-0.1829955,-0.57995446
0610010K14Rik,-1.0822802,-1.73978964,-1.85796767
0610012G03Rik,0.1094487,0.09824498,0.09778162


./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/blancgrp_211613_RNAseq_total_stranded.unt--vs--tgfb.all.txt.gz


Unnamed: 0_level_0,unt.transcription,tgfb.transcription,log2FCuntVStgfb.transcription,FDRuntVStgfb.transcription,PuntVStgfb.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
4930402H24Rik,1.6107994,2.4726014,0.861802,0,0
Abcc2,-0.3106197,-3.832616,-3.521996,0,0
Abcc3,1.8096731,0.6774089,-1.132264,0,0
Abcc4,2.6411045,1.032585,-1.60852,0,0
Abcd3,1.8514176,0.3012823,-1.550135,0,0
Abhd17a,0.9001339,1.9114717,1.011338,0,0


./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/blancgrp_211613_RNAseq_total_stranded.tgfb--vs--tgfbCX.all.txt.gz


Unnamed: 0_level_0,tgfb.transcription,tgfbCX5461.transcription,log2FCtgfbVStgfbCX5461.transcription,FDRtgfbVStgfbCX5461.transcription,PtgfbVStgfbCX5461.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Ccng1,2.024061,3.005395,0.9813342,0.0,0
Cdkn1a,1.201262,2.903427,1.7021649,0.0,0
Ctsd,5.208313,5.777468,0.5691555,1e-11,0
Lce1g,2.005627,2.834313,0.8286863,1e-11,0
Slco2a1,2.261261,3.028303,0.7670417,3e-11,0
Itgb4,2.102091,2.894961,0.7928702,5e-11,0


Unnamed: 0_level_0,unt.transcription,tgfb.transcription,tgfbCX5461.transcription,log2FCuntVStgfb.transcription,FDRuntVStgfb.transcription,PuntVStgfb.transcription,log2FCtgfbVStgfbCX5461.transcription,FDRtgfbVStgfbCX5461.transcription,PtgfbVStgfbCX5461.transcription,unt.tgfb.DEtranscription,tgfb.tgfbCX5461.DEtranscription,reversible.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Abcc2,-0.3106197,-3.832616,-3.871377,-3.521996,0,0,-0.03876149,0.9709617,0.9282336,down,notSig,notSig


# df_translation

In [291]:

fname_in <- sprintf("%s/blancgrp_161021_Riboseq.log2cpm.txt.gz", dir_riboseq)
verb('%s\n', fname_in)
df_log2cpm <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_log2cpm) <- c('unt.translation','tgfb.translation','tgfbCX5461.translation')

if (!is.null(pattern_remove_gene)) {
  dim(df_log2cpm)
  df_log2cpm <- df_log2cpm[!grepl(pattern_remove_gene, rownames(df_log2cpm)),]
}
head(df_log2cpm)
dim(df_log2cpm)

fname_in <- sprintf("%s/blancgrp_161021_Riboseq.unt48--vs--tgfb48.all.txt.gz", dir_riboseq)
verb('%s\n', fname_in)
df_ribo <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_ribo) <- c('unt.translation','tgfb.translation',
                       'log2FCuntVStgfb.translation','FDRuntVStgfb.translation',
                       'PuntVStgfb.translation')

if (!is.null(pattern_remove_gene)) {
  dim(df_ribo)
  df_ribo <- df_ribo[!grepl(pattern_remove_gene, rownames(df_ribo)),]
}
head(df_ribo)
dim(df_ribo)

fname_in <- sprintf("%s/blancgrp_161021_Riboseq.tgfb48--vs--tgfbCX5461.all.txt.gz", dir_riboseq)
verb('%s\n', fname_in)
df_ribo_cx <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_ribo_cx) <- c('tgfb.translation','tgfbCX5461.translation',
                          'log2FCtgfbVStgfbCX5461.translation','FDRtgfbVStgfbCX5461.translation',
                          'PtgfbVStgfbCX5461.translation')

if (!is.null(pattern_remove_gene)) {
  dim(df_ribo_cx)
  df_ribo_cx <- df_ribo_cx[!grepl(pattern_remove_gene, rownames(df_ribo_cx)),]
}
head(df_ribo_cx)
dim(df_ribo_cx)

# https://adairama.wordpress.com/2017/11/22/how-to-merge-multiple-datasets-in-r-based-on-row-names/
mylist <- list(df_log2cpm[,c('unt.translation','tgfb.translation','tgfbCX5461.translation')],
                df_ribo[,c('log2FCuntVStgfb.translation',
                           'FDRuntVStgfb.translation',
                           'PuntVStgfb.translation')],
                df_ribo_cx[,c('log2FCtgfbVStgfbCX5461.translation',
                           'FDRtgfbVStgfbCX5461.translation',
                           'PtgfbVStgfbCX5461.translation')])
for(i in 1:length(mylist)){
  #colnames(mylist[[i]]) <- paste0( names(mylist)[i], "_", colnames(mylist[[i]]) )
  mylist[[i]]$ROWNAMES  <- rownames(mylist[[i]])
}
df_translation <- plyr::join_all(mylist, by="ROWNAMES", type="full")
rownames(df_translation) <- df_translation$ROWNAMES; df_translation$ROWNAMES <- NULL

df_translation$unt.tgfb.DEtranslation <- 'notSig'
df_translation$tgfb.tgfbCX5461.DEtranslation <- 'notSig'
df_translation$reversible.translation <- 'notSig'

f_up <- df_translation$log2FCuntVStgfb.translation > th_log2fc & df_translation$FDRuntVStgfb.translation < th_adj_pvalue
f_dn <- df_translation$log2FCtgfbVStgfbCX5461.translation < -th_log2fc & df_translation$FDRtgfbVStgfbCX5461.translation < th_adj_pvalue
df_translation$unt.tgfb.DEtranslation[f_up] <- 'up'
df_translation$tgfb.tgfbCX5461.DEtranslation[f_dn] <- 'down'
df_translation$reversible.translation[f_up & f_dn] <- 'upDown'

f_dn <- df_translation$log2FCuntVStgfb.translation < -th_log2fc & df_translation$FDRuntVStgfb.translation < th_adj_pvalue
f_up <- df_translation$log2FCtgfbVStgfbCX5461.translation > th_log2fc & df_translation$FDRtgfbVStgfbCX5461.translation < th_adj_pvalue
df_translation$unt.tgfb.DEtranslation[f_dn] <- 'down'
df_translation$tgfb.tgfbCX5461.DEtranslation[f_up] <- 'up'
df_translation$reversible.translation[f_dn & f_up] <- 'downUp'

head(df_translation)
dim(df_translation)

./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/blancgrp_161021_Riboseq.log2cpm.txt.gz


Unnamed: 0_level_0,unt.translation,tgfb.translation,tgfbCX5461.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
0610009B22Rik,-0.1209301,-0.5623582,0.4593551
0610010F05Rik,-1.439017,-1.1130376,-1.8999308
0610010K14Rik,-0.7062234,-2.1409318,-0.6268219
0610012G03Rik,0.2537944,-0.2992177,-0.2840107
0610040J01Rik,-2.6671038,-3.6173387,-3.7617905
1110002E22Rik,-1.7847344,-2.3942601,-5.1504356


./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/blancgrp_161021_Riboseq.unt48--vs--tgfb48.all.txt.gz


Unnamed: 0_level_0,unt.translation,tgfb.translation,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,PuntVStgfb.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Actb,8.246082,8.87654,0.6304577,0,0
Actn1,3.156675,4.771428,1.6147534,0,0
Aldoa,6.209498,5.230513,-0.9789853,0,0
Ankrd1,4.963155,6.810945,1.8477894,0,0
Anxa4,4.540031,2.988093,-1.5519384,0,0
Apob,5.403923,6.689152,1.2852282,0,0


./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/blancgrp_161021_Riboseq.tgfb48--vs--tgfbCX5461.all.txt.gz


Unnamed: 0_level_0,tgfb.translation,tgfbCX5461.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,PtgfbVStgfbCX5461.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Atf3,3.385307,5.081134,1.695826,0,0
Ccn2,7.709552,6.45164,-1.257912,0,0
Gm10616,5.937916,3.850963,-2.086953,0,0
Gm29371,5.548829,2.816191,-2.732638,0,0
Gm49450,2.970223,4.93098,1.960757,0,0
Krt8,7.227771,6.438771,-0.789,0,0


Unnamed: 0_level_0,unt.translation,tgfb.translation,tgfbCX5461.translation,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,PuntVStgfb.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,PtgfbVStgfbCX5461.translation,unt.tgfb.DEtranslation,tgfb.tgfbCX5461.DEtranslation,reversible.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
0610009B22Rik,-0.1209301,-0.5623582,0.4593551,-0.4414281,0.30541555,0.14036813,1.02171324,0.02919136,0.002929355,notSig,up,notSig
0610010F05Rik,-1.439017,-1.1130376,-1.8999308,0.3259794,0.64123254,0.45306626,-0.78689319,0.41800085,0.183428443,notSig,notSig,notSig
0610010K14Rik,-0.7062234,-2.1409318,-0.6268219,-1.4347083,0.05167993,0.01144092,1.51410985,0.07704533,0.012849451,notSig,notSig,notSig
0610012G03Rik,0.2537944,-0.2992177,-0.2840107,-0.5530121,0.1382324,0.04346982,0.01520704,0.98121037,0.964044006,notSig,notSig,notSig
0610040J01Rik,-2.6671038,-3.6173387,-3.7617905,-0.9502349,0.49047068,0.29624483,-0.14445185,0.95563779,0.908020836,notSig,notSig,notSig
1110002E22Rik,-1.7847344,-2.3942601,-5.1504356,-0.6095257,0.493384,0.29970843,-2.75617555,0.17439999,0.043534588,notSig,notSig,notSig


# df_all

In [292]:
t(df_gtf[1:3,])

Unnamed: 0,1,4,7
seqnames,1,1,1
start,150956201,150983666,151012258
end,150958296,150984611,151013531
width,2096,946,1274
strand,+,+,+
source,havana,havana,havana
type,gene,gene,gene
score,,,
phase,,,
gene_id,ENSMUSG00000102628,ENSMUSG00000100595,ENSMUSG00000097426


In [293]:
df_all <- merge(df_transcription, df_translation, by = 0, all = T)
sym <- df_all$Row.names
rownames(df_all) <- sym
df_all$Row.names <- NULL

if (grepl("isoform", level)) {
    idx <- match(rownames(df_all), df_gtf$transcript_name)
    df_all <- cbind(df_gtf[idx, 1:6], df_all)
    df_all$biotype <- df_gtf[idx, "transcript_biotype"]
    
} else {    
    idx <- match(rownames(df_all), df_gtf$gene_name)
    df_all <- cbind(df_gtf[idx, 1:6], df_all)    
    df_all$biotype <- df_gtf[idx, "gene_biotype"]
}

rownames(df_all) <- sym

head(df_all)
dim(df_all)


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt.transcription,tgfb.transcription,tgfbCX5461.transcription,log2FCuntVStgfb.transcription,...,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,PuntVStgfb.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,PtgfbVStgfbCX5461.translation,unt.tgfb.DEtranslation,tgfb.tgfbCX5461.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,...,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
0610009B22Rik,11,51576213,51579701,3489,-,ensembl_havana,-1.7361927,-1.77777696,-1.52337076,-0.04158431,...,-0.4414281,0.30541555,0.14036813,1.02171324,0.02919136,0.002929355,notSig,up,notSig,protein_coding
0610009E02Rik,2,26335708,26349402,13695,+,havana,-4.4542278,-5.89842646,-5.22659361,-1.44419866,...,,,,,,,,,,lncRNA
0610009L18Rik,11,120239504,120242016,2513,+,ensembl_havana,-5.7348513,-5.04982074,-5.3231791,0.68503055,...,,,,,,,,,,lncRNA
0610010F05Rik,11,23514961,23583639,68679,-,ensembl_havana,-0.5443124,-0.1829955,-0.57995446,0.36131693,...,0.3259794,0.64123254,0.45306626,-0.78689319,0.41800085,0.183428443,notSig,notSig,notSig,protein_coding
0610010K14Rik,11,70126032,70128740,2709,-,ensembl_havana,-1.0822802,-1.73978964,-1.85796767,-0.65750941,...,-1.4347083,0.05167993,0.01144092,1.51410985,0.07704533,0.012849451,notSig,notSig,notSig,protein_coding
0610012G03Rik,16,31765868,31767312,1445,-,havana,0.1094487,0.09824498,0.09778162,-0.01120375,...,-0.5530121,0.1382324,0.04346982,0.01520704,0.98121037,0.964044006,notSig,notSig,notSig,protein_coding


## fix biotype

In [294]:
# Snord43 ENSMUSG00000105167 was annotated as miRNA 
idx <- which(rownames(df_all)=="Snord43")

if (length(idx)==1) {
    df_all[idx, "biotype"] <- "snoRNA"
}
idx

## write df_all

In [295]:
fname_out <- sprintf("./data/emt.tables.%s.comprehensive_170224%s_161021%s.txt",
                     strdir, rundate_appendix, rundate_appendix)
fname_out
getwd()
write.table(df_all, file = fname_out,
            row.names = TRUE , col.names = NA ,  sep="\t"  , quote = FALSE  )

# unt48 vs. tgfb48

## detected gene symbols

### rnaseq detection

In [296]:

fname_rnaseq <- sprintf("blancgrp_211613_RNAseq_total_stranded.unt--vs--tgfb.all.txt.gz")
df_rnaseq <- read.table(file=sprintf("%s/%s", dir_rnaseq, fname_rnaseq), 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

if (!is.null(pattern_remove_gene)) {
  dim(df_rnaseq)
  df_rnaseq <- df_rnaseq[!grepl(pattern_remove_gene, rownames(df_rnaseq)),]
}

# df_mrna$biotype
idx <- match(rownames(df_rnaseq), rownames(df_all))
any(is.na(idx)) # should be FALSE
df_rnaseq$biotype <- df_all[idx,'biotype']
biotype_rnaseq <- unique(df_rnaseq$biotype)
biotype_rnaseq

head(df_rnaseq)
dim(df_rnaseq)
sym_rnaseq <- rownames(df_rnaseq)
length(sym_rnaseq)


Unnamed: 0_level_0,unt,tgfb,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
4930402H24Rik,1.6107994,2.4726014,0.861802,0,0,protein_coding
Abcc2,-0.3106197,-3.832616,-3.521996,0,0,protein_coding
Abcc3,1.8096731,0.6774089,-1.132264,0,0,protein_coding
Abcc4,2.6411045,1.032585,-1.60852,0,0,protein_coding
Abcd3,1.8514176,0.3012823,-1.550135,0,0,protein_coding
Abhd17a,0.9001339,1.9114717,1.011338,0,0,protein_coding


### riboseq detection

In [297]:

fname_riboseq <- sprintf("blancgrp_161021_Riboseq.unt48--vs--tgfb48.all.txt.gz")
df_riboseq <- read.table(file=sprintf("%s/%s", dir_riboseq, fname_riboseq), 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

if (!is.null(pattern_remove_gene)) {
  dim(df_riboseq)
  df_riboseq <- df_riboseq[!grepl(pattern_remove_gene, rownames(df_riboseq)),]
}

# df_mrna$biotype
idx <- match(rownames(df_riboseq), rownames(df_all))
any(is.na(idx)) # should be FALSE
df_riboseq$biotype <- df_all[idx,'biotype']
biotype_riboseq <- unique(df_riboseq$biotype)
biotype_riboseq


head(df_riboseq)
dim(df_riboseq)
sym_riboseq <- rownames(df_riboseq)
length(sym_riboseq)
sym_riboseq <- unique(sym_riboseq)
length(sym_riboseq)


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Actb,8.246082,8.87654,0.6304577,0,0,protein_coding
Actn1,3.156675,4.771428,1.6147534,0,0,protein_coding
Aldoa,6.209498,5.230513,-0.9789853,0,0,protein_coding
Ankrd1,4.963155,6.810945,1.8477894,0,0,protein_coding
Anxa4,4.540031,2.988093,-1.5519384,0,0,protein_coding
Apob,5.403923,6.689152,1.2852282,0,0,protein_coding


#### protein_coding only

In [298]:
f <- grepl('^protein_coding$', df_riboseq$biotype)
df_ribo_wo_pseudo <- df_riboseq[f,]
head(df_ribo_wo_pseudo)
dim(df_ribo_wo_pseudo)

Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Actb,8.246082,8.87654,0.6304577,0,0,protein_coding
Actn1,3.156675,4.771428,1.6147534,0,0,protein_coding
Aldoa,6.209498,5.230513,-0.9789853,0,0,protein_coding
Ankrd1,4.963155,6.810945,1.8477894,0,0,protein_coding
Anxa4,4.540031,2.988093,-1.5519384,0,0,protein_coding
Apob,5.403923,6.689152,1.2852282,0,0,protein_coding


### sym_detected_both

In [299]:
sym_detected_both <- intersect(sym_rnaseq, sym_riboseq)
length(sym_detected_both)

# total genes detected in shared between both datasets
# with GRCm38.97.gtf RSEM: 10212
# with GRCm38.97.gtf HTSEQ: 10150
# with GRCm38.97.rRNA.gtf: 10235

### sym_detected_rnaseq_only

In [300]:
sym_detected_rnaseq_only <- setdiff(sym_rnaseq, sym_riboseq)
length(sym_detected_rnaseq_only)

### sym_detected_riboseq_only

In [301]:
sym_detected_riboseq_only <- setdiff(sym_riboseq, sym_rnaseq)
length(sym_detected_riboseq_only)


## df_mrna

In [302]:
fname_mrna <- sprintf("blancgrp_211613_RNAseq_total_stranded.unt--vs--tgfb.diff-all.txt.gz")
df_mrna <- read.table(file = sprintf("%s/%s", dir_rnaseq, fname_mrna), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_mrna)
    df_mrna <- df_mrna[!grepl(pattern_remove_gene, rownames(df_mrna)), ]
}
head(df_mrna)
dim(df_mrna)

colnames(df_all)
idx <- which(df_all[, "unt.tgfb.DEtranscription"] != "notSig")
df_mrna <- df_mrna[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_mrna), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_mrna$biotype <- df_all[idx, "biotype"]

head(df_mrna)
dim(df_mrna)

if (f_write_table) {
    fname_out <- sprintf("./data/table/170224%s.%s.unt48--vs--tgfb48.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_mrna, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

vec_log2FC <- df_mrna$log2FC
vec_fdr <- df_mrna$FDR

Unnamed: 0_level_0,unt,tgfb,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Serpina1c,0.8882039,-8.561984,-9.450188,4e-10,1e-10
Apoc2,1.3537993,-7.987147,-9.340946,0.0,0.0
Ugt2b34,1.4160637,-7.903516,-9.31958,0.0,0.0
Dio3os,0.3207386,-8.561984,-8.882723,1.2283e-07,2.41e-08
Dio3,0.1231443,-8.561984,-8.685128,0.0,0.0
Itih2,-0.1366035,-8.561984,-8.425381,0.0,0.0


Unnamed: 0_level_0,unt,tgfb,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0610009E02Rik,-4.4542278,-5.8984265,-1.4441987,0.02199116,0.0114521064,lncRNA
0610010F05Rik,-0.5443124,-0.1829955,0.3613169,0.008558582,0.0040252843,protein_coding
0610010K14Rik,-1.0822802,-1.7397896,-0.6575094,0.001465,0.0005896719,protein_coding
0610040J01Rik,-1.1236084,-4.7394168,-3.6158084,1.1e-10,0.0,protein_coding
1110002E22Rik,-0.7498065,-0.3413013,0.4085052,0.01645896,0.0082826295,protein_coding
1110002L01Rik,-1.6505374,-0.71763,0.9329074,5.682e-06,1.4535e-06,lncRNA


### sym_mrna_up

In [303]:
f_up <- (vec_fdr < th_adj_pvalue) & (vec_log2FC > th_log2fc)
df_mrna_up <- df_mrna[f_up,]
sym_mrna_up <- rownames(df_mrna_up)
dim(df_mrna_up)

list_genes[['sym_mrna_up']] <- sym_mrna_up

### sym_mrna_dn

In [304]:
f_dn <- (vec_fdr < th_adj_pvalue) & (vec_log2FC < -th_log2fc)
df_mrna_dn <- df_mrna[f_dn,]
sym_mrna_dn <- rownames(df_mrna_dn)
dim(df_mrna_dn)

list_genes[['sym_mrna_dn']] <- sym_mrna_dn

### sym_mrna_dn3x

In [305]:
f_dn3x <- (vec_fdr < th_adj_pvalue) & 
         (vec_log2FC > -log2(3.15) & vec_log2FC < -log2(2.75))
df_mrna_dn3x <- df_mrna[f_dn3x,]
sym_mrna_dn3x <- rownames(df_mrna_dn3x)
head(df_mrna_dn3x)
dim(df_mrna_dn3x)

list_genes[['sym_mrna_dn3x']] <- sym_mrna_dn3x

Unnamed: 0_level_0,unt,tgfb,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
9530085L11Rik,-4.220872,-5.76984,-1.548968,0.0211031647,0.0109409995,TEC
Abcc4,2.641104,1.032585,-1.60852,0.0,0.0,protein_coding
Abcd3,1.851418,0.3012823,-1.550135,0.0,0.0,protein_coding
Acaa1b,-2.754737,-4.2869754,-1.532238,0.0006501193,0.0002427825,protein_coding
Acox2,0.520881,-0.9882475,-1.509129,0.0,0.0,protein_coding
Adhfe1,-4.589387,-6.1869391,-1.597552,0.0312734336,0.016944786,protein_coding


### sym_mrna

In [306]:
sym_mrna <- union(sym_mrna_up, sym_mrna_dn)
length(sym_mrna)

df_mrna <- df_mrna[sym_mrna,,drop=F]

### sym_mrna_no

In [307]:
sym_mrna_no <- setdiff(sym_rnaseq, sym_mrna)
length(sym_mrna_no)

# Not DE in total RNA-seq for control vs. tgfb48
# with GRCm38.97.gtf RSEM: 7732
# with GRCm38.97.gtf HTSEQ: 7630
# with GRCm38.97.rRNA.gtf: 7703

### sym_mrna_de_detected_both

In [308]:
sym_mrna_de_detected_both <- intersect(sym_mrna, sym_detected_both)
length(sym_mrna_de_detected_both)


## df_ribo

In [309]:
# dir_riboseq <- '../data/limma-voom.mrna/161021/NMuMG' fname_ribo <-
# 'emt.161021.limma-voom.mrna.limma.gene.unt48--vs--tgfb48.diff-all.txt'
fname_ribo <- sprintf("blancgrp_161021_Riboseq.unt48--vs--tgfb48.diff-all.txt.gz")
df_ribo <- read.table(file = sprintf("%s/%s", dir_riboseq, fname_ribo), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_ribo)
    df_ribo <- df_ribo[!grepl(pattern_remove_gene, rownames(df_ribo)), ]
}
head(df_ribo)
dim(df_ribo)

idx <- which(df_all[, "unt.tgfb.DEtranslation"] != "notSig")
df_ribo <- df_ribo[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_ribo), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_ribo$biotype <- df_all[idx, "biotype"]

head(df_ribo)
dim(df_ribo)


if (f_write_table) {
    fname_out <- sprintf("./data/table/161021%s.%s.unt48--vs--tgfb48.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_ribo, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

vec_log2FC <- df_ribo$log2FC
vec_fdr <- df_ribo$FDR


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Akr1c19,1.2798329,-5.394223,-6.674056,3.23176e-06,1.196e-07
Gm22973,0.4065654,-5.394223,-5.800788,0.0002505955,1.79206e-05
Lgals4,4.0592875,-1.190004,-5.249291,0.0,0.0
Gm10076,1.67979,-3.459413,-5.139203,0.0006104391,5.02679e-05
Apoc2,-0.3649858,-5.394223,-5.029237,0.0002057103,1.41371e-05
Ugt2b34,2.0156044,-2.992556,-5.008161,6.0056e-07,1.75e-08


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1110038B12Rik,2.6703556,2.3172192,-0.3531364,0.02700556,0.0049270853,lncRNA
1110059E24Rik,0.1306316,-0.8647351,-0.9953667,0.02004643,0.0034012561,protein_coding
1500011B03Rik,-0.639762,-2.0752952,-1.4355331,0.02352269,0.0041348011,protein_coding
1700015I17Rik,-3.3048711,-1.0700091,2.2348621,0.008306292,0.0011381517,lncRNA
1700017B05Rik,0.9365901,2.0120754,1.0754853,1.137532e-05,4.984e-07,protein_coding
1700037H04Rik,0.156265,-1.1485832,-1.3048481,0.01614225,0.0025791538,protein_coding


### sym_ribo_up

In [310]:
f_up <- (vec_fdr < th_adj_pvalue) & (vec_log2FC > th_log2fc)
df_ribo_up <- df_ribo[f_up,]
sym_ribo_up <- rownames(df_ribo_up)
dim(df_ribo_up)

list_genes[['sym_ribo_up']] <- sym_ribo_up

### sym_ribo_dn

In [311]:
f_dn <- (vec_fdr < th_adj_pvalue) & (vec_log2FC < -th_log2fc)
df_ribo_dn <- df_ribo[f_dn,]
sym_ribo_dn <- rownames(df_ribo_dn)
dim(df_ribo_dn)

list_genes[['sym_ribo_dn']] <- sym_ribo_dn

### sym_ribo_dn3x

In [312]:
f_dn3x <- (vec_fdr < th_adj_pvalue) &
         (vec_log2FC > -log2(3.15) & vec_log2FC < -log2(2.75))
df_ribo_dn3x <- df_ribo[f_dn3x,]
sym_ribo_dn3x <- rownames(df_ribo_dn3x)
head(df_ribo_dn3x)
dim(df_ribo_dn3x)

list_genes[['sym_ribo_dn3x']] <- sym_ribo_dn3x

Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Abcc3,1.654898,0.04332324,-1.611575,1.186433e-05,5.251e-07,protein_coding
Agpat1,-0.4103686,-1.89449402,-1.484125,0.037707,0.0075080874,protein_coding
Aldoc,0.4498243,-1.161534,-1.611358,0.0007964878,6.80759e-05,protein_coding
Angpt2,-0.7769103,-2.41493036,-1.63802,0.02014007,0.0034210259,protein_coding
Anxa4,4.5400311,2.98809266,-1.551938,0.0,0.0,protein_coding
Arpc5l,1.2752314,-0.26209684,-1.537328,5.123173e-05,2.7798e-06,protein_coding


### sym_ribo

In [313]:
sym_ribo <- union(sym_ribo_up, sym_ribo_dn)
length(sym_ribo)

df_ribo <- df_ribo[sym_ribo,,drop=F]

### sym_ribo_no

In [314]:
sym_ribo_no <- setdiff(sym_riboseq, sym_ribo)
length(sym_ribo_no)

### sym_ribo_de_detected_both

In [315]:
sym_ribo_de_detected_both <- intersect(sym_ribo, sym_detected_both)
length(sym_ribo_de_detected_both)

## biotype

In [316]:
unique(df_ribo$biotype)
length(which(df_ribo$biotype=='processed_pseudogene'))

## protein_coding only

In [317]:
f <- grepl('^protein_coding$', df_ribo$biotype)
df_ribo_wo_pseudo <- df_ribo[f,]
head(df_ribo_wo_pseudo)
dim(df_ribo_wo_pseudo)

Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1700017B05Rik,0.9365901,2.0120754,1.0754853,1.137532e-05,4.984e-07,protein_coding
4930402H24Rik,0.3611133,1.72813,1.3670168,4.40685e-06,1.697e-07,protein_coding
9130019O22Rik,-4.6965296,-1.7175273,2.9790024,0.04080761,0.008374356,protein_coding
9930111J21Rik1,-3.2764441,1.2721796,4.5486236,9.3665e-07,2.97e-08,protein_coding
9930111J21Rik2,-4.2238439,-0.4650922,3.7587516,0.001088954,9.91936e-05,protein_coding
Abcf3,0.3664803,1.0211312,0.6546509,0.02767077,0.0050876573,protein_coding


## shared between both rnaseq and riboseq

### sym_mrna_ribo_no

In [318]:
sym_mrna_ribo_no <- intersect(sym_mrna_no, sym_ribo_no)
length(sym_mrna_ribo_no)


### sym_mrna_up_ribo_up

In [319]:
# detected in both platforms
sym_mrna_up_ribo_up <- intersect(sym_mrna_up, sym_ribo_up)
length(sym_mrna_up_ribo_up)

list_genes[['sym_mrna_up_ribo_up']] <- sym_mrna_up_ribo_up

### sym_mrna_dn_ribo_dn

In [320]:
# detected in both platforms
sym_mrna_dn_ribo_dn <- intersect(sym_mrna_dn, sym_ribo_dn)
length(sym_mrna_dn_ribo_dn)

list_genes[['sym_mrna_dn_ribo_dn']] <- sym_mrna_dn_ribo_dn

### sym_mrna_up_ribo_dn

In [321]:
# detected in both platforms
sym_mrna_up_ribo_dn <- intersect(sym_mrna_up, sym_ribo_dn)
sym_mrna_up_ribo_dn
length(sym_mrna_up_ribo_dn)

list_genes[['sym_mrna_up_ribo_dn']] <- sym_mrna_up_ribo_dn

### sym_mrna_dn_ribo_up

In [322]:
# detected in both platforms
sym_mrna_dn_ribo_up <- intersect(sym_mrna_dn, sym_ribo_up)
sym_mrna_dn_ribo_up
length(sym_mrna_dn_ribo_up)

list_genes[['sym_mrna_dn_ribo_up']] <- sym_mrna_dn_ribo_up

## transcription & translation total

In [323]:
length(sym_mrna)
length(sym_ribo)

# detected in both platforms
sym_mrna_de_ribo_de <- intersect(sym_mrna, sym_ribo)
length(sym_mrna_de_ribo_de)

In [324]:
# detected in both platforms
length(sym_mrna_de_detected_both)
length(sym_ribo_de_detected_both)
sym_mrna_de_ribo_de <- intersect(sym_mrna_de_detected_both, sym_ribo_de_detected_both)
length(sym_mrna_de_ribo_de)

### validation

In [325]:
length(sym_mrna_up_ribo_up) + length(sym_mrna_dn_ribo_dn) +
length(sym_mrna_up_ribo_dn) + length(sym_mrna_dn_ribo_up)

## df_mrna_only

In [326]:
sym_mrna_only <- setdiff(sym_mrna, sym_ribo)
length(sym_mrna_only)

# subsetting df_mrna
df_mrna_only <- df_mrna[sym_mrna_only,]
head(df_mrna_only)
dim(df_mrna_only)

Unnamed: 0_level_0,unt,tgfb,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0610010F05Rik,-0.5443124,-0.1829955,0.3613169,0.008558582,0.0040252843,protein_coding
1110002E22Rik,-0.7498065,-0.3413013,0.4085052,0.01645896,0.0082826295,protein_coding
1110002L01Rik,-1.6505374,-0.71763,0.9329074,5.682e-06,1.4535e-06,lncRNA
1110046J04Rik,-4.4554433,-2.6479122,1.8075311,7.382651e-05,2.30857e-05,lncRNA
1600012H06Rik,-0.4952647,0.155596,0.6508607,2.93835e-06,7.218e-07,protein_coding
1700001C19Rik,-5.6609668,-4.4226956,1.2382712,0.04557368,0.0258838147,protein_coding


### check riboseq FDR

In [327]:
f <- rownames(df_riboseq) %in% rownames(df_mrna_only)
head(df_riboseq[f,])

any(abs(df_riboseq[f,'log2FC']) > th_log2fc & df_riboseq[f,'FDR'] < th_adj_pvalue)

Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
H1f2,5.276187,5.057592,-0.21859501,0.005336126,0.0006568977,protein_coding
Ndufs7,8.026788,8.149887,0.12309913,0.013974819,0.002161799,protein_coding
Eef1a1,8.187685,8.253094,0.06540837,0.019934998,0.0033728993,protein_coding
Ppia,4.605999,4.369546,-0.23645393,0.027579985,0.0050686671,protein_coding
Lmna,4.150486,4.358623,0.20813748,0.035081546,0.0068595856,protein_coding
Fth1,4.412744,4.175521,-0.23722277,0.04294812,0.0089561757,protein_coding


In [328]:
vec_log2FC <- df_mrna_only$log2FC
vec_fdr <- df_mrna_only$FDR

### sym_mrna_up_ribo_no

In [329]:
f_up <- vec_fdr < th_adj_pvalue & vec_log2FC > th_log2fc
df_mrna_only_up <- df_mrna_only[f_up,]
sym_mrna_up_ribo_no <- rownames(df_mrna_only_up)
dim(df_mrna_only_up)

# detected in both platforms
sym_mrna_up_ribo_no <- intersect(sym_mrna_up_ribo_no, sym_detected_both)
length(sym_mrna_up_ribo_no)

list_genes[['sym_mrna_up_ribo_no']] <- sym_mrna_up_ribo_no

### sym_mrna_dn_ribo_no

In [330]:
f_dn <- vec_fdr < th_adj_pvalue & vec_log2FC < -th_log2fc
df_mrna_only_dn <- df_mrna_only[f_dn,]
sym_mrna_dn_ribo_no <- rownames(df_mrna_only_dn)
dim(df_mrna_only_dn)

# detected in both platforms
sym_mrna_dn_ribo_no <- intersect(sym_mrna_dn_ribo_no, sym_detected_both)
length(sym_mrna_dn_ribo_no)

list_genes[['sym_mrna_dn_ribo_no']] <- sym_mrna_dn_ribo_no

### sym_mrna_only (=sym_mrna_de_ribo_no)

In [331]:
# detected in both platforms
sym_mrna_de_ribo_no <- union(sym_mrna_up_ribo_no, sym_mrna_dn_ribo_no)
length(sym_mrna_de_ribo_no)

sym_mrna_only <- sym_mrna_de_ribo_no
length(sym_mrna_only)

df_mrna_only <- df_mrna[sym_mrna_only, ]
if (f_write_table) {
    fname_out <- sprintf("./data/table/170224%s.%s.unt48--vs--tgfb48.diff-all.transcriptionONLY.txt", 
        rundate_appendix, strdir)
    write.table(df_mrna_only, file = fname_out, row.names = TRUE, col.names = NA, 
        sep = "\t", quote = FALSE)
}

##  df_ribo_only

In [332]:
sym_ribo_only <- setdiff(sym_ribo, sym_mrna)
length(sym_ribo_only)

if (!is.null(pattern_remove_gene)) {
  any(grepl(pattern_remove_gene, sym_ribo_only)) # should be FALSE
}

# subsetting df_ribo
df_ribo_only <- df_ribo[sym_ribo_only,]

head(df_ribo_only)
dim(df_ribo_only)


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1700015I17Rik,-3.3048711,-1.0700091,2.2348621,0.008306292,0.0011381517,lncRNA
4931419H13Rik,-1.7459926,-0.2932365,1.4527561,0.006859988,0.0008930906,lncRNA
5S_rRNA,10.5330843,11.3966448,0.8635606,0.012771123,0.0019380505,rRNA
9130019O22Rik,-4.6965296,-1.7175273,2.9790024,0.04080761,0.008374356,protein_coding
Acadl,1.6545088,2.443806,0.7892972,0.000104799,6.4549e-06,protein_coding
Acbd3,0.6329265,1.2734654,0.6405389,0.017595871,0.0028759378,protein_coding


In [333]:
length(intersect(sym_ribo_only, sym_detected_both))

In [334]:
vec_log2FC <- df_ribo_only$log2FC
vec_fdr <- df_ribo_only$FDR

### sym_mrna_no_ribo_up

In [335]:
f_up <- (vec_fdr < th_adj_pvalue) & (vec_log2FC > th_log2fc)
df_ribo_only_up <- df_ribo_only[f_up,]
sym_mrna_no_ribo_up <- rownames(df_ribo_only_up)
dim(df_ribo_only_up)

# detected in both platforms
sym_mrna_no_ribo_up <- intersect(sym_mrna_no_ribo_up, sym_detected_both)
length(sym_mrna_no_ribo_up)

list_genes[['sym_mrna_no_ribo_up']] <- sym_mrna_no_ribo_up

### sym_mrna_no_ribo_dn

In [336]:
f_dn <- (vec_fdr < th_adj_pvalue) & (vec_log2FC < -th_log2fc)
df_ribo_only_dn <- df_ribo_only[f_dn,]
sym_mrna_no_ribo_dn <- rownames(df_ribo_only_dn)
dim(df_ribo_only_dn)

# detected in both platforms
sym_mrna_no_ribo_dn <- intersect(sym_mrna_no_ribo_dn, sym_detected_both)
length(sym_mrna_no_ribo_dn)

list_genes[['sym_mrna_no_ribo_dn']] <- sym_mrna_no_ribo_dn

### sym_ribo_only (=sym_mrna_no_ribo_de)

In [337]:
# detected in both platforms
sym_mrna_no_ribo_de <- union(sym_mrna_no_ribo_up, sym_mrna_no_ribo_dn)
length(sym_mrna_no_ribo_de)

# here, sym_ribo_only was redefined.
# sym_ribo_only were detected in both platforms.
sym_ribo_only <- sym_mrna_no_ribo_de
length(sym_ribo_only)

df_ribo_only <- df_ribo[sym_ribo_only, ]

if (f_write_table) {
    fname_out <- sprintf("./data/table/161021%s.NMuMG.%s.unt48--vs--tgfb48.diff-all.translationONLY.txt", 
        rundate_appendix, strdir)
    write.table(df_ribo_only, file = fname_out, row.names = TRUE, col.names = NA, 
        sep = "\t", quote = FALSE)
}

## biotype

In [338]:
unique(df_all$biotype)
f_pseudo <- grepl('pseudo', df_all$biotype)
sym_pseudo <- rownames(df_all[f_pseudo,])

length(setdiff(sym_mrna_no_ribo_up, sym_pseudo))
length(setdiff(sym_mrna_no_ribo_dn, sym_pseudo))

## no DE

In [339]:
n_detected <- length(sym_detected_both)
n_detected

sym_de_both <- union(sym_mrna_de_detected_both, sym_ribo_de_detected_both)

sym_no_de_both <- setdiff(sym_detected_both, sym_de_both)
length(sym_no_de_both)

### validation

In [340]:
# de both & dectected in both platforms
intersect(sym_mrna_only, sym_mrna_de_ribo_de)
intersect(sym_ribo_only, sym_mrna_de_ribo_de)
sym_de <- union(sym_mrna_only, sym_ribo_only)
sym_de <- union(sym_de, sym_mrna_de_ribo_de)
n_de <- length(sym_de)
n_de

sym_no_de_both1 <- setdiff(sym_detected_both, sym_de)
length(sym_no_de_both1)

setequal(sym_no_de_both, sym_no_de_both1)

# tgfb48 vs tgfbCX5461

## df_mrna2

In [341]:
fname_in <- sprintf("blancgrp_211613_RNAseq_total_stranded.tgfb--vs--tgfbCX.diff-all.txt.gz")
df_mrna2 <- read.table(file = sprintf("%s/%s", dir_rnaseq, fname_in), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_mrna2)
    df_mrna2 <- df_mrna2[!grepl(pattern_remove_gene, rownames(df_mrna2)), ]
}
head(df_mrna2)
dim(df_mrna2)

idx <- which(df_all[, "tgfb.tgfbCX5461.DEtranscription"] != "notSig")
df_mrna2 <- df_mrna2[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_mrna2), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_mrna2$biotype <- df_all[idx, "biotype"]

head(df_mrna2)
dim(df_mrna2)

if (f_write_table) {
    fname_out <- sprintf("./data/table/170224%s.%s.tgfb48--vs--tgfb48cx5461100nm.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_mrna2, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

Unnamed: 0_level_0,tgfb,tgfbCX,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Gm48048,-4.665601,-8.134377,-3.468776,0.004385847,0.0001915553
Gm12435,-4.397995,-7.534412,-3.136417,0.017733015,0.0012461922
Serpina1e,-4.575426,-7.534598,-2.959172,0.022591396,0.001775212
Etohd2,-5.232751,-8.134377,-2.901626,0.037200411,0.0037040237
Samd5,-4.041869,-6.646292,-2.604423,0.015702795,0.0010528093
Kcnk10,-3.891947,-6.099245,-2.207298,0.037499985,0.0037539439


Unnamed: 0_level_0,tgfb,tgfbCX,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0610010F05Rik,-0.1829955,-0.5799545,-0.396959,0.041485181,0.0043446887,protein_coding
1110002L01Rik,-0.71763,-0.2905577,0.4270723,0.031212722,0.0028870448,lncRNA
1700017B05Rik,2.4528345,2.7315583,0.2787238,0.003380659,0.0001369835,protein_coding
1810013L24Rik,1.247026,1.5439815,0.2969555,0.006595667,0.0003321669,protein_coding
2210418O10Rik,-2.6406196,-4.1794167,-1.5387971,0.045975213,0.0050478705,protein_coding
2510039O18Rik,0.9820496,1.2884085,0.3063589,0.012466718,0.0007706629,protein_coding


In [342]:
vec_log2FC <- df_mrna2$log2FC
vec_fdr <- df_mrna2$FDR


### sym_mrna2_up

In [343]:
f_up <- vec_fdr < th_adj_pvalue & vec_log2FC > th_log2fc
df_mrna2_up <- df_mrna2[f_up,]
sym_mrna2_up <- rownames(df_mrna2_up)
dim(df_mrna2_up)

list_genes[['sym_mrna2_up']] <- sym_mrna2_up

### sym_mrna2_dn

In [344]:

f_dn <- vec_fdr < th_adj_pvalue & vec_log2FC < -th_log2fc
df_mrna2_dn <- df_mrna2[f_dn,]
sym_mrna2_dn <- rownames(df_mrna2_dn)
dim(df_mrna2_dn)

list_genes[['sym_mrna2_dn']] <- sym_mrna2_dn

### sym_mrna2

In [345]:
sym_mrna2 <- union(sym_mrna2_up, sym_mrna2_dn)
length(sym_mrna2)

df_mrna2 <- df_mrna2[sym_mrna2,,drop=F]

### sym_mrna2_no

In [346]:
sym_mrna2_no <- setdiff(sym_rnaseq, sym_mrna2)
length(sym_mrna2_no)

# Not DE in total RNA-seq for tgfb48 vs. tgfbCX
# with GRCm38.97.gtf RSEM: 10447
# with GRCm38.97.gtf HTSEQ: 10125
# with GRCm38.97.rRNA.gtf: 10456

## df_ribo2

In [347]:
# dir_riboseq <- '../data/limma-voom.mrna/161021/NMuMG' fname_ribo <-
# 'emt.161021.limma-voom.mrna.limma.gene.tgfb48--vs--tgfbCX5461100nm.diff-all.txt'
fname_ribo <- sprintf("blancgrp_161021_Riboseq.tgfb48--vs--tgfbCX5461.diff-all.txt.gz")

df_ribo2 <- read.table(file = sprintf("%s/%s", dir_riboseq, fname_ribo), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_ribo2)
    df_ribo2 <- df_ribo2[!grepl(pattern_remove_gene, rownames(df_ribo2)), ]
}
head(df_ribo2)
dim(df_ribo2)

idx <- which(df_all[, "tgfb.tgfbCX5461.DEtranslation"] != "notSig")
df_ribo2 <- df_ribo2[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_ribo2), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_ribo2$biotype <- df_all[idx, "biotype"]

head(df_ribo2)
dim(df_ribo2)

if (!is.null(pattern_remove_gene)) {
    any(grepl(pattern_remove_gene, rownames(df_ribo2)))  # should be FALSE
}

if (f_write_table) {
    fname_out <- sprintf("./data/table/161021%s.%s.tgfb48--vs--tgfbCX5461100nm.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_ribo2, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

Unnamed: 0_level_0,tgfb48,tgfbCX5461,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Gm23238,3.9645862,-4.513182,-8.477768,1.004305e-05,1.482e-07
Gm5526,2.2208786,-5.645228,-7.866107,5.819154e-05,1.104e-06
Ccdc7a,0.7046368,-5.645228,-6.349865,0.001484941,5.68085e-05
Gm9512,1.9202386,-4.266225,-6.186464,0.0002785005,7.6833e-06
Gm22739,1.4669296,-4.513182,-5.980111,0.04614975,0.0056650787
B230208H11Rik,8.0881136,2.141498,-5.946615,1e-11,0.0


Unnamed: 0_level_0,tgfb48,tgfbCX5461,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0610009B22Rik,-0.5623582,0.4593551,1.0217132,0.0291913586,0.0029293545,protein_coding
1700015I17Rik,-1.0700091,-4.6043968,-3.5343878,0.0375198024,0.0041560108,lncRNA
1810013L24Rik,1.98121,2.8701991,0.8889891,0.0001134758,2.4969e-06,protein_coding
2200002D01Rik,1.5131712,2.3211691,0.8079979,0.0017129703,6.91013e-05,protein_coding
2310030G06Rik,-0.1652,0.9174211,1.0826212,0.0145442189,0.0011224291,protein_coding
2610307P16Rik,-2.1443849,-0.3826178,1.7617671,0.0325815452,0.003467028,lncRNA


In [348]:
vec_log2FC <- df_ribo2$log2FC
vec_fdr <- df_ribo2$FDR


### sym_ribo2_up

In [349]:
f_up <- vec_fdr < th_adj_pvalue & vec_log2FC > th_log2fc
df_ribo2_up <- df_ribo2[f_up,]
sym_ribo2_up <- rownames(df_ribo2_up)
dim(df_ribo2_up)

list_genes[['sym_ribo2_up']] <- sym_ribo2_up

### sym_ribo2_dn

In [350]:
f_dn <- vec_fdr < th_adj_pvalue & vec_log2FC < -th_log2fc
df_ribo2_dn <- df_ribo2[f_dn,]
sym_ribo2_dn <- rownames(df_ribo2_dn)
dim(df_ribo2_dn)

list_genes[['sym_ribo2_dn']] <- sym_ribo2_dn

### sym_ribo2

In [351]:
sym_ribo2 <- union(sym_ribo2_up, sym_ribo2_dn)
length(sym_ribo2)

df_ribo2 <- df_ribo2[sym_ribo2,,drop=F]

### sym_ribo2_no

In [352]:
sym_ribo2_no <- setdiff(sym_riboseq, sym_ribo2)
length(sym_ribo2_no)

# Not DE in ribosome profiling for tgfb48 vs. tgfbCX
# with GRCm38.97.gtf RSEM: 10009
# with GRCm38.97.gtf HTSEQ: 8958
# with GRCm38.97.rRNA.gtf: 9984

## shared between both rnaseq and riboseq

### sym_mrna2_no_ribo2_no

In [353]:
sym_mrna2_no_ribo2_no <- intersect(sym_mrna2_no, sym_ribo2_no)
length(sym_mrna2_no_ribo2_no)
sym_mrna2_no_ribo2_no <- intersect(sym_mrna2_no_ribo2_no, sym_detected_both)
length(sym_mrna2_no_ribo2_no)

# Not DE in both platform for tgfb48 vs. tgfbCX
# with GRCm38.97.gtf RSEM: 6920
# with GRCm38.97.gtf HTSEQ: 6379
# with GRCm38.97.rRNA.gtf: 6917

### sym_mrna2_up_ribo2_no

In [354]:
sym_mrna2_up_ribo2_no <- setdiff(sym_mrna2_up, sym_ribo2)
length(sym_mrna2_up_ribo2_no)
sym_mrna2_up_ribo2_no <- intersect(sym_mrna2_up_ribo2_no, sym_detected_both)
length(sym_mrna2_up_ribo2_no)

list_genes[['sym_mrna2_up_ribo2_no']] <- sym_mrna2_up_ribo2_no

### sym_mrna2_dn_ribo2_no

In [355]:
sym_mrna2_dn_ribo2_no <- setdiff(sym_mrna2_dn, sym_ribo2)
length(sym_mrna2_dn_ribo2_no)
sym_mrna2_dn_ribo2_no <- intersect(sym_mrna2_dn_ribo2_no, sym_detected_both)
length(sym_mrna2_dn_ribo2_no)

list_genes[['sym_mrna2_dn_ribo2_no']] <- sym_mrna2_dn_ribo2_no

### sym_mrna2_no_ribo2_up

In [356]:
sym_mrna2_no_ribo2_up <- setdiff(sym_ribo2_up, sym_mrna2)
length(sym_mrna2_no_ribo2_up)
sym_mrna2_no_ribo2_up <- intersect(sym_mrna2_no_ribo2_up, sym_detected_both)
length(sym_mrna2_no_ribo2_up)

list_genes[['sym_mrna2_no_ribo2_up']] <- sym_mrna2_no_ribo2_up

### sym_mrna2_no_ribo2_dn

In [357]:
sym_mrna2_no_ribo2_dn <- setdiff(sym_ribo2_dn, sym_mrna2)
length(sym_mrna2_no_ribo2_dn)
sym_mrna2_no_ribo2_dn <- intersect(sym_mrna2_no_ribo2_dn, sym_detected_both)
length(sym_mrna2_no_ribo2_dn)

list_genes[['sym_mrna2_no_ribo2_dn']] <- sym_mrna2_no_ribo2_dn

### sym_mrna2_up_ribo2_up

In [358]:
sym_mrna2_up_ribo2_up <- intersect(sym_mrna2_up, sym_ribo2_up)
sym_mrna2_up_ribo2_up
length(sym_mrna2_up_ribo2_up)

list_genes[['sym_mrna2_up_ribo2_up']] <- sym_mrna2_up_ribo2_up

### sym_mrna2_dn_ribo2_dn


In [359]:
sym_mrna2_dn_ribo2_dn <- intersect(sym_mrna2_dn, sym_ribo2_dn)
sym_mrna2_dn_ribo2_dn
length(sym_mrna2_dn_ribo2_dn)

list_genes[['sym_mrna2_dn_ribo2_dn']] <- sym_mrna2_dn_ribo2_dn

### sym_mrna2_up_ribo2_dn

In [360]:
sym_mrna2_up_ribo2_dn <- intersect(sym_mrna2_up, sym_ribo2_dn)
sym_mrna2_up_ribo2_dn
length(sym_mrna2_up_ribo2_dn)

list_genes[['sym_mrna2_up_ribo2_dn']] <- sym_mrna2_up_ribo2_dn

### sym_mrna2_dn_ribo2_up

In [361]:
sym_mrna2_dn_ribo2_up <- intersect(sym_mrna2_dn, sym_ribo2_up)
sym_mrna2_dn_ribo2_up
length(sym_mrna2_dn_ribo2_up)

list_genes[['sym_mrna2_dn_ribo2_up']] <- sym_mrna2_dn_ribo2_up

# unt48 vs. tgfb48 vs. tgfbCX5461

## sym_mrna1_no_mrna2_no

In [362]:
sym_mrna1_no_mrna2_no <- intersect(sym_mrna_no, sym_mrna2_no)
length(sym_mrna1_no_mrna2_no)

# No DE; No DE in total RNA-seq
# with GRCm38.97.gtf RSEM: 6507
# with GRCm38.97.gtf HTSEQ: 6244
# with GRCm38.97.rRNA.gtf: 6487

## sym_ribo1_no_ribo2_no

In [363]:
sym_ribo1_no_ribo2_no <- intersect(sym_ribo_no, sym_ribo2_no)
length(sym_ribo1_no_ribo2_no)

# No DE; No DE in ribosome profiling
# with GRCm38.97.gtf RSEM: 8726
# with GRCm38.97.gtf HTSEQ: 7228
# with GRCm38.97.rRNA.gtf: 8720

## sym_mrna_no_ribo_no

In [364]:
sym_mrna_no_ribo_no <- intersect(sym_mrna1_no_mrna2_no, sym_ribo1_no_ribo2_no)
length(sym_mrna_no_ribo_no)

# No DE; No De in both platform
# with GRCm38.97.gtf RSEM: 4019
# with GRCm38.97.gtf HTSEQ: 3516
# with GRCm38.97.rRNA.gtf: 4015

## more complex intersections


### up/dn/only ; no DE

In [365]:
vec1_var <- c('mrna_up_ribo_up', 'mrna_dn_ribo_dn', 'mrna_up_ribo_dn', 'mrna_dn_ribo_up',
              'mrna_up_ribo_no', 'mrna_dn_ribo_no', 'mrna_no_ribo_up', 'mrna_no_ribo_dn')

vec2_var <- c('ribo2_no')

max_ngenes <- 50
for (var1 in vec1_var) {
    for (var2 in vec2_var) {
      strvar <- sprintf('sym_%s_%s', var1, var2)
      list_genes[[strvar]] <- eval(parse(text=sprintf("intersect(sym_%s, sym_%s)", var1, var2)))
      list_genes[[strvar]] <- intersect(list_genes[[strvar]], sym_detected_both)
      strgene <- paste(head(list_genes[[strvar]],max_ngenes), collapse=",")
      if (length(list_genes[[strvar]]) > max_ngenes) {
          strgene <- paste0(strgene,',...')
      }
      verb('%s: %d: %s\n\n', strvar, length(list_genes[[strvar]]), strgene)
    }
}

sym_mrna_up_ribo_up_ribo2_no: 598: 1700017B05Rik,4930402H24Rik,9930111J21Rik2,Abcg2,Abl1,Abr,Abracl,Acadvl,Acot9,Acta2,Actn4,Actr1a,Acvr1,Adam15,Adam8,Adarb1,Add2,Adgra1,Adssl1,Afap1,Aff1,Aif1l,Aldh1l1,Anapc10,Angptl2,Ankrd1,Ankrd13b,Ankrd44,Ano1,Anxa8,Ap2a1,Ap2b1,Ap5b1,Apbb2,Apol9b,Arc,Arfgef2,Arhgap23,Arhgap42,Arhgef18,Arhgef19,Arhgef5,Armc9,Arsb,ARVCF,Ascc3,Atg7,Atp10d,Atp1a3,Atp2b4,...

sym_mrna_dn_ribo_dn_ribo2_no: 560: 2310009B15Rik,Abcc2,Abcc3,Abcc4,Abcc5,Abcd3,Abhd6,Acadm,Acbd5,Acot2,Acsl4,Acsl5,Acy3,Adh5,Adh7,Adora1,Adsl,Adss,Afp,Agfg1,Agmo,Agpat2,Agpat5,Ak3,Akr1b7,Akr1c12,Akr1c13,Akr1c19,Alad,Alcam,Aldh4a1,Alkbh7,Anapc13,Angpt2,Ank,Anks4b,Antxr2,Anxa11,Anxa13,Anxa4,Apoc2,Arf6,Arhgap18,Arhgap40,Asns,Atf5,Atp1a1,Atp5a1,Atp5e,Atp5o,...

sym_mrna_up_ribo_dn_ribo2_no: 33: 2410006H16Rik,Adam9,Agpat1,Akt1,Aph1a,Atox1,Atp6v0a1,Bola2,Cmtm6,Cyb5r4,Dusp1,Eipr1,Fam13b,Fbxo42,Fgfr1op2,Gch1,Gpr137b-ps,Hsdl1,Jund,Leprotl1,Lsm14b,Msln,Ndrg1,Nfe2l1,Ptpra,Senp3,Serf2,Tmsb10,Trim8,Trir,Ywhag,Zf

### no DE ; up/dn/only

In [366]:
vec1_var <- c('mrna_no')

vec2_var <- c('mrna2_up_ribo2_up', 'mrna2_dn_ribo2_dn', 'mrna2_up_ribo2_dn', 'mrna2_dn_ribo2_up',
              'mrna2_up_ribo2_no', 'mrna2_dn_ribo2_no', 'mrna2_no_ribo2_up', 'mrna2_no_ribo2_dn')

max_ngenes <- 50
for (var1 in vec1_var) {
    for (var2 in vec2_var) {
      strvar <- sprintf('sym_%s_%s', var1, var2)
      list_genes[[strvar]] <- eval(parse(text=sprintf("intersect(sym_%s, sym_%s)", var1, var2)))
      list_genes[[strvar]] <- intersect(list_genes[[strvar]], sym_detected_both)
      strgene <- paste(head(list_genes[[strvar]],max_ngenes), collapse=",")
      if (length(list_genes[[strvar]]) > max_ngenes) {
          strgene <- paste0(strgene,',...')
      }
      verb('%s: %d: %s\n\n', strvar, length(list_genes[[strvar]]), strgene)
    }
}

sym_mrna_no_mrna2_up_ribo2_up: 9: Hcfc1r1,Rps27l,AI837181,Traf4,Ciao3,Btg2,Sox9,Areg,Ndufa7

sym_mrna_no_mrna2_dn_ribo2_dn: 9: Cldn2,mt-Cytb,Dpysl2,Ddx21,Spon2,Cct4,Klhl9,Hnrnpa0,Hsp90b1

sym_mrna_no_mrna2_up_ribo2_dn: 5: Agrn,Lamb2,Malat1,Tom1l2,Fcsk

sym_mrna_no_mrna2_dn_ribo2_up: 7: Cks1b,Srsf3,Casp6,E2f3,Snrpd1,Snx5,Smarce1

sym_mrna_no_mrna2_up_ribo2_no: 103: Pxn,Ei24,Drg2,Itgb4,Foxo3,Zbtb7b,Lgals3,Pgls,Itga3,Uck1,Tecpr1,Hps1,Cul7,Hspbp1,Usp20,Map2k2,Tmem199,Spns1,Gm5884,Cmtm7,Uqcrc1,Zc3h3,Abca7,Man2b1,Commd9,Ccs,Lrsam1,Mien1,Apeh,Hip1r,Mthfr,Mrps12,Rab4b,Zfp622,Pla2g6,Cdip1,Vps11,Asl,Tjap1,Ubxn6,Chmp1a,Etfb,Surf1,Exoc4,Cav1,Arap1,AA986860,Polk,Rnf10,Taf1c,...

sym_mrna_no_mrna2_dn_ribo2_no: 113: Set,Nop56,Oaz1,Tmem109,Ufc1,Acaca,Heatr1,Nup153,Nup85,Ppid,Oaz2,Snrpf,Abcb10,Baz1a,Ola1,Srsf7,Mthfd1l,Rrp1b,Cdca7,BC005537,Rfwd3,Naa50,Las1l,Poglut2,Prkd3,Ric8b,Prr14l,Nolc1,D430020J02Rik,Trim59,Prkar2a,SPATA5L1,Lxn,Supt16,Spred1,Hace1,Rpp14,Gm20008,Ncbp2,Pask,Hnrnpdl,Syne2,Gm12435,Spast,

### other combinations

In [367]:
vec1_var <- c('mrna_up_ribo_up', 'mrna_dn_ribo_dn', 'mrna_up_ribo_dn', 'mrna_dn_ribo_up',
              'mrna_up_ribo_no', 'mrna_dn_ribo_no', 'mrna_no_ribo_up', 'mrna_no_ribo_dn')

vec2_var <- c('mrna2_up_ribo2_up', 'mrna2_dn_ribo2_dn', 
              'mrna2_up_ribo2_no', 'mrna2_dn_ribo2_no', 'mrna2_no_ribo2_up', 'mrna2_no_ribo2_dn',
              'mrna2_up_ribo2_dn', 'mrna2_dn_ribo2_up')

max_ngenes <- 50
for (var1 in vec1_var) {
    verb('-------------------------------------\n%s\n\n', var1)
    for (var2 in vec2_var) {
      strvar <- sprintf('sym_%s_%s', var1, var2)
      list_genes[[strvar]] <- eval(parse(text=sprintf("intersect(sym_%s, sym_%s)", var1, var2)))
      list_genes[[strvar]] <- intersect(list_genes[[strvar]], sym_detected_both)
      strgene <- paste(head(list_genes[[strvar]],max_ngenes), collapse=",")
      if (length(list_genes[[strvar]]) > max_ngenes) {
          strgene <- paste0(strgene,',...')
      }
      verb('\t%s: %d: %s\n\n', strvar, length(list_genes[[strvar]]), strgene)
    }
}

-------------------------------------
mrna_up_ribo_up

	sym_mrna_up_ribo_up_mrna2_up_ribo2_up: 15: Arl2,Bhlhe40,Ccl2,Clcf1,Gadd45b,Gadd45g,Gla,Glipr1,Lce1g,Lif,Rras,Stk17b,Tfr2,Tspan7,Zkscan5

	sym_mrna_up_ribo_up_mrna2_dn_ribo2_dn: 13: Adam12,Arid3a,Col1a1,Cpd,Fkbp5,Mcam,Nt5e,Pdia6,Pxdn,Ror1,Spp1,Stk3,Tagln

	sym_mrna_up_ribo_up_mrna2_up_ribo2_no: 62: 1700017B05Rik,Adam8,Adssl1,Aif1l,Ankrd13b,Ap5b1,Arc,Arhgap23,Armc9,ARVCF,Atg7,Atp6ap1,Atp6v0d1,Bcar1,Bmp1,Castor1,Ccl7,Cep170b,Cotl1,Creb3,Csf1,Csrnp1,Csrp1,Dcbld2,Ddhd1,Ecm1,Edn1,Ehd1,Fam20c,Flnb,Fosl2,Fst,Glmp,Grn,Gsn,Hmox1,Hyal1,Inf2,Itpr3,Jun,Klhl26,Lbp,Lgmn,Lims2,Lpcat4,Maged2,Man2b2,Mus81,Nectin2,Nfkbia,...

	sym_mrna_up_ribo_up_mrna2_dn_ribo2_no: 30: Adarb1,Adgra1,Angptl2,Atp2b4,Ckap4,Csdc2,Ctsw,Daam1,Dennd5b,Denr,Erap1,Foxf2,Gldc,Il11,Lgr6,Nav1,Nrp2,Olfml2b,Pdgfrb,Ppa1,Rflnb,Rrbp1,Serpinf1,Smarcc1,Spcs3,St3gal5,Tln2,Tmem119,Ulbp1,Zfp36l2

	sym_mrna_up_ribo_up_mrna2_no_ribo2_up: 40: Ak1,Arl6ip5,B2m,Coq10b,Cstb,Ctsl,Cyb5r3,Dusp5,F1

# define frequently used variables

## sym_mrna_up_mrna2_dn

In [368]:
# relying on emt.tables.mrna.comprehensive.txt the first column of "reversible.translation.CX"
# translation tgfb48 up and tgfb27h+cx down
idx <- which(df_all[,'reversible.transcription']=='upDown')
sym_mrna_up_mrna2_dn <- rownames(df_all[idx,])
length(sym_mrna_up_mrna2_dn)

if (!is.null(pattern_remove_gene)) {
    any(grepl(pattern_remove_gene, sym_mrna_up_mrna2_dn)) # should be FALSE
}

sym_mrna_up_mrna2_dn <- intersect(sym_mrna_up, sym_mrna2_dn)
length(sym_mrna_up_mrna2_dn)


## sym_mrna_dn_mrna2_up

In [369]:
# translation tgfb48 down and tgfb27h+cx up
idx <- which(df_all[,'reversible.translation']=='downUp')
sym_mrna_dn_mrna2_up <- rownames(df_all[idx,])
length(sym_mrna_dn_mrna2_up)

sym_mrna_dn_mrna2_up <- intersect(sym_mrna_dn, sym_mrna2_up)
length(sym_mrna_dn_mrna2_up)

## sym_mrna_dn3x_mrna2_no

In [370]:
sym_mrna_dn3x_mrna2_no <- intersect(sym_mrna_dn3x, sym_mrna2_no)
length(sym_mrna_dn3x_mrna2_no)

## sym_ribo_up_ribo2_dn

In [371]:
# relying on emt.tables.mrna.comprehensive.txt the first column of "reversible.translation.CX"
# translation tgfb48 up and tgfb27h+cx down
idx <- which(df_all[,'reversible.translation']=='upDown')
sym_ribo_up_ribo2_dn <- rownames(df_all[idx,])
length(sym_ribo_up_ribo2_dn)

if (!is.null(pattern_remove_gene)) {
    any(grepl(pattern_remove_gene, sym_ribo_up_ribo2_dn)) # should be FALSE
}

sym_ribo_up_ribo2_dn <- intersect(sym_ribo_up, sym_ribo2_dn)
length(sym_ribo_up_ribo2_dn)


In [372]:
# translation tgfb48, tgfb48+27h+cx
sym_mrna_up_cx_dn <- sym_mrna_up_mrna2_dn
sym_mrna_dn_cx_up <- sym_mrna_dn_mrna2_up
sym_mrna_dn3x_cx_no <- sym_mrna_dn3x_mrna2_no
sym_mrna_up_cx_up <- intersect(sym_mrna_up, sym_mrna2_up)
sym_mrna_dn_cx_dn <- intersect(sym_mrna_dn, sym_mrna2_dn)

## sym_ribo_dn_ribo2_up

In [373]:
# translation tgfb48 down and tgfb27h+cx up
idx <- which(df_all[,'reversible.translation']=='downUp')
sym_ribo_dn_ribo2_up <- rownames(df_all[idx,])
length(sym_ribo_dn_ribo2_up)

sym_ribo_dn_ribo2_up <- intersect(sym_ribo_dn, sym_ribo2_up)
length(sym_ribo_dn_ribo2_up)


## sym_ribo_dn3x_ribo2_no

In [374]:
sym_ribo_dn3x_ribo2_no <- intersect(sym_ribo_dn3x, sym_ribo2_no)
length(sym_ribo_dn3x_ribo2_no)

In [375]:
# translation tgfb48, tgfb48+27h+cx
sym_ribo_up_cx_dn <- sym_ribo_up_ribo2_dn
sym_ribo_dn_cx_up <- sym_ribo_dn_ribo2_up
sym_ribo_dn3x_cx_no <- sym_ribo_dn3x_ribo2_no
sym_ribo_up_cx_up <- intersect(sym_ribo_up, sym_ribo2_up)
sym_ribo_dn_cx_dn <- intersect(sym_ribo_dn, sym_ribo2_dn)


# translation tgfb48, tgfb48+27h+cx, no effect on transcription tgfb48
# detected in both data sets
sym_mrna_no_ribo_up_cx_dn <- intersect(sym_mrna_no_ribo_up, sym_ribo2_dn)
sym_mrna_no_ribo_up_cx_dn <- intersect(sym_mrna_no_ribo_up_cx_dn, sym_detected_both)

sym_mrna_no_ribo_dn_cx_up <- intersect(sym_mrna_no_ribo_dn, sym_ribo2_up)
sym_mrna_no_ribo_dn_cx_up <- intersect(sym_mrna_no_ribo_dn_cx_up, sym_detected_both)

sym_mrna_no_ribo_up_cx_up <- intersect(sym_mrna_no_ribo_up, sym_ribo2_up)
sym_mrna_no_ribo_up_cx_up <- intersect(sym_mrna_no_ribo_up_cx_up, sym_detected_both)

sym_mrna_no_ribo_dn_cx_dn <- intersect(sym_mrna_no_ribo_dn, sym_ribo2_dn)
sym_mrna_no_ribo_dn_cx_dn <- intersect(sym_mrna_no_ribo_dn_cx_dn, sym_detected_both)

## sym_ribo_cx_exclusively_affected

In [376]:
length(sym_ribo2)
length(sym_ribo)
sym_ribo_cx_exclusively_affected <- setdiff(sym_ribo2, sym_ribo)
length(sym_ribo_cx_exclusively_affected)

In [377]:
sym_ribo_cx_exclusively_affected_up <- intersect(sym_ribo_cx_exclusively_affected,
                                                 sym_ribo2_up)
length(sym_ribo_cx_exclusively_affected_up)

In [378]:
sym_ribo_cx_exclusively_affected_dn <- intersect(sym_ribo_cx_exclusively_affected,
                                                 sym_ribo2_dn)
length(sym_ribo_cx_exclusively_affected_dn)

## sym_ribo_cx_unaffected

In [379]:
length(sym_riboseq)  # 13827

sym_ribo_cx_affected <- sym_ribo2
length(sym_ribo_cx_affected)

sym_ribo_cx_unaffected <- setdiff(sym_riboseq, sym_ribo_cx_affected)
length(sym_ribo_cx_unaffected)

## sym_mrna_no_ribo_de_cx_unaffected

In [380]:
sym_mrna_no_ribo_de_cx_unaffected <- intersect(sym_ribo_only, sym_ribo_cx_unaffected)
length(sym_mrna_no_ribo_de_cx_unaffected)

length(intersect(sym_mrna_no_ribo_de_cx_unaffected, sym_rnaseq))
length(intersect(sym_mrna_no_ribo_de_cx_unaffected, sym_riboseq))

# df_protein

In [381]:
## Proteomics part is not using at this point
# https://epigeneticsandchromatin.biomedcentral.com/articles/10.1186/s13072-019-0256-y
df_protein <- read.table(file="../nmumg_proteomics_mass_spec/table_s1a_protein_groups.txt", 
                header=TRUE, sep="\t", row.names=NULL,
                quote="", comment.char="#", stringsAsFactors=F)
t(df_protein[1:3,])
dim(df_protein)


condstr <- 'proteomics.unt0.tgfb48'
cols <- c('Majority.Gene.name','log2FC.2.days.0.min','X.log2.p..2.days.0.min')
df <- df_protein[,cols]
idx <- which((df[,2] > log2(1.2)) & (2^-df[,3] < 0.05))
sym_protein_up <- df[idx,1]

idx <- which((df[,2] < -log2(1.2)) & (2^-df[,3] < 0.05))
sym_protein_dn <- df[idx,1]

"cannot open file '../nmumg_proteomics_mass_spec/table_s1a_protein_groups.txt': No such file or directory"


ERROR: Error in file(file, "rt"): cannot open the connection


## sym_up_up2, sym_dn_dn2

In [382]:
length(sym_mrna_up_ribo_up)
f <- grepl('Rp', sym_mrna_up_ribo_up)
sym_mrna_up_ribo_up[f]



In [383]:
f <- grepl('Rp', sym_protein_up)
sym_protein_up[f]


ERROR: Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'grepl': object 'sym_protein_up' not found


In [384]:
f <- grepl('Rp', sym_mrna_dn_ribo_dn)
sym_mrna_dn_ribo_dn[f]
length(sym_mrna_dn_ribo_dn[f])

In [385]:

sym_up_up2 <- intersect(sym_mrna_up_ribo_up, sym_protein_up)
sym_up_up2
length(sym_up_up2)

sym_dn_dn2 <- intersect(sym_mrna_dn_ribo_dn, sym_protein_dn)
sym_dn_dn2
length(sym_dn_dn2)


ERROR: Error in h(simpleError(msg, call)): error in evaluating the argument 'y' in selecting a method for function 'intersect': object 'sym_protein_up' not found


In [386]:
sym_ribosomal_proteins <- c('Rpl15', 'Rpl32', 'Rplp1', 'Rps28', 'Rps3', 'Rpsa')

df_all[sym_ribosomal_proteins,]

Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt.transcription,tgfb.transcription,tgfbCX5461.transcription,log2FCuntVStgfb.transcription,...,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,PuntVStgfb.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,PtgfbVStgfbCX5461.translation,unt.tgfb.DEtranslation,tgfb.tgfbCX5461.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,...,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Rpl15,14,4198305,4201873,3569,+,ensembl_havana,4.144202,3.852056,3.92176,-0.29214638,...,-1.1376928,0.0002175378,1.51939e-05,0.71923131,0.04374918,0.0052288986,down,up,downUp,protein_coding
Rpl32,6,115782466,115785708,3243,-,ensembl_havana,4.397618,4.124774,4.118941,-0.27284405,...,-0.6114659,9.48e-08,2e-09,0.7204888,1.5279e-07,1e-09,down,up,downUp,protein_coding
Rplp1,9,61820566,61821824,1259,-,ensembl_havana,5.219518,5.223095,5.23772,0.00357701,...,-0.9682169,1.03e-09,0.0,0.06527635,0.7483032,0.5380572933,down,notSig,notSig,protein_coding
Rps28,17,34038001,34043536,5536,-,ensembl_havana,4.384985,4.094191,4.261039,-0.29079309,...,-0.6379383,0.000730636,6.14446e-05,-0.23310234,0.3723709,0.1515931764,down,notSig,notSig,protein_coding
Rps3,7,99127103,99132945,5843,-,ensembl_havana,5.166973,4.930413,4.989184,-0.23656,...,-0.6712653,1.44e-09,0.0,0.60189674,3.541e-07,2.9e-09,down,up,downUp,protein_coding
Rpsa,9,119956755,119961435,4681,+,ensembl_havana,6.373183,6.062804,6.010222,-0.3103784,...,-0.2809227,0.002570434,0.000277439,0.32771627,0.005305566,0.0002984192,down,up,downUp,protein_coding


## sym_up_up2_dn, sym_dn_dn2_up

In [387]:
sym_up_up_dn <- intersect(sym_mrna_up_ribo_up, sym_ribo2_dn)
#sym_up_up2_dn <- intersect(sym_up_up_dn, sym_protein_up)
#sym_up_up2_dn
#length(sym_up_up2_dn)

sym_dn_dn_up <- intersect(sym_mrna_dn_ribo_dn, sym_ribo2_up)
#sym_dn_dn2_up <- intersect(sym_dn_dn_up, sym_protein_dn)
#sym_dn_dn2_up
#length(sym_dn_dn2_up)


# save riboprof_170224_161021.rdata

In [388]:

fname_out <- sprintf("./rdata/riboprof_170224%s_161021%s_%s.rdata",
                     rundate_appendix, rundate_appendix, strdir)
verb('%s\n', fname_out)
save(
     # raw counts
     df_count_mrna, df_count_ribo,
     # dectected
     sym_rnaseq, sym_riboseq,
     # detected in the both platform or only one of them. 
     sym_detected_both, sym_detected_rnaseq_only, sym_detected_riboseq_only,
     # comprehensive tables 
     df_all, 
     #df_protein,
     # transcription tfgb48
     sym_mrna, sym_mrna_up, sym_mrna_dn, sym_mrna_no,
     # transcription tgfb48+27h+cx
     sym_mrna2, sym_mrna2_up, sym_mrna2_dn, sym_mrna2_no,
     # translation tgfb48
     sym_ribo, sym_ribo_up, sym_ribo_dn, sym_ribo_no,
     # translation tgfb48+27h+cx
     sym_ribo2, sym_ribo2_up, sym_ribo2_dn, sym_ribo2_no,
    
     # transcription tfgb48, no effect on translation tgfb48
     # detected in both data sets
     sym_mrna_up_ribo_no, sym_mrna_dn_ribo_no,
     sym_mrna_de_ribo_no, sym_mrna_only,
     # translation tfgb48, no effect on transcription tgfb48
     # detected in both data sets 
     sym_mrna_no_ribo_up, sym_mrna_no_ribo_dn,
     sym_mrna_no_ribo_de, sym_ribo_only,
     
     # transcription tfgb48, translation tfgb48
     # detected in both data sets
     sym_mrna_up_ribo_up, sym_mrna_dn_ribo_dn,
     sym_mrna_up_ribo_dn, sym_mrna_dn_ribo_up,
    
     ###### CX
     # transcription tgfb48, tgfb48+27h+cx
     sym_mrna_up_cx_dn, sym_mrna_dn_cx_up, sym_mrna_dn3x_cx_no,
     sym_mrna_up_cx_up, sym_mrna_dn_cx_dn,
     # translation tgfb48, tgfb48+27h+cx
     sym_ribo_up_cx_dn, sym_ribo_dn_cx_up, sym_ribo_dn3x_cx_no,
     sym_ribo_up_cx_up, sym_ribo_dn_cx_dn,
     sym_ribo_cx_exclusively_affected,
     sym_ribo_cx_exclusively_affected_up,
     sym_ribo_cx_exclusively_affected_dn,
     sym_ribo_cx_unaffected,
     # translation tgfb48, tgfb48+27h+cx, no effect on transcription tgfb48
     # detected in both data sets
     sym_mrna_no_ribo_up_cx_dn, sym_mrna_no_ribo_dn_cx_up,
     sym_mrna_no_ribo_up_cx_up, sym_mrna_no_ribo_dn_cx_dn,
     sym_mrna_no_ribo_de_cx_unaffected, 
     # transcription tfgb48, translation tgfb48, tgfb48+27h+cx
     # detected in both platforms 
     sym_up_up_dn, sym_dn_dn_up,
    
     ###### protein mass spec.    
     # mass spec. tgfb48
     #sym_protein_up, sym_protein_dn,
     # transcription tfgb48, translation tgfb48, mass spec. tgfb48, tgfb48+27h+cx up
     # detected in three platforms
     #sym_up_up2_dn, sym_dn_dn2_up,
     
     ## list_genes
     list_genes,
    
     file=fname_out)

./rdata/riboprof_170224.rdna_rn18s_161021.rdna_rn18s_limma-voom.mrna.rdata


# additional check

In [389]:
c('Marc2','March2') %in% sym_mrna_dn
c('Marc2','March2') %in% sym_ribo_dn
c('Marc2','March2') %in% sym_mrna_dn_ribo_dn

In [390]:
c('Marc2','March2') %in% sym_mrna_dn_ribo_no
c('Marc2','March2') %in% sym_mrna_no_ribo_dn

In [391]:
t(df_all[c('Marc2','March2'),])

Unnamed: 0,NA,NA.1
seqnames,,
start,,
end,,
width,,
strand,,
source,,
unt.transcription,,
tgfb.transcription,,
tgfbCX5461.transcription,,
log2FCuntVStgfb.transcription,,


# write xlsx

In [392]:
colnames(df_all)

In [393]:
list_df <- list()
cols <- colnames(df_all)
cols <- cols[!(cols %in% c("HomoloGene.ID","mouse.sym","mouse.eid","human.sym","human.eid"))]
cols <- cols[!grepl('^P',cols)]
       
sheet_names <- names(list_genes)
for (sname in sheet_names) {
    #verb('%s\n', sname)
    list_df[[sname]] <- df_all[list_genes[[sname]], cols]
}
titles <- sheet_names
subtitles <- rep(NULL,length(sheet_names))
nv_column_width <- rep(11,ncol(df_all))
names(nv_column_width) <- colnames(df_all)
#write_xlsx(list_df, 'xlsx/170224_161021.xlsx', titles, subtitles, nv_column_width)

dir_xlsx <- sprintf('xlsx/170224%s_161021%s_%s',
                        rundate_appendix, rundate_appendix, strdir)
dir.create(dir_xlsx ,  recursive = TRUE , showWarnings = FALSE)
filename_xlsx <- sprintf('%s/170224%s_161021%s_%s.xlsx', dir_xlsx,
                         rundate_appendix, rundate_appendix, strdir)

require(openxlsx)
wb <- createWorkbook()

#hs <- createStyle(fontColour = "#ffffff", fgFill = "#4F80BD", halign = "center", valign = "center", textDecoration = "Bold", border = "TopBottomLeftRight", textRotation = 45)
#options("openxlsx.borderColour" = "#4F80BD")
#options("openxlsx.borderStyle" = "thin")
#modifyBaseFont(wb, fontSize = 10, fontName = "Arial Narrow")

addWorksheet(wb, sheetName='all', gridLines=TRUE)
writeDataTable(wb, sheet=1, x=df_all[,cols],
        colNames=TRUE, rowNames=TRUE)
setColWidths(wb, sheet=1, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )

sheet_names <- gsub('sym_','',names(list_df))
sheet_names <- gsub('mrna_','11_', sheet_names)
sheet_names <- gsub('ribo_','21_', sheet_names)
sheet_names <- gsub('mrna2_','12_', sheet_names)
sheet_names <- gsub('ribo2_','22_', sheet_names)
for (i in 1:length(list_df)) {
     verb('%s\n', sheet_names[i]) 
     addWorksheet(wb, sheetName=sheet_names[i], gridLines=TRUE)
     writeDataTable(wb, sheet=1+i, x=list_df[[i]],
        colNames=TRUE, rowNames=TRUE)
     setColWidths(wb, sheet=1+i, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )
}
# save xlsx
saveWorkbook(wb, filename_xlsx, overwrite=TRUE)

11_up
11_dn
11_dn3x
21_up
21_dn
21_dn3x
11_up_21_up
11_dn_21_dn
11_up_21_dn
11_dn_21_up
11_up_21_no
11_dn_21_no
11_no_21_up
11_no_21_dn
12_up
12_dn
22_up
22_dn
12_up_22_no
12_dn_22_no
12_no_22_up
12_no_22_dn
12_up_22_up
12_dn_22_dn
12_up_22_dn
12_dn_22_up
11_up_21_up_22_no
11_dn_21_dn_22_no
11_up_21_dn_22_no
11_dn_21_up_22_no
11_up_21_no_22_no
11_dn_21_no_22_no
11_no_21_up_22_no
11_no_21_dn_22_no
11_no_12_up_22_up
11_no_12_dn_22_dn
11_no_12_up_22_dn
11_no_12_dn_22_up
11_no_12_up_22_no
11_no_12_dn_22_no
11_no_12_no_22_up
11_no_12_no_22_dn
11_up_21_up_12_up_22_up
11_up_21_up_12_dn_22_dn
11_up_21_up_12_up_22_no
11_up_21_up_12_dn_22_no
11_up_21_up_12_no_22_up
11_up_21_up_12_no_22_dn
11_up_21_up_12_up_22_dn
11_up_21_up_12_dn_22_up
11_dn_21_dn_12_up_22_up
11_dn_21_dn_12_dn_22_dn
11_dn_21_dn_12_up_22_no
11_dn_21_dn_12_dn_22_no
11_dn_21_dn_12_no_22_up
11_dn_21_dn_12_no_22_dn
11_dn_21_dn_12_up_22_dn
11_dn_21_dn_12_dn_22_up
11_up_21_dn_12_up_22_up
11_up_21_dn_12_dn_22_dn
11_up_21_dn_12_up_22_no


## biotype

In [394]:
biotypes <- unique(df_all$biotype)
biotypes

In [395]:

for (biotype in biotypes) {
  verb('%s\n', biotype)  
  filename_xlsx <- sprintf('%s/170224%s_161021%s_%s_%s.xlsx', dir_xlsx,
                         rundate_appendix, rundate_appendix, strdir, biotype)

  require(openxlsx)
  wb <- createWorkbook()

  #hs <- createStyle(fontColour = "#ffffff", fgFill = "#4F80BD", halign = "center", valign = "center", textDecoration = "Bold", border = "TopBottomLeftRight", textRotation = 45)
  #options("openxlsx.borderColour" = "#4F80BD")
  #options("openxlsx.borderStyle" = "thin")
  #modifyBaseFont(wb, fontSize = 10, fontName = "Arial Narrow")

  addWorksheet(wb, sheetName='all', gridLines=TRUE)
  df1 <- df_all
  idx <- which(df1$biotype == biotype)
  #verb('\t%all: %d\n', length(idx))  
  if (length(idx)==0) next    
  writeDataTable(wb, sheet=1, x=df_all[idx,!grepl('^P',colnames(df_all))],
        colNames=TRUE, rowNames=TRUE)
  setColWidths(wb, sheet=1, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )    
    
  sheet_names <- gsub('sym_','',names(list_df))
  sheet_names <- gsub('mrna_','11_', sheet_names)
  sheet_names <- gsub('ribo_','21_', sheet_names)
  sheet_names <- gsub('mrna2_','12_', sheet_names)
  sheet_names <- gsub('ribo2_','22_', sheet_names)
  sheet_num <- 1  
  for (i in 1:length(list_df)) {
     df1 <- list_df[[i]]
     idx <- which(df1$biotype == biotype)
     #verb('\t%s: %d\n', sheet_names[i], length(idx))  
     if (length(idx)==0) next
    
     sheet_num <- sheet_num+1 
     addWorksheet(wb, sheetName=sheet_names[i], gridLines=TRUE)
     writeDataTable(wb, sheet=sheet_num, x=df1[idx,!grepl('^P',colnames(df1)),drop=F],
        colNames=TRUE, rowNames=TRUE)
     setColWidths(wb, sheet=sheet_num, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )
  }
  # save xlsx
  saveWorkbook(wb, filename_xlsx, overwrite=TRUE)
}



protein_coding
lncRNA
TEC
transcribed_unprocessed_pseudogene
transcribed_processed_pseudogene
rRNA
unprocessed_pseudogene
misc_RNA
translated_unprocessed_pseudogene
polymorphic_pseudogene
processed_pseudogene
snoRNA
pseudogene
transcribed_unitary_pseudogene
scaRNA
snRNA
miRNA
IG_C_gene
Mt_rRNA
Mt_tRNA
rDNA
ribozyme
TR_C_gene
TR_V_gene


# write xlsx for validation

## filter_rows_with_log2fc_fdr

In [396]:
filter_rows_with_log2fc_fdr <- function(df, col) {
    
    items <- strsplit(col, "\\.")[[1]]
    items[1] <- gsub("tgfb48", "tgfb", items[1])
    items[3] <- gsub("DE", '', items[3])
    col_log2fc <- sprintf("log2FC%sVS%s.%s", items[1], items[2], items[3])
    col_fdr <- sprintf("FDR%sVS%s.%s", items[1], items[2], items[3])
    
    f_out <- rep(FALSE, nrow(df))
    # not sig
    f_notsig <- df[,col] == "notSig"
    idx <- which(f_notsig)
    
    f <- (abs(df[idx, col_log2fc]) < 0.1) & (df[idx, col_fdr] > 0.25)
    f_out[idx[f]] <- TRUE
    
    # up/dn
    idx <- which(!f_notsig)
    f <- (abs(df[idx, col_log2fc]) > log2(1.5)) & (df[idx, col_fdr] < 0.05)
    f_out[idx[f]] <- TRUE    
    
    f_out
}



## filter_rows_for_validation

In [397]:
filter_rows_for_validation <- function(sheet_name, df, th_log2cpm=2) {
    
    # large cpm for unt48 vs. tgfb48
    f1 <- (df$unt.transcription >= th_log2cpm) | (df$tgfb.transcription >= th_log2cpm)
    # large cpm for tgfb48 vs. CX
    f2 <- (df$tgfb.transcription >= th_log2cpm) | (df$tgfbCX5461.transcription >= th_log2cpm)
    f <- f1 & f2
    
    f <- f & filter_rows_with_log2fc_fdr(df, "unt.tgfb.DEtranscription")
    f <- f & filter_rows_with_log2fc_fdr(df, "tgfb.tgfbCX5461.DEtranscription")
    
    # large cpm for unt48 vs. tgfb48
    f1 <- (df$unt.translation >= th_log2cpm) | (df$tgfb.translation >= th_log2cpm)
    # large cpm for tgfb48 vs. CX
    f2 <- (df$tgfb.translation >= th_log2cpm) | (df$tgfbCX5461.translation >= th_log2cpm)
    f <- f & (f1 & f2)
    
    f <- f & filter_rows_with_log2fc_fdr(df, "unt.tgfb.DEtranslation")
    f <- f & filter_rows_with_log2fc_fdr(df, "tgfb.tgfbCX5461.DEtranslation")
    
    df[f, !grepl('^P',colnames(df)), drop=F]
}

In [398]:
list_df <- list()
cols <- colnames(df_all)
cols <- cols[!(cols %in% c("HomoloGene.ID","mouse.sym","mouse.eid","human.sym","human.eid"))]
cols <- cols[!grepl('^P',cols)]

sheet_names <- names(list_genes)
for (sname in sheet_names) {
    #verb('%s\n', sname)
    list_df[[sname]] <- df_all[list_genes[[sname]], cols]
}
titles <- sheet_names
subtitles <- rep(NULL,length(sheet_names))
nv_column_width <- rep(11,ncol(df_all))
names(nv_column_width) <- colnames(df_all)
#write_xlsx(list_df, 'xlsx/170224_161021.xlsx', titles, subtitles, nv_column_width)

filename_xlsx <- sprintf('%s/170224%s_161021%s_%s_for_validation.xlsx', dir_xlsx,
                         rundate_appendix, rundate_appendix, strdir)
verb("%s\n", filename_xlsx)

require(openxlsx)
wb <- createWorkbook()

#hs <- createStyle(fontColour = "#ffffff", fgFill = "#4F80BD", halign = "center", valign = "center", textDecoration = "Bold", border = "TopBottomLeftRight", textRotation = 45)
#options("openxlsx.borderColour" = "#4F80BD")
#options("openxlsx.borderStyle" = "thin")
#modifyBaseFont(wb, fontSize = 10, fontName = "Arial Narrow")

addWorksheet(wb, sheetName='all', gridLines=TRUE)
writeDataTable(wb, sheet=1, x=filter_rows_for_validation("all", df_all[, cols]),
        colNames=TRUE, rowNames=TRUE)
setColWidths(wb, sheet=1, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )

sheet_names <- gsub('sym_','',names(list_df))
sheet_names <- gsub('mrna_','11_', sheet_names)
sheet_names <- gsub('ribo_','21_', sheet_names)
sheet_names <- gsub('mrna2_','12_', sheet_names)
sheet_names <- gsub('ribo2_','22_', sheet_names)
for (i in 1:length(list_df)) {
     verb('%s\n', sheet_names[i]) 
     addWorksheet(wb, sheetName=sheet_names[i], gridLines=TRUE)
     writeDataTable(wb, sheet=1+i, x=filter_rows_for_validation(sheet_names[i], list_df[[i]]),
        colNames=TRUE, rowNames=TRUE)
     setColWidths(wb, sheet=1+i, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )
}
# save xlsx
saveWorkbook(wb, filename_xlsx, overwrite=TRUE)

xlsx/170224.rdna_rn18s_161021.rdna_rn18s_limma-voom.mrna/170224.rdna_rn18s_161021.rdna_rn18s_limma-voom.mrna_for_validation.xlsx
11_up
11_dn
11_dn3x
21_up
21_dn
21_dn3x
11_up_21_up
11_dn_21_dn
11_up_21_dn
11_dn_21_up
11_up_21_no
11_dn_21_no
11_no_21_up
11_no_21_dn
12_up
12_dn
22_up
22_dn
12_up_22_no
12_dn_22_no
12_no_22_up
12_no_22_dn
12_up_22_up
12_dn_22_dn
12_up_22_dn
12_dn_22_up
11_up_21_up_22_no
11_dn_21_dn_22_no
11_up_21_dn_22_no
11_dn_21_up_22_no
11_up_21_no_22_no
11_dn_21_no_22_no
11_no_21_up_22_no
11_no_21_dn_22_no
11_no_12_up_22_up
11_no_12_dn_22_dn
11_no_12_up_22_dn
11_no_12_dn_22_up
11_no_12_up_22_no
11_no_12_dn_22_no
11_no_12_no_22_up
11_no_12_no_22_dn
11_up_21_up_12_up_22_up
11_up_21_up_12_dn_22_dn
11_up_21_up_12_up_22_no
11_up_21_up_12_dn_22_no
11_up_21_up_12_no_22_up
11_up_21_up_12_no_22_dn
11_up_21_up_12_up_22_dn
11_up_21_up_12_dn_22_up
11_dn_21_dn_12_up_22_up
11_dn_21_dn_12_dn_22_dn
11_dn_21_dn_12_up_22_no
11_dn_21_dn_12_dn_22_no
11_dn_21_dn_12_no_22_up
11_dn_21_dn_12_

## 11_up_21_no

In [399]:
# 11_up_21_no: up for unt48 vs. tgfb48 only in rna-seq
name1 <- "11_up_21_no"
idx <- which(sheet_names==name1)
# DE3: 1340
# DE4: 1272
dim(list_df[[idx]])

# DE3: Ap2a1, Atp11a, Bcl9l, Ctxn1, Neat1, Ptpn13, Stx6
# DE4: Bcl9l, Neat1
filter_rows_for_validation(name1, list_df[[idx]])

Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt.transcription,tgfb.transcription,tgfbCX5461.transcription,log2FCuntVStgfb.transcription,...,tgfb.translation,tgfbCX5461.translation,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,unt.tgfb.DEtranslation,tgfb.tgfbCX5461.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,...,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Timp2,11,118191895,118246566,54672,-,ensembl_havana,3.702578,4.61324,4.684573,0.9106621,...,3.152708,3.166464,0.03675525,0.8477296,0.01375588,0.9588233,notSig,notSig,notSig,protein_coding


## 11_no_21_up

In [400]:
# 11_no_21_up
# 11_no: no change for unt48 vs. tgfb48 only in RNA-seq
# 21_up: up for unt48 vs. tgfb48 only in RPF-seq
name1 <- "11_no_21_up"
idx <- which(sheet_names==name1)
# DE3: 180
# DE4: 161
dim(list_df[[idx]])

# DE3: Abcf3, Cd14, Elovl1, Gm25360, Hsph1, Nrg1, Pot1b, Pxylp1, Rnf169, Uba6, Usp5
# DE4: Abcf3, Cd14, Elovl1, Hexim1, Hsph1, Nrg1, Rnf169, Usp5
df_out <- filter_rows_for_validation(name1, list_df[[idx]])
t(df_out)

Unnamed: 0,mt-Nd1
seqnames,MT
start,2751
end,3707
width,957
strand,+
source,RefSeq
unt.transcription,6.095172
tgfb.transcription,6.006088
tgfbCX5461.transcription,5.933917
log2FCuntVStgfb.transcription,-0.08908384


## 11_up_21_up_12_no_22_dn

In [401]:
# 11_up_21_up_12_no_22_dn: up for unt48 vs. tgfb48 in both platform, down for tgfb48 vs. CX only in ribo-seq (e.g. Zeb1)

name1 <- "11_up_21_up_12_no_22_dn"
idx <- which(sheet_names==name1)

# DE3: 182
# DE4: 182
dim(list_df[[idx]])

filter_rows_for_validation(name1, list_df[[idx]])


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt.transcription,tgfb.transcription,tgfbCX5461.transcription,log2FCuntVStgfb.transcription,...,tgfb.translation,tgfbCX5461.translation,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,unt.tgfb.DEtranslation,tgfb.tgfbCX5461.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,...,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Akr1b3,6,34279369,34294413,15045,-,ensembl_havana,1.5021399,3.40496,3.491553,1.9028201,...,2.728316,1.3137932,2.4870701,0.0002249105,-1.4145226,0.01821304,up,down,upDown,protein_coding
Col4a1,8,11248423,11362826,114404,-,ensembl_havana,3.2556386,5.325969,5.390736,2.0703301,...,4.948415,2.9929565,2.6071701,0.0,-1.9554584,1e-11,up,down,upDown,protein_coding
Colgalt1,8,72063642,72077555,13914,+,ensembl_havana,2.9508621,4.418117,4.46739,1.4672553,...,4.062421,3.4009698,1.9474767,0.0,-0.6614512,5.247929e-05,up,down,upDown,protein_coding
D630003M21Rik,2,158024453,158071142,46690,-,ensembl_havana,0.2401005,2.755146,2.802101,2.5150454,...,2.30761,0.8531208,3.0001153,1.1e-10,-1.4544888,7.40993e-06,up,down,upDown,protein_coding
Efemp2,19,5523982,5532545,8564,+,ensembl_havana,2.3539001,3.536365,3.609116,1.1824646,...,2.24261,1.6014558,1.297012,5.9623e-07,-0.6411542,0.01374074,up,down,upDown,protein_coding
Enc1,13,97377613,97389542,11930,+,ensembl_havana,1.1909819,2.437358,2.388628,1.2463757,...,2.283187,1.0844962,0.9283433,1.096159e-05,-1.1986908,4.375509e-05,up,down,upDown,protein_coding
Fndc3b,3,27470311,27765456,295146,-,ensembl_havana,1.2177858,2.960893,3.00854,1.7431069,...,2.2001,1.1877016,1.3213026,4.0338e-07,-1.0123985,0.0003866486,up,down,upDown,protein_coding
Hk2,6,82702006,82751435,49430,-,ensembl_havana,0.7010763,2.085114,2.152475,1.3840378,...,2.910086,1.9109113,1.3979067,2.295e-08,-0.9991744,8.654022e-05,up,down,upDown,protein_coding
Jup,11,100259784,100288589,28806,-,ensembl_havana,3.3563116,4.332686,4.402641,0.9763748,...,4.708209,3.9693213,1.3962396,8.4e-10,-0.7388882,9.735087e-05,up,down,upDown,protein_coding
Lmcd1,6,112250719,112307386,56668,+,ensembl_havana,-2.2793554,2.262028,2.301087,4.5413839,...,3.902408,3.1311039,3.1744348,0.0,-0.7713039,1.170752e-05,up,down,upDown,protein_coding


## 11_dn_21_dn_12_no_22_up

In [402]:
# down for unt48 vs. tgfb48 in both platform, up for tgfb48 vs. CX only in Ribo-seq (e.g. Eif3k)

name1 <- "11_dn_21_dn_12_no_22_up"
idx <- which(sheet_names==name1)

# DE3: 83
# DE4: 83
dim(list_df[[idx]])

filter_rows_for_validation(name1, list_df[[idx]])


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt.transcription,tgfb.transcription,tgfbCX5461.transcription,log2FCuntVStgfb.transcription,...,tgfb.translation,tgfbCX5461.translation,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,unt.tgfb.DEtranslation,tgfb.tgfbCX5461.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,...,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Gstp1,19,4085407,4087985,2579,-,ensembl_havana,4.584731,3.298379,3.286578,-1.286352,...,4.017704,4.73958,-1.269607,0,0.7218763,5.212e-07,down,up,downUp,protein_coding


## 11_no_21_up_12_no_22_dn

In [403]:
# 11_no_21_up_12_no_22_dn: up for unt48 vs. tgfb48 only in ribo-seq, down for tgfb48 vs. CX only in ribo-seq (e.g. Fanca)

name1 <- "11_no_21_up_12_no_22_dn"
idx <- which(sheet_names==name1)

# DE3: 57
# DE4: 53
dim(list_df[[idx]])

filter_rows_for_validation(name1, list_df[[idx]])


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt.transcription,tgfb.transcription,tgfbCX5461.transcription,log2FCuntVStgfb.transcription,...,tgfb.translation,tgfbCX5461.translation,log2FCuntVStgfb.translation,FDRuntVStgfb.translation,log2FCtgfbVStgfbCX5461.translation,FDRtgfbVStgfbCX5461.translation,unt.tgfb.DEtranslation,tgfb.tgfbCX5461.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,...,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
mt-Nd1,MT,2751,3707,957,+,RefSeq,6.095172,6.006088,5.933917,-0.08908384,...,3.083192,2.049343,1.165125,6.528e-08,-1.03385,1.99832e-05,up,down,upDown,protein_coding


## 11_no_21_dn_12_no_22_up