# load functions

In [4]:
suppressMessages(suppressWarnings(source("./jupyter_common.R")))


# common parameters

In [5]:
type <- "limma-voom.mrna"
level <- "htseq_gene"

rundate_appendix <- ".rdna_rn18s"
pattern_remove_gene <- NULL

th_adj_pvalue <- 0.05
th_log2fc <- log2(1.2)

list_genes <- list()
strdir <- type

dir_limma_output <- "out/limma-voom.mrna"
dir_rnaseq <- sprintf('./%s/170224.rdna_rn18s/NMuMG', dir_limma_output) 
dir_riboseq <- sprintf("./%s/161021.rdna_rn18s/NMuMG", dir_limma_output)

f_write_table <- FALSE
getwd()

# read gtf

In [6]:
library('rtracklayer')
my_file <- sprintf("./data/mouse/Mus_musculus.GRCm38.97%s.gtf.gz", rundate_appendix)
show(my_file)
granges_gtf <- import(my_file)

head(granges_gtf)
length(granges_gtf)

"package 'rtracklayer' was built under R version 4.1.1"
Loading required package: GenomicRanges

"package 'GenomicRanges' was built under R version 4.1.2"


[1] "./data/mouse/Mus_musculus.GRCm38.97.rdna_rn18s.gtf.gz"


GRanges object with 6 ranges and 22 metadata columns:
      seqnames          ranges strand |   source       type     score     phase
         <Rle>       <IRanges>  <Rle> | <factor>   <factor> <numeric> <integer>
  [1]        1 3073253-3074322      + |  havana  gene              NA      <NA>
  [2]        1 3073253-3074322      + |  havana  transcript        NA      <NA>
  [3]        1 3073253-3074322      + |  havana  exon              NA      <NA>
  [4]        1 3102016-3102125      + |  ensembl gene              NA      <NA>
  [5]        1 3102016-3102125      + |  ensembl transcript        NA      <NA>
  [6]        1 3102016-3102125      + |  ensembl exon              NA      <NA>
                 gene_id gene_version     gene_name gene_source gene_biotype
             <character>  <character>   <character> <character>  <character>
  [1] ENSMUSG00000102693            1 4933401J01Rik      havana          TEC
  [2] ENSMUSG00000102693            1 4933401J01Rik      havana          TE

In [7]:
df_gtf0 <- as.data.frame(granges_gtf)
if (grepl("isoform", level)) {
    df_gtf <- df_gtf0[df_gtf0$type=='transcript',]
} else {
    df_gtf <- df_gtf0[df_gtf0$type=='gene',]
}
          
#head(df_gtf)
t(df_gtf[c(1,2,3),])
dim(df_gtf)

length(unique(df_gtf$gene_id))

gene_name <- unique(df_gtf$gene_name)
any(is.na(gene_name))
any(gene_name == 'NA')
any(nchar(gene_name)==0)
length(gene_name)


Unnamed: 0,1,4,7
seqnames,1,1,1
start,3073253,3102016,3205901
end,3074322,3102125,3671498
width,1070,110,465598
strand,+,+,-
source,havana,ensembl,ensembl_havana
type,gene,gene,gene
score,,,
phase,,,
gene_id,ENSMUSG00000102693,ENSMUSG00000064842,ENSMUSG00000051951


## gene_id

In [62]:
length(unique(df_gtf$gene_id))

In [63]:
f <- grepl('^ucsc', df_gtf$gene_id)
gene_id_ucsc <- unique(df_gtf$gene_id[f])
head(gene_id_ucsc)
length(gene_id_ucsc)

In [64]:
length(setdiff(df_gtf$gene_id, gene_id_ucsc))
56754-1181

## gene_name

In [65]:
length(unique(df_gtf$gene_name))

In [66]:
f <- grepl('^ucsc', df_gtf$gene_name)
gene_name_ucsc <- unique(df_gtf$gene_name[f])
head(gene_name_ucsc)
length(gene_name_ucsc)

In [67]:
length(setdiff(df_gtf$gene_name, gene_name_ucsc))
56634-1181

## gene_biotype

In [68]:
f <- df_gtf$gene_biotype == 'protein_coding'
length(unique(df_gtf[f,'gene_name']))

In [69]:
f <- df_gtf$gene_biotype == 'rRNA'
f <- f & !grepl("n-R5s", df_gtf$gene_name)
sort(unique(df_gtf[f,'gene_name']))

## transcript_id

In [70]:
length(unique(df_gtf$transcript_id))

## transcript_name

In [71]:
length(unique(df_gtf$transcript_name))

## transcript_biotype

In [72]:
f <- df_gtf$transcript_biotype == 'protein_coding'
length(unique(df_gtf[f,'transcript_name']))

In [73]:
idx <- which(df_gtf0$transcript_biotype == "rRNA" & df_gtf0$exon_number > 1)
if (length(idx) > 0) {
    head(df_gtf0[idx, ])
    dim(df_gtf0[idx, ])
} else {
    f <- df_gtf0$transcript_biotype == "rRNA"
    unique(df_gtf[f, 'exon_number'])
}

# df_count_mrna

In [3]:
# Randall totalRNA 

fname_in <- sprintf("%s/170224.NMuMG.%s.counts.raw.txt.gz", dir_rnaseq, strdir)
verb('%s\n', fname_in)
df_count_mrna <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

# add seqnames, start, end, witdth, strand, source
sym <- rownames(df_count_mrna)
if (grepl("isoform", level)) {
    idx <- match(rownames(df_count_mrna), df_gtf$transcript_name)
    df_count_mrna <- cbind(df_gtf[idx, 1:6], df_count_mrna)
    df_count_mrna$biotype <- df_gtf[idx, "transcript_biotype"]
    
} else {    
    idx <- match(rownames(df_count_mrna), df_gtf$gene_name)
    df_count_mrna <- cbind(df_gtf[idx, 1:6], df_count_mrna)    
    df_count_mrna$biotype <- df_gtf[idx, "gene_biotype"]
}

rownames(df_count_mrna) <- sym
head(df_count_mrna)

./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/170224.NMuMG.limma-voom.mrna.counts.raw.txt.gz


"cannot open file './out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/170224.NMuMG.limma-voom.mrna.counts.raw.txt.gz': No such file or directory"


ERROR: Error in file(file, "rt"): cannot open the connection


# df_count_ribo

In [75]:
fname_in <- sprintf("%s/161021.NMuMG.%s.counts.raw.txt.gz", dir_riboseq, strdir)
verb('%s\n', fname_in)
df_count_ribo <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

sym <- rownames(df_count_ribo)
if (grepl("isoform", level)) {
    idx <- match(rownames(df_count_ribo), df_gtf$transcript_name)
    df_count_ribo <- cbind(df_gtf[idx, 1:6], df_count_ribo)
    df_count_ribo$biotype <- df_gtf[idx, "transcript_biotype"]
    
} else {    
    idx <- match(rownames(df_count_ribo), df_gtf$gene_name)
    df_count_ribo <- cbind(df_gtf[idx, 1:6], df_count_ribo)    
    df_count_ribo$biotype <- df_gtf[idx, "gene_biotype"]
}

rownames(df_count_ribo) <- sym
head(df_count_ribo)


./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/161021.NMuMG.limma-voom.mrna.counts.raw.txt.gz


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,RD0175,RD0176,RD0177,RD0178,RD0179,RD0180,RD0184,RD0185,RD0186,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>
__alignment_not_unique,,,,,,,0,0,0,0,0,0,0,0,0,
__ambiguous,,,,,,,31331,36394,33298,17428,23539,21878,13380,15968,10757,
__no_feature,,,,,,,433684,593196,638550,430982,592788,614080,258134,277100,186826,
__not_aligned,,,,,,,35064519,43483167,46348710,28888575,39393642,40654786,27907812,32865359,21965755,
__too_low_aQual,,,,,,,0,0,0,0,0,0,0,0,0,
rDNA_promoter,BK000964.3,45306.0,45446.0,141.0,+,stjude,0,0,0,0,0,0,0,0,0,rDNA


# df_transcription

In [76]:
# Randall totalRNA 
# log2cpm
fname_in <- sprintf("%s/170224.NMuMG.%s.log2cpm.txt.gz", dir_rnaseq, strdir)
verb('%s\n', fname_in)
df_log2cpm <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_log2cpm) <- c('unt48.transcription','tgfb48.transcription','tgfbCX5461100nm.transcription')

if (!is.null(pattern_remove_gene)) {
  dim(df_log2cpm)
  df_log2cpm <- df_log2cpm[!grepl(pattern_remove_gene, rownames(df_log2cpm)),]
}
head(df_log2cpm)
dim(df_log2cpm)

fname_in <- sprintf("%s/170224.NMuMG.%s.unt48--vs--tgfb48.all.txt.gz", dir_rnaseq, strdir)
verb('%s\n', fname_in)
df_mrna <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_mrna) <- c('unt48.transcription','tgfb48.transcription',
                       'log2FCunt48VStgfb48.transcription','FDRunt48VStgfb48.transcription',
                       'Punt48VStgfb48.transcription')

if (!is.null(pattern_remove_gene)) {
  dim(df_mrna)
  df_mrna <- df_mrna[!grepl(pattern_remove_gene, rownames(df_mrna)),]
}
head(df_mrna)
dim(df_mrna)

fname_in <- sprintf("%s/170224.NMuMG.%s.tgfb48--vs--tgfbCX5461100nm.all.txt.gz", dir_rnaseq, strdir)
verb('%s\n', fname_in)
df_mrna_cx <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_mrna_cx) <- c('tgfb48.transcription','tgfbCX5461100nm.transcription',
                          'log2FCtgfbVStgfbCX5461100nm.transcription','FDRtgfbVStgfbCX5461100nm.transcription',
                          'PtgfbVStgfbCX5461100nm.transcription')

if (!is.null(pattern_remove_gene)) {
  dim(df_mrna_cx)
  df_mrna_cx <- df_mrna_cx[!grepl(pattern_remove_gene, rownames(df_mrna_cx)),]
}
head(df_mrna_cx)
dim(df_mrna_cx)

# https://adairama.wordpress.com/2017/11/22/how-to-merge-multiple-datasets-in-r-based-on-row-names/
mylist <- list(df_log2cpm[,c('unt48.transcription','tgfb48.transcription','tgfbCX5461100nm.transcription')],
                df_mrna[,c('log2FCunt48VStgfb48.transcription',
                           'FDRunt48VStgfb48.transcription',
                           'Punt48VStgfb48.transcription')],
                df_mrna_cx[,c('log2FCtgfbVStgfbCX5461100nm.transcription',
                           'FDRtgfbVStgfbCX5461100nm.transcription',
                           'PtgfbVStgfbCX5461100nm.transcription')])
for(i in 1:length(mylist)){
  #colnames(mylist[[i]]) <- paste0( names(mylist)[i], "_", colnames(mylist[[i]]) )
  mylist[[i]]$ROWNAMES  <- rownames(mylist[[i]])
}
df_transcription <- plyr::join_all(mylist, by="ROWNAMES", type="full")
rownames(df_transcription) <- df_transcription$ROWNAMES; df_transcription$ROWNAMES <- NULL


df_transcription$unt48.tgfb48.DEtranscription <- 'notSig'
df_transcription$tgfb48.tgfbCX5461100nm.DEtranscription <- 'notSig'
df_transcription$reversible.transcription <- 'notSig'

f_up <- df_transcription$log2FCunt48VStgfb48.transcription > th_log2fc & df_transcription$FDRunt48VStgfb48.transcription < th_adj_pvalue
f_dn <- df_transcription$log2FCtgfbVStgfbCX5461100nm.transcription < -th_log2fc & df_transcription$FDRtgfbVStgfbCX5461100nm.transcription < th_adj_pvalue
df_transcription$unt48.tgfb48.DEtranscription[f_up] <- 'up'
df_transcription$tgfb48.tgfbCX5461100nm.DEtranscription[f_dn] <- 'down'
df_transcription$reversible.transcription[f_up & f_dn] <- 'upDown'

f_dn <- df_transcription$log2FCunt48VStgfb48.transcription < -th_log2fc & df_transcription$FDRunt48VStgfb48.transcription < th_adj_pvalue
f_up <- df_transcription$log2FCtgfbVStgfbCX5461100nm.transcription > th_log2fc & df_transcription$FDRtgfbVStgfbCX5461100nm.transcription < th_adj_pvalue
df_transcription$unt48.tgfb48.DEtranscription[f_dn] <- 'down'
df_transcription$tgfb48.tgfbCX5461100nm.DEtranscription[f_up] <- 'up'
df_transcription$reversible.transcription[f_dn & f_up] <- 'downUp'

#tail(df_transcription)
df_transcription[c('Abcc2'),]
dim(df_transcription)

./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/170224.NMuMG.limma-voom.mrna.log2cpm.txt.gz


Unnamed: 0_level_0,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
0610009B22Rik,1.378095,0.558629,0.8116783
0610009L18Rik,-2.252284,-3.1533015,-3.7140633
0610010F05Rik,3.071791,3.7414762,3.6247997
0610012G03Rik,2.057036,1.8223029,1.6342654
0610030E20Rik,1.809939,1.771047,1.8676255
0610038B21Rik,-1.771257,-0.9942376,-1.9819725


./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/170224.NMuMG.limma-voom.mrna.unt48--vs--tgfb48.all.txt.gz


Unnamed: 0_level_0,unt48.transcription,tgfb48.transcription,log2FCunt48VStgfb48.transcription,FDRunt48VStgfb48.transcription,Punt48VStgfb48.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1700017B05Rik,5.13742,6.2813516,1.1439316,0,0
4930402H24Rik,4.24696,5.314564,1.0676044,0,0
4931406P16Rik,3.317556,4.8805024,1.5629466,0,0
Abcc2,3.268253,0.3278268,-2.9404266,0,0
Abcc4,5.253194,4.1491558,-1.1040378,0,0
Abhd2,7.013178,6.071242,-0.9419359,0,0


./out/limma-voom.mrna/170224.rdna_rn18s/NMuMG/170224.NMuMG.limma-voom.mrna.tgfb48--vs--tgfbCX5461100nm.all.txt.gz


Unnamed: 0_level_0,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCtgfbVStgfbCX5461100nm.transcription,FDRtgfbVStgfbCX5461100nm.transcription,PtgfbVStgfbCX5461100nm.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Acox1,5.293247,6.37457,1.081322,0,0
Arhgap23,4.985406,6.017128,1.031722,0,0
Arid3a,3.813112,5.110447,1.297335,0,0
Atf3,6.785541,7.736118,0.950577,0,0
Bcl6,3.075916,4.695217,1.619301,0,0
Cdkn1a,4.881347,6.464424,1.583077,0,0


Unnamed: 0_level_0,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,FDRunt48VStgfb48.transcription,Punt48VStgfb48.transcription,log2FCtgfbVStgfbCX5461100nm.transcription,FDRtgfbVStgfbCX5461100nm.transcription,PtgfbVStgfbCX5461100nm.transcription,unt48.tgfb48.DEtranscription,tgfb48.tgfbCX5461100nm.DEtranscription,reversible.transcription
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Abcc2,3.268253,0.3278268,1.535872,-2.940427,0,0,1.208045,0.0001150507,5.6576e-06,down,up,downUp


# df_translation

In [77]:

fname_in <- sprintf("%s/161021.NMuMG.%s.log2cpm.txt.gz", dir_riboseq, strdir)
verb('%s\n', fname_in)
df_log2cpm <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_log2cpm) <- c('unt48.translation','tgfb48.translation','tgfbCX5461100nm.translation')

if (!is.null(pattern_remove_gene)) {
  dim(df_log2cpm)
  df_log2cpm <- df_log2cpm[!grepl(pattern_remove_gene, rownames(df_log2cpm)),]
}
head(df_log2cpm)
dim(df_log2cpm)

fname_in <- sprintf("%s/161021.NMuMG.%s.unt48--vs--tgfb48.all.txt.gz", dir_riboseq, strdir)
verb('%s\n', fname_in)
df_ribo <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_ribo) <- c('unt48.translation','tgfb48.translation',
                       'log2FCunt48VStgfb48.translation','FDRunt48VStgfb48.translation',
                       'Punt48VStgfb48.translation')

if (!is.null(pattern_remove_gene)) {
  dim(df_ribo)
  df_ribo <- df_ribo[!grepl(pattern_remove_gene, rownames(df_ribo)),]
}
head(df_ribo)
dim(df_ribo)

fname_in <- sprintf("%s/161021.NMuMG.%s.tgfb48--vs--tgfbCX5461100nm.all.txt.gz", dir_riboseq, strdir)
verb('%s\n', fname_in)
df_ribo_cx <- read.table(file=fname_in, 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)
colnames(df_ribo_cx) <- c('tgfb48.translation','tgfbCX5461100nm.translation',
                          'log2FCtgfbVStgfbCX5461100nm.translation','FDRtgfbVStgfbCX5461100nm.translation',
                          'PtgfbVStgfbCX5461100nm.translation')

if (!is.null(pattern_remove_gene)) {
  dim(df_ribo_cx)
  df_ribo_cx <- df_ribo_cx[!grepl(pattern_remove_gene, rownames(df_ribo_cx)),]
}
head(df_ribo_cx)
dim(df_ribo_cx)

# https://adairama.wordpress.com/2017/11/22/how-to-merge-multiple-datasets-in-r-based-on-row-names/
mylist <- list(df_log2cpm[,c('unt48.translation','tgfb48.translation','tgfbCX5461100nm.translation')],
                df_ribo[,c('log2FCunt48VStgfb48.translation',
                           'FDRunt48VStgfb48.translation',
                           'Punt48VStgfb48.translation')],
                df_ribo_cx[,c('log2FCtgfbVStgfbCX5461100nm.translation',
                           'FDRtgfbVStgfbCX5461100nm.translation',
                           'PtgfbVStgfbCX5461100nm.translation')])
for(i in 1:length(mylist)){
  #colnames(mylist[[i]]) <- paste0( names(mylist)[i], "_", colnames(mylist[[i]]) )
  mylist[[i]]$ROWNAMES  <- rownames(mylist[[i]])
}
df_translation <- plyr::join_all(mylist, by="ROWNAMES", type="full")
rownames(df_translation) <- df_translation$ROWNAMES; df_translation$ROWNAMES <- NULL

df_translation$unt48.tgfb48.DEtranslation <- 'notSig'
df_translation$tgfb48.tgfbCX5461100nm.DEtranslation <- 'notSig'
df_translation$reversible.translation <- 'notSig'

f_up <- df_translation$log2FCunt48VStgfb48.translation > th_log2fc & df_translation$FDRunt48VStgfb48.translation < th_adj_pvalue
f_dn <- df_translation$log2FCtgfbVStgfbCX5461100nm.translation < -th_log2fc & df_translation$FDRtgfbVStgfbCX5461100nm.translation < th_adj_pvalue
df_translation$unt48.tgfb48.DEtranslation[f_up] <- 'up'
df_translation$tgfb48.tgfbCX5461100nm.DEtranslation[f_dn] <- 'down'
df_translation$reversible.translation[f_up & f_dn] <- 'upDown'

f_dn <- df_translation$log2FCunt48VStgfb48.translation < -th_log2fc & df_translation$FDRunt48VStgfb48.translation < th_adj_pvalue
f_up <- df_translation$log2FCtgfbVStgfbCX5461100nm.translation > th_log2fc & df_translation$FDRtgfbVStgfbCX5461100nm.translation < th_adj_pvalue
df_translation$unt48.tgfb48.DEtranslation[f_dn] <- 'down'
df_translation$tgfb48.tgfbCX5461100nm.DEtranslation[f_up] <- 'up'
df_translation$reversible.translation[f_dn & f_up] <- 'downUp'

head(df_translation)
dim(df_translation)

./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/161021.NMuMG.limma-voom.mrna.log2cpm.txt.gz


Unnamed: 0_level_0,unt48.translation,tgfb48.translation,tgfbCX5461100nm.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
0610009B22Rik,1.4699579,1.1791783,2.2156391
0610010F05Rik,0.383754,0.6242094,-0.1382724
0610012G03Rik,2.0402057,1.5075815,1.5199931
0610040J01Rik,-0.9933001,-1.8930848,-1.9545279
1110002E22Rik,-0.1409433,-0.5588888,-3.3332046
1110002L01Rik,-0.6420859,-0.8959827,-0.7116839


./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/161021.NMuMG.limma-voom.mrna.unt48--vs--tgfb48.all.txt.gz


Unnamed: 0_level_0,unt48.translation,tgfb48.translation,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,Punt48VStgfb48.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Actb,9.0693462,9.471076,0.40173,0,0
Actn1,4.7529615,6.419199,1.666237,0,0
Adam8,1.3825386,3.730572,2.348033,0,0
Adgrg1,3.5475901,4.875399,1.327808,0,0
Ak1,-0.6852402,4.533343,5.218583,0,0
Aldoa,6.7594436,5.63877,-1.120674,0,0


./out/limma-voom.mrna/161021.rdna_rn18s/NMuMG/161021.NMuMG.limma-voom.mrna.tgfb48--vs--tgfbCX5461100nm.all.txt.gz


Unnamed: 0_level_0,tgfb48.translation,tgfbCX5461100nm.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,PtgfbVStgfbCX5461100nm.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Actb,9.471076,9.155017,-0.3160591,0,0
Atf3,5.118301,6.856082,1.737781,0,0
B230208H11Rik,6.059496,4.778324,-1.2811722,0,0
B2m,6.272014,6.994009,0.721995,0,0
Btg2,5.509405,6.353397,0.8439911,0,0
Ccl2,5.869266,6.877332,1.0080662,0,0


Unnamed: 0_level_0,unt48.translation,tgfb48.translation,tgfbCX5461100nm.translation,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,Punt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,PtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
0610009B22Rik,1.4699579,1.1791783,2.2156391,-0.2907796,0.5964922,0.38680006,1.03646083,0.02002003,0.002253052,notSig,up,notSig
0610010F05Rik,0.383754,0.6242094,-0.1382724,0.2404554,0.76246,0.59687073,-0.76248178,0.36051466,0.15580591,notSig,notSig,notSig
0610012G03Rik,2.0402057,1.5075815,1.5199931,-0.5326242,0.2004935,0.07022991,0.01241152,0.98882154,0.970293622,notSig,notSig,notSig
0610040J01Rik,-0.9933001,-1.8930848,-1.9545279,-0.8997847,0.5090293,0.29465928,-0.06144307,0.97795512,0.950913146,notSig,notSig,notSig
1110002E22Rik,-0.1409433,-0.5588888,-3.3332046,-0.4179455,0.6805423,0.48861134,-2.77431585,0.04079412,0.00615284,notSig,down,notSig
1110002L01Rik,-0.6420859,-0.8959827,-0.7116839,-0.2538969,0.8454341,0.71661232,0.18429884,0.90322779,0.803521505,notSig,notSig,notSig


# df_all

In [78]:
t(df_gtf[1:3,])

Unnamed: 0,1,4,7
seqnames,1,1,1
start,3073253,3102016,3205901
end,3074322,3102125,3671498
width,1070,110,465598
strand,+,+,-
source,havana,ensembl,ensembl_havana
type,gene,gene,gene
score,,,
phase,,,
gene_id,ENSMUSG00000102693,ENSMUSG00000064842,ENSMUSG00000051951


In [79]:
df_all <- merge(df_transcription, df_translation, by = 0, all = T)
sym <- df_all$Row.names
rownames(df_all) <- sym
df_all$Row.names <- NULL

if (grepl("isoform", level)) {
    idx <- match(rownames(df_all), df_gtf$transcript_name)
    df_all <- cbind(df_gtf[idx, 1:6], df_all)
    df_all$biotype <- df_gtf[idx, "transcript_biotype"]
    
} else {    
    idx <- match(rownames(df_all), df_gtf$gene_name)
    df_all <- cbind(df_gtf[idx, 1:6], df_all)    
    df_all$biotype <- df_gtf[idx, "gene_biotype"]
}

rownames(df_all) <- sym

head(df_all)
dim(df_all)


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,⋯,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,Punt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,PtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
0610009B22Rik,11,51685386,51688874,3489,-,ensembl_havana,1.378095,0.558629,0.8116783,-0.81946578,⋯,-0.2907796,0.5964922,0.38680006,1.03646083,0.02002003,0.002253052,notSig,up,notSig,protein_coding
0610009L18Rik,11,120348678,120351190,2513,+,ensembl_havana,-2.252284,-3.1533015,-3.7140633,-0.90101716,⋯,,,,,,,,,,lncRNA
0610010F05Rik,11,23564961,23633639,68679,-,ensembl_havana,3.071791,3.7414762,3.6247997,0.66968552,⋯,0.2404554,0.76246,0.59687073,-0.76248178,0.36051466,0.15580591,notSig,notSig,notSig,protein_coding
0610012G03Rik,16,31947050,31948494,1445,-,havana,2.057036,1.8223029,1.6342654,-0.23473296,⋯,-0.5326242,0.2004935,0.07022991,0.01241152,0.98882154,0.970293622,notSig,notSig,notSig,protein_coding
0610030E20Rik,6,72347317,72353148,5832,+,ensembl_havana,1.809939,1.771047,1.8676255,-0.03889221,⋯,,,,,,,,,,protein_coding
0610038B21Rik,8,77517056,77523898,6843,+,havana,-1.771257,-0.9942376,-1.9819725,0.7770189,⋯,,,,,,,,,,lncRNA


## fix biotype

In [80]:
# Snord43 ENSMUSG00000105167 was annotated as miRNA 
idx <- which(rownames(df_all)=="Snord43")

if (length(idx)==1) {
    df_all[idx, "biotype"] <- "snoRNA"
}


## write df_all

In [81]:
fname_out <- sprintf("./data/emt.tables.%s.comprehensive_170224%s_161021%s.txt",
                     strdir, rundate_appendix, rundate_appendix)
write.table(df_all, file = fname_out,
            row.names = TRUE , col.names = NA ,  sep="\t"  , quote = FALSE  )

# unt48 vs. tgfb48

## detected gene symbols

### rnaseq detection

In [82]:

fname_rnaseq <- sprintf("170224.NMuMG.%s.unt48--vs--tgfb48.all.txt.gz", strdir)
df_rnaseq <- read.table(file=sprintf("%s/%s", dir_rnaseq, fname_rnaseq), 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

if (!is.null(pattern_remove_gene)) {
  dim(df_rnaseq)
  df_rnaseq <- df_rnaseq[!grepl(pattern_remove_gene, rownames(df_rnaseq)),]
}

# df_mrna$biotype
idx <- match(rownames(df_rnaseq), rownames(df_all))
any(is.na(idx)) # should be FALSE
df_rnaseq$biotype <- df_all[idx,'biotype']
biotype_rnaseq <- unique(df_rnaseq$biotype)
biotype_rnaseq

head(df_rnaseq)
dim(df_rnaseq)
sym_rnaseq <- rownames(df_rnaseq)
length(sym_rnaseq)

# total genes detected in total RNA-seq
# with GRCm38.97.gtf RSEM: 13013
# with GRCm38.97.gtf HTSEQ: 12914
# with GRCm38.97.rRNA.gtf: 13007


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1700017B05Rik,5.13742,6.2813516,1.1439316,0,0,protein_coding
4930402H24Rik,4.24696,5.314564,1.0676044,0,0,protein_coding
4931406P16Rik,3.317556,4.8805024,1.5629466,0,0,protein_coding
Abcc2,3.268253,0.3278268,-2.9404266,0,0,protein_coding
Abcc4,5.253194,4.1491558,-1.1040378,0,0,protein_coding
Abhd2,7.013178,6.071242,-0.9419359,0,0,protein_coding


### riboseq detection

In [83]:

fname_riboseq <- sprintf("161021.NMuMG.%s.unt48--vs--tgfb48.all.txt.gz", strdir)
df_riboseq <- read.table(file=sprintf("%s/%s", dir_riboseq, fname_riboseq), 
                header=TRUE, sep="\t", row.names=1,
                quote="", comment.char="#", stringsAsFactors=F)

if (!is.null(pattern_remove_gene)) {
  dim(df_riboseq)
  df_riboseq <- df_riboseq[!grepl(pattern_remove_gene, rownames(df_riboseq)),]
}

# df_mrna$biotype
idx <- match(rownames(df_riboseq), rownames(df_all))
any(is.na(idx)) # should be FALSE
df_riboseq$biotype <- df_all[idx,'biotype']
biotype_riboseq <- unique(df_riboseq$biotype)
biotype_riboseq


head(df_riboseq)
dim(df_riboseq)
sym_riboseq <- rownames(df_riboseq)
length(sym_riboseq)
sym_riboseq <- unique(sym_riboseq)
length(sym_riboseq)

# total genes detected in robosome profiling
# with GRCm38.97.gtf RSEM: 11779
# with GRCm38.97.gtf HTSEQ: 11264
# with GRCm38.97.rRNA.gtf: 11811


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Actb,9.0693462,9.471076,0.40173,0,0,protein_coding
Actn1,4.7529615,6.419199,1.666237,0,0,protein_coding
Adam8,1.3825386,3.730572,2.348033,0,0,protein_coding
Adgrg1,3.5475901,4.875399,1.327808,0,0,protein_coding
Ak1,-0.6852402,4.533343,5.218583,0,0,protein_coding
Aldoa,6.7594436,5.63877,-1.120674,0,0,protein_coding


#### protein_coding only

In [84]:
f <- grepl('^protein_coding$', df_riboseq$biotype)
df_ribo_wo_pseudo <- df_riboseq[f,]
head(df_ribo_wo_pseudo)
dim(df_ribo_wo_pseudo)

Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Actb,9.0693462,9.471076,0.40173,0,0,protein_coding
Actn1,4.7529615,6.419199,1.666237,0,0,protein_coding
Adam8,1.3825386,3.730572,2.348033,0,0,protein_coding
Adgrg1,3.5475901,4.875399,1.327808,0,0,protein_coding
Ak1,-0.6852402,4.533343,5.218583,0,0,protein_coding
Aldoa,6.7594436,5.63877,-1.120674,0,0,protein_coding


### sym_detected_both

In [85]:
sym_detected_both <- intersect(sym_rnaseq, sym_riboseq)
length(sym_detected_both)

# total genes detected in shared between both datasets
# with GRCm38.97.gtf RSEM: 10212
# with GRCm38.97.gtf HTSEQ: 10150
# with GRCm38.97.rRNA.gtf: 10235

### sym_detected_rnaseq_only

In [86]:
sym_detected_rnaseq_only <- setdiff(sym_rnaseq, sym_riboseq)
length(sym_detected_rnaseq_only)

### sym_detected_riboseq_only

In [87]:
sym_detected_riboseq_only <- setdiff(sym_riboseq, sym_rnaseq)
length(sym_detected_riboseq_only)


## df_mrna

In [88]:
fname_mrna <- sprintf("170224.NMuMG.%s.unt48--vs--tgfb48.diff-all.txt.gz", strdir)
df_mrna <- read.table(file = sprintf("%s/%s", dir_rnaseq, fname_mrna), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_mrna)
    df_mrna <- df_mrna[!grepl(pattern_remove_gene, rownames(df_mrna)), ]
}
head(df_mrna)
dim(df_mrna)

idx <- which(df_all[, "unt48.tgfb48.DEtranscription"] != "notSig")
df_mrna <- df_mrna[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_mrna), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_mrna$biotype <- df_all[idx, "biotype"]

head(df_mrna)
dim(df_mrna)

if (f_write_table) {
    fname_out <- sprintf("./data/table/170224%s.NMuMG.%s.unt48--vs--tgfb48.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_mrna, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

vec_log2FC <- df_mrna$log2FC
vec_fdr <- df_mrna$FDR


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Itih2,1.640282,-5.128863,-6.769146,2.07e-09,1e-10
Ugt2b34,4.614077,-1.923804,-6.537881,0.0,0.0
Ngef,1.25944,-5.128863,-6.388304,2.34e-08,1.9e-09
Mep1a,2.151093,-4.114984,-6.266077,1.167e-08,9e-10
Serpina1b,4.318216,-1.818996,-6.137213,0.0,0.0
Dio3os,1.667966,-4.212294,-5.88026,8.484e-08,7.8e-09


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0610009B22Rik,1.3780948,0.558629,-0.8194658,0.005547618,0.0016759278,protein_coding
0610010F05Rik,3.0717907,3.74147621,0.6696855,3.829709e-05,6.2934e-06,protein_coding
0610040J01Rik,2.0375611,-0.1021995,-2.1397606,3.797e-08,3.2e-09,protein_coding
1110002L01Rik,0.6359643,-0.08954396,-0.7255082,0.03581027,0.0147339873,lncRNA
1110012L19Rik,2.120301,1.74798983,-0.3723112,0.04394907,0.0186986901,protein_coding
1110046J04Rik,-2.5207668,-0.82482908,1.6959377,0.01196216,0.0040565545,lncRNA


### sym_mrna_up

In [89]:
f_up <- (vec_fdr < th_adj_pvalue) & (vec_log2FC > th_log2fc)
df_mrna_up <- df_mrna[f_up,]
sym_mrna_up <- rownames(df_mrna_up)
dim(df_mrna_up)

list_genes[['sym_mrna_up']] <- sym_mrna_up

### sym_mrna_dn

In [90]:
f_dn <- (vec_fdr < th_adj_pvalue) & (vec_log2FC < -th_log2fc)
df_mrna_dn <- df_mrna[f_dn,]
sym_mrna_dn <- rownames(df_mrna_dn)
dim(df_mrna_dn)

list_genes[['sym_mrna_dn']] <- sym_mrna_dn

### sym_mrna_dn3x

In [91]:
f_dn3x <- (vec_fdr < th_adj_pvalue) & 
         (vec_log2FC > -log2(3.15) & vec_log2FC < -log2(2.75))
df_mrna_dn3x <- df_mrna[f_dn3x,]
sym_mrna_dn3x <- rownames(df_mrna_dn3x)
head(df_mrna_dn3x)
dim(df_mrna_dn3x)

list_genes[['sym_mrna_dn3x']] <- sym_mrna_dn3x

Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Areg,4.73056994,3.1890182,-1.541552,0.0,0.0,protein_coding
Arl4a,1.77397773,0.2083347,-1.565643,2.44473e-06,3.042e-07,protein_coding
Arrb1,2.30361308,0.7528758,-1.550737,1.0986e-07,1.03e-08,protein_coding
Bcam,0.04162516,-1.4695714,-1.511197,0.005507664,0.0016617252,protein_coding
Bicd1,2.50891352,1.0085863,-1.500327,1.7096e-07,1.67e-08,protein_coding
Bmp4,1.29905203,-0.2520544,-1.551106,4.041069e-05,6.6657e-06,protein_coding


### sym_mrna

In [92]:
sym_mrna <- union(sym_mrna_up, sym_mrna_dn)
length(sym_mrna)

df_mrna <- df_mrna[sym_mrna,,drop=F]

### sym_mrna_no

In [93]:
sym_mrna_no <- setdiff(sym_rnaseq, sym_mrna)
length(sym_mrna_no)

# Not DE in total RNA-seq for control vs. tgfb48
# with GRCm38.97.gtf RSEM: 7732
# with GRCm38.97.gtf HTSEQ: 7630
# with GRCm38.97.rRNA.gtf: 7703

### sym_mrna_de_detected_both

In [94]:
sym_mrna_de_detected_both <- intersect(sym_mrna, sym_detected_both)
length(sym_mrna_de_detected_both)


## df_ribo

In [95]:
# dir_riboseq <- '../data/limma-voom.mrna/161021/NMuMG' fname_ribo <-
# 'emt.161021.limma-voom.mrna.limma.gene.unt48--vs--tgfb48.diff-all.txt'
fname_ribo <- sprintf("161021.NMuMG.%s.unt48--vs--tgfb48.diff-all.txt.gz", strdir)
df_ribo <- read.table(file = sprintf("%s/%s", dir_riboseq, fname_ribo), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_ribo)
    df_ribo <- df_ribo[!grepl(pattern_remove_gene, rownames(df_ribo)), ]
}
head(df_ribo)
dim(df_ribo)

idx <- which(df_all[, "unt48.tgfb48.DEtranslation"] != "notSig")
df_ribo <- df_ribo[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_ribo), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_ribo$biotype <- df_all[idx, "biotype"]

head(df_ribo)
dim(df_ribo)


if (f_write_table) {
    fname_out <- sprintf("./data/table/161021%s.NMuMG.%s.unt48--vs--tgfb48.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_ribo, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

vec_log2FC <- df_ribo$log2FC
vec_fdr <- df_ribo$FDR


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Akr1c19,2.972913,-3.1463625,-6.119275,3.17e-08,1.1e-09
Lgals4,5.783977,0.4568861,-5.327091,0.0,0.0
Ugt2b34,3.582636,-1.4148791,-4.997515,3.14e-09,1e-10
Itih2,1.16793,-3.6746833,-4.842613,1.734998e-05,1.0367e-06
Tigit,2.073585,-2.5762724,-4.649857,2.40448e-05,1.5041e-06
Ermp1,4.282262,-0.1966184,-4.47888,2e-11,0.0


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1500011B03Rik,0.9749769,-0.639274,-1.6142509,0.02899104,0.0052723729,protein_coding
1700017B05Rik,2.6078233,3.7538244,1.1460011,1.9384e-07,7.8e-09,protein_coding
1700021F05Rik,1.5161538,0.2188761,-1.2972777,0.02097997,0.003539596,protein_coding
1700037H04Rik,1.9977438,0.6242094,-1.3735344,0.007937892,0.0010867306,protein_coding
1810022K09Rik,1.2220342,-0.2602219,-1.4822561,0.03427127,0.0065308584,protein_coding
2200002D01Rik,3.634076,3.0558296,-0.5782464,0.01088532,0.0015660208,protein_coding


### sym_ribo_up

In [96]:
f_up <- (vec_fdr < th_adj_pvalue) & (vec_log2FC > th_log2fc)
df_ribo_up <- df_ribo[f_up,]
sym_ribo_up <- rownames(df_ribo_up)
dim(df_ribo_up)

list_genes[['sym_ribo_up']] <- sym_ribo_up

### sym_ribo_dn

In [97]:
f_dn <- (vec_fdr < th_adj_pvalue) & (vec_log2FC < -th_log2fc)
df_ribo_dn <- df_ribo[f_dn,]
sym_ribo_dn <- rownames(df_ribo_dn)
dim(df_ribo_dn)

list_genes[['sym_ribo_dn']] <- sym_ribo_dn

### sym_ribo_dn3x

In [98]:
f_dn3x <- (vec_fdr < th_adj_pvalue) &
         (vec_log2FC > -log2(3.15) & vec_log2FC < -log2(2.75))
df_ribo_dn3x <- df_ribo[f_dn3x,]
sym_ribo_dn3x <- rownames(df_ribo_dn3x)
head(df_ribo_dn3x)
dim(df_ribo_dn3x)

list_genes[['sym_ribo_dn3x']] <- sym_ribo_dn3x

Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1500011B03Rik,0.9749769,-0.63927403,-1.614251,0.02899104,0.0052723729,protein_coding
1810022K09Rik,1.2220342,-0.26022186,-1.482256,0.03427127,0.0065308584,protein_coding
Abcc3,3.3512283,1.76084801,-1.59038,1.07649e-06,4.91e-08,protein_coding
Alkbh7,1.485699,0.01557098,-1.470128,0.02775712,0.0050023459,protein_coding
Angpt2,0.8902749,-0.63865979,-1.528935,0.03914492,0.0077623734,protein_coding
Anxa4,6.2716177,4.73604843,-1.535569,0.0,0.0,protein_coding


### sym_ribo

In [99]:
sym_ribo <- union(sym_ribo_up, sym_ribo_dn)
length(sym_ribo)

df_ribo <- df_ribo[sym_ribo,,drop=F]

### sym_ribo_no

In [100]:
sym_ribo_no <- setdiff(sym_riboseq, sym_ribo)
length(sym_ribo_no)

# Not DE in ribosome profiling for control vs. tgfb48
# with GRCm38.97.gtf RSEM: 9633
# with GRCm38.97.gtf HTSEQ: 8497
# with GRCm38.97.rRNA.gtf: 9659

### sym_ribo_de_detected_both

In [101]:
sym_ribo_de_detected_both <- intersect(sym_ribo, sym_detected_both)
length(sym_ribo_de_detected_both)

## biotype

In [102]:
unique(df_ribo$biotype)
length(which(df_ribo$biotype=='processed_pseudogene'))

## protein_coding only

In [103]:
f <- grepl('^protein_coding$', df_ribo$biotype)
df_ribo_wo_pseudo <- df_ribo[f,]
head(df_ribo_wo_pseudo)
dim(df_ribo_wo_pseudo)


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1700017B05Rik,2.607823,3.753824,1.1460011,1.9384e-07,7.8e-09,protein_coding
4930402H24Rik,2.075533,3.447984,1.3724509,1.995e-07,8.1e-09,protein_coding
4931406P16Rik,1.359496,2.584558,1.2250627,0.0002122674,1.69313e-05,protein_coding
Abcf3,2.063664,2.720818,0.6571541,0.03310276,0.006246804,protein_coding
Abcg2,1.241131,2.26315,1.0220188,0.004737738,0.0005936487,protein_coding
Abl1,2.151249,2.763326,0.612077,0.03859921,0.0076093772,protein_coding


## shared between both rnaseq and riboseq

### sym_mrna_ribo_no

In [104]:
sym_mrna_ribo_no <- intersect(sym_mrna_no, sym_ribo_no)
length(sym_mrna_ribo_no)

# Not DE in both platform for control vs. tgfb48
# with GRCm38.97.gtf RSEM: 5351
# with GRCm38.97.gtf HTSEQ: 5027
# with GRCm38.97.rRNA.gtf: 5351

### sym_mrna_up_ribo_up

In [105]:
# detected in both platforms
sym_mrna_up_ribo_up <- intersect(sym_mrna_up, sym_ribo_up)
length(sym_mrna_up_ribo_up)

list_genes[['sym_mrna_up_ribo_up']] <- sym_mrna_up_ribo_up

### sym_mrna_dn_ribo_dn

In [106]:
# detected in both platforms
sym_mrna_dn_ribo_dn <- intersect(sym_mrna_dn, sym_ribo_dn)
length(sym_mrna_dn_ribo_dn)

list_genes[['sym_mrna_dn_ribo_dn']] <- sym_mrna_dn_ribo_dn

### sym_mrna_up_ribo_dn

In [107]:
# detected in both platforms
sym_mrna_up_ribo_dn <- intersect(sym_mrna_up, sym_ribo_dn)
sym_mrna_up_ribo_dn
length(sym_mrna_up_ribo_dn)

list_genes[['sym_mrna_up_ribo_dn']] <- sym_mrna_up_ribo_dn

### sym_mrna_dn_ribo_up

In [108]:
# detected in both platforms
sym_mrna_dn_ribo_up <- intersect(sym_mrna_dn, sym_ribo_up)
sym_mrna_dn_ribo_up
length(sym_mrna_dn_ribo_up)

list_genes[['sym_mrna_dn_ribo_up']] <- sym_mrna_dn_ribo_up

## transcription & translation total

In [109]:

length(sym_mrna)
length(sym_ribo)

# detected in both platforms
sym_mrna_de_ribo_de <- intersect(sym_mrna, sym_ribo)
length(sym_mrna_de_ribo_de)


In [110]:
# detected in both platforms
length(sym_mrna_de_detected_both)
length(sym_ribo_de_detected_both)
sym_mrna_de_ribo_de <- intersect(sym_mrna_de_detected_both, sym_ribo_de_detected_both)
length(sym_mrna_de_ribo_de)


### validation

In [111]:
length(sym_mrna_up_ribo_up) + length(sym_mrna_dn_ribo_dn) +
length(sym_mrna_up_ribo_dn) + length(sym_mrna_dn_ribo_up)

## df_mrna_only

In [112]:
sym_mrna_only <- setdiff(sym_mrna, sym_ribo)
length(sym_mrna_only)

# subsetting df_mrna
df_mrna_only <- df_mrna[sym_mrna_only,]
head(df_mrna_only)
dim(df_mrna_only)



Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0610010F05Rik,3.0717907,3.74147621,0.6696855,3.829709e-05,6.2934e-06,protein_coding
1110046J04Rik,-2.5207668,-0.82482908,1.6959377,0.01196216,0.0040565545,lncRNA
1500009C09Rik,-5.1302054,-2.15308879,2.9771166,0.003631782,0.001029696,protein_coding
1700001C19Rik,-4.6018846,-1.59908837,3.0027962,0.00434829,0.0012624286,protein_coding
1700006J14Rik,-0.9045477,-0.07321008,0.8313376,0.04730304,0.0204150721,lncRNA
1700018A04Rik,-2.1593318,0.44183829,2.6011701,6.289988e-05,1.09988e-05,lncRNA


### check riboseq FDR

In [113]:
f <- rownames(df_riboseq) %in% rownames(df_mrna_only)
head(df_riboseq[f,])

any(abs(df_riboseq[f,'log2FC']) > th_log2fc & df_riboseq[f,'FDR'] < th_adj_pvalue)


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Atp5a1,6.543008,6.299365,-0.2436438,0.004166421,0.0005099767,protein_coding
Krt8,6.465768,6.655029,0.1892606,0.01798466,0.0028881872,protein_coding
Eef1b2,5.699159,5.448913,-0.2502463,0.037826085,0.0073801643,protein_coding
Fam160b1,2.483063,3.033937,0.5508746,0.050046192,0.0106354983,protein_coding
Mrpl21,3.06311,2.487265,-0.5758453,0.050046192,0.0106401988,protein_coding
Phip,1.741018,2.417732,0.6767138,0.050046192,0.0106329403,protein_coding


In [114]:
vec_log2FC <- df_mrna_only$log2FC
vec_fdr <- df_mrna_only$FDR


### sym_mrna_up_ribo_no

In [115]:
f_up <- vec_fdr < th_adj_pvalue & vec_log2FC > th_log2fc
df_mrna_only_up <- df_mrna_only[f_up,]
sym_mrna_up_ribo_no <- rownames(df_mrna_only_up)
dim(df_mrna_only_up)

# detected in both platforms
sym_mrna_up_ribo_no <- intersect(sym_mrna_up_ribo_no, sym_detected_both)
length(sym_mrna_up_ribo_no)

list_genes[['sym_mrna_up_ribo_no']] <- sym_mrna_up_ribo_no

### sym_mrna_dn_ribo_no

In [116]:
f_dn <- vec_fdr < th_adj_pvalue & vec_log2FC < -th_log2fc
df_mrna_only_dn <- df_mrna_only[f_dn,]
sym_mrna_dn_ribo_no <- rownames(df_mrna_only_dn)
dim(df_mrna_only_dn)

# detected in both platforms
sym_mrna_dn_ribo_no <- intersect(sym_mrna_dn_ribo_no, sym_detected_both)
length(sym_mrna_dn_ribo_no)

list_genes[['sym_mrna_dn_ribo_no']] <- sym_mrna_dn_ribo_no

### sym_mrna_only (=sym_mrna_de_ribo_no)

In [117]:
# detected in both platforms
sym_mrna_de_ribo_no <- union(sym_mrna_up_ribo_no, sym_mrna_dn_ribo_no)
length(sym_mrna_de_ribo_no)

sym_mrna_only <- sym_mrna_de_ribo_no
length(sym_mrna_only)

df_mrna_only <- df_mrna[sym_mrna_only, ]
if (f_write_table) {
    fname_out <- sprintf("./data/table/170224%s.NMuMG.%s.unt48--vs--tgfb48.diff-all.transcriptionONLY.txt", 
        rundate_appendix, strdir)
    write.table(df_mrna_only, file = fname_out, row.names = TRUE, col.names = NA, 
        sep = "\t", quote = FALSE)
}

##  df_ribo_only

In [118]:
sym_ribo_only <- setdiff(sym_ribo, sym_mrna)
length(sym_ribo_only)

if (!is.null(pattern_remove_gene)) {
  any(grepl(pattern_remove_gene, sym_ribo_only)) # should be FALSE
}

# subsetting df_ribo
df_ribo_only <- df_ribo[sym_ribo_only,]

head(df_ribo_only)
dim(df_ribo_only)


Unnamed: 0_level_0,unt48,tgfb48,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
4931419H13Rik,-0.2645954,1.0148059,1.2794013,0.0396473,0.0079040669,lncRNA
Abcf3,2.0636635,2.7208177,0.6571541,0.03310276,0.006246804,protein_coding
Acbd3,2.3254237,2.971784,0.6463603,0.02154767,0.0036626302,protein_coding
Acta2,-2.7982033,0.5463877,3.344591,0.001598918,0.000167884,protein_coding
Adamts15,-2.9268356,-0.5595413,2.3672943,0.04458295,0.0091424583,protein_coding
Agrn,5.6241352,6.0133807,0.3892455,7.707763e-05,5.4326e-06,protein_coding


In [119]:
length(intersect(sym_ribo_only, sym_detected_both))


In [120]:
vec_log2FC <- df_ribo_only$log2FC
vec_fdr <- df_ribo_only$FDR


### sym_mrna_no_ribo_up

In [121]:
f_up <- (vec_fdr < th_adj_pvalue) & (vec_log2FC > th_log2fc)
df_ribo_only_up <- df_ribo_only[f_up,]
sym_mrna_no_ribo_up <- rownames(df_ribo_only_up)
dim(df_ribo_only_up)

# detected in both platforms
sym_mrna_no_ribo_up <- intersect(sym_mrna_no_ribo_up, sym_detected_both)
length(sym_mrna_no_ribo_up)

list_genes[['sym_mrna_no_ribo_up']] <- sym_mrna_no_ribo_up

### sym_mrna_no_ribo_dn

In [122]:
f_dn <- (vec_fdr < th_adj_pvalue) & (vec_log2FC < -th_log2fc)
df_ribo_only_dn <- df_ribo_only[f_dn,]
sym_mrna_no_ribo_dn <- rownames(df_ribo_only_dn)
dim(df_ribo_only_dn)

# detected in both platforms
sym_mrna_no_ribo_dn <- intersect(sym_mrna_no_ribo_dn, sym_detected_both)
length(sym_mrna_no_ribo_dn)

list_genes[['sym_mrna_no_ribo_dn']] <- sym_mrna_no_ribo_dn

### sym_ribo_only (=sym_mrna_no_ribo_de)

In [123]:
# detected in both platforms
sym_mrna_no_ribo_de <- union(sym_mrna_no_ribo_up, sym_mrna_no_ribo_dn)
length(sym_mrna_no_ribo_de)

# here, sym_ribo_only was redefined.
# sym_ribo_only were detected in both platforms.
sym_ribo_only <- sym_mrna_no_ribo_de
length(sym_ribo_only)

df_ribo_only <- df_ribo[sym_ribo_only, ]

if (f_write_table) {
    fname_out <- sprintf("./data/table/161021%s.NMuMG.%s.unt48--vs--tgfb48.diff-all.translationONLY.txt", 
        rundate_appendix, strdir)
    write.table(df_ribo_only, file = fname_out, row.names = TRUE, col.names = NA, 
        sep = "\t", quote = FALSE)
}

## biotype

In [124]:
unique(df_all$biotype)
f_pseudo <- grepl('pseudo', df_all$biotype)
sym_pseudo <- rownames(df_all[f_pseudo,])

length(setdiff(sym_mrna_no_ribo_up, sym_pseudo))
length(setdiff(sym_mrna_no_ribo_dn, sym_pseudo))

## no DE

In [125]:
n_detected <- length(sym_detected_both)
n_detected

sym_de_both <- union(sym_mrna_de_detected_both, sym_ribo_de_detected_both)

sym_no_de_both <- setdiff(sym_detected_both, sym_de_both)
length(sym_no_de_both)


### validation

In [126]:
# de both & dectected in both platforms
intersect(sym_mrna_only, sym_mrna_de_ribo_de)
intersect(sym_ribo_only, sym_mrna_de_ribo_de)
sym_de <- union(sym_mrna_only, sym_ribo_only)
sym_de <- union(sym_de, sym_mrna_de_ribo_de)
n_de <- length(sym_de)
n_de

sym_no_de_both1 <- setdiff(sym_detected_both, sym_de)
length(sym_no_de_both1)

setequal(sym_no_de_both, sym_no_de_both1)

# tgfb48 vs tgfbCX5461

## df_mrna2

In [127]:
fname_in <- sprintf("170224.NMuMG.%s.tgfb48--vs--tgfbCX5461100nm.diff-all.txt.gz", 
    strdir)
df_mrna2 <- read.table(file = sprintf("%s/%s", dir_rnaseq, fname_in), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_mrna2)
    df_mrna2 <- df_mrna2[!grepl(pattern_remove_gene, rownames(df_mrna2)), ]
}
head(df_mrna2)
dim(df_mrna2)

idx <- which(df_all[, "tgfb48.tgfbCX5461100nm.DEtranscription"] != "notSig")
df_mrna2 <- df_mrna2[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_mrna2), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_mrna2$biotype <- df_all[idx, "biotype"]

head(df_mrna2)
dim(df_mrna2)

if (f_write_table) {
    fname_out <- sprintf("./data/table/170224%s.NMuMG.%s.tgfb48--vs--tgfb48cx5461100nm.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_mrna2, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

Unnamed: 0_level_0,tgfb48,tgfbCX5461100nm,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Hist1h2ai,-1.823194,-4.604358,-2.781164,0.008573554,0.0011957694
Thsd7a,-1.218782,-3.664414,-2.445632,0.01237485,0.0019252754
Stc1,-1.763874,-4.162247,-2.398372,0.01494062,0.0024697456
Pard3b,1.261556,-1.081571,-2.343127,2.7206e-07,4.3e-09
Spr-ps1,-2.117083,-4.434833,-2.31775,0.03347868,0.0070919507
Cdk14,-1.258622,-3.525566,-2.266943,0.009614947,0.0013871793


Unnamed: 0_level_0,tgfb48,tgfbCX5461100nm,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1110002E22Rik,2.72632752,3.308517,0.5821899,0.0001237559,6.1635e-06,protein_coding
1110008L16Rik,1.89353341,1.480044,-0.4134898,0.04614794,0.0110929453,protein_coding
1110059E24Rik,2.54356606,1.975447,-0.5681191,0.00101522,7.92489e-05,protein_coding
1600014C10Rik,1.75873637,1.097187,-0.6615492,0.0089124,0.001261664,protein_coding
1700006J14Rik,-0.07321008,1.258784,1.3319937,7.054605e-05,3.1461e-06,lncRNA
1700007J10Rik,-1.74280455,-0.264305,1.4784996,0.007614453,0.0010272112,lncRNA


In [128]:
vec_log2FC <- df_mrna2$log2FC
vec_fdr <- df_mrna2$FDR


### sym_mrna2_up

In [129]:
f_up <- vec_fdr < th_adj_pvalue & vec_log2FC > th_log2fc
df_mrna2_up <- df_mrna2[f_up,]
sym_mrna2_up <- rownames(df_mrna2_up)
dim(df_mrna2_up)

list_genes[['sym_mrna2_up']] <- sym_mrna2_up

### sym_mrna2_dn

In [130]:

f_dn <- vec_fdr < th_adj_pvalue & vec_log2FC < -th_log2fc
df_mrna2_dn <- df_mrna2[f_dn,]
sym_mrna2_dn <- rownames(df_mrna2_dn)
dim(df_mrna2_dn)

list_genes[['sym_mrna2_dn']] <- sym_mrna2_dn

### sym_mrna2

In [131]:
sym_mrna2 <- union(sym_mrna2_up, sym_mrna2_dn)
length(sym_mrna2)

df_mrna2 <- df_mrna2[sym_mrna2,,drop=F]

### sym_mrna2_no

In [132]:
sym_mrna2_no <- setdiff(sym_rnaseq, sym_mrna2)
length(sym_mrna2_no)

# Not DE in total RNA-seq for tgfb48 vs. tgfbCX
# with GRCm38.97.gtf RSEM: 10447
# with GRCm38.97.gtf HTSEQ: 10125
# with GRCm38.97.rRNA.gtf: 10456

## df_ribo2

In [133]:
# dir_riboseq <- '../data/limma-voom.mrna/161021/NMuMG' fname_ribo <-
# 'emt.161021.limma-voom.mrna.limma.gene.tgfb48--vs--tgfbCX5461100nm.diff-all.txt'
fname_ribo <- sprintf("161021.NMuMG.%s.tgfb48--vs--tgfbCX5461100nm.diff-all.txt.gz", 
    strdir)

df_ribo2 <- read.table(file = sprintf("%s/%s", dir_riboseq, fname_ribo), header = TRUE, 
    sep = "\t", row.names = 1, quote = "", comment.char = "#", stringsAsFactors = F)

if (!is.null(pattern_remove_gene)) {
    dim(df_ribo2)
    df_ribo2 <- df_ribo2[!grepl(pattern_remove_gene, rownames(df_ribo2)), ]
}
head(df_ribo2)
dim(df_ribo2)

idx <- which(df_all[, "tgfb48.tgfbCX5461100nm.DEtranslation"] != "notSig")
df_ribo2 <- df_ribo2[rownames(df_all[idx, ]), ]

# df_mrna$biotype
idx <- match(rownames(df_ribo2), rownames(df_all))
any(is.na(idx))  # should be FALSE
df_ribo2$biotype <- df_all[idx, "biotype"]

head(df_ribo2)
dim(df_ribo2)

if (!is.null(pattern_remove_gene)) {
    any(grepl(pattern_remove_gene, rownames(df_ribo2)))  # should be FALSE
}

if (f_write_table) {
    fname_out <- sprintf("./data/table/161021%s.NMuMG.%s.tgfb48--vs--tgfbCX5461100nm.diff-all.txt", 
        rundate_appendix, strdir)
    write.table(df_ribo2, file = fname_out, row.names = TRUE, col.names = NA, sep = "\t", 
        quote = FALSE)
}

Unnamed: 0_level_0,tgfb48,tgfbCX5461100nm,log2FC,FDR,p.value
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Gm23238,4.609661,-3.1367895,-7.74645,6e-11,0.0
Gm15564,5.859788,-0.8713795,-6.731167,0.0,0.0
Gm23540,4.635973,-1.5286723,-6.164646,5e-11,0.0
Gm44456,2.297554,-3.8670543,-6.164608,7.847e-08,1.1e-09
Mirlet7c-1,1.713583,-3.8670543,-5.580638,1.56063e-06,2.96e-08
Megf6,2.205295,-3.3447488,-5.550044,6.3011e-07,1.07e-08


Unnamed: 0_level_0,tgfb48,tgfbCX5461100nm,log2FC,FDR,p.value,biotype
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0610009B22Rik,1.1791783,2.2156391,1.0364608,0.02002003,0.0022530517,protein_coding
1110002E22Rik,-0.5588888,-3.3332046,-2.7743158,0.04079412,0.00615284,protein_coding
1700021F05Rik,0.2188761,1.738751,1.5198749,0.01005345,0.0009107691,protein_coding
1810013L24Rik,3.7417428,4.6583561,0.9166133,9.3442e-07,1.63e-08,protein_coding
1810026B05Rik,2.5782959,3.3488717,0.7705758,0.007621297,0.0006249224,lncRNA
1810037I17Rik,1.3561817,-0.3633797,-1.7195614,0.02539849,0.0031625955,protein_coding


In [134]:
vec_log2FC <- df_ribo2$log2FC
vec_fdr <- df_ribo2$FDR


### sym_ribo2_up

In [135]:
f_up <- vec_fdr < th_adj_pvalue & vec_log2FC > th_log2fc
df_ribo2_up <- df_ribo2[f_up,]
sym_ribo2_up <- rownames(df_ribo2_up)
dim(df_ribo2_up)

list_genes[['sym_ribo2_up']] <- sym_ribo2_up

### sym_ribo2_dn

In [136]:
f_dn <- vec_fdr < th_adj_pvalue & vec_log2FC < -th_log2fc
df_ribo2_dn <- df_ribo2[f_dn,]
sym_ribo2_dn <- rownames(df_ribo2_dn)
dim(df_ribo2_dn)

list_genes[['sym_ribo2_dn']] <- sym_ribo2_dn

### sym_ribo2

In [137]:
sym_ribo2 <- union(sym_ribo2_up, sym_ribo2_dn)
length(sym_ribo2)

df_ribo2 <- df_ribo2[sym_ribo2,,drop=F]

### sym_ribo2_no

In [138]:
sym_ribo2_no <- setdiff(sym_riboseq, sym_ribo2)
length(sym_ribo2_no)

# Not DE in ribosome profiling for tgfb48 vs. tgfbCX
# with GRCm38.97.gtf RSEM: 10009
# with GRCm38.97.gtf HTSEQ: 8958
# with GRCm38.97.rRNA.gtf: 9984

## shared between both rnaseq and riboseq

### sym_mrna2_no_ribo2_no

In [139]:
sym_mrna2_no_ribo2_no <- intersect(sym_mrna2_no, sym_ribo2_no)
length(sym_mrna2_no_ribo2_no)
sym_mrna2_no_ribo2_no <- intersect(sym_mrna2_no_ribo2_no, sym_detected_both)
length(sym_mrna2_no_ribo2_no)

# Not DE in both platform for tgfb48 vs. tgfbCX
# with GRCm38.97.gtf RSEM: 6920
# with GRCm38.97.gtf HTSEQ: 6379
# with GRCm38.97.rRNA.gtf: 6917

### sym_mrna2_up_ribo2_no

In [140]:
sym_mrna2_up_ribo2_no <- setdiff(sym_mrna2_up, sym_ribo2)
length(sym_mrna2_up_ribo2_no)
sym_mrna2_up_ribo2_no <- intersect(sym_mrna2_up_ribo2_no, sym_detected_both)
length(sym_mrna2_up_ribo2_no)

list_genes[['sym_mrna2_up_ribo2_no']] <- sym_mrna2_up_ribo2_no

### sym_mrna2_dn_ribo2_no

In [141]:
sym_mrna2_dn_ribo2_no <- setdiff(sym_mrna2_dn, sym_ribo2)
length(sym_mrna2_dn_ribo2_no)
sym_mrna2_dn_ribo2_no <- intersect(sym_mrna2_dn_ribo2_no, sym_detected_both)
length(sym_mrna2_dn_ribo2_no)

list_genes[['sym_mrna2_dn_ribo2_no']] <- sym_mrna2_dn_ribo2_no

### sym_mrna2_no_ribo2_up

In [142]:
sym_mrna2_no_ribo2_up <- setdiff(sym_ribo2_up, sym_mrna2)
length(sym_mrna2_no_ribo2_up)
sym_mrna2_no_ribo2_up <- intersect(sym_mrna2_no_ribo2_up, sym_detected_both)
length(sym_mrna2_no_ribo2_up)

list_genes[['sym_mrna2_no_ribo2_up']] <- sym_mrna2_no_ribo2_up

### sym_mrna2_no_ribo2_dn

In [143]:
sym_mrna2_no_ribo2_dn <- setdiff(sym_ribo2_dn, sym_mrna2)
length(sym_mrna2_no_ribo2_dn)
sym_mrna2_no_ribo2_dn <- intersect(sym_mrna2_no_ribo2_dn, sym_detected_both)
length(sym_mrna2_no_ribo2_dn)

list_genes[['sym_mrna2_no_ribo2_dn']] <- sym_mrna2_no_ribo2_dn

### sym_mrna2_up_ribo2_up

In [144]:
sym_mrna2_up_ribo2_up <- intersect(sym_mrna2_up, sym_ribo2_up)
sym_mrna2_up_ribo2_up
length(sym_mrna2_up_ribo2_up)

list_genes[['sym_mrna2_up_ribo2_up']] <- sym_mrna2_up_ribo2_up

### sym_mrna2_dn_ribo2_dn


In [145]:
sym_mrna2_dn_ribo2_dn <- intersect(sym_mrna2_dn, sym_ribo2_dn)
sym_mrna2_dn_ribo2_dn
length(sym_mrna2_dn_ribo2_dn)

list_genes[['sym_mrna2_dn_ribo2_dn']] <- sym_mrna2_dn_ribo2_dn

### sym_mrna2_up_ribo2_dn

In [146]:
sym_mrna2_up_ribo2_dn <- intersect(sym_mrna2_up, sym_ribo2_dn)
sym_mrna2_up_ribo2_dn
length(sym_mrna2_up_ribo2_dn)

list_genes[['sym_mrna2_up_ribo2_dn']] <- sym_mrna2_up_ribo2_dn

### sym_mrna2_dn_ribo2_up

In [147]:
sym_mrna2_dn_ribo2_up <- intersect(sym_mrna2_dn, sym_ribo2_up)
sym_mrna2_dn_ribo2_up
length(sym_mrna2_dn_ribo2_up)

list_genes[['sym_mrna2_dn_ribo2_up']] <- sym_mrna2_dn_ribo2_up

# unt48 vs. tgfb48 vs. tgfbCX5461

## sym_mrna1_no_mrna2_no

In [148]:
sym_mrna1_no_mrna2_no <- intersect(sym_mrna_no, sym_mrna2_no)
length(sym_mrna1_no_mrna2_no)

# No DE; No DE in total RNA-seq
# with GRCm38.97.gtf RSEM: 6507
# with GRCm38.97.gtf HTSEQ: 6244
# with GRCm38.97.rRNA.gtf: 6487

## sym_ribo1_no_ribo2_no

In [149]:
sym_ribo1_no_ribo2_no <- intersect(sym_ribo_no, sym_ribo2_no)
length(sym_ribo1_no_ribo2_no)

# No DE; No DE in ribosome profiling
# with GRCm38.97.gtf RSEM: 8726
# with GRCm38.97.gtf HTSEQ: 7228
# with GRCm38.97.rRNA.gtf: 8720

## sym_mrna_no_ribo_no

In [150]:
sym_mrna_no_ribo_no <- intersect(sym_mrna1_no_mrna2_no, sym_ribo1_no_ribo2_no)
length(sym_mrna_no_ribo_no)

# No DE; No De in both platform
# with GRCm38.97.gtf RSEM: 4019
# with GRCm38.97.gtf HTSEQ: 3516
# with GRCm38.97.rRNA.gtf: 4015

## more complex intersections


### up/dn/only ; no DE

In [151]:
vec1_var <- c('mrna_up_ribo_up', 'mrna_dn_ribo_dn', 'mrna_up_ribo_dn', 'mrna_dn_ribo_up',
              'mrna_up_ribo_no', 'mrna_dn_ribo_no', 'mrna_no_ribo_up', 'mrna_no_ribo_dn')

vec2_var <- c('ribo2_no')

max_ngenes <- 50
for (var1 in vec1_var) {
    for (var2 in vec2_var) {
      strvar <- sprintf('sym_%s_%s', var1, var2)
      list_genes[[strvar]] <- eval(parse(text=sprintf("intersect(sym_%s, sym_%s)", var1, var2)))
      list_genes[[strvar]] <- intersect(list_genes[[strvar]], sym_detected_both)
      strgene <- paste(head(list_genes[[strvar]],max_ngenes), collapse=",")
      if (length(list_genes[[strvar]]) > max_ngenes) {
          strgene <- paste0(strgene,',...')
      }
      verb('%s: %d: %s\n\n', strvar, length(list_genes[[strvar]]), strgene)
    }
}

sym_mrna_up_ribo_up_ribo2_no: 599: 1700017B05Rik,4930402H24Rik,4931406P16Rik,Abl1,Abr,Abracl,Acadl,Acadvl,Actn4,Actr1a,Acvr1,Adam12,Adam15,Adarb1,Add2,Adgra1,Adgrl1,Adssl1,Afap1,Aff1,Aif1l,Aldh1l1,Alms1,Angptl2,Ankrd1,Ankrd44,Ano1,Anp32a,Anxa5,Anxa8,Aopep,Ap2a1,Ap2b1,Ap5b1,Ap5z1,Apbb2,Apob,Arf4,Arfgef2,Arhgap23,Arhgap42,Arhgef5,Arvcf,Ascc3,Atg7,Atp10a,Atp10d,Atp1a3,Atp6ap1,Atp6v1e1,...

sym_mrna_dn_ribo_dn_ribo2_no: 583: 1700037H04Rik,2210011C24Rik,2410006H16Rik,Abcc2,Abcc3,Abcc4,Abcc5,Abcd3,Abce1,Abhd6,Acot2,Acsl4,Acsl5,Acy3,Adh5,Adh7,Adora1,Adsl,Adss,Afm,Afp,Agfg1,Agmo,Agpat2,Agpat5,Ak3,Akr1b7,Akr1c12,Akr1c13,Akr1c19,Alad,Alcam,Aldh2,Aldoa,Alkbh5,Alkbh7,Anapc13,Angpt2,Anks4b,Antxr2,Anxa11,Anxa13,Anxa2,Anxa3,Anxa4,Ap2m1,Api5,Arf6,Arhgap12,Arhgap18,...

sym_mrna_up_ribo_dn_ribo2_no: 2: Hist1h4n,Sh3kbp1

sym_mrna_dn_ribo_up_ribo2_no: 5: Gm24265,Osgin1,Plk2,rDNA_Rn5-8s,Umps

sym_mrna_up_ribo_no_ribo2_no: 1179: 0610010F05Rik,1700088E04Rik,1810058I24Rik,2310022B05Rik,2700081O15Rik,2900026A

### no DE ; up/dn/only

In [152]:
vec1_var <- c('mrna_no')

vec2_var <- c('mrna2_up_ribo2_up', 'mrna2_dn_ribo2_dn', 'mrna2_up_ribo2_dn', 'mrna2_dn_ribo2_up',
              'mrna2_up_ribo2_no', 'mrna2_dn_ribo2_no', 'mrna2_no_ribo2_up', 'mrna2_no_ribo2_dn')

max_ngenes <- 50
for (var1 in vec1_var) {
    for (var2 in vec2_var) {
      strvar <- sprintf('sym_%s_%s', var1, var2)
      list_genes[[strvar]] <- eval(parse(text=sprintf("intersect(sym_%s, sym_%s)", var1, var2)))
      list_genes[[strvar]] <- intersect(list_genes[[strvar]], sym_detected_both)
      strgene <- paste(head(list_genes[[strvar]],max_ngenes), collapse=",")
      if (length(list_genes[[strvar]]) > max_ngenes) {
          strgene <- paste0(strgene,',...')
      }
      verb('%s: %d: %s\n\n', strvar, length(list_genes[[strvar]]), strgene)
    }
}

sym_mrna_no_mrna2_up_ribo2_up: 68: Atf3,Tcim,Sdc4,Dnajc5,Klf6,Tuba4a,Tnfrsf12a,Srsf11,Brd2,Ier5,Mafk,Ninj1,Prc1,Rars,Rbm38,Sra1,Vps37b,Hist1h4c,Zfp280d,Rcan1,Rnd1,Snhg17,Rchy1,Prrc2c,Ccnt2,Atp6v1d,Rhod,Egr1,Eif5,Prrg4,2610035D17Rik,Orc4,Rnf185,Zfx,Jmjd6,Zfp36l1,Bcl3,Otud1,Pip4p1,Chuk,Pnrc1,Hist1h4a,Hist2h4,Arl5b,Fam133b,Snhg15,Stx3,Necap1,Zfr,Vhl,...

sym_mrna_no_mrna2_dn_ribo2_dn: 34: Snd1,Ncl,Sel1l,Rtn3,Slc6a6,Mtap,Shq1,Kif21b,rDNA_Rn18s,Fzd7,Pfas,Cldn2,Atm,Dcaf7,Lman1,G3bp2,Mlec,Tomm40l,Gm7232,Fjx1,Fto,Dhcr24,Ogfod1,Alg2,Rcc2,Tubb5,Ddah1,Khdrbs1,Hspa8,Rab6a,Lad1,Focad,Dpysl2,Trappc9

sym_mrna_no_mrna2_up_ribo2_dn: 26: Rbbp6,Nufip2,Nbeal2,Upf1,Acin1,Nat10,Tmem214,Baiap2l1,Rbm14,Vamp2,Gmip,Ankrd11,Cbx4,Nrg1,Smpd3,Cpped1,Thoc6,Ilvbl,1110002E22Rik,Por,Haus8,Fat1,Crb2,Slc10a3,C77080,Irs2

sym_mrna_no_mrna2_dn_ribo2_up: 38: Gars,Ssr3,C1qbp,Afg3l2,Copz1,Prxl2c,Cetn3,Ppp1ca,Manf,Ap1s1,Hunk,Gatad1,Swi5,Cks1b,Rexo2,Sppl2a,Cd9,Psma7,Stub1,Decr1,Dnajc10,Bcap29,Atp5k,Ergic1,Arpc3,Rtraf,Snrpb2,Cn

### other combinations

In [153]:
vec1_var <- c('mrna_up_ribo_up', 'mrna_dn_ribo_dn', 'mrna_up_ribo_dn', 'mrna_dn_ribo_up',
              'mrna_up_ribo_no', 'mrna_dn_ribo_no', 'mrna_no_ribo_up', 'mrna_no_ribo_dn')

vec2_var <- c('mrna2_up_ribo2_up', 'mrna2_dn_ribo2_dn', 
              'mrna2_up_ribo2_no', 'mrna2_dn_ribo2_no', 'mrna2_no_ribo2_up', 'mrna2_no_ribo2_dn',
              'mrna2_up_ribo2_dn', 'mrna2_dn_ribo2_up')

max_ngenes <- 50
for (var1 in vec1_var) {
    verb('-------------------------------------\n%s\n\n', var1)
    for (var2 in vec2_var) {
      strvar <- sprintf('sym_%s_%s', var1, var2)
      list_genes[[strvar]] <- eval(parse(text=sprintf("intersect(sym_%s, sym_%s)", var1, var2)))
      list_genes[[strvar]] <- intersect(list_genes[[strvar]], sym_detected_both)
      strgene <- paste(head(list_genes[[strvar]],max_ngenes), collapse=",")
      if (length(list_genes[[strvar]]) > max_ngenes) {
          strgene <- paste0(strgene,',...')
      }
      verb('\t%s: %d: %s\n\n', strvar, length(list_genes[[strvar]]), strgene)
    }
}

-------------------------------------
mrna_up_ribo_up

	sym_mrna_up_ribo_up_mrna2_up_ribo2_up: 36: Arl4d,Arl6ip5,Atp6v0d1,Bhlhe40,Ccl2,Clcf1,Coq10b,Csf2,Cyp1b1,Dusp5,Fosb,Gadd45b,Gadd45g,Gla,Hes1,Hmox1,Kctd11,Klf10,Lif,Maff,Micall1,Noct,Nsmce3,Nudt18,Pdlim7,Pfkfb3,Rabgef1,Rassf1,Sh3bp2,Sox9,Stk17b,Tgif1,Tiparp,Tubb6,Zfp655,Zkscan5

	sym_mrna_up_ribo_up_mrna2_dn_ribo2_dn: 54: Actb,Ahr,Anxa1,Anxa6,Atl3,Atp1b1,Atp2b4,Bdnf,Bicd2,Cacna2d1,Ccn2,Ckap4,Col1a1,Col4a1,Col4a2,Col5a1,Cp,D630003M21Rik,Ddit4,Dlc1,Fbxo32,Fkbp5,Glg1,Has2,Inhba,Itgb3,Krt7,Lpcat1,Lpp,Lrrc32,Marcks,Megf8,Msn,Mvp,Myh9,Nt5e,Olr1,Pdgfrb,Pla2g15,Plxnd1,Ptprk,Pxdn,rDNA_ETS5_chr17,Rtn4,Slco4a1,Spp1,Tapbp,Thbs1,Tln2,Tmem119,...

	sym_mrna_up_ribo_up_mrna2_up_ribo2_no: 138: 1700017B05Rik,4931406P16Rik,Aff1,Ap5b1,Ap5z1,Arfgef2,Arhgap23,Arhgef5,Ascc3,Atp6v1e1,Azin1,Bag3,Baz1a,Bcar1,Bcar3,Bnc1,Ccl7,Ccn4,Cdc42ep1,Cipc,Cish,Cited2,Cnnm4,Csf1,Csrnp1,Ctps,Cttnbp2nl,Daam1,Ddhd1,Dennd5b,Dot1l,Dusp7,Dyrk3,Dzip1l,Ecm1,Edn1,Eif2ak3,Epb41l1,

# define frequently used variables

## sym_mrna_up_mrna2_dn

In [154]:
# relying on emt.tables.mrna.comprehensive.txt the first column of "reversible.translation.CX"
# translation tgfb48 up and tgfb27h+cx down
idx <- which(df_all[,'reversible.transcription']=='upDown')
sym_mrna_up_mrna2_dn <- rownames(df_all[idx,])
length(sym_mrna_up_mrna2_dn)

if (!is.null(pattern_remove_gene)) {
    any(grepl(pattern_remove_gene, sym_mrna_up_mrna2_dn)) # should be FALSE
}

sym_mrna_up_mrna2_dn <- intersect(sym_mrna_up, sym_mrna2_dn)
length(sym_mrna_up_mrna2_dn)


## sym_mrna_dn_mrna2_up

In [155]:
# translation tgfb48 down and tgfb27h+cx up
idx <- which(df_all[,'reversible.translation']=='downUp')
sym_mrna_dn_mrna2_up <- rownames(df_all[idx,])
length(sym_mrna_dn_mrna2_up)

sym_mrna_dn_mrna2_up <- intersect(sym_mrna_dn, sym_mrna2_up)
length(sym_mrna_dn_mrna2_up)

## sym_mrna_dn3x_mrna2_no

In [156]:
sym_mrna_dn3x_mrna2_no <- intersect(sym_mrna_dn3x, sym_mrna2_no)
length(sym_mrna_dn3x_mrna2_no)




## sym_ribo_up_ribo2_dn

In [157]:
# relying on emt.tables.mrna.comprehensive.txt the first column of "reversible.translation.CX"
# translation tgfb48 up and tgfb27h+cx down
idx <- which(df_all[,'reversible.translation']=='upDown')
sym_ribo_up_ribo2_dn <- rownames(df_all[idx,])
length(sym_ribo_up_ribo2_dn)

if (!is.null(pattern_remove_gene)) {
    any(grepl(pattern_remove_gene, sym_ribo_up_ribo2_dn)) # should be FALSE
}

sym_ribo_up_ribo2_dn <- intersect(sym_ribo_up, sym_ribo2_dn)
length(sym_ribo_up_ribo2_dn)


In [158]:
# translation tgfb48, tgfb48+27h+cx
sym_mrna_up_cx_dn <- sym_mrna_up_mrna2_dn
sym_mrna_dn_cx_up <- sym_mrna_dn_mrna2_up
sym_mrna_dn3x_cx_no <- sym_mrna_dn3x_mrna2_no
sym_mrna_up_cx_up <- intersect(sym_mrna_up, sym_mrna2_up)
sym_mrna_dn_cx_dn <- intersect(sym_mrna_dn, sym_mrna2_dn)

## sym_ribo_dn_ribo2_up

In [159]:
# translation tgfb48 down and tgfb27h+cx up
idx <- which(df_all[,'reversible.translation']=='downUp')
sym_ribo_dn_ribo2_up <- rownames(df_all[idx,])
length(sym_ribo_dn_ribo2_up)

sym_ribo_dn_ribo2_up <- intersect(sym_ribo_dn, sym_ribo2_up)
length(sym_ribo_dn_ribo2_up)


## sym_ribo_dn3x_ribo2_no

In [160]:
sym_ribo_dn3x_ribo2_no <- intersect(sym_ribo_dn3x, sym_ribo2_no)
length(sym_ribo_dn3x_ribo2_no)



In [161]:
# translation tgfb48, tgfb48+27h+cx
sym_ribo_up_cx_dn <- sym_ribo_up_ribo2_dn
sym_ribo_dn_cx_up <- sym_ribo_dn_ribo2_up
sym_ribo_dn3x_cx_no <- sym_ribo_dn3x_ribo2_no
sym_ribo_up_cx_up <- intersect(sym_ribo_up, sym_ribo2_up)
sym_ribo_dn_cx_dn <- intersect(sym_ribo_dn, sym_ribo2_dn)


# translation tgfb48, tgfb48+27h+cx, no effect on transcription tgfb48
# detected in both data sets
sym_mrna_no_ribo_up_cx_dn <- intersect(sym_mrna_no_ribo_up, sym_ribo2_dn)
sym_mrna_no_ribo_up_cx_dn <- intersect(sym_mrna_no_ribo_up_cx_dn, sym_detected_both)

sym_mrna_no_ribo_dn_cx_up <- intersect(sym_mrna_no_ribo_dn, sym_ribo2_up)
sym_mrna_no_ribo_dn_cx_up <- intersect(sym_mrna_no_ribo_dn_cx_up, sym_detected_both)

sym_mrna_no_ribo_up_cx_up <- intersect(sym_mrna_no_ribo_up, sym_ribo2_up)
sym_mrna_no_ribo_up_cx_up <- intersect(sym_mrna_no_ribo_up_cx_up, sym_detected_both)

sym_mrna_no_ribo_dn_cx_dn <- intersect(sym_mrna_no_ribo_dn, sym_ribo2_dn)
sym_mrna_no_ribo_dn_cx_dn <- intersect(sym_mrna_no_ribo_dn_cx_dn, sym_detected_both)

## sym_ribo_cx_exclusively_affected

In [162]:
length(sym_ribo2)
length(sym_ribo)
sym_ribo_cx_exclusively_affected <- setdiff(sym_ribo2, sym_ribo)
length(sym_ribo_cx_exclusively_affected)


In [163]:
sym_ribo_cx_exclusively_affected_up <- intersect(sym_ribo_cx_exclusively_affected,
                                                 sym_ribo2_up)
length(sym_ribo_cx_exclusively_affected_up)

In [164]:
sym_ribo_cx_exclusively_affected_dn <- intersect(sym_ribo_cx_exclusively_affected,
                                                 sym_ribo2_dn)
length(sym_ribo_cx_exclusively_affected_dn)

## sym_ribo_cx_unaffected

In [165]:
length(sym_riboseq)  # 13827

sym_ribo_cx_affected <- sym_ribo2
length(sym_ribo_cx_affected)

sym_ribo_cx_unaffected <- setdiff(sym_riboseq, sym_ribo_cx_affected)
length(sym_ribo_cx_unaffected)


## sym_mrna_no_ribo_de_cx_unaffected

In [166]:
sym_mrna_no_ribo_de_cx_unaffected <- intersect(sym_ribo_only, sym_ribo_cx_unaffected)
length(sym_mrna_no_ribo_de_cx_unaffected)

length(intersect(sym_mrna_no_ribo_de_cx_unaffected, sym_rnaseq))
length(intersect(sym_mrna_no_ribo_de_cx_unaffected, sym_riboseq))


# df_protein

In [167]:
# https://epigeneticsandchromatin.biomedcentral.com/articles/10.1186/s13072-019-0256-y
df_protein <- read.table(file="../nmumg_proteomics_mass_spec/table_s1a_protein_groups.txt", 
                header=TRUE, sep="\t", row.names=NULL,
                quote="", comment.char="#", stringsAsFactors=F)
t(df_protein[1:3,])
dim(df_protein)


condstr <- 'proteomics.unt0.tgfb48'
cols <- c('Majority.Gene.name','log2FC.2.days.0.min','X.log2.p..2.days.0.min')
df <- df_protein[,cols]
idx <- which((df[,2] > log2(1.2)) & (2^-df[,3] < 0.05))
sym_protein_up <- df[idx,1]

idx <- which((df[,2] < -log2(1.2)) & (2^-df[,3] < 0.05))
sym_protein_dn <- df[idx,1]


Unnamed: 0,1,2,3
Majority.Protein.ID,A2A432,A2A5R2,A2A690
Majority.Protein.name,Cullin-4B,Brefeldin A-inhibited guanine nucleotide-exchange protein 2,Protein TANC2
Majority.Gene.name,Cul4b,Arfgef2,Tanc2
Protein.IDs,A2A432,A2A5R2,A2A690
Protein.names,Cullin-4B,Brefeldin A-inhibited guanine nucleotide-exchange protein 2,Protein TANC2
Gene.names,Cul4b,Arfgef2,Tanc2
Peptide.counts,10,3,4
Sequence.coverage....,15.8,3.6,2.7
Mol..weight..kDa.,110.70,202.24,220.26
Sequence.length,970,1792,1994


## sym_up_up2, sym_dn_dn2

In [168]:
length(sym_mrna_up_ribo_up)
f <- grepl('Rp', sym_mrna_up_ribo_up)
sym_mrna_up_ribo_up[f]



In [169]:
f <- grepl('Rp', sym_protein_up)
sym_protein_up[f]


In [170]:
f <- grepl('Rp', sym_mrna_dn_ribo_dn)
sym_mrna_dn_ribo_dn[f]
length(sym_mrna_dn_ribo_dn[f])

In [171]:

sym_up_up2 <- intersect(sym_mrna_up_ribo_up, sym_protein_up)
sym_up_up2
length(sym_up_up2)

sym_dn_dn2 <- intersect(sym_mrna_dn_ribo_dn, sym_protein_dn)
sym_dn_dn2
length(sym_dn_dn2)


In [172]:
sym_ribosomal_proteins <- c('Rpl15', 'Rpl32', 'Rplp1', 'Rps28', 'Rps3', 'Rpsa')

df_all[sym_ribosomal_proteins,]

Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,⋯,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,Punt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,PtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Rpl15,14,18267823,18271391,3569,-,ensembl_havana,5.459518,4.991497,4.771384,-0.4680213,⋯,-1.2487127,0.3021658,0.1271681281,-0.5452805,0.77800948,0.5947925848,notSig,notSig,notSig,protein_coding
Rpl32,6,115805505,115808747,3243,-,ensembl_havana,6.734843,6.097129,5.72614,-0.6377142,⋯,-0.6060058,0.002357606,0.0002635011,0.5770278,0.01015831,0.0009291078,down,up,downUp,protein_coding
Rplp1,9,61913284,61914542,1259,-,ensembl_havana,7.702973,7.016074,6.660871,-0.6868989,⋯,-0.9418643,3.994e-08,1.4e-09,-0.3538803,0.14140751,0.0369959115,down,notSig,notSig,protein_coding
Rps28,17,33819027,33824562,5536,-,ensembl_havana,6.231407,5.831708,5.65645,-0.399699,⋯,,,,,,,,,,protein_coding
Rps3,7,99477896,99483738,5843,-,ensembl_havana,7.702639,7.053267,6.729972,-0.6493718,⋯,-0.7147533,0.0,0.0,0.6959434,0.0,0.0,down,up,downUp,protein_coding
Rpsa,9,120127689,120132369,4681,+,ensembl_havana,8.605925,8.030752,7.561042,-0.5751733,⋯,,,,,,,,,,protein_coding


## sym_up_up2_dn, sym_dn_dn2_up

In [173]:
sym_up_up_dn <- intersect(sym_mrna_up_ribo_up, sym_ribo2_dn)
sym_up_up2_dn <- intersect(sym_up_up_dn, sym_protein_up)
sym_up_up2_dn
length(sym_up_up2_dn)

sym_dn_dn_up <- intersect(sym_mrna_dn_ribo_dn, sym_ribo2_up)
sym_dn_dn2_up <- intersect(sym_dn_dn_up, sym_protein_dn)
sym_dn_dn2_up
length(sym_dn_dn2_up)


# save riboprof_170224_161021.rdata

In [174]:

fname_out <- sprintf("./rdata/riboprof_170224%s_161021%s_%s.rdata",
                     rundate_appendix, rundate_appendix, strdir)
verb('%s\n', fname_out)
save(
     # raw counts
     df_count_mrna, df_count_ribo,
     # dectected
     sym_rnaseq, sym_riboseq,
     # detected in the both platform or only one of them. 
     sym_detected_both, sym_detected_rnaseq_only, sym_detected_riboseq_only,
     # comprehensive tables 
     df_all, df_protein,
     # transcription tfgb48
     sym_mrna, sym_mrna_up, sym_mrna_dn, sym_mrna_no,
     # transcription tgfb48+27h+cx
     sym_mrna2, sym_mrna2_up, sym_mrna2_dn, sym_mrna2_no,
     # translation tgfb48
     sym_ribo, sym_ribo_up, sym_ribo_dn, sym_ribo_no,
     # translation tgfb48+27h+cx
     sym_ribo2, sym_ribo2_up, sym_ribo2_dn, sym_ribo2_no,
    
     # transcription tfgb48, no effect on translation tgfb48
     # detected in both data sets
     sym_mrna_up_ribo_no, sym_mrna_dn_ribo_no,
     sym_mrna_de_ribo_no, sym_mrna_only,
     # translation tfgb48, no effect on transcription tgfb48
     # detected in both data sets 
     sym_mrna_no_ribo_up, sym_mrna_no_ribo_dn,
     sym_mrna_no_ribo_de, sym_ribo_only,
     
     # transcription tfgb48, translation tfgb48
     # detected in both data sets
     sym_mrna_up_ribo_up, sym_mrna_dn_ribo_dn,
     sym_mrna_up_ribo_dn, sym_mrna_dn_ribo_up,
    
     ###### CX
     # transcription tgfb48, tgfb48+27h+cx
     sym_mrna_up_cx_dn, sym_mrna_dn_cx_up, sym_mrna_dn3x_cx_no,
     sym_mrna_up_cx_up, sym_mrna_dn_cx_dn,
     # translation tgfb48, tgfb48+27h+cx
     sym_ribo_up_cx_dn, sym_ribo_dn_cx_up, sym_ribo_dn3x_cx_no,
     sym_ribo_up_cx_up, sym_ribo_dn_cx_dn,
     sym_ribo_cx_exclusively_affected,
     sym_ribo_cx_exclusively_affected_up,
     sym_ribo_cx_exclusively_affected_dn,
     sym_ribo_cx_unaffected,
     # translation tgfb48, tgfb48+27h+cx, no effect on transcription tgfb48
     # detected in both data sets
     sym_mrna_no_ribo_up_cx_dn, sym_mrna_no_ribo_dn_cx_up,
     sym_mrna_no_ribo_up_cx_up, sym_mrna_no_ribo_dn_cx_dn,
     sym_mrna_no_ribo_de_cx_unaffected, 
     # transcription tfgb48, translation tgfb48, tgfb48+27h+cx
     # detected in both platforms 
     sym_up_up_dn, sym_dn_dn_up,
    
     ###### protein mass spec.    
     # mass spec. tgfb48
     sym_protein_up, sym_protein_dn,
     # transcription tfgb48, translation tgfb48, mass spec. tgfb48, tgfb48+27h+cx up
     # detected in three platforms
     sym_up_up2_dn, sym_dn_dn2_up,
     
     ## list_genes
     list_genes,
    
     file=fname_out)

./rdata/riboprof_170224.rdna_rn18s_161021.rdna_rn18s_limma-voom.mrna.rdata


# additional check

In [175]:
c('Marc2','March2') %in% sym_mrna_dn
c('Marc2','March2') %in% sym_ribo_dn
c('Marc2','March2') %in% sym_mrna_dn_ribo_dn

In [176]:
c('Marc2','March2') %in% sym_mrna_dn_ribo_no
c('Marc2','March2') %in% sym_mrna_no_ribo_dn

In [177]:
t(df_all[c('Marc2','March2'),])

Unnamed: 0,Marc2,March2
seqnames,1,17
start,184813068,33685692
end,184846451,33718670
width,33384,32979
strand,-,-
source,ensembl_havana,ensembl_havana
unt48.transcription,3.580933,2.787924
tgfb48.transcription,3.140785,2.933485
tgfbCX5461100nm.transcription,3.036474,2.649384
log2FCunt48VStgfb48.transcription,-0.4401479,0.1455606


# write xlsx

In [178]:
colnames(df_all)

In [179]:
list_df <- list()
cols <- colnames(df_all)
cols <- cols[!(cols %in% c("HomoloGene.ID","mouse.sym","mouse.eid","human.sym","human.eid"))]
cols <- cols[!grepl('^P',cols)]
       
sheet_names <- names(list_genes)
for (sname in sheet_names) {
    #verb('%s\n', sname)
    list_df[[sname]] <- df_all[list_genes[[sname]], cols]
}
titles <- sheet_names
subtitles <- rep(NULL,length(sheet_names))
nv_column_width <- rep(11,ncol(df_all))
names(nv_column_width) <- colnames(df_all)
#write_xlsx(list_df, 'xlsx/170224_161021.xlsx', titles, subtitles, nv_column_width)

dir_xlsx <- sprintf('xlsx/170224%s_161021%s_%s',
                        rundate_appendix, rundate_appendix, strdir)
dir.create(dir_xlsx ,  recursive = TRUE , showWarnings = FALSE)
filename_xlsx <- sprintf('%s/170224%s_161021%s_%s.xlsx', dir_xlsx,
                         rundate_appendix, rundate_appendix, strdir)

require(openxlsx)
wb <- createWorkbook()

#hs <- createStyle(fontColour = "#ffffff", fgFill = "#4F80BD", halign = "center", valign = "center", textDecoration = "Bold", border = "TopBottomLeftRight", textRotation = 45)
#options("openxlsx.borderColour" = "#4F80BD")
#options("openxlsx.borderStyle" = "thin")
#modifyBaseFont(wb, fontSize = 10, fontName = "Arial Narrow")

addWorksheet(wb, sheetName='all', gridLines=TRUE)
writeDataTable(wb, sheet=1, x=df_all[,cols],
        colNames=TRUE, rowNames=TRUE)
setColWidths(wb, sheet=1, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )

sheet_names <- gsub('sym_','',names(list_df))
sheet_names <- gsub('mrna_','11_', sheet_names)
sheet_names <- gsub('ribo_','21_', sheet_names)
sheet_names <- gsub('mrna2_','12_', sheet_names)
sheet_names <- gsub('ribo2_','22_', sheet_names)
for (i in 1:length(list_df)) {
     verb('%s\n', sheet_names[i]) 
     addWorksheet(wb, sheetName=sheet_names[i], gridLines=TRUE)
     writeDataTable(wb, sheet=1+i, x=list_df[[i]],
        colNames=TRUE, rowNames=TRUE)
     setColWidths(wb, sheet=1+i, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )
}
# save xlsx
saveWorkbook(wb, filename_xlsx, overwrite=TRUE)

Loading required package: openxlsx



11_up
11_dn
11_dn3x
21_up
21_dn
21_dn3x
11_up_21_up
11_dn_21_dn
11_up_21_dn
11_dn_21_up
11_up_21_no
11_dn_21_no
11_no_21_up
11_no_21_dn
12_up
12_dn
22_up
22_dn
12_up_22_no
12_dn_22_no
12_no_22_up
12_no_22_dn
12_up_22_up
12_dn_22_dn
12_up_22_dn
12_dn_22_up
11_up_21_up_22_no
11_dn_21_dn_22_no
11_up_21_dn_22_no
11_dn_21_up_22_no
11_up_21_no_22_no
11_dn_21_no_22_no
11_no_21_up_22_no
11_no_21_dn_22_no
11_no_12_up_22_up
11_no_12_dn_22_dn
11_no_12_up_22_dn
11_no_12_dn_22_up
11_no_12_up_22_no
11_no_12_dn_22_no
11_no_12_no_22_up
11_no_12_no_22_dn
11_up_21_up_12_up_22_up
11_up_21_up_12_dn_22_dn
11_up_21_up_12_up_22_no
11_up_21_up_12_dn_22_no
11_up_21_up_12_no_22_up
11_up_21_up_12_no_22_dn
11_up_21_up_12_up_22_dn
11_up_21_up_12_dn_22_up
11_dn_21_dn_12_up_22_up
11_dn_21_dn_12_dn_22_dn
11_dn_21_dn_12_up_22_no
11_dn_21_dn_12_dn_22_no
11_dn_21_dn_12_no_22_up
11_dn_21_dn_12_no_22_dn
11_dn_21_dn_12_up_22_dn
11_dn_21_dn_12_dn_22_up
11_up_21_dn_12_up_22_up
11_up_21_dn_12_dn_22_dn
11_up_21_dn_12_up_22_no


## biotype

In [180]:
biotypes <- unique(df_all$biotype)
biotypes

In [181]:

for (biotype in biotypes) {
  verb('%s\n', biotype)  
  filename_xlsx <- sprintf('%s/170224%s_161021%s_%s_%s.xlsx', dir_xlsx,
                         rundate_appendix, rundate_appendix, strdir, biotype)

  require(openxlsx)
  wb <- createWorkbook()

  #hs <- createStyle(fontColour = "#ffffff", fgFill = "#4F80BD", halign = "center", valign = "center", textDecoration = "Bold", border = "TopBottomLeftRight", textRotation = 45)
  #options("openxlsx.borderColour" = "#4F80BD")
  #options("openxlsx.borderStyle" = "thin")
  #modifyBaseFont(wb, fontSize = 10, fontName = "Arial Narrow")

  addWorksheet(wb, sheetName='all', gridLines=TRUE)
  df1 <- df_all
  idx <- which(df1$biotype == biotype)
  #verb('\t%all: %d\n', length(idx))  
  if (length(idx)==0) next    
  writeDataTable(wb, sheet=1, x=df_all[idx,!grepl('^P',colnames(df_all))],
        colNames=TRUE, rowNames=TRUE)
  setColWidths(wb, sheet=1, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )    
    
  sheet_names <- gsub('sym_','',names(list_df))
  sheet_names <- gsub('mrna_','11_', sheet_names)
  sheet_names <- gsub('ribo_','21_', sheet_names)
  sheet_names <- gsub('mrna2_','12_', sheet_names)
  sheet_names <- gsub('ribo2_','22_', sheet_names)
  sheet_num <- 1  
  for (i in 1:length(list_df)) {
     df1 <- list_df[[i]]
     idx <- which(df1$biotype == biotype)
     #verb('\t%s: %d\n', sheet_names[i], length(idx))  
     if (length(idx)==0) next
    
     sheet_num <- sheet_num+1 
     addWorksheet(wb, sheetName=sheet_names[i], gridLines=TRUE)
     writeDataTable(wb, sheet=sheet_num, x=df1[idx,!grepl('^P',colnames(df1)),drop=F],
        colNames=TRUE, rowNames=TRUE)
     setColWidths(wb, sheet=sheet_num, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )
  }
  # save xlsx
  saveWorkbook(wb, filename_xlsx, overwrite=TRUE)
}



protein_coding
lncRNA
TEC
transcribed_unprocessed_pseudogene
transcribed_processed_pseudogene
transcribed_unitary_pseudogene
unprocessed_pseudogene
polymorphic_pseudogene
processed_pseudogene
unitary_pseudogene
snoRNA
rRNA
pseudogene
scaRNA
snRNA
misc_RNA
miRNA
IG_C_gene
Mt_rRNA
Mt_tRNA
rDNA
ribozyme
TR_C_gene
TR_V_gene


# write xlsx for validation

## filter_rows_with_log2fc_fdr

In [182]:
filter_rows_with_log2fc_fdr <- function(df, col) {
    
    items <- strsplit(col, "\\.")[[1]]
    items[1] <- gsub("tgfb48", "tgfb", items[1])
    items[3] <- gsub("DE", '', items[3])
    col_log2fc <- sprintf("log2FC%sVS%s.%s", items[1], items[2], items[3])
    col_fdr <- sprintf("FDR%sVS%s.%s", items[1], items[2], items[3])
    
    f_out <- rep(FALSE, nrow(df))
    # not sig
    f_notsig <- df[,col] == "notSig"
    idx <- which(f_notsig)
    
    f <- (abs(df[idx, col_log2fc]) < 0.1) & (df[idx, col_fdr] > 0.25)
    f_out[idx[f]] <- TRUE
    
    # up/dn
    idx <- which(!f_notsig)
    f <- (abs(df[idx, col_log2fc]) > log2(1.5)) & (df[idx, col_fdr] < 0.05)
    f_out[idx[f]] <- TRUE    
    
    f_out
}



## filter_rows_for_validation

In [183]:
filter_rows_for_validation <- function(sheet_name, df, th_log2cpm=2) {
    
    # large cpm for unt48 vs. tgfb48
    f1 <- (df$unt48.transcription >= th_log2cpm) | (df$tgfb48.transcription >= th_log2cpm)
    # large cpm for tgfb48 vs. CX
    f2 <- (df$tgfb48.transcription >= th_log2cpm) | (df$tgfbCX5461100nm.transcription >= th_log2cpm)
    f <- f1 & f2
    
    f <- f & filter_rows_with_log2fc_fdr(df, "unt48.tgfb48.DEtranscription")
    f <- f & filter_rows_with_log2fc_fdr(df, "tgfb48.tgfbCX5461100nm.DEtranscription")
    
    # large cpm for unt48 vs. tgfb48
    f1 <- (df$unt48.translation >= th_log2cpm) | (df$tgfb48.translation >= th_log2cpm)
    # large cpm for tgfb48 vs. CX
    f2 <- (df$tgfb48.translation >= th_log2cpm) | (df$tgfbCX5461100nm.translation >= th_log2cpm)
    f <- f & (f1 & f2)
    
    f <- f & filter_rows_with_log2fc_fdr(df, "unt48.tgfb48.DEtranslation")
    f <- f & filter_rows_with_log2fc_fdr(df, "tgfb48.tgfbCX5461100nm.DEtranslation")
    
    df[f, !grepl('^P',colnames(df)), drop=F]
}

In [184]:
list_df <- list()
cols <- colnames(df_all)
cols <- cols[!(cols %in% c("HomoloGene.ID","mouse.sym","mouse.eid","human.sym","human.eid"))]
cols <- cols[!grepl('^P',cols)]

sheet_names <- names(list_genes)
for (sname in sheet_names) {
    #verb('%s\n', sname)
    list_df[[sname]] <- df_all[list_genes[[sname]], cols]
}
titles <- sheet_names
subtitles <- rep(NULL,length(sheet_names))
nv_column_width <- rep(11,ncol(df_all))
names(nv_column_width) <- colnames(df_all)
#write_xlsx(list_df, 'xlsx/170224_161021.xlsx', titles, subtitles, nv_column_width)

filename_xlsx <- sprintf('%s/170224%s_161021%s_%s_for_validation.xlsx', dir_xlsx,
                         rundate_appendix, rundate_appendix, strdir)
verb("%s\n", filename_xlsx)

require(openxlsx)
wb <- createWorkbook()

#hs <- createStyle(fontColour = "#ffffff", fgFill = "#4F80BD", halign = "center", valign = "center", textDecoration = "Bold", border = "TopBottomLeftRight", textRotation = 45)
#options("openxlsx.borderColour" = "#4F80BD")
#options("openxlsx.borderStyle" = "thin")
#modifyBaseFont(wb, fontSize = 10, fontName = "Arial Narrow")

addWorksheet(wb, sheetName='all', gridLines=TRUE)
writeDataTable(wb, sheet=1, x=filter_rows_for_validation("all", df_all[, cols]),
        colNames=TRUE, rowNames=TRUE)
setColWidths(wb, sheet=1, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )

sheet_names <- gsub('sym_','',names(list_df))
sheet_names <- gsub('mrna_','11_', sheet_names)
sheet_names <- gsub('ribo_','21_', sheet_names)
sheet_names <- gsub('mrna2_','12_', sheet_names)
sheet_names <- gsub('ribo2_','22_', sheet_names)
for (i in 1:length(list_df)) {
     verb('%s\n', sheet_names[i]) 
     addWorksheet(wb, sheetName=sheet_names[i], gridLines=TRUE)
     writeDataTable(wb, sheet=1+i, x=filter_rows_for_validation(sheet_names[i], list_df[[i]]),
        colNames=TRUE, rowNames=TRUE)
     setColWidths(wb, sheet=1+i, cols=1:(ncol(df_all)+1),
        widths=c(25, rep(15,7), rep(12,3), rep(15,7), rep(12,3), 20) )
}
# save xlsx
saveWorkbook(wb, filename_xlsx, overwrite=TRUE)

xlsx/170224.rdna_rn18s_161021.rdna_rn18s_limma-voom.mrna/170224.rdna_rn18s_161021.rdna_rn18s_limma-voom.mrna_for_validation.xlsx
11_up
11_dn
11_dn3x
21_up
21_dn
21_dn3x
11_up_21_up
11_dn_21_dn
11_up_21_dn
11_dn_21_up
11_up_21_no
11_dn_21_no
11_no_21_up
11_no_21_dn
12_up
12_dn
22_up
22_dn
12_up_22_no
12_dn_22_no
12_no_22_up
12_no_22_dn
12_up_22_up
12_dn_22_dn
12_up_22_dn
12_dn_22_up
11_up_21_up_22_no
11_dn_21_dn_22_no
11_up_21_dn_22_no
11_dn_21_up_22_no
11_up_21_no_22_no
11_dn_21_no_22_no
11_no_21_up_22_no
11_no_21_dn_22_no
11_no_12_up_22_up
11_no_12_dn_22_dn
11_no_12_up_22_dn
11_no_12_dn_22_up
11_no_12_up_22_no
11_no_12_dn_22_no
11_no_12_no_22_up
11_no_12_no_22_dn
11_up_21_up_12_up_22_up
11_up_21_up_12_dn_22_dn
11_up_21_up_12_up_22_no
11_up_21_up_12_dn_22_no
11_up_21_up_12_no_22_up
11_up_21_up_12_no_22_dn
11_up_21_up_12_up_22_dn
11_up_21_up_12_dn_22_up
11_dn_21_dn_12_up_22_up
11_dn_21_dn_12_dn_22_dn
11_dn_21_dn_12_up_22_no
11_dn_21_dn_12_dn_22_no
11_dn_21_dn_12_no_22_up
11_dn_21_dn_12_

## 11_up_21_no

In [185]:
# 11_up_21_no: up for unt48 vs. tgfb48 only in rna-seq
name1 <- "11_up_21_no"
idx <- which(sheet_names==name1)
# DE3: 1340
# DE4: 1272
dim(list_df[[idx]])

# DE3: Ap2a1, Atp11a, Bcl9l, Ctxn1, Neat1, Ptpn13, Stx6
# DE4: Bcl9l, Neat1
filter_rows_for_validation(name1, list_df[[idx]])

Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,⋯,tgfb48.translation,tgfbCX5461100nm.translation,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Bcl9l,9,44482825,44511896,29072,+,ensembl_havana,4.749208,5.541473,5.627332,0.7922647,⋯,2.121726,2.169154,-0.07994202,0.8767992,0.04742836,0.9373426,notSig,notSig,notSig,protein_coding
Neat1,19,5824708,5845478,20771,-,havana,6.143513,7.270401,9.028366,1.1268884,⋯,3.119925,5.060732,-0.02655551,0.9402735,1.94080705,0.0,notSig,up,notSig,lncRNA


## 11_no_21_up

In [186]:
# 11_no_21_up
# 11_no: no change for unt48 vs. tgfb48 only in RNA-seq
# 21_up: up for unt48 vs. tgfb48 only in RPF-seq
name1 <- "11_no_21_up"
idx <- which(sheet_names==name1)
# DE3: 180
# DE4: 161
dim(list_df[[idx]])

# DE3: Abcf3, Cd14, Elovl1, Gm25360, Hsph1, Nrg1, Pot1b, Pxylp1, Rnf169, Uba6, Usp5
# DE4: Abcf3, Cd14, Elovl1, Hexim1, Hsph1, Nrg1, Rnf169, Usp5
df_out <- filter_rows_for_validation(name1, list_df[[idx]])
t(df_out)

Unnamed: 0,Elovl1,Hexim1,Hsph1
seqnames,4,11,5
start,118428093,103116231,149614287
end,118432953,103119725,149636376
width,4861,3495,22090
strand,+,+,-
source,ensembl_havana,ensembl_havana,ensembl_havana
unt48.transcription,3.110992,3.493564,4.332281
tgfb48.transcription,3.186450,3.547152,4.311113
tgfbCX5461100nm.transcription,3.254529,3.552939,4.215187
log2FCunt48VStgfb48.transcription,0.07545855,0.05358750,-0.02116818


## 11_up_21_up_12_no_22_dn

In [187]:
# 11_up_21_up_12_no_22_dn: up for unt48 vs. tgfb48 in both platform, down for tgfb48 vs. CX only in ribo-seq (e.g. Zeb1)

name1 <- "11_up_21_up_12_no_22_dn"
idx <- which(sheet_names==name1)

# DE3: 182
# DE4: 182
dim(list_df[[idx]])

filter_rows_for_validation(name1, list_df[[idx]])


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,⋯,tgfb48.translation,tgfbCX5461100nm.translation,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Abcg2,6,58584523,58695676,111154,+,ensembl_havana,1.3589696,2.020313,1.970978,0.6613431,⋯,2.26315,1.4345838,1.0220188,0.004737738,-0.828566,0.04826441,up,down,upDown,protein_coding
Acot7,4,152178134,152271855,93722,+,ensembl_havana,2.9509776,3.596815,3.612917,0.6458373,⋯,3.391681,2.5581129,0.6990855,0.002350277,-0.8335679,0.002078716,up,down,upDown,protein_coding
Adprh,16,38444030,38452703,8674,-,ensembl_havana,2.170033,2.775568,2.689175,0.6055351,⋯,2.742099,1.5416834,0.9993159,0.001351817,-1.2004153,0.00139351,up,down,upDown,protein_coding
Arhgef40,14,51984719,52006251,21533,+,ensembl_havana,4.111528,4.821662,4.846823,0.7101337,⋯,2.836252,2.0234826,0.6954301,0.0241101,-0.8127695,0.02346172,up,down,upDown,protein_coding
Atrn,2,130906495,131030333,123839,+,ensembl_havana,3.9109944,4.818325,4.830312,0.9073308,⋯,2.787924,1.805621,1.2556162,7.892852e-05,-0.9823034,0.006517768,up,down,upDown,protein_coding
Cyb561a3,19,10577172,10595961,18790,+,ensembl_havana,2.5542696,3.744545,3.679208,1.1902759,⋯,2.246475,0.9382046,1.3478947,0.0007729983,-1.3082703,0.004824773,up,down,upDown,protein_coding
Ehbp1l1,19,5707376,5726317,18942,-,ensembl_havana,4.6017792,6.047077,6.142634,1.4452976,⋯,3.342922,2.6849353,1.3460147,4.1799e-07,-0.6579864,0.01624628,up,down,upDown,protein_coding
Eno3,11,70657202,70662513,5312,+,ensembl_havana,2.1554254,3.144029,3.207621,0.9886033,⋯,3.490632,2.7721539,1.875346,3.1e-10,-0.718478,0.006165869,up,down,upDown,protein_coding
Ets1,9,32636221,32757820,121600,+,ensembl_havana,4.3801374,5.812666,5.820724,1.4325287,⋯,3.971812,3.2768627,1.424309,7.8e-10,-0.694949,0.001722423,up,down,upDown,protein_coding
Fbln2,6,91212455,91272540,60086,+,ensembl_havana,6.0974625,8.997633,9.031067,2.90017,⋯,8.56708,7.888299,2.7827172,0.0,-0.6787812,0.0,up,down,upDown,protein_coding


## 11_dn_21_dn_12_no_22_up

In [188]:
# down for unt48 vs. tgfb48 in both platform, up for tgfb48 vs. CX only in Ribo-seq (e.g. Eif3k)

name1 <- "11_dn_21_dn_12_no_22_up"
idx <- which(sheet_names==name1)

# DE3: 83
# DE4: 83
dim(list_df[[idx]])

filter_rows_for_validation(name1, list_df[[idx]])


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,⋯,tgfb48.translation,tgfbCX5461100nm.translation,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Acer3,7,98206389,98321208,114820,-,ensembl_havana,4.207895,3.337391,3.317584,-0.8705042,⋯,2.310893,3.321396,-1.1601477,1.479142e-05,1.0105039,0.0006777515,down,up,downUp,protein_coding
Cdh17,4,11758147,11817895,59749,+,ensembl_havana,6.130163,4.494413,4.40565,-1.6357492,⋯,3.500507,4.398577,-1.9267684,0.0,0.8980703,7.40243e-06,down,up,downUp,protein_coding
Coa3,11,101277968,101279114,1147,-,ensembl_havana,3.357174,2.462092,2.37465,-0.8950821,⋯,2.510048,3.20472,-0.9651971,0.0001698186,0.6946725,0.02202701,down,up,downUp,protein_coding
Gm6472,X,152909505,152910155,651,-,havana,2.652867,2.036453,1.96234,-0.6164135,⋯,2.183319,2.902427,-0.9814157,0.0007621754,0.7191082,0.03542426,down,up,downUp,transcribed_processed_pseudogene
Lin7c,2,109890853,109901003,10151,+,ensembl_havana,5.554064,4.875048,4.884611,-0.6790167,⋯,2.386171,3.450558,-0.9985575,0.0001390998,1.0643861,0.0002060667,down,up,downUp,protein_coding
Mrps18a,17,46110986,46128910,17925,+,ensembl_havana,3.552013,2.927555,2.891004,-0.6244585,⋯,2.045456,3.207055,-1.0215265,0.0007778624,1.161599,0.0003676827,down,up,downUp,protein_coding
Stom,2,35313986,35336976,22991,-,ensembl_havana,5.383036,4.238216,4.318125,-1.144819,⋯,2.626953,3.317428,-1.512062,9.6e-10,0.6904754,0.01565598,down,up,downUp,protein_coding


## 11_no_21_up_12_no_22_dn

In [189]:
# 11_no_21_up_12_no_22_dn: up for unt48 vs. tgfb48 only in ribo-seq, down for tgfb48 vs. CX only in ribo-seq (e.g. Fanca)

name1 <- "11_no_21_up_12_no_22_dn"
idx <- which(sheet_names==name1)

# DE3: 57
# DE4: 53
dim(list_df[[idx]])

filter_rows_for_validation(name1, list_df[[idx]])


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,⋯,tgfb48.translation,tgfbCX5461100nm.translation,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Hexim1,11,103116231,103119725,3495,+,ensembl_havana,3.493564,3.547152,3.552939,0.0535875,⋯,2.272081,1.358701,1.0368382,0.006688445,-0.9133794,0.04077204,up,down,upDown,protein_coding
Hsph1,5,149614287,149636376,22090,-,ensembl_havana,4.332281,4.311113,4.215187,-0.02116818,⋯,3.957991,2.882762,0.8104418,4.232752e-05,-1.075229,6.30765e-06,up,down,upDown,protein_coding


## 11_no_21_dn_12_no_22_up

In [190]:
# 11_no_21_dn_12_no_22_up: down for unt48 vs. tgfb48 only in ribo-seq, up for tgfb48 vs. CX only in ribo-seq (e.g. Eif5a, Gemin7)
    
name1 <- "11_no_21_dn_12_no_22_up"
idx <- which(sheet_names==name1)

# DE3: 36
# DE4: 39
dim(list_df[[idx]])

filter_rows_for_validation(name1, list_df[[idx]])


Unnamed: 0_level_0,seqnames,start,end,width,strand,source,unt48.transcription,tgfb48.transcription,tgfbCX5461100nm.transcription,log2FCunt48VStgfb48.transcription,⋯,tgfb48.translation,tgfbCX5461100nm.translation,log2FCunt48VStgfb48.translation,FDRunt48VStgfb48.translation,log2FCtgfbVStgfbCX5461100nm.translation,FDRtgfbVStgfbCX5461100nm.translation,unt48.tgfb48.DEtranslation,tgfb48.tgfbCX5461100nm.DEtranslation,reversible.translation,biotype
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
Mtdh,15,34082694,34145624,62931,+,ensembl_havana,5.467764,5.481582,5.469784,0.01381855,⋯,1.967451,2.800038,-1.070979,0.0006171566,0.8325874,0.02285516,down,up,downUp,protein_coding
