In [2]:
suppressMessages(library(DESeq2))

readcounts.dir <- '/staging/as/skchoudh/SRP010679_tx_counts/'
design.file <-  '/staging/as/skchoudh/SRP010679_tx_counts/metadata_ribo.tsv'

## Suffix of htseq-count output
counts.suffix <- '.tsv'

t2g <- read.table('/home/cmb-panasas2/skchoudh/genomes/hg38/annotation/tx_to_gene_type_name.tsv', stringsAsFactors = F, header = T)
colnames(t2g) <- c('target_id', 'ens_gene', 'transcript_type', 'gene_type',  'ext_gene')
t2g <- t2g[, c('target_id', 'ens_gene', 'ext_gene')]
rownames(t2g) <- t2g$target_id

write_results <- function(df, prefix){
  df<- as.data.frame(df)
  df <- df[order(df$padj),]
  df$gene_id <- t2g[rownames(df),]$ens_gene
  df$gene_name <- t2g[rownames(df),]$ext_gene
  df.sig <- subset(df, padj<0.05)
  df.sig.up <- subset(df.sig, log2FoldChange>0)
  df.sig.down <- subset(df.sig, log2FoldChange<0)
  write.table(df, file = file.path(paste(prefix, 'tsv', sep='.')), sep = '\t')

  write.table(df.sig, file = file.path(paste(prefix, 'sig', 'tsv', sep='.')), sep = '\t')
  write.table(df.sig.up,  file = file.path(paste(prefix, 'sig', 'up', 'tsv', sep='.')), sep = '\t')
  write.table(df.sig.down,  file = file.path(paste(prefix, 'sig', 'down', 'tsv', sep='.')), sep = '\t')
  return (df.sig)
}

In [2]:
design.info <- read.table(design.file, header=T, stringsAsFactors=FALSE, sep='\t')
design.info <- design.info[design.info$treatment %in% c("vehicle", "rapamycin") ,]
design.info$treatment <- factor(design.info$treatment, levels=c("vehicle", "rapamycin"))
files <- paste(design.info$experiment_accession, counts.suffix, sep='')
sampleName <- design.info$experiment_accession



sampleTable <- data.frame(sampleName = sampleName, 
                          fileName = files, 
                          treatment=design.info$treatment)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
                                       directory = readcounts.dir,
                                       design = ~ treatment)
rownames(ddsHTSeq) <- gsub('\\.[0-9]+', '', rownames(ddsHTSeq))
ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
dds <- DESeq(ddsHTSeq)

estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [3]:
rapamycin_vs_vehicle <- results(dds)

write_results(rapamycin_vs_vehicle, '/staging/as/skchoudh/SRP010679_tx_differential_analysis/ribo_rapamycin_vs_vehicle')


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_id,gene_name
ENST00000242810,165.2205,2.007134,0.4138739,4.849627,1.236939e-06,0.009628727,ENSG00000114796.15,KLHL24
ENST00000245185,968.6928,1.619759,0.3312439,4.88993,1.008718e-06,0.009628727,ENSG00000125148.6,MT2A
ENST00000309268,113.0751,-2.389704,0.4897919,-4.879018,1.066154e-06,0.009628727,ENSG00000156508.17,EEF1A1
ENST00000316292,113.0751,-2.389704,0.4897919,-4.879018,1.066154e-06,0.009628727,ENSG00000156508.17,EEF1A1
ENST00000331523,113.0751,-2.389704,0.4897919,-4.879018,1.066154e-06,0.009628727,ENSG00000156508.17,EEF1A1
ENST00000356303,110.3998,-2.540367,0.5282843,-4.808713,1.519053e-06,0.009628727,ENSG00000156508.17,EEF1A1
ENST00000454652,165.2205,2.007134,0.4138739,4.849627,1.236939e-06,0.009628727,ENSG00000114796.15,KLHL24
ENST00000455918,110.3998,-2.540367,0.5282843,-4.808713,1.519053e-06,0.009628727,ENSG00000156508.17,EEF1A1
ENST00000476808,146.4641,2.092052,0.4318124,4.844816,1.26729e-06,0.009628727,ENSG00000114796.15,KLHL24
ENST00000610520,113.0751,-2.389704,0.4897919,-4.879018,1.066154e-06,0.009628727,ENSG00000156508.17,EEF1A1


In [5]:
design.info <- read.table(design.file, header=T, stringsAsFactors=FALSE, sep='\t')
design.info <- design.info[design.info$treatment %in% c("vehicle", "pp242") ,]
design.info$treatment <- factor(design.info$treatment, levels=c("vehicle", "pp242"))
files <- paste(design.info$experiment_accession, counts.suffix, sep='')
sampleName <- design.info$experiment_accession



sampleTable <- data.frame(sampleName = sampleName, 
                          fileName = files, 
                          treatment=design.info$treatment)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
                                       directory = readcounts.dir,
                                       design = ~ treatment)
rownames(ddsHTSeq) <- gsub('\\.[0-9]+', '', rownames(ddsHTSeq))
ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
dds <- DESeq(ddsHTSeq)

pp242_vs_vehicle <- results(dds)

write_results(pp242_vs_vehicle, '/staging/as/skchoudh/SRP010679_tx_differential_analysis/ribo_pp242_vs_vehicle')


estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_id,gene_name
ENST00000456586,1119.8576,-2.630413,0.2773067,-9.485573,2.410555e-21,8.097296e-17,ENSG00000197756.9,RPL37A
ENST00000309311,3184.5381,-2.852667,0.3055571,-9.335951,1.000886e-20,1.681038e-16,ENSG00000167658.15,EEF2
ENST00000227378,3535.6029,-2.736981,0.3086245,-8.868321,7.425721e-19,3.563392e-15,ENSG00000109971.13,HSPA8
ENST00000526110,3535.6029,-2.736981,0.3086245,-8.868321,7.425721e-19,3.563392e-15,ENSG00000109971.13,HSPA8
ENST00000532636,3535.6029,-2.736981,0.3086245,-8.868321,7.425721e-19,3.563392e-15,ENSG00000109971.13,HSPA8
ENST00000533540,3051.6804,-2.698730,0.3016028,-8.947959,3.621161e-19,3.563392e-15,ENSG00000109971.13,HSPA8
ENST00000534624,3535.6029,-2.736981,0.3086245,-8.868321,7.425721e-19,3.563392e-15,ENSG00000109971.13,HSPA8
ENST00000526686,1357.9862,-2.706329,0.3077127,-8.794987,1.430647e-18,6.007107e-15,ENSG00000109971.13,HSPA8
ENST00000534319,2586.5368,-2.717969,0.3106462,-8.749405,2.144804e-18,8.005123e-15,ENSG00000109971.13,HSPA8
ENST00000427280,817.9089,-2.562162,0.2944238,-8.702291,3.252491e-18,9.104534e-15,ENSG00000197756.9,RPL37A


# uORF DF

In [13]:
design.info <- read.table(design.file, header=T, stringsAsFactors=FALSE, sep='\t')
design.info <- design.info[design.info$treatment %in% c("vehicle", "pp242") ,]
design.info$treatment <- factor(design.info$treatment, levels=c("vehicle", "pp242"))
files <- paste(design.info$experiment_accession, counts.suffix, sep='')
sampleName <- design.info$experiment_accession



sampleTable <- data.frame(sampleName = sampleName, 
                          fileName = files, 
                          treatment=design.info$treatment)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
                                       directory = '/staging/as/skchoudh/SRP010679_uORF_differential_analysis/',
                                       design = ~ treatment)
ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
dds <- DESeq(ddsHTSeq)

pp242_vs_vehicle <- results(dds)

write_results(pp242_vs_vehicle, '/staging/as/skchoudh/SRP010679_uORF_differential_analysis/ribo_pp242_vs_vehicle')

ERROR: Error in ncol(countData) == nrow(colData): argument "countData" is missing, with no default


In [13]:
design.info <- read.table(design.file, header=T, stringsAsFactors=FALSE, sep='\t')
design.info <- design.info[design.info$treatment %in% c("vehicle", "pp242") ,]
design.info$treatment <- factor(design.info$treatment, levels=c("vehicle", "pp242"))
files <- paste(design.info$experiment_accession, counts.suffix, sep='')
sampleName <- design.info$experiment_accession



sampleTable <- data.frame(sampleName = sampleName, 
                          fileName = files, 
                          treatment=design.info$treatment)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
                                       directory = '/staging/as/skchoudh/SRP010679_uORF_translating_only_differential_analysis/',
                                       design = ~ treatment)
ddsHTSeq <- ddsHTSeq[ rowSums(counts(ddsHTSeq)) > 1, ]
dds <- DESeq(ddsHTSeq)

pp242_vs_vehicle <- results(dds)

write_results(pp242_vs_vehicle, '/staging/as/skchoudh/SRP010679_uORF_translating_only_differential_analysis/ribo_pp242_vs_vehicle')

estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_id,gene_name
ENST00000523172.5_98042765_98045398_63,877.6139,-1.924436,0.4217647,-4.56282,5.047111e-06,0.003695593,,
ENST00000528847.1_75400728_75400814_87,237.7931,-2.008471,0.4429807,-4.533992,5.78793e-06,0.003695593,,
ENST00000244745.3_21593862_21593960_99,120.6445,2.214624,0.501337,4.417436,9.987845e-06,0.004251493,,


# Apply the size factors obtained from the CDS data to the uORF data

In [3]:
get_dds_obj_corrected <- function(ddsNew, ddsOld){  
  ddsOld <- estimateSizeFactors(ddsOld)
  sizeFactors(ddsNew) <- sizeFactors(ddsOld)
  #dispersionFunction(ddsNew) <- dispersionFunction(ddsOld)
  return(ddsNew)
}

design.info <- read.table(design.file, header=T, stringsAsFactors=FALSE, sep='\t')
design.info <- design.info[design.info$treatment %in% c("vehicle", "pp242") ,]
design.info$treatment <- factor(design.info$treatment, levels=c("vehicle", "pp242"))
files <- paste(design.info$experiment_accession, counts.suffix, sep='')
sampleName <- design.info$experiment_accession

sampleTable <- data.frame(sampleName = sampleName, 
                          fileName = files, 
                          treatment=design.info$treatment)
ddsHTSeq.cds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
                                           directory = '/staging/as/skchoudh/SRP010679_tx_counts/',
                                           design = ~ treatment)

ddsHTSeq.uorf <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 
                                           directory = '/staging/as/skchoudh/SRP010679_uORF_translating_only_differential_analysis/',
                                           design = ~ treatment)

ddsHTSeq.cds <- DESeq(ddsHTSeq.cds)




estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


In [20]:
dim(counts(ddsHTSeq.cds))

In [21]:
dim(counts(ddsHTSeq.cds[ rowSums(counts(ddsHTSeq.cds)) > 0, ]))


In [6]:
get_dds_obj_corrected <- function(ddsNew, ddsOld){  
  ddsOld <- estimateSizeFactors(ddsOld)
  sizeFactors(ddsNew) <- sizeFactors(ddsOld)
  #dispersionFunction(ddsNew) <- dispersionFunction(ddsOld)
  return(ddsNew)
}

In [8]:
ddsHTSeq.uorf <- get_dds_obj_corrected(ddsHTSeq.uorf, ddsHTSeq.cds)
ddsHTSeq.uorf <- estimateDispersions(ddsHTSeq.uorf)

gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates


In [9]:
sizeFactors(ddsHTSeq.cds)

In [18]:
ddsHTSeq.uorf

class: DESeqDataSet 
dim: 1277 4 
metadata(1): version
assays(1): counts
rownames(1277): ENST00000540437.5_1323130_1324674_252
  ENST00000540437.5_1323180_1324676_204 ...
  ENST00000424325.6_154398270_154398350_81
  ENST00000618723.4_154398270_154398350_81
rowData names(0):
colnames(4): SRX118286 SRX118290 SRX118292 SRX118296
colData names(1): treatment

In [12]:
ddsHTSeq.uorf <- DESeq(ddsHTSeq.uorf)
pp242_vs_vehicle <- results(ddsHTSeq.uorf)

write_results(pp242_vs_vehicle, '/staging/as/skchoudh/SRP010679_uORF_translating_only_differential_analysis/ribo_same_size_factor_pp242_vs_vehicle')

using pre-existing size factors
estimating dispersions
found already estimated dispersions, replacing these
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,gene_id,gene_name
ENST00000244745.3_21593862_21593960_99,122.9619,2.298195,0.523536,4.389755,1.134785e-05,0.01449121,,
ENST00000528847.1_75400728_75400814_87,234.5215,-1.929083,0.4726027,-4.081827,4.468303e-05,0.02853011,,
