Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
87 lines (59 sloc) 2.59 KB
title author date output
RNA-seq data preparation
Russ Poldrack
November 1, 2014
html_document

RNA-seq data preparation

Code available at: https://github.com/poldrack/myconnectome/blob/master/myconnectome/rnaseq/RNAseq_data_preparation.Rmd

This code loads the gene-level read count data and performs a variance-stabilizing transform using DESeq, and saves the resulting data for further analysis.

library(DESeq)
library(vsn)
library("RColorBrewer")
library("gplots")

basedir=Sys.getenv('MYCONNECTOME_DIR')
if (basedir=='') {
  basedir='/Users/poldrack/data_unsynced/myconnectome'
}
dataurl=sprintf('%s/rna-seq/',basedir)

####Load the data from the cloud and estimate size factors for correction
### use data that have been filtered for abundance (4>mean<10000) and removed snoRNAs

cdsFull=newCountDataSetFromHTSeqCount(read.table(sprintf('%s/htcount_files.txt',dataurl)),directory=sprintf('%s/htcount_files_filtered',dataurl))

cdsFull = estimateSizeFactors( cdsFull )

####Compute mean expression for each gene across sesssions
rs = rowMeans ( counts ( cdsFull ))
allgenes=rownames(counts(cdsFull))


####Remove genes with excessively high or low expression levels

use = (rs>4 & rs<10000)
cds=cdsFull[use,]
usedgenes=rownames(counts(cds))


####Generate variance-stabilized count data and save to file

cdsBlind = estimateDispersions( cds, method="blind" ,fitType='local')
vsd = varianceStabilizingTransformation( cdsBlind )
vsdata=getVarianceStabilizedData(cdsBlind)
write.table(vsdata,sprintf('%s/rna-seq/varstab_data_prefiltered.txt',basedir))

Dispersion estimates

plotDispEsts( cdsBlind )

SD vs. count before and after correction

par(mfrow=c(1,2))
notAllZero = (rowSums(counts(cds))>0)
meanSdPlot(log2(counts(cds)[notAllZero, ] + 1), ylim = c(0,2.5))
meanSdPlot(vsd[notAllZero, ], ylim = c(0,2.5))

Clusters of genes/sessions

select = order(rowMeans(counts(cdsBlind)), decreasing=TRUE)[1:30]
hmcol = colorRampPalette(brewer.pal(9, "GnBu"))(100)
heatmap.2(exprs(vsd)[select,], col = hmcol, trace="none", margin=c(10, 6))

Clusters of sessions - to look for outliers

dists = dist( t( exprs(vsd) ) )
mat = as.matrix( dists )
heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(13, 13))