Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
129 lines (91 sloc) 3.98 KB
title author date output
Snyderome data preparation
Russ Poldrack
May 18, 2015
html_document

Comparison of MyConnectome to Snyderome RNA-seq data

Code available at: https://github.com/poldrack/myconnectome/blob/master/myconnectome/rnaseq/Snyderome_data_preparation.Rmd

The Snyderome RNA-seq data were obtained from GEO.

basedir=Sys.getenv('MYCONNECTOME_DIR')
if (basedir==''){basedir='/Users/poldrack/data_unsynced/myconnectome'}

library(DESeq)

filenames=read.table(sprintf('%s/rna-seq/snyderome/snyderome_files.txt',basedir))

filenames=as.character(filenames$V1)
htcount_files=c()
for (i in 1:length(filenames)){
  htcount_files=rbind(htcount_files,c(filenames[i],filenames[i],i))
}
cdsFull=newCountDataSetFromHTSeqCount(htcount_files,
          directory=sprintf('%s/rna-seq/snyderome',basedir))
cdsFull = estimateSizeFactors( cdsFull )

# get mean expression for each gene across sesssions
rs = rowMeans ( counts ( cdsFull ))
allgenes=rownames(counts(cdsFull))

# remove genes with 
use = (rs>4 & rs<10000)
cds=cdsFull[use,]
usedgenes=rownames(counts(cds))

# generate variance-stabilized count data and save to file

cdsBlind = estimateDispersions( cds, method="blind" ,fitType='local')
vsd = varianceStabilizingTransformation( cdsBlind )
vsdata=getVarianceStabilizedData(cdsBlind)
dir.create(file.path(sprintf('%s/rna-seq',basedir), 'snyderome'), showWarnings = FALSE)
write.table(vsdata,sprintf('%s/rna-seq/snyderome/varstab_data.txt',basedir))

Diagnostic figures

Dispersion estimates

# plot some diagnostic figures

plotDispEsts( cdsBlind )


SD vs. count before and after correction

#plot SD vs. count before and after correction
library(vsn)
par(mfrow=c(1,2))
notAllZero = (rowSums(counts(cds))>0)
meanSdPlot(log2(counts(cds)[notAllZero, ] + 1), ylim = c(0,2.5))
meanSdPlot(vsd[notAllZero, ], ylim = c(0,2.5))

Clusters of genes/sessions

# plot clusters of genes/subjects
library("RColorBrewer")
library("gplots")
select = order(rowMeans(counts(cdsBlind)), decreasing=TRUE)[1:30]
hmcol = colorRampPalette(brewer.pal(9, "GnBu"))(100)
heatmap.2(exprs(vsd)[select,], col = hmcol, trace="none", margin=c(10, 6))

Plot clusters of sessions - to look for outliers

# plot clusters of subjects - to look for outliers
dists = dist( t( exprs(vsd) ) )
mat = as.matrix( dists )
heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(13, 13))


Compare expression to MyConnectome


mycdata=read.table(sprintf('%s/rna-seq/varstab_data_prefiltered.txt',basedir))
mycmean=apply(mycdata,1,mean)
snymean=apply(vsdata,1,mean)

cat(sprintf('Snyderome: %d genes\n',length(snymean)))
cat(sprintf('MyConnectome: %d genes\n',length(mycmean)))
mycgenes=names(mycmean)
snygenes=names(snymean)
mycmatch=mycgenes %in% snygenes
snymatch=snygenes %in% mycgenes

mycmean_match=mycmean[mycmatch]
snymean_match=snymean[snymatch]
cat(sprintf('Overlap: %d genes\n',length(snymean_match)))
if (sum(names(mycmean_match)==names(snymean_match)) != length(snymean_match)) {
  cat('there is a problem with gene name mismatch')
}

cat(sprintf('Correlation between datasets = %f\n',cor(mycmean_match,snymean_match)))


plot(mycmean_match,snymean_match,main='MyConnectome vs. Snyderome',xlab='MyConnectome expression',ylab='Snyderome expression')