### Limma voom analysis of AD/PD dataset, NO SVA

In [1]:
rm(list=ls())
#load necessary libraries 
library(ggplot2)
library(DESeq2)
library("BiocParallel")
parallelFlag=TRUE
register(MulticoreParam(50))
library("IHW")
library("pheatmap")
library(sva)
library(limma)

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colMeans,
    colnames, colSums, dirname, do.call, duplicated, eval, evalq,
    Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
    lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,
    pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,
    rowSums, sapply, setdiff, sort, table, tapply, union, unique,
    unsplit, which, which.max, which

## Load data and design

In [2]:
##load ATAC-seq raw read counts
#data=read.table('../adpd.atac.idr.counts.txt.gz',header=TRUE,sep='\t')

##concatenate chrom/start/end columns values to server as rownames for the dataframe of the form chrom_start_end 
#rownames(data)=paste(data$chrom,data$start,data$end,sep="_")
#data$chrom=NULL
#data$start=NULL
#data$end=NULL

#data=data[rowSums(data)>0,]


In [2]:
#load ATAC-seq data normalized counts. These were generated below w/ voom function 
E=read.table("idr.voom.qnorm.txt",header=TRUE,sep='\t',row.names=1)

In [3]:
head(data)

                                                                     
1 function (..., list = character(), package = NULL, lib.loc = NULL, 
2     verbose = getOption("verbose"), envir = .GlobalEnv)            
3 {                                                                  
4     fileExt <- function(x) {                                       
5         db <- grepl("\\\\.[^.]+\\\\.(gz|bz2|xz)$", x)              
6         ans <- sub(".*\\\\.", "", x)                               

In [4]:
#load the metadata
batches=read.table("../batches.filtered.csv",header=TRUE,sep='\t')

In [5]:
Grouping <- factor(paste0(batches$Cohort,".",batches$RegionMod, ".", batches$TypeMod))

In [6]:
batches$Grouping=Grouping

In [7]:
colSums(is.na(batches))

In [8]:
#SVA can't handle NA values, so we have no choice but to interpolate to the mode for missing entries in PMI & ApoE 
batches$ApoE[is.na(batches$ApoE)]='3_3'
batches$PMI[is.na(batches$PMI)]=mean(na.omit(batches$PMI))

In [9]:
#Now, we include all covariates and surrogate variables 
mod=model.matrix(~Grouping+Gender+expired_age+PMI+TissueCenter+ApoE+Batch,data=batches)

### fit model with limma 

In [10]:
fit <- lmFit(E,mod)

Coefficients not estimable: TissueCenterWUSTL BatchPD_X031 


“Partial NA coefficients for 385725 probe(s)”

In [13]:
colnames(mod)


In [14]:
colnames(fit$coefficients)

###  PD, CAUD

In [16]:
#create contrasts of interest 
pd.caud.cont.matrix=makeContrasts(
                          pd_caud_adpd_vs_lopd="GroupingPD.CAUD.ADPD - GroupingPD.CAUD.LOPD",
                          pd_caud_adpd_vs_ctrl="GroupingPD.CAUD.ADPD - GroupingPD.CAUD.CTRL",
                          pd_caud_lopd_vs_ctrl="GroupingPD.CAUD.LOPD - GroupingPD.CAUD.CTRL",
                          levels=mod)


“Renaming (Intercept) to Intercept”

In [17]:
fit2=contrasts.fit(fit,pd.caud.cont.matrix)
e=eBayes(fit2)
comparisons=colnames(pd.caud.cont.matrix)

“row names of contrasts don't match col names of coefficients”

In [18]:
comparisons

In [25]:
for(i in seq(1,length(comparisons)))
{
  tab<-topTable(e, number=nrow(e),coef=i,p.value = 0.05)
  up=sum(tab$logFC>0)
  down=sum(tab$logFC<0)
  sig=nrow(tab)
  curtitle=paste(comparisons[i],'\n','sig:',sig,'\n','up:',up,'\n','down:',down,'\n')
  print(curtitle)
  vals=topTable(e,number=nrow(e),coef=i)
  vals$pscaled=-10*log10(vals$adj.P.Val)
  vals$sig=vals$adj.P.Val<0.05
  png(paste("volcano_diff",comparisons[i],".png",sep=""))
  print(ggplot(data=vals,
               aes(y=vals$pscaled,x=vals$logFC,color=vals$sig))+
   geom_point()+
   xlab("log2(FC)")+
   ylab("-10*log10(pval)")+
   ggtitle(curtitle))
  dev.off() 
  write.table(tab,file=paste("diff_",comparisons[i],".tsv",sep=""),quote=FALSE,sep='\t',row.names = TRUE,col.names = TRUE)
}
    

[1] "pd_caud_adpd_vs_lopd \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_caud_adpd_vs_ctrl \n sig: 659 \n up: 414 \n down: 245 \n"
[1] "pd_caud_lopd_vs_ctrl \n sig: 3 \n up: 0 \n down: 3 \n"


In [None]:
### PD, 