### Limma voom analysis of AD/PD dataset, NO SVA

In [1]:
rm(list=ls())
#load necessary libraries 
library(ggplot2)
library(DESeq2)
library("BiocParallel")
parallelFlag=TRUE
register(MulticoreParam(50))
library("IHW")
library("pheatmap")
library(sva)
library(limma)

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colMeans,
    colnames, colSums, dirname, do.call, duplicated, eval, evalq,
    Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
    lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,
    pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,
    rowSums, sapply, setdiff, sort, table, tapply, union, unique,
    unsplit, which, which.max, which

## Load data and design

In [2]:
##load ATAC-seq raw read counts
#data=read.table('../adpd.atac.idr.counts.txt.gz',header=TRUE,sep='\t')

##concatenate chrom/start/end columns values to server as rownames for the dataframe of the form chrom_start_end 
#rownames(data)=paste(data$chrom,data$start,data$end,sep="_")
#data$chrom=NULL
#data$start=NULL
#data$end=NULL

#data=data[rowSums(data)>0,]


In [3]:
#load ATAC-seq data normalized counts. These were generated below w/ voom function 
E=read.table("idr.voom.qnorm.txt",header=TRUE,sep='\t',row.names=1)

In [4]:
#load the metadata
batches=read.table("../batches.filtered.csv",header=TRUE,sep='\t')

## Grouping on Cohort, RegionMod, and TypeMod

In [5]:
Grouping <- factor(paste0(batches$Cohort,".",batches$RegionMod, ".", batches$TypeMod))

In [6]:
batches$Grouping=Grouping

In [7]:
colSums(is.na(batches))

In [8]:
#SVA can't handle NA values, so we have no choice but to interpolate to the mode for missing entries in PMI & ApoE 
batches$ApoE[is.na(batches$ApoE)]='3_3'
batches$PMI[is.na(batches$PMI)]=mean(na.omit(batches$PMI))

In [9]:
mod=model.matrix(~Grouping+Gender+expired_age+PMI+ApoE,data=batches)

### fit model with limma 

In [10]:
fit <- lmFit(E,mod)

In [11]:
colnames(mod)


In [12]:
colnames(fit$coefficients)

###  Contrasts 

In [13]:
#create contrasts of interest 
cont.matrix=makeContrasts(
    pd_caud_adpd_vs_lopd="GroupingPD.CAUD.ADPD - GroupingPD.CAUD.LOPD",
    pd_caud_adpd_vs_ctrl="GroupingPD.CAUD.ADPD - GroupingPD.CAUD.CTRL",
    pd_caud_lopd_vs_ctrl="GroupingPD.CAUD.LOPD - GroupingPD.CAUD.CTRL",
    pd_hipp_adpd_vs_lopd="GroupingPD.HIPP.ADPD - GroupingPD.HIPP.LOPD",
    pd_hipp_adpd_vs_ctrl="GroupingPD.HIPP.ADPD - GroupingPD.HIPP.CTRL",
    pd_hipp_lopd_vs_ctrl="GroupingPD.HIPP.LOPD - GroupingPD.HIPP.CTRL",
    pd_mdfg_adpd_vs_lopd="GroupingPD.MDFG.ADPD - GroupingPD.MDFG.LOPD",
    pd_mdfg_adpd_vs_ctrl="GroupingPD.MDFG.ADPD - GroupingPD.MDFG.CTRL",
    pd_mdfg_lopd_vs_ctrl="GroupingPD.MDFG.LOPD - GroupingPD.MDFG.CTRL",
    pd_ptmn_adpd_vs_lopd="GroupingPD.PTMN.ADPD - GroupingPD.PTMN.LOPD",
    pd_ptmn_adpd_vs_ctrl="GroupingPD.PTMN.ADPD - GroupingPD.PTMN.CTRL",
    pd_ptmn_lopd_vs_ctrl="GroupingPD.PTMN.LOPD - GroupingPD.PTMN.CTRL",
    pd_smtg_adpd_vs_lopd="GroupingPD.SMTG.ADPD - GroupingPD.SMTG.LOPD",
    pd_smtg_adpd_vs_ctrl="GroupingPD.SMTG.ADPD - GroupingPD.SMTG.CTRL",
    pd_smtg_lopd_vs_ctrl="GroupingPD.SMTG.LOPD - GroupingPD.SMTG.CTRL",
    pd_suni_adpd_vs_lopd="GroupingPD.SUNI.ADPD - GroupingPD.SUNI.LOPD",
    pd_suni_adpd_vs_ctrl="GroupingPD.SUNI.ADPD - GroupingPD.SUNI.CTRL",
    pd_suni_lopd_vs_ctrl="GroupingPD.SUNI.LOPD - GroupingPD.SUNI.CTRL",
    levels=mod)


“Renaming (Intercept) to Intercept”

In [14]:
fit2=contrasts.fit(fit,cont.matrix)
e=eBayes(fit2)
comparisons=colnames(cont.matrix)

“row names of contrasts don't match col names of coefficients”

In [15]:
pval_thresh=0.05
lfc_thresh=1

In [17]:
for(i in seq(1,length(comparisons)))
{
  tab<-topTable(e, number=nrow(e),coef=i,lfc=lfc_thresh, p.value = pval_thresh)
  up=sum(tab$logFC>0)
  down=sum(tab$logFC<0)
  sig=nrow(tab)
  curtitle=paste(comparisons[i],'\n','sig:',sig,'\n','up:',up,'\n','down:',down,'\n')
  print(curtitle)
  vals=topTable(e,number=nrow(e),coef=i)
  vals$pscaled=-1*log10(vals$adj.P.Val)
  vals$sig=vals$adj.P.Val<pval_thresh & abs(vals$logFC)>lfc_thresh 
  png(paste("volcano_diff",comparisons[i],".png",sep=""))
  print(ggplot(data=vals,
               aes(y=vals$pscaled,x=vals$logFC,color=vals$sig))+
               geom_point(alpha=0.1)+
               xlab("log2(FC)")+
               ylab("-log10(pval)")+
               ggtitle(curtitle)+
               theme_bw()+
               scale_color_manual(values=c("#000000","#FF0000")))
  dev.off() 
  write.table(tab,file=paste("diff_",comparisons[i],".tsv",sep=""),quote=FALSE,sep='\t',row.names = TRUE,col.names = TRUE)
}
    

[1] "pd_caud_adpd_vs_lopd \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_caud_adpd_vs_ctrl \n sig: 483 \n up: 148 \n down: 335 \n"
[1] "pd_caud_lopd_vs_ctrl \n sig: 113 \n up: 34 \n down: 79 \n"
[1] "pd_hipp_adpd_vs_lopd \n sig: 37 \n up: 27 \n down: 10 \n"
[1] "pd_hipp_adpd_vs_ctrl \n sig: 5 \n up: 5 \n down: 0 \n"
[1] "pd_hipp_lopd_vs_ctrl \n sig: 17 \n up: 12 \n down: 5 \n"
[1] "pd_mdfg_adpd_vs_lopd \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_mdfg_adpd_vs_ctrl \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_mdfg_lopd_vs_ctrl \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_ptmn_adpd_vs_lopd \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_ptmn_adpd_vs_ctrl \n sig: 3 \n up: 2 \n down: 1 \n"
[1] "pd_ptmn_lopd_vs_ctrl \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_smtg_adpd_vs_lopd \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_smtg_adpd_vs_ctrl \n sig: 3 \n up: 2 \n down: 1 \n"
[1] "pd_smtg_lopd_vs_ctrl \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_suni_adpd_vs_lopd \n sig: 18 \n up: 8 \n down: 10 \n"
[1] "pd_suni_adpd_vs_ct

## Looking at Type and Region (as opposed to TypeMod and RegionMod)

In [18]:
Grouping <- factor(paste0(batches$Cohort,".",batches$Region, ".", batches$Type))
batches$Grouping=Grouping
mod=model.matrix(~Grouping+Gender+expired_age+PMI+ApoE,data=batches)
fit <- lmFit(E,mod)
colnames(fit$coefficients)

In [19]:
#create contrasts of interest 
cont.matrix=makeContrasts(
    pd_caud_gba1_vs_lrrk="GroupingPD.CAUD.GBA1 - GroupingPD.CAUD.LRRK",
    pd_caud_gba1_vs_spor="GroupingPD.CAUD.GBA1 - GroupingPD.CAUD.SPOR",
    pd_caud_gba1_vs_ctrl="GroupingPD.CAUD.GBA1 - GroupingPD.CAUD.CTRL",
    pd_caud_lrrk_vs_spor="GroupingPD.CAUD.LRRK - GroupingPD.CAUD.SPOR",
    pd_caud_lrrk_vs_ctrl="GroupingPD.CAUD.LRRK - GroupingPD.CAUD.CTRL",
    pd_caud_spor_vs_ctrl="GroupingPD.CAUD.SPOR - GroupingPD.CAUD.CTRL",
    pd_hipp_gba1_vs_lrrk="GroupingPD.HIPP.GBA1 - GroupingPD.HIPP.LRRK",
    pd_hipp_gba1_vs_spor="GroupingPD.HIPP.GBA1 - GroupingPD.HIPP.SPOR",
    pd_hipp_gba1_vs_ctrl="GroupingPD.HIPP.GBA1 - GroupingPD.HIPP.CTRL",
    pd_hipp_lrrk_vs_spor="GroupingPD.HIPP.LRRK - GroupingPD.HIPP.SPOR",
    pd_hipp_lrrk_vs_ctrl="GroupingPD.HIPP.LRRK - GroupingPD.HIPP.CTRL",
    pd_hipp_spor_vs_ctrl="GroupingPD.HIPP.SPOR - GroupingPD.HIPP.CTRL",
    pd_mdfg_gba1_vs_lrrk="GroupingPD.MDFG.GBA1 - GroupingPD.MDFG.LRRK",
    pd_mdfg_gba1_vs_spor="GroupingPD.MDFG.GBA1 - GroupingPD.MDFG.SPOR",
    pd_mdfg_gba1_vs_ctrl="GroupingPD.MDFG.GBA1 - GroupingPD.MDFG.CTRL",
    pd_mdfg_lrrk_vs_spor="GroupingPD.MDFG.LRRK - GroupingPD.MDFG.SPOR",
    pd_mdfg_lrrk_vs_ctrl="GroupingPD.MDFG.LRRK - GroupingPD.MDFG.CTRL",
    pd_mdfg_spor_vs_ctrl="GroupingPD.MDFG.SPOR - GroupingPD.MDFG.CTRL",
    pd_mdtg_gba1_vs_lrrk="GroupingPD.MDTG.GBA1 - GroupingPD.MDTG.LRRK",
    pd_mdtg_gba1_vs_spor="GroupingPD.MDTG.GBA1 - GroupingPD.MDTG.SPOR",
    pd_mdtg_gba1_vs_ctrl="GroupingPD.MDTG.GBA1 - GroupingPD.MDTG.CTRL",
    pd_mdtg_lrrk_vs_spor="GroupingPD.MDTG.LRRK - GroupingPD.MDTG.SPOR",
    pd_mdtg_lrrk_vs_ctrl="GroupingPD.MDTG.LRRK - GroupingPD.MDTG.CTRL",
    pd_mdtg_spor_vs_ctrl="GroupingPD.MDTG.SPOR - GroupingPD.MDTG.CTRL",
    pd_ptmn_gba1_vs_lrrk="GroupingPD.PTMN.GBA1 - GroupingPD.PTMN.LRRK",
    pd_ptmn_gba1_vs_spor="GroupingPD.PTMN.GBA1 - GroupingPD.PTMN.SPOR",
    pd_ptmn_gba1_vs_ctrl="GroupingPD.PTMN.GBA1 - GroupingPD.PTMN.CTRL",
    pd_ptmn_lrrk_vs_spor="GroupingPD.PTMN.LRRK - GroupingPD.PTMN.SPOR",
    pd_ptmn_lrrk_vs_ctrl="GroupingPD.PTMN.LRRK - GroupingPD.PTMN.CTRL",
    pd_ptmn_spor_vs_ctrl="GroupingPD.PTMN.SPOR - GroupingPD.PTMN.CTRL",
    pd_suni_gba1_vs_lrrk="GroupingPD.SUNI.GBA1 - GroupingPD.SUNI.LRRK",
    pd_suni_gba1_vs_spor="GroupingPD.SUNI.GBA1 - GroupingPD.SUNI.SPOR",
    pd_suni_gba1_vs_ctrl="GroupingPD.SUNI.GBA1 - GroupingPD.SUNI.CTRL",
    pd_suni_lrrk_vs_spor="GroupingPD.SUNI.LRRK - GroupingPD.SUNI.SPOR",
    pd_suni_lrrk_vs_ctrl="GroupingPD.SUNI.LRRK - GroupingPD.SUNI.CTRL",
    pd_suni_spor_vs_ctrl="GroupingPD.SUNI.SPOR - GroupingPD.SUNI.CTRL",    
    levels=mod)


“Renaming (Intercept) to Intercept”

In [20]:
fit2=contrasts.fit(fit,cont.matrix)
e=eBayes(fit2)
comparisons=colnames(cont.matrix)

“row names of contrasts don't match col names of coefficients”

In [21]:
for(i in seq(1,length(comparisons)))
{
  tab<-topTable(e, number=nrow(e),coef=i,lfc=lfc_thresh,p.value = pval_thresh)
  up=sum(tab$logFC>0)
  down=sum(tab$logFC<0)
  sig=nrow(tab)
  curtitle=paste(comparisons[i],'\n','sig:',sig,'\n','up:',up,'\n','down:',down,'\n')
  print(curtitle)
  vals=topTable(e,number=nrow(e),coef=i)
  vals$pscaled=-1*log10(vals$adj.P.Val)
  vals$sig=vals$adj.P.Val<pval_thresh & abs(vals$logFC)>lfc_thresh
  png(paste("expanded_volcano_diff",comparisons[i],".png",sep=""))
  print(ggplot(data=vals,
               aes(y=vals$pscaled,x=vals$logFC,color=vals$sig))+
               geom_point(alpha=0.1)+
               xlab("log2(FC)")+
               ylab("-log10(pval)")+
               ggtitle(curtitle)+
               theme_bw()+
               scale_color_manual(values=c("#000000","#FF0000")))
  dev.off() 
  write.table(tab,file=paste("expanded_diff_",comparisons[i],".tsv",sep=""),quote=FALSE,sep='\t',row.names = TRUE,col.names = TRUE)
}
    

[1] "pd_caud_gba1_vs_lrrk \n sig: 15 \n up: 15 \n down: 0 \n"
[1] "pd_caud_gba1_vs_spor \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_caud_gba1_vs_ctrl \n sig: 836 \n up: 240 \n down: 596 \n"
[1] "pd_caud_lrrk_vs_spor \n sig: 6 \n up: 0 \n down: 6 \n"
[1] "pd_caud_lrrk_vs_ctrl \n sig: 22 \n up: 0 \n down: 22 \n"
[1] "pd_caud_spor_vs_ctrl \n sig: 127 \n up: 38 \n down: 89 \n"
[1] "pd_hipp_gba1_vs_lrrk \n sig: 247 \n up: 246 \n down: 1 \n"
[1] "pd_hipp_gba1_vs_spor \n sig: 23 \n up: 11 \n down: 12 \n"
[1] "pd_hipp_gba1_vs_ctrl \n sig: 1 \n up: 1 \n down: 0 \n"
[1] "pd_hipp_lrrk_vs_spor \n sig: 309 \n up: 0 \n down: 309 \n"
[1] "pd_hipp_lrrk_vs_ctrl \n sig: 189 \n up: 0 \n down: 189 \n"
[1] "pd_hipp_spor_vs_ctrl \n sig: 13 \n up: 10 \n down: 3 \n"
[1] "pd_mdfg_gba1_vs_lrrk \n sig: 11 \n up: 11 \n down: 0 \n"
[1] "pd_mdfg_gba1_vs_spor \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_mdfg_gba1_vs_ctrl \n sig: 0 \n up: 0 \n down: 0 \n"
[1] "pd_mdfg_lrrk_vs_spor \n sig: 17 \n up: 0 \n down: 17 \n"
[1] "