In [21]:
library(DESeq2)
library(gplots)
library(RColorBrewer)
library(genefilter)

In [None]:
countsFile <- '/counts/all_samples.countSimp.no_header.txt'
sampleInfoFile <- 'all_samples_extended_info.txt'
peakIDsFile <- 'dlpfc_hpc_combined_set_combined_200_name.saf'

In [23]:
countMatrix <- read.table(countsFile, header = FALSE, sep = "\t", skip = 0)
sampleInfo <- read.table(sampleInfoFile, header = TRUE, row.names=1, sep = "\t", skip=0)
peakInfo <- read.table(peakIDsFile, header = FALSE, row.names=1, sep = "\t", skip=0)

#making sure the row names are valid variable names, essential for row names and column names to match
rownames(sampleInfo) <- make.names(rownames(sampleInfo))
all(rownames(sampleInfo) == colnames(countMatrix))

In [24]:
colnames(countMatrix) <- rownames(sampleInfo)
all(rownames(sampleInfo) == colnames(countMatrix))

In [25]:
rownames(countMatrix) = rownames(peakInfo)

In [26]:
sampleInfo$proj_id <- factor(sampleInfo$proj_id)
sampleInfo$cell_type <- factor(sampleInfo$cell_type)
sampleInfo$brain_region <- factor(sampleInfo$brain_region)
sampleInfo$attempt <- factor(sampleInfo$attempt)
sampleInfo$replicate <- factor(sampleInfo$replicate)
sampleInfo$binary_amyloid <- factor(sampleInfo$binary_amyloid)
sampleInfo$msex <- factor(sampleInfo$msex)
sampleInfo$is_microglia <- factor(as.integer(sampleInfo$cell_type == "Microglia"))
sampleInfo$is_glia <- factor(as.integer(sampleInfo$cell_type == "Glia"))
sampleInfo$is_neuron <- factor(as.integer(sampleInfo$cell_type == "Neuron"))
sampleInfo$amyloid_sqrt <- sqrt(sampleInfo$amyloid)

In [27]:
nscCutoff <- 1.0
rscCutoff <- 0.4
pbcCutoff <- 0.5

In [28]:
sampleInfo <- sampleInfo[sampleInfo$NSC>nscCutoff,]
sampleInfo <- sampleInfo[sampleInfo$RSC>rscCutoff,]
sampleInfo <- sampleInfo[sampleInfo$PBC1>pbcCutoff,]
sampleInfo <- sampleInfo[sampleInfo$PBC2>pbcCutoff,]
countMatrix <- countMatrix[,rownames(sampleInfo)]

In [None]:
dds <-  DESeqDataSetFromMatrix(countData = countMatrix,
                              colData = sampleInfo,
                              design = ~ binary_amyloid)
dds

In [30]:
ddsCollapsed <- collapseReplicates( dds,
                                   groupby = make.names(paste(dds$proj_id,dds$brain_region, dds$cell_type,sep="_")))

In [None]:
vst_transform <- vst(ddsCollapsed)

In [None]:
vst_data <- assay(vst_transform)

In [15]:
write.table(vst_data, file="/vst_transform/all_samples_vst_matrix.txt", sep="\t", quote=FALSE)

In [14]:
svg("pca_colored_by_population.svg")
plotPCA(vst_transform, ntop=352012, intgroup="cell_type")
dev.off()

In [15]:
svg("pca_colored_by_brain_region.svg")
plotPCA(vst_transform, ntop=352012, intgroup="brain_region")
dev.off()

In [16]:
svg("pca_colored_by_sex.svg")
plotPCA(vst_transform, ntop=352012, intgroup="msex")
dev.off()

In [17]:
svg("pca_colored_by_pathology.svg")
plotPCA(vst_transform, ntop=352012, intgroup="binary_amyloid")
dev.off()

In [18]:
svg("pca_colored_by_pathology_continuous.svg")
plotPCA(vst_transform, ntop=352012, intgroup="amyloid_sqrt")
dev.off()