In [1]:
library(DESeq2)
library(gplots)
library(RColorBrewer)
library(genefilter)

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, cbind, colMeans, colnames,
    colSums, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match,
    mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which, which.max, which.min

“multiple methods

In [None]:
countsFile <- '/counts/all_samples.countSimp.no_header.txt'
sampleInfoFile <- 'all_samples_extended_info.txt'
peakIDsFile <- 'dlpfc_hpc_combined_set_combined_200_name.saf'

In [4]:
countMatrix <- read.table(countsFile, header = FALSE, sep = "\t", skip = 0)
sampleInfo <- read.table(sampleInfoFile, header = TRUE, row.names=1, sep = "\t", skip=0)
peakInfo <- read.table(peakIDsFile, header = FALSE, row.names=1, sep = "\t", skip=0)

#making sure the row names are valid variable names, essential for row names and column names to match
rownames(sampleInfo) <- make.names(rownames(sampleInfo))
all(rownames(sampleInfo) == colnames(countMatrix))

In [5]:
colnames(countMatrix) <- rownames(sampleInfo)
all(rownames(sampleInfo) == colnames(countMatrix))

In [6]:
rownames(countMatrix) = rownames(peakInfo)

In [7]:
sampleInfo$proj_id <- factor(sampleInfo$proj_id)
sampleInfo$cell_type <- factor(sampleInfo$cell_type)
sampleInfo$brain_region <- factor(sampleInfo$brain_region)
sampleInfo$attempt <- factor(sampleInfo$attempt)
sampleInfo$replicate <- factor(sampleInfo$replicate)
sampleInfo$binary_amyloid <- factor(sampleInfo$binary_amyloid)
sampleInfo$msex <- factor(sampleInfo$msex)
sampleInfo$is_microglia <- factor(as.integer(sampleInfo$cell_type == "Microglia"))
sampleInfo$is_glia <- factor(as.integer(sampleInfo$cell_type == "Glia"))
sampleInfo$is_neuron <- factor(as.integer(sampleInfo$cell_type == "Neuron"))
sampleInfo$amyloid_sqrt <- sqrt(sampleInfo$amyloid)

In [8]:
nscCutoff <- 1.0
rscCutoff <- 0.4
pbcCutoff <- 0.5

In [9]:
sampleInfo <- sampleInfo[sampleInfo$NSC>nscCutoff,]
sampleInfo <- sampleInfo[sampleInfo$RSC>rscCutoff,]
sampleInfo <- sampleInfo[sampleInfo$PBC1>pbcCutoff,]
sampleInfo <- sampleInfo[sampleInfo$PBC2>pbcCutoff,]
sampleInfo <- sampleInfo[sampleInfo$cell_type=="Glia",]
sampleInfo <- sampleInfo[sampleInfo$brain_region=="DLPFC",]
countMatrix <- countMatrix[,rownames(sampleInfo)]

In [None]:
dds <-  DESeqDataSetFromMatrix(countData = countMatrix,
                              colData = sampleInfo,
                              design = ~ binary_amyloid)
dds

In [11]:
ddsCollapsed <- collapseReplicates( dds,
                                   groupby = make.names(paste(dds$proj_id,dds$brain_region, dds$cell_type,sep="_")))

In [12]:
vst_transform <- vst(ddsCollapsed)

In [13]:
vst_data <- assay(vst_transform)

In [15]:
write.table(vst_data, file="/vst_transform/glia_dlpfc_vst_matrix.txt", sep="\t", quote=FALSE)

In [14]:
dlpfcSampleInfo <- colData(ddsCollapsed)

In [20]:
write.table(dlpfcSampleInfo, file="/vst_transform/glia_dlpfc_sample_info.txt", sep="\t", quote=FALSE)

In [None]:
glia_dlpfc_ba_up_peaks_file <- "/deseq_analysis/binary_amyloid_deseq/glia_dlpfc_binary_amyloid_up_peak_ids.txt"
glia_dlpfc_ba_up_peaks_list <- as.character(read.table(glia_dlpfc_ba_down_peaks_file)$V1)

In [16]:
selected_vst_matrix <- vst_data[glia_dlpfc_ba_up_peaks_list,]

In [17]:
sampleInfoSorted <- dlpfcSampleInfo[order(dlpfcSampleInfo$amyloid),]

selected_vst_matrix <- selected_vst_matrix[,rownames(sampleInfoSorted)]

selected_vst_matrix <- selected_vst_matrix[order(rowVars(selected_vst_matrix), decreasing=TRUE ),]

png("glia_dlpfc_ba_up_heatmap.png")
full <- heatmap.2(selected_vst_matrix,
          col= colorRampPalette(rev(brewer.pal(9, "RdBu")))(100),
          scale="row",
          dendrogram="none",
          trace="none",
          Colv=FALSE,
          Rowv=FALSE,
          density.info="none",
         )
dev.off()

In [18]:
svg("glia_dlpfc_amyloid_values.svg")
image(as.matrix(sampleInfoSorted$amyloid), col=rev(heat.colors(8)))
dev.off()