In [None]:
# conda activate dream

library(edgeR)
library(parallelly)
library(data.table)
library(BiocParallel)
library(variancePartition)

setwd("/mnt/lareaulab/reliscu/projects/NSF_GRFP/analyses/pseudobulk_test/tasic_2018/mouse_ALM")

In [None]:
Sys.setenv(OPENBLAS_NUM_THREADS="1", OMP_NUM_THREADS="1", MKL_NUM_THREADS="1")
if (requireNamespace("RhpcBLASctl", quietly=TRUE)) {
  RhpcBLASctl::blas_set_num_threads(1)
  RhpcBLASctl::omp_set_num_threads(1)
}

param <- SnowParam(workers=6, type="SOCK", progressbar=TRUE)

Here I run DE analysis on the pseudobulked cell type data

In [4]:
counts <- fread("data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk.csv", data.table=FALSE)
sample_meta <- fread("data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_sampleinfo.csv", data.table=FALSE)

# Add sex and age info. to metadata
sample_meta$Row <- 1:nrow(sample_meta)
donor_meta <- fread("/mnt/lareaulab/reliscu/projects/NSF_GRFP/data/scRNA-seq/tasic_2018/tasic_2018_tableS10_sampleinfo_donor_level.csv")
sample_meta <- merge(sample_meta, donor_meta[,1:3], by.x="Donor", by.y="Animal ID", all.x=TRUE, sort=FALSE)
sample_meta <- sample_meta[order(sample_meta$Row),] # Keep original row order
rownames(sample_meta) <- sample_meta$Sample_ID

# Reformat cell type names
sample_meta$Cell_type <- sapply(sample_meta$Cell_type, function(x) gsub(" ", "_", x))
sample_meta$Cell_type <- sapply(sample_meta$Cell_type, function(x) gsub("/", "_", x, fixed=TRUE))

# Center age
sample_meta$Age_c <- as.numeric(scale(sample_meta$Age, center=TRUE, scale=FALSE))

In [5]:
# Prep count data

y <- DGEList(counts) 
keep <- filterByExpr(y, group=sample_meta$Cell_type)
y <- y[keep,, keep.lib.sizes=FALSE]
print(dim(y$counts))
y <- calcNormFactors(y)

Setting first column of `counts` as gene annotation.



[1] 25273   609


In [6]:
ctypes <- unique(sample_meta$Cell_type)
ctype_levels <- levels(factor(ctypes))

In [7]:
# # Subset data for testing

# set.seed(1)
# sample_idx <- sample(1:nrow(sample_meta), size=200)
# sample_meta <- sample_meta[sample_idx,]
# counts_subset <- counts[sample(1:nrow(counts), size=1000), c(1, sample_idx + 1)]

# y <- DGEList(counts_subset)
# keep <- filterByExpr(y, group=sample_meta$Cell_type)
# y <- y[keep,, keep.lib.sizes=FALSE]
# dim(y$counts)
# y <- calcNormFactors(y)

# ctypes <- unique(sample_meta$Cell_type)
# ctype_levels <- levels(factor(ctypes))

## 1 vs. pooled test

Compare gene expression between target cell type all other cell types pooled

In [8]:
# # Note: this takes several days

# form <- ~ Test + Sex + Age_c + (1 | Donor)

# pool_res <- lapply(ctypes, function(target) {
#     print(paste(target, "vs. rest"))
#     sample_meta$Test <- ifelse(
#         sample_meta$Cell_type == target, target, "Rest"
#     )
#     vobj <- voomWithDreamWeights(y, form, sample_meta, BPPARAM=param)
#     fit <- dream(vobj, form, sample_meta, BPPARAM=param)
#     # fit <- eBayes(fit)
#     topTable(fit, number=Inf, p.value=0.05)
# })
# names(pool_res) <- ctypes

# saveRDS(pool_res, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_1_vs_pooled_DE_genes_dream.RDS")

## Pairwise tests / 1 vs. mean(others) tests

Compare model coefficients expression between:

1. Each target cell type each other cell type (pairwise)
2. 

In [9]:
form <- ~ 0 + Cell_type + Sex + Age_c + (1 | Donor)
vobj <- voomWithDreamWeights(y, form, sample_meta, BPPARAM=param)

In [65]:
form_fixed <- ~ 0 + Cell_type + Sex + Age_c
coef_names <- colnames(model.matrix(form_fixed, data=sample_meta))

ctype_coef_names <- coef_names[-c(length(coef_names)-1, length(coef_names))]
ctype_pairs <- combn(ctype_coef_names, m=2) 

L_list <- lapply(1:ncol(ctype_pairs), function(idx) {
    pair <- ctype_pairs[,idx]
    ctype1 <- gsub("Cell_type", "", pair[1])
    ctype2 <- gsub("Cell_type", "", pair[2])
    L <- matrix(
        0, nrow=length(coef_names), ncol=1, 
        dimnames=list(coef_names, paste0(ctype1, "_vs_", ctype2))
    )
    L[pair[1], 1] <-  1
    L[pair[2], 1] <- -1
    L
})
L <- do.call(cbind, L_list)

In [None]:
# Batching contrasts

# Running on compute4 (screen on compute3)

idx_list <- split(seq_len(ncol(L)), ceiling(seq_along(seq_len(ncol(L))) / 100))
res_list <- vector("list", length(idx_list))

for (i in seq_along(idx_list)) {
    print(paste("Starting batch", i))
    idx <- idx_list[[i]]
    Lsub <- L[,idx, drop=FALSE]
    fit  <- dream(vobj, form, sample_meta, L=Lsub, BPPARAM=param)
    batch_res <- lapply(colnames(Lsub), function(test) {
        tt <- topTable(fit, coef=test, number=Inf, p.value=0.05)
        tt$Test <- test
        tt
    })
    res_list[[i]] <- do.call(rbind, batch_res)
    rm(fit); gc()
}
all_res <- do.call(rbind, unlist(res_list, recursive=FALSE))

In [None]:
saveRDS(res_list, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_pairwise_DE_genes_dream.RDS")

In [None]:
# res_list <- vector(mode="list", length=length(ctypes))
# names(res_list) <- ctypes

# for (target in ctypes) {
#     print(paste("Starting", target))

#     # Test multiple contrasts at once:
#     L <- cbind(
#         make_pairwise_contrasts(target),
#         make_mean_contrasts(target)
#     )
#     fit <- dream(vobj, form, sample_meta, L=L, BPPARAM=param)
#     fit <- eBayes(fit)

#     # Extract results
#     ctype_res_list <- lapply(colnames(L), function(test) {
#         tt <- topTable(fit, coef=test, number=Inf, p.value=0.05)
#         tt$Test <- test
#         tt
#     })
#     res_list[[target]] <- do.call(rbind, ctype_res_list)

# }

# saveRDS(mean_res, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_multi_test_DE_genes_dream.RDS")

# 1 vs. mean(others) tests

Compare model coefficients between each target cell type and the average of the other cell types (this approach is invariant to cell type composition, unlike the pooled approach)

In [None]:
# Make a 1 vs. mean(others) contrast matrix

form_fixed <- ~ 0 + Cell_type + Sex + Age_c
coef_names <- colnames(model.matrix(form_fixed, data=sample_meta))

make_mean_contrast <- function(target, ctype_levels){
  others <- setdiff(ctype_levels, target)
  K <- length(others)
  L <- matrix(0, length(coef_names), 1, dimnames=list(coef_names, paste0(target, "_vs_meanOthers")))
  L[paste0("Cell_type", make.names(target)), 1] <-  1
  L[paste0("Cell_type", make.names(others)), 1] <- -1/K
  L
}

In [None]:

fit  <- dream(vobj, form, sample_meta, L=Lsub, BPPARAM=param)