In [None]:
# conda activate dream

library(edgeR)
library(data.table)
library(BiocParallel)
library(variancePartition)

setwd("/mnt/lareaulab/reliscu/projects/NSF_GRFP/analyses/pseudobulk_test/tasic_2018/mouse_ALM")

In [None]:
param <- SnowParam(20, "SOCK", progressbar=TRUE)

Here I run DE analysis on the pseudobulked cell type data

In [None]:
counts <- fread("data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk.csv", data.table=FALSE)
sample_meta <- fread("data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_sampleinfo.csv", data.table=FALSE)

In [None]:
# Add sex and age info. to metadata

sample_meta$Row <- 1:nrow(sample_meta)
donor_meta <- fread("/mnt/lareaulab/reliscu/projects/NSF_GRFP/data/scRNA-seq/tasic_2018/tasic_2018_tableS10_sampleinfo_donor_level.csv")
sample_meta <- merge(sample_meta, donor_meta[,1:3], by.x="Donor", by.y="Animal ID", all.x=TRUE, sort=FALSE)
sample_meta <- sample_meta[order(sample_meta$Row),] # Keep original row order
rownames(sample_meta) <- sample_meta$Sample_ID

# Reformat cell type names
sample_meta$Cell_type <- sapply(sample_meta$Cell_type, function(x) gsub(" ", "_", x))
sample_meta$Cell_type <- sapply(sample_meta$Cell_type, function(x) gsub("/", "_", x, fixed=TRUE))

In [None]:
# Prep count data

y <- DGEList(counts) 
keep <- filterByExpr(y, group=sample_meta$Cell_type)
y <- y[keep,, keep.lib.sizes=FALSE]
print(dim(y$counts))
y <- calcNormFactors(y)

Setting first column of `counts` as gene annotation.



In [None]:
ctypes <- unique(sample_meta$Cell_type)
ctype_levels <- levels(factor(ctypes))

In [None]:
# # Test data
# set.seed(1)
# sample_idx <- sample(1:nrow(sample_meta), size=20)
# sample_meta_subset <- sample_meta[sample_idx,]
# counts_subset <- counts[sample(1:nrow(counts), size=100), c(1, sample_idx + 1)]

# y <- DGEList(counts_subset)
# keep <- filterByExpr(y, group=sample_meta_subset$Cell_type)
# y <- y[keep,, keep.lib.sizes=FALSE]
# dim(y$counts)
# y <- calcNormFactors(y)

# ctypes <- unique(sample_meta_subset$Cell_type)
# ctype_levels <- levels(factor(ctypes))

## 1 vs. pooled test

Compare gene expression between target cell type and a pool of all other cell types

In [97]:
form <- ~ Test + (1|Sex) + (1|Age) + (1|Donor)

In [None]:
# Note: this takes several days

pool_res <- lapply(ctypes, function(target) {
    print(paste(target, "vs. rest"))
    sample_meta$Test <- ifelse(
        sample_meta$Cell_type == target, target, "Rest"
    )
    vobj <- voomWithDreamWeights(y, form, sample_meta, BPPARAM=param)
    fit <- dream(vobj, form, sample_meta, BPPARAM=param)
    fit <- eBayes(fit)
    topTable(fit)
})
names(pool_res) <- ctypes

In [None]:
saveRDS(pool_res, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_1_vs_pooled_DE_genes_dream.RDS")

## Pairwise tests

In [None]:
form <- ~ 0 + Cell_type + Sex + Age + (1 | Donor)
vobj <- voomWithDreamWeights(y, form, sample_meta, BPPARAM=param)

In [None]:
ctype_levels <- levels(factor(ctypes))

pairwise_res <- lapply(ctype_levels, function(target) {
    print(paste(target, "vs. rest"))
    
    others <- setdiff(ctype_levels, target)
    K <- length(others)
    test <- paste0(
       fspaste0("Cell_type", target, " - ("), paste0("Cell_type", others, collapse = " + "), ")/", K)
    names(test) <- paste0(target, "_vs_all")
    L <- makeContrastsDream(form, sample_meta, contrasts=test)

    # Fit model for current test
    fit <- dream(vobj, form, sample_meta, L, BPPARAM=param)
    fit <- eBayes(fit)

    # Extract results from each pairwise test
    ctype_pairwise_res <- lapply(seq_len(ncol(L)), function(i) 
        topTable(fit, coef=i)
    )
    names(ctype_pairwise_res) <- colnames(L)
    ctype_pairwise_res

})

saveRDS(pairwise_res, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_pairwise_DE_genes_dream.RDS")

In [None]:
# # 3b) extract per-contrast results
# P2   <- do.call(cbind, lapply(res_list, `[[`, "P.Value"))  # two-sided p
# LFC  <- do.call(cbind, lapply(res_list, `[[`, "logFC"))