In [3]:
# conda activate dream

library(edgeR)
library(data.table)
library(BiocParallel)
library(variancePartition)

setwd("/mnt/lareaulab/reliscu/projects/NSF_GRFP/analyses/pseudobulk_test/tasic_2018/mouse_ALM")

param <- MulticoreParam(20)


Attaching package: ‘variancePartition’


The following objects are masked from ‘package:limma’:

    eBayes, topTable




Here I run DE analysis on the pseudobulked cell type data

In [64]:
counts <- fread("data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk.csv", data.table=FALSE)
sample_meta <- fread("data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_sampleinfo.csv", data.table=FALSE)

In [None]:
# Add sex and age info. to metadata
sample_meta$Row <- 1:nrow(sample_meta)
donor_meta <- fread("/mnt/lareaulab/reliscu/projects/NSF_GRFP/data/scRNA-seq/tasic_2018/tasic_2018_tableS10_sampleinfo_donor_level.csv")
sample_meta <- merge(sample_meta, donor_meta[,1:3], by.x="Donor", by.y="Animal ID", all.x=TRUE, sort=FALSE)
sample_meta <- sample_meta[order(sample_meta$Row),] # Keep original row order
rownames(sample_meta) <- sample_meta$Sample_ID

# Reformat cell type names
sample_meta$Cell_type <- sapply(sample_meta$Cell_type, function(x) gsub(" ", "_", x))
sample_meta$Cell_type <- sapply(sample_meta$Cell_type, function(x) gsub("/", "_", x, fixed=TRUE))

# Center/scale age
sample_meta$Age_c <- as.numeric(scale(sample_meta$Age, center=TRUE, scale=FALSE))

In [66]:
# Prep count data

y <- DGEList(counts) 
keep <- filterByExpr(y, group=sample_meta$Cell_type)
y <- y[keep,, keep.lib.sizes=FALSE]
print(dim(y$counts))
y <- calcNormFactors(y)

Setting first column of `counts` as gene annotation.



[1] 25273   609


In [None]:
ctypes <- unique(sample_meta$Cell_type)
ctype_levels <- levels(factor(ctypes))

In [None]:
# # Subset data for testing

# set.seed(1)
# sample_idx <- sample(1:nrow(sample_meta), size=200)
# sample_meta_subset <- sample_meta[sample_idx,]
# counts_subset <- counts[sample(1:nrow(counts), size=1000), c(1, sample_idx + 1)]

# y <- DGEList(counts_subset)
# keep <- filterByExpr(y, group=sample_meta_subset$Cell_type)
# y <- y[keep,, keep.lib.sizes=FALSE]
# dim(y$counts)
# y <- calcNormFactors(y)

# ctypes <- unique(sample_meta_subset$Cell_type)
# ctype_levels <- levels(factor(ctypes))

Setting first column of `counts` as gene annotation.



## 1 vs. pooled test

Compare gene expression between target cell type and a pool of all other cell types

In [None]:
# # Note: this takes several days

# form <- ~ Test + Sex + Age_c + (1 | Donor)

# pool_res <- lapply(ctypes, function(target) {
#     print(paste(target, "vs. rest"))
#     sample_meta$Test <- ifelse(
#         sample_meta$Cell_type == target, target, "Rest"
#     )
#     vobj <- voomWithDreamWeights(y, form, sample_meta, BPPARAM=param)
#     fit <- dream(vobj, form, sample_meta, BPPARAM=param)
#     # fit <- eBayes(fit)
#     topTable(fit)
# })
# names(pool_res) <- ctypes

# saveRDS(pool_res, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_1_vs_pooled_DE_genes_dream.RDS")

## Pairwise tests / 1 vs. mean(others) tests

In [104]:
# Make a 1 vs. mean(others) contrast matrix

make_mean_contrasts <- function(a, levs){
  others <- setdiff(ctype_levels, a)
  K <- length(others)
  L <- matrix(0, length(coef_names), 1, dimnames=list(coef_names, paste0(a, "_vs_meanOthers")))
  L[paste0("Cell_type", make.names(a)), 1] <-  1
  L[paste0("Cell_type", make.names(others)), 1] <- -1/K
  L
}

# Make a pairwise contrast matrix

make_pairwise_contrasts <- function(target) {
  others <- setdiff(ctype_levels, target)
  # One column for each test: "target vs other"
  mk_col <- function(other) {
    L <- matrix(0, nrow = length(coef_names), ncol = 1,
                dimnames = list(coef_names, paste0(target, "_vs_", other)))
    L[paste0("Cell_type", target), 1] <-  1
    L[paste0("Cell_type", other), 1]  <- -1
    L
  }
  do.call(cbind, lapply(others, mk_col))
}

In [None]:
form <- ~ 0 + Cell_type + Sex + Age_c + (1 | Donor)
vobj <- voomWithDreamWeights(y, form, sample_meta, BPPARAM=param)

In [None]:
form_fixed <- ~ 0 + Cell_type + Sex + Age_c
coef_names <- colnames(model.matrix(form_fixed, data=sample_meta))

In [None]:
res_list <- vector(mode="list", length=length(ctypes))
names(res_list) <- ctypes

for (target in ctypes) {
    print(paste("Starting", target))

    # Test multiple contrasts at once:
    L <- cbind(
        make_pairwise_contrasts(target),
        make_mean_contrasts(target)
    )
    fit <- dream(vobj, form, sample_meta, L=L, BPPARAM=param)
    fit <- eBayes(fit)

    # Extract results
    ctype_res_list <- lapply(colnames(L), function(test) {
        tt <- topTable(fit, coef=test, number=Inf, p.value=0.05)
        tt$Test <- test
        tt
    })
    res_list[[target]] <- do.call(rbind, ctype_res_list)

}

saveRDS(mean_res, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_multi_test_DE_genes_dream.RDS")

In [None]:
saveRDS(mean_res, file="data/tasic_2018_ALM_STAR_donor_cell_type_pseudobulk_DE_genes_dream.RDS")