In [1]:
suppressMessages(library('DESeq2'))
library('BiocParallel')
register(MulticoreParam(4))

In [2]:
root <- '../../'
config <- yaml::read_yaml(paste0(root, 'config/config.yaml'))
data_path <- config$data_path
tmp_path <- config$tmp_path

# local work
data_path <- paste0(root, 'data/')
deseq2_results_path <- paste0(root, 'results/DESeq2/severity/')

# Hamster

In [3]:
# load data
countData <- read.csv(file = paste0(data_path, "all_hamsters_countData.csv"), row.names=1)
colData <- read.csv(file = paste0(data_path, "all_hamsters_colData.csv"))

In [4]:
# celltypes to test
celltype_dict = list(
    'Macrophages' = c('Treml4+Macrophages', 'InterstitialMacrophages', 'AlveolarMacrophages', 'MonocyticMacrophages'),
    'Neutrophils' = c('Neutrophils'),
    'Endothelial' = c('Artery', 'Vein', 'Bronchial', 'Lymphatic', 'Capillary'),
    'Tcells' = c('CD4+ T cells', 'CD8+ T cells', 'activated T cells'),
    'NKcells' = c('NK cells', 'NKT cells'),
    'TNKcells' = c('TNK cells', 'NK cells', 'NKT cells', 'CD4+ T cells', 'CD8+ T cells', 'activated T cells'),
    'Epithelial' = c('AT1', 'AT2', 'Ciliated'),
    'Allcelltypes' = unique(colData$celltype)
)

In [5]:
for (organism in c('Dwarfhamster_ld', 'Dwarfhamster_hd')){
    # Define what to test
    if (grepl('ld', organism, fixed = TRUE)){
        dosage_str = '_ld'
        not_dosage = 'high dose'
    } else {
        dosage_str = '_hd'
        not_dosage = 'low dose'
    }
    organism = 'Dwarfhamster'
    
    for (celltype_name in names(celltype_dict)){
        selected_celltypes = celltype_dict[[celltype_name]]

        # select cells for testing
        mask = colData$celltype %in% c(selected_celltypes) &
            (colData$dosage != not_dosage | colData$organism!=organism) & 
            colData$ncells >= 20 & colData$time %in% c('D2', 'D3')

        scolData = colData[mask , ]
        scountData = countData[, mask]

        # this is where the magic happens
        design = ~ organism
        reduced = ~ 1

        # Setup data and supply design matrix
        dds <- DESeqDataSetFromMatrix(countData = scountData, colData = scolData,
                                      design = design)

        # collapse selected celltypes
        dds <- collapseReplicates(dds, dds$replicate, dds$celltype)

        # Filter genes below 10 counts in total
        dds <- dds[rowSums(counts(dds)) >= 10,]
        # Setup deseq with single cell recommendations, add reduced design matrix
        dds <- DESeq(dds, test="LRT", minReplicatesForReplace=Inf, reduced= reduced)

        # Run deseq2
        res <- results(dds, contrast=c('organism', organism, 'Goldhamster'))
        resOrdered <- res[order(res$pvalue),]
        write.csv(as.data.frame(resOrdered), 
                  file=paste0(deseq2_results_path, celltype_name, "_", organism, dosage_str, "_severity.csv"))
    }
}

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion 

# Human

In [28]:
# load data
countData <- read.csv(file = paste0(data_path, "LiaoZhang120520_countData.csv"), row.names=1)
colData <- read.csv(file = paste0(data_path, "LiaoZhang120520_colData.csv"))

celltype_dict = list(
    # Not enough cells in control for Neutrophils!
    'TNKcells' = c('TNKcells'),
    'Macrophages' = c('Macrophages'),
    'Epithelial' = c('Epithelial'),
    'Allcelltypes' = unique(colData$celltype)
)

for (celltype_name in names(celltype_dict)){
    selected_celltypes = celltype_dict[[celltype_name]]

    mask = colData$celltype %in% c(selected_celltypes) & colData$ncells >= 20
    scolData = colData[mask, ]
    scountData = countData[, mask]

    # this is where the magic happens
    design = ~ severity
    reduced = ~ 1

    # Setup data and supply design matrix
    dds <- DESeqDataSetFromMatrix(countData = scountData, colData = scolData,
                                  design = design)
    
    # collapse selected celltypes
    dds <- collapseReplicates(dds, dds$sample_id, dds$celltype)

    # Filter genes below 10 counts in total
    dds <- dds[rowSums(counts(dds)) >= 10,]
    # Setup deseq with single cell recommendations, add reduced design matrix
    dds <- DESeq(dds, test="LRT", minReplicatesForReplace=Inf, reduced= reduced)

    # Run deseq2
    res <- results(dds, contrast=c('severity', 'severe', 'mild'))
    resOrdered <- res[order(res$pvalue),]
    write.csv(as.data.frame(resOrdered),
                      file=paste0(deseq2_results_path, celltype_name, "_LiaoZhang_severity.csv"))

}

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion 

In [21]:
# melmsizar only has critical as severity

In [30]:
# load data
countData <- read.csv(file = paste0(data_path, "ChuaEils290620_countData.csv"), row.names=1)
colData <- read.csv(file = paste0(data_path, "ChuaEils290620_colData.csv"))

celltype_dict = list(
    'Macrophages' = c('AlveolarMacrophages', 'MonocyticMacrophages', 'Macrophages'),
    'Neutrophils' = c('Neutrophils'),
    'TNKcells' = c('TNKcells'),
    'Epithelial' = c('Epithelial', 'Ciliated'),
    'Allcelltypes' = unique(colData$celltype)
)

for (celltype_name in names(celltype_dict)){
    selected_celltypes = celltype_dict[[celltype_name]]

    mask = colData$celltype %in% c(selected_celltypes) & colData$ncells >= 20
    scolData = colData[mask, ]
    scountData = countData[, mask]

    # this is where the magic happens
    design = ~ severity
    reduced = ~ 1

    # Setup data and supply design matrix
    dds <- DESeqDataSetFromMatrix(countData = scountData, colData = scolData,
                                  design = design)
    
    # collapse selected celltypes
    dds <- collapseReplicates(dds, dds$sample_id, dds$celltype)

    # Filter genes below 10 counts in total
    dds <- dds[rowSums(counts(dds)) >= 10,]
    # Setup deseq with single cell recommendations, add reduced design matrix
    dds <- DESeq(dds, test="LRT", minReplicatesForReplace=Inf, reduced= reduced)

    # Run deseq2
    res <- results(dds, contrast=c('severity', 'critical', 'moderate'))
    resOrdered <- res[order(res$pvalue),]
    write.csv(as.data.frame(resOrdered),
                      file=paste0(deseq2_results_path, celltype_name, "_ChuaEils_severity.csv"))
}

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion 