In [1]:
suppressMessages(library('DESeq2'))
library('BiocParallel')
register(MulticoreParam(4))

In [2]:
config <- yaml::read_yaml('../../config/config.yaml')
data_path <- config$data_path
tmp_path <- config$tmp_path

# local work
data_path <- '../../data/'
deseq2_results_path <- '../../results/DESeq2/infection/'

# Hamster

In [3]:
# load data
countData <- read.csv(file = paste0(data_path, "all_hamsters_countData.csv"), row.names=1)
colData <- read.csv(file = paste0(data_path, "all_hamsters_colData.csv"))
# group D2 and D3 into infected group, D0 is healthy
colData$disease = 'healthy'
colData$disease = replace(colData$disease, colData$time!='D0', 'COVID19')

In [4]:
# celltypes to test
celltype_dict = list(
    'Macrophages' = c('Treml4+Macrophages', 'InterstitialMacrophages', 'AlveolarMacrophages', 'MonocyticMacrophages'),
    'Neutrophils' = c('Neutrophils'),
    'Endothelial' = c('Artery', 'Vein', 'Bronchial', 'Lymphatic', 'Capillary'),
    'Tcells' = c('CD4+ T cells', 'CD8+ T cells', 'activated T cells'),
    'NKcells' = c('NK cells', 'NKT cells'),
    'TNKcells' = c('TNK cells', 'NK cells', 'NKT cells', 'CD4+ T cells', 'CD8+ T cells', 'activated T cells'),
    'Epithelial' = c('AT1', 'AT2', 'Ciliated'),
    'Allcelltypes' = unique(colData$celltype)
)

In [8]:
for (organism in c('Goldhamster', 'Dwarfhamster_hd', 'Dwarfhamster_ld')){
    # Define what to test
    if (organism=='Goldhamster') {
        dosage_str = ''
        dosage_to_exclude = 'None'
    } else {
        if (grepl('ld', organism, fixed = TRUE)){
            dosage_str = '_ld'
            dosage_to_exclude = 'high dose'
        } else {
            dosage_str = '_hd'
            dosage_to_exclude = 'low dose'
        }
        organism = 'Dwarfhamster'
    }
    
    for (celltype_name in names(celltype_dict)){
        selected_celltypes = celltype_dict[[celltype_name]]

        mask = colData$celltype %in% c(selected_celltypes) &
            colData$dosage!=dosage_to_exclude & 
            colData$organism==organism &
            colData$ncells >= 20 & 
            colData$time %in% c('D0', 'D2', 'D3')

        if (sum(mask)<5){
            # not enough samples to test
            next
        }

        scolData = colData[mask , ]
        scountData = countData[, mask]

        # this is where the magic happens
        design = ~ disease
        reduced = ~ 1

        # Setup data and supply design matrix
        dds <- DESeqDataSetFromMatrix(countData = scountData, colData = scolData,
                                      design = design)

        # collapse selected celltypes
        dds <- collapseReplicates(dds, dds$replicate, dds$celltype)

        # Filter genes below 10 counts in total
        dds <- dds[rowSums(counts(dds)) >= 10,]
        # Setup deseq with single cell recommendations, add reduced design matrix
        dds <- DESeq(dds, test="LRT", minReplicatesForReplace=Inf, reduced= reduced)

        # Run deseq2
        res <- results(dds, contrast=c('disease', 'COVID19', 'healthy'))
        resOrdered <- res[order(res$pvalue),]
        write.csv(as.data.frame(resOrdered), 
                  file=paste0(deseq2_results_path, celltype_name, "_", organism, dosage_str, "_COVID19_vs_healthy.csv"))
    }
}

converting counts to integer mode

"some variables in design formula are characters, converting to factors"
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

"some variables in design formula are characters, converting to factors"
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

"some variables in design formula are characters, converting to factors"
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

"some variables in design formula are characters, converting to factors"
estimating size factors

estimating dispersions

gene-wise dispersion 

# Human

In [16]:
# load data
countData <- read.csv(file = paste0(data_path, "MelmsIzar290421_countData.csv"), row.names=1)
colData <- read.csv(file = paste0(data_path, "MelmsIzar290421_colData.csv"))

celltype_dict = list(
    'Macrophages' = c('AlveolarMacrophages', 'MonocyticMacrophages', 'Monocytes'),  # TODO should we include monocytes?
    'Endothelial' = c('Endothelial cells', 'Vein', 'Artery', 'Capillary'),
    'TNKcells' = c('TNKcells'),
    'Epithelial' = c('AT1', 'AT2', 'Ciliated', 'Tuft-like', 'Epithelial'),
    'Allcelltypes' = unique(colData$celltype)
)

for (celltype_name in names(celltype_dict)){
    selected_celltypes = celltype_dict[[celltype_name]]

    mask = colData$celltype %in% c(selected_celltypes) & colData$ncells >= 20
    scolData = colData[mask, ]
    scountData = countData[, mask]

    # this is where the magic happens
    design = ~ disease
    reduced = ~ 1

    # Setup data and supply design matrix
    dds <- DESeqDataSetFromMatrix(countData = scountData, colData = scolData,
                                  design = design)
    
    # collapse selected celltypes
    dds <- collapseReplicates(dds, dds$sample_id, dds$celltype)

    # Filter genes below 10 counts in total
    dds <- dds[rowSums(counts(dds)) >= 10,]
    # Setup deseq with single cell recommendations, add reduced design matrix
    dds <- DESeq(dds, test="LRT", minReplicatesForReplace=Inf, reduced= reduced)

    # Run deseq2
    res <- results(dds, contrast=c('disease', 'COVID19', 'healthy'))
    resOrdered <- res[order(res$pvalue),]
    write.csv(as.data.frame(resOrdered),
                      file=paste0(deseq2_results_path, celltype_name, "_MelmsIzar_COVID19_vs_healthy.csv"))
}

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion 

In [20]:
# load data
countData <- read.csv(file = paste0(data_path, "ChuaEils290620_countData.csv"), row.names=1)
colData <- read.csv(file = paste0(data_path, "ChuaEils290620_colData.csv"))

celltype_dict = list(
    'TNKcells' = c('TNKcells'),
    'Macrophages' = c('AlveolarMacrophages', 'MonocyticMacrophages', 'Macrophages'),
    'Epithelial' = c('Epithelial', 'Ciliated'),
    'Allcelltypes' = unique(colData$celltype)
)

for (celltype_name in names(celltype_dict)){
    selected_celltypes = celltype_dict[[celltype_name]]

    mask = colData$celltype %in% c(selected_celltypes) & colData$ncells >= 20
    scolData = colData[mask, ]
    scountData = countData[, mask]

    # this is where the magic happens
    design = ~ disease
    reduced = ~ 1

    # Setup data and supply design matrix
    dds <- DESeqDataSetFromMatrix(countData = scountData, colData = scolData,
                                  design = design)
    
    # collapse selected celltypes
    dds <- collapseReplicates(dds, dds$sample_id, dds$celltype)

    # Filter genes below 10 counts in total
    dds <- dds[rowSums(counts(dds)) >= 10,]
    # Setup deseq with single cell recommendations, add reduced design matrix
    dds <- DESeq(dds, test="LRT", minReplicatesForReplace=Inf, reduced= reduced)

    # Run deseq2
    res <- results(dds, contrast=c('disease', 'COVID19', 'healthy'))
    resOrdered <- res[order(res$pvalue),]
    write.csv(as.data.frame(resOrdered),
                      file=paste0(deseq2_results_path, celltype_name, "_ChuaEils_COVID19_vs_healthy.csv"))
}

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion 

In [12]:
# load data
countData <- read.csv(file = paste0(data_path, "LiaoZhang120520_countData.csv"), row.names=1)
colData <- read.csv(file = paste0(data_path, "LiaoZhang120520_colData.csv"))

celltype_dict = list(
    'Macrophages' = c('Macrophages'),  # TODO should we include monocytes?
    'TNKcells' = c('TNKcells'),
    'Epithelial' = c('Epithelial'),
    'Allcelltypes' = unique(colData$celltype)
)

for (celltype_name in names(celltype_dict)){
    selected_celltypes = celltype_dict[[celltype_name]]
    print(celltype_name)

    mask = colData$celltype %in% c(selected_celltypes) & colData$ncells >= 20
    scolData = colData[mask, ]
    scountData = countData[, mask]

    # this is where the magic happens
    design = ~ disease
    reduced = ~ 1

    # Setup data and supply design matrix
    dds <- DESeqDataSetFromMatrix(countData = scountData, colData = scolData,
                                  design = design)
    
    # collapse selected celltypes
    dds <- collapseReplicates(dds, dds$sample_id, dds$celltype)

    # Filter genes below 10 counts in total
    dds <- dds[rowSums(counts(dds)) >= 10,]
    # Setup deseq with single cell recommendations, add reduced design matrix
    dds <- DESeq(dds, test="LRT", minReplicatesForReplace=Inf, reduced= reduced)

    # Run deseq2
    res <- results(dds, contrast=c('disease', 'COVID19', 'healthy'))
    resOrdered <- res[order(res$pvalue),]
    write.csv(as.data.frame(resOrdered),
                      file=paste0(deseq2_results_path, celltype_name, "_LiaoZhang_COVID19_vs_healthy.csv"))
}

[1] "Macrophages"


converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing



[1] "TNKcells"


converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing



[1] "Epithelial"


converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing



[1] "Allcelltypes"


converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

