In [1]:
library(DESeq2)
library(EnhancedVolcano)

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min



Attaching package: ‘S4Vectors’


The following object is masked from ‘package:utils’:

    findMatches


The following objects are masked from ‘package:base’:

    expand.grid, I, unname


Loading required package: IRanges

Loading required package: GenomicRanges

Loading required package: GenomeInfoDb

Loa

In [2]:
df <- read.table("dea/sum.tsv", header=TRUE, sep="\t")
# Drop duplicates in id column
df <- df[!duplicated(df$id),]
rownames(df) <- df$id
df$id <- NULL

annotation <- read.table("dea/annotation.bed", header=FALSE, sep="\t", col.names = c("chr", "start", "end", "name", "score", "strand", "type", "gene.id", "gene", "transcripts", "databases"))
# Remove chr, start, end, score, strand, gene.id, transcripts
annotation$chr <- NULL
annotation$start <- NULL
annotation$end <- NULL
annotation$score <- NULL
annotation$strand <- NULL
annotation$gene.id <- NULL
annotation$transcripts <- NULL

rownames(annotation) <- annotation$name
annotation$name <- NULL

phenotype <- read.csv("dea/phenotype.csv", header=TRUE, row.names = 1)
phenotype$transgene <- as.factor(phenotype$transgene)
phenotype$drug <- as.factor(phenotype$drug)

# Center and scale age and induction
phenotype$age <- scale(phenotype$age)
phenotype$induction <- scale(phenotype$induction)

In [3]:
run_analysis <- function(phenotype, design, directory, contrast = NULL, name = NULL) {
    # Fail if both contrast and name are NULL
    if (is.null(contrast) && is.null(name)) {
        stop("Both contrast and name cannot be NULL")
    }
    # Fail if none of contrast and name are NULL
    if (!is.null(contrast) && !is.null(name)) {
        stop("Both contrast and name cannot be provided")
    }

    alpha <- 0.05
    dds <- DESeqDataSetFromMatrix(countData = df[, rownames(phenotype)],
                                colData = phenotype,
                                design = design)
    dds <- DESeq(dds)
    if (!is.null(contrast)) {
        res <- results(dds, contrast = contrast, alpha = alpha)
    } else {
        res <- results(dds, name = name, alpha = alpha)
    }
    res <- res[order(res$padj),]
    res <- cbind(res, annotation[rownames(res),])

    dir.create(directory, showWarnings = FALSE)

    EnhancedVolcano(res,
        x='log2FoldChange',
        y='padj',
        lab=res$gene,
        pCutoff = alpha)
    ggsave(paste0(directory, "/volcano.png"))

    # Keep only rows without any NA
    res <- res[complete.cases(res),]

    upregulated <- unique(unlist(strsplit(res[res$log2FoldChange > 0 & res$padj < alpha,]$gene, ",")))
    downregulated <- unique(unlist(strsplit(res[res$log2FoldChange < 0 & res$padj < alpha,]$gene, ",")))

    dir.create("dea/esr1_no", showWarnings = FALSE)
    write.table(upregulated, file=paste0(directory, "/upregulated.txt"), row.names=FALSE, col.names=FALSE, quote=FALSE)
    write.table(downregulated, file=paste0(directory, "/downregulated.txt"), row.names=FALSE, col.names=FALSE, quote=FALSE)
}

# Correlation with ESR1 induction

In [4]:
phenotype_esr <- phenotype[phenotype$transgene == "ESR1",]
nrow(phenotype_esr)

## Without drugs

In [5]:
run_analysis(
    phenotype_esr[phenotype_esr$drug == "no",],
    ~ age + induction,
    "dea/esr1_no",
    name="induction"
)

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mSaving 6.67 x 6.67 in image


## Tamoxifen

In [6]:
run_analysis(
    phenotype_esr[phenotype_esr$drug != "letrozole",],
    ~ age + drug + induction,
    "dea/esr1_tamoxifen",
    contrast=c("drug", "tamoxifen", "no")
)

factor levels were dropped which had no samples

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mSaving 6.67 x 6.67 in image


## Letrozole

In [7]:
run_analysis(
    phenotype_esr[phenotype_esr$drug != "tamoxifen",],
    ~ age + drug + induction,
    "dea/esr1_letrozole",
    contrast=c("drug", "letrozole", "no")
)

factor levels were dropped which had no samples

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mSaving 6.67 x 6.67 in image
