In [113]:
library(VennDiagram)
library(ggplot2)
library(EnhancedVolcano)

In [114]:
directory <- "../chapters/4_results_and_discussion/figures/dea/"
alpha <- 0.05

In [115]:
annotation <- read.table("dea/annotation.bed", header=FALSE, sep="\t", col.names = c("chr", "start", "end", "name", "score", "strand", "type", "gene.id", "gene", "transcripts", "databases"))
# Remove chr, start, end, score, strand, gene.id, transcripts
annotation$chr <- NULL
annotation$start <- NULL
annotation$end <- NULL
annotation$score <- NULL
annotation$strand <- NULL
annotation$gene.id <- NULL
annotation$transcripts <- NULL
annotation$has_db <- annotation$databases == "."

rownames(annotation) <- annotation$name
annotation$name <- NULL

In [116]:
for (drug in c("tamoxifen", "letrozole")) {
    deseq <- read.table(file.path(directory, "deseq2", drug, "res.tsv"), header=TRUE, sep="\t", row.names=1)
    ciriquant <- read.table(file.path(directory, "ciriquant", drug, "results.tsv"), header=TRUE, sep="\t", row.names = 1)

    deseq <- deseq[deseq$padj < alpha,]
    ciriquant <- ciriquant[ciriquant$PValue < alpha,]

    deseq_hits <- rownames(deseq)
    ciriquant_hits <- rownames(ciriquant)

    venn.diagram(
        x = list(deseq_hits, ciriquant_hits),
        category.names = c("DESeq2", "Ciriquant"),
        filename = file.path(directory, paste0(drug, "_venn_circs.png")),
        output=TRUE,
        imagetype="png",
        fill = c(alpha("#440154ff",0.3), alpha('#21908dff',0.3)),
        disable.logging = TRUE
    )

    circ_intersection <- intersect(deseq_hits, ciriquant_hits)
    annotation_circ <- annotation[circ_intersection,]
    annotation_circ <- annotation_circ[order(annotation_circ$gene),]
    write.table(annotation_circ, file=file.path(directory, paste0(drug, "_circ_intersection.tsv")), sep="\t", quote=FALSE, row.names=TRUE, col.names=FALSE)
    circ_union <- union(deseq_hits, ciriquant_hits)

    deseq_genes <- unlist(strsplit(deseq$gene, ","))
    deseq_genes <- deseq_genes[!is.na(deseq_genes)]

    ciriquant_genes <- unlist(strsplit(ciriquant$gene, ","))
    ciriquant_genes <- ciriquant_genes[!is.na(ciriquant_genes)]

    venn.diagram(
        x = list(deseq_genes, ciriquant_genes),
        category.names = c("DESeq2", "Ciriquant"),
        filename = file.path(directory, paste0(drug, "_venn_genes.png")),
        output=TRUE,
        imagetype="png",
        fill = c(alpha("#440154ff",0.3), alpha('#21908dff',0.3)),
        disable.logging = TRUE
    )

    gene_intersection <- intersect(deseq_genes, ciriquant_genes)
    annotation_gene <- annotation[circ_union,]
    # Keep annotation entries where the "gene" column contains at least one of the genes in the intersection
    annotation_gene <- annotation_gene[sapply(annotation_gene$gene, function(genes) any(strsplit(genes, ",") %in% gene_intersection)),]
    annotation_gene <- annotation_gene[order(annotation_gene$gene),]
    write.table(annotation_gene, file=file.path(directory, paste0(drug, "_gene_intersection.tsv")), sep="\t", quote=FALSE, row.names=TRUE, col.names=FALSE)
}

INFO [2024-10-11 16:10:36] $x
INFO [2024-10-11 16:10:36] list(deseq_hits, ciriquant_hits)
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $category.names
INFO [2024-10-11 16:10:36] c("DESeq2", "Ciriquant")
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $filename
INFO [2024-10-11 16:10:36] file.path(directory, paste0(drug, "_venn_circs.png"))
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $output
INFO [2024-10-11 16:10:36] [1] TRUE
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $imagetype
INFO [2024-10-11 16:10:36] [1] "png"
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $fill
INFO [2024-10-11 16:10:36] c(alpha("#440154ff", 0.3), alpha("#21908dff", 0.3))
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $disable.logging
INFO [2024-10-11 16:10:36] [1] TRUE
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $x
INFO [2024-10-11 16:10:36] list(deseq_genes, ciriquant_genes)
INFO [2024-10-11 16:10:36] 
INFO [2024-10-11 16:10:36] $category.names
