In [None]:
suppressWarnings(suppressPackageStartupMessages(library("clusterProfiler"))) # v3.14.3
suppressWarnings(suppressPackageStartupMessages(library("DOSE")))
suppressWarnings(suppressPackageStartupMessages(library("ReactomePA")))

In [None]:
bg_genes <- rownames(read.table("../../../data/preprocessed_v6/TCGA-BRCA_1079_17Kgenes.Xena_TCGA_PanCan.log2_exprs_z_v6.tsv",sep = "\t",header = 1,row.names = 1))
length(bg_genes)

bg_genes <- suppressMessages(bitr(bg_genes, fromType = "SYMBOL",
        toType = c("ENTREZID"),
        OrgDb = 'org.Hs.eg.db')[,"ENTREZID"])
length(bg_genes)

# TCGA

In [None]:
df <- read.table("../../../Supplementary_tables_and_data/TableS2/TableS2_potentially_novel.tsv",sep = "\t",header = 1,row.names = 1,stringsAsFactors = F)
dim(df)

In [None]:
gsoa <- function(gset,bg_genes){
    
    # map gene names to Entrez ID 
    gset_entrez <- c()
    try(
    gset_entrez <- bitr(gset, fromType = "SYMBOL",
            toType = c("ENTREZID"),
            OrgDb = 'org.Hs.eg.db')[,"ENTREZID"]
    )
    
    results <- list()
    results["BP"] <-""
    results["MF"] <-""
    results["CC"] <-""
    results["KEGG"] <-""
    results["DO"] <-""
    results["Reactome"] <- ""
    
    if (length(gset_entrez)>1){
        # GO
        for (GOC in c("BP","MF","CC")){
            df <- enrichGO(gene          = gset_entrez,       # query gene list
                            universe      = bg_genes,   # background
                            OrgDb         = "org.Hs.eg.db",
                            ont           = GOC,
                            pAdjustMethod = "BH",
                            minGSSize = 2,
                            maxGSSize = 500,
                            pvalueCutoff  = 0.05,
                            qvalueCutoff  = 0.05,
                            readable      = T)
            df <- df[df$Count >= 2]

            if (!is.null(df)){
                if (dim(df)[1]>0){
                    df <- df[order(df$p.adjust, -df$Count), ]
                    results[GOC] <- paste(paste0(df$Description," (",df$ID,") ", df$GeneRatio," p.adj=",formatC(df$p.adjust, format = "e", digits = 2),"\n"),collapse='')
                }
            }
        }

        # KEGG
        df <- enrichKEGG(gene = gset_entrez,
                     universe = bg_genes,
                     organism     = 'hsa',
                     pAdjustMethod = "BH",
                     minGSSize = 2,
                     maxGSSize = 500,
                     pvalueCutoff  = 0.05,
                     qvalueCutoff  = 0.05)
        df <- df[df$Count >= 2]
        
        if (!is.null(df)){
                if (dim(df)[1]>0){
                    df <- df[order(df$p.adjust, -df$Count), ]
                    results["KEGG"] <- paste(paste0(df$Description," (",df$ID,") ", df$GeneRatio," p.adj=",formatC(df$p.adjust, format = "e", digits = 2),"\n"),collapse='')
                }
        }

        # Disease Ontology
        df <- enrichDO(gene = gset_entrez,
                  universe = bg_genes,
                  ont = "DO",
                  pAdjustMethod = "BH",
                  minGSSize = 2,
                  maxGSSize = 500,
                  pvalueCutoff  = 0.05,
                  qvalueCutoff  = 0.05,
                  readable      = T)
        df <- df[df$Count >= 2]
        
        if (!is.null(df)){
                if (dim(df)[1]>0){
                    df <- df[order(df$p.adjust, -df$Count), ]
                    results["DO"] <- paste(paste0(df$Description," (",df$ID,") ", df$GeneRatio," p.adj=",formatC(df$p.adjust, format = "e", digits = 2),"\n"),collapse='')
                }
        }

        # Reactome
        df <- enrichPathway(gene=gset_entrez,
                        universe = bg_genes,
                        organism = "human",
                        pvalueCutoff = 0.05, 
                        pAdjustMethod = "BH",
                        qvalueCutoff = 0.05,
                        minGSSize = 2,
                        maxGSSize = 500,
                        readable = F)
        df <- df[df$Count >= 2]
        
    
        if (!is.null(df)){
                if (dim(df)[1]>0){
                    df <- df[order(df$p.adjust, -df$Count), ]
                    results["Reactome"] <- paste(paste0(df$Description," (",df$ID,") ", df$GeneRatio," p.adj=",formatC(df$p.adjust, format = "e", digits = 2),"\n"),collapse='')
                }
        }
    }
    return (results)
}



In [None]:
all_results <- c()
bic_ids <- rownames(df)
target_column <- "shared" #"union" # "shared"
for (bic in bic_ids){
    gset <- unlist(strsplit(df[bic,target_column],' '))
    gsoa_results <- suppressMessages(suppressWarnings(gsoa(gset,bg_genes)))
    all_results <- rbind(all_results,gsoa_results)
}
rownames(all_results) <- bic_ids
dim(all_results)
all_results

In [None]:
df2 <- as.data.frame(all_results)
df2[] <- lapply(df2, as.character)
table <- cbind(df,df2)

In [None]:
out_prefix = "../../../Supplementary_tables_and_data/TableS2/TableS2_potentially_novel.with_enrichment"
fname = paste0(out_prefix,"_",target_column,".tsv")
write.table(table, file = fname,quote = T, sep="\t",row.names = T,col.names = T)
fname