In [None]:
library("clusterProfiler")
library("org.Hs.eg.db")
library("DOSE")
library("ReactomePA")
library("ggplot2")
library("readxl")
library("writexl")
library("dplyr")
library("tidyr")
library("ComplexUpset")
library("eulerr")
library("tibble")  
library("MeSHDbi")
library("meshes")
library("europepmc")
library("VennDiagram")

In [None]:
setwd("/folder/")
data_pos <- read_xlsx("Upregulated_Genes.xlsx")
data_neg <- read_xlsx("Downregulated_Genes.xlsx")

In [None]:
### Keep Genes that are at the Top 90% of the Scores.

score_threshold <- quantile(data_pos$scores, 0.90)
data_pos_filtered <- data_pos[data_pos$scores > score_threshold, ]
rownames(data_pos_filtered) <- NULL
head(data_pos_filtered)

score_threshold <- quantile(data_neg$scores, 0.90)
data_neg_filtered <- data_neg[data_neg$scores > score_threshold, ]
rownames(data_neg_filtered) <- NULL
head(data_neg_filtered)

In [None]:
### For the Wilcoxon DGEs.

data_pos_filtered$genes <- data_pos_filtered$gene
data_pos_filtered$logFoldChange <- data_pos_filtered$logfoldchanges
data_pos_filtered <- data_pos_filtered[order(data_pos_filtered$scores, decreasing = TRUE), ]
dim(data_pos_filtered)

data_neg_filtered$genes <- data_neg_filtered$gene
data_neg_filtered$logFoldChange <- data_neg_filtered$logfoldchanges
data_neg_filtered <- data_neg_filtered[order(data_neg_filtered$scores, decreasing = TRUE), ]
dim(data_neg_filtered)

In [None]:
### Create Gene Lists.

data <- data_pos_filtered # or data <- data_neg_filtered

gene_list <- data$genes

ids <- bitr(gene_list, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
dedup_ids = ids[!duplicated(ids[c("SYMBOL")]),]

data_dedub = data[gene_list %in% dedup_ids$SYMBOL,]
data_dedub$ENTREZID = dedup_ids$ENTREZID

names(gene_list) <- data_dedub$ENTREZID
gene_list <- na.omit(gene_list)

gene <- names(gene_list)

In [None]:
### Group GO.

ggo <- groupGO(gene     = gene,
               OrgDb    = org.Hs.eg.db,
               ont      = "BP", # "CC", "BP", "MF"
               level    = 5,
               readable = TRUE)

head(ggo)

In [None]:
### Enrich GO.

ego <- enrichGO(gene          = gene,
                OrgDb         = org.Hs.eg.db,
                ont           = "BP", # "CC", "BP", "MF"
                pAdjustMethod = "bonferroni",
                pvalueCutoff  = 0.05,
                qvalueCutoff  = 0.05,
                minGSSize = 1,
                maxGSSize = 10000,
        readable      = TRUE)
head(ego)

In [None]:
### Export GO Term Results

ego_df <- as.data.frame(ego)
write_xlsx(ego_df, "GO_Upregulated_Genes.xlsx")

In [None]:
### Inspection.

unique(ego$Description)

In [None]:
### KEGG Enrichment.

kk <- enrichKEGG(gene = names(gene_list),
                 organism     = 'hsa',
                 keyType = "ncbi-geneid", # keyType one of "kegg", 'ncbi-geneid', 'ncib-proteinid' and 'uniprot'
                 pvalueCutoff = 0.05)
head(kk)

kk2 <- gseKEGG(geneList     = gene_list,
               organism     = 'hsa',
               minGSSize    = 10,
               maxGSSize = 5000,
               pvalueCutoff = 0.05,
               verbose      = FALSE)
head(kk2)

In [None]:
### Reactome Enrichment Analysis.

x <- enrichPathway(gene, pvalueCutoff = 0.05, readable = TRUE)
head(x)

y <- gsePathway(gene_list, 
                pvalueCutoff = 0.05,
                pAdjustMethod = "BH", 
                verbose = FALSE)
head(y)

viewPathway("Peptide chain elongation", 
            readable = TRUE, 
            foldChange = gene_list)

In [None]:
### Disease Enrichment Analysis.

x <- enrichDO(gene          = gene,
              ont           = "DO",
              pvalueCutoff  = 0.05,
              pAdjustMethod = "bonferroni",
              universe      = names(gene_list),
              minGSSize     = 5,
              maxGSSize     = 5000,
              qvalueCutoff  = 0.05,
              readable      = FALSE)
head(x)

In [None]:
### Preparation of Data for Plots.

setwd("/folder/")
ego_pos <- read_xlsx("GO_Upregulated_Genes.xlsx")
ego_neg <- read_xlsx("GO_Downregulated_Genes.xlsx")

In [None]:
gene_list <- unique(unlist(strsplit(ego_pos$geneID, "/")))

enrich_obj_pos <- new("enrichResult",
  result = as.data.frame(ego_pos),
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 0.2,
  gene = gene_list,
  geneSets = setNames(strsplit(ego_pos$geneID, "/"), ego_pos$ID),
  organism = "UNKNOWN",
  ontology = "BP",  # Or MF/CC
  keytype = "UNKNOWN",
  readable = FALSE
)

gene_list <- unique(unlist(strsplit(ego_neg$geneID, "/")))

enrich_obj_neg <- new("enrichResult",
  result = as.data.frame(ego_neg),
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 0.2,
  gene = gene_list,
  geneSets = setNames(strsplit(ego_neg$geneID, "/"), ego_neg$ID),
  organism = "UNKNOWN",
  ontology = "BP",  # Or MF/CC
  keytype = "UNKNOWN",
  readable = FALSE
)

simplified_pos <- simplify(enrich_obj_pos, cutoff = 0.8, by = "p.adjust", select_fun = min)
simplified_neg <- simplify(enrich_obj_neg, cutoff = 0.8, by = "p.adjust", select_fun = min)

In [None]:
### Significantly Enriched Terms.

pos_sig <- as.data.frame(simplified_pos)
neg_sig <- as.data.frame(simplified_neg)

pos_sig$Condition <- "Upregulated"
neg_sig$Condition <- "Downregulated"
combined_long <- bind_rows(
    pos_sig, 
    neg_sig)

binary_matrix <- combined_long %>%
  select(Description, Condition) %>%
  mutate(present = 1) %>%
  pivot_wider(names_from = Condition, values_from = present, values_fill = 0)

combined_merged <- combined_long %>%
  left_join(binary_matrix, by = "Description")

In [None]:
### Create Binary Matrix.

binary_upset <- combined_merged %>%
  select(Description, 'Upregulated', 'Downregulated') %>%
  distinct() %>%
  mutate(across(c('Upregulated', 'Downregulated'), ~ as.integer(.)))

binary_upset_matrix <- binary_upset %>%
  column_to_rownames("Description")

str(binary_upset_matrix)

binary_upset_matrix <- as.data.frame(binary_upset_matrix)

In [None]:
### Matrix.

mat <-
  binary_upset_matrix %>%
  dplyr::select('Upregulated', 'Downregulated')

gplots::venn(mat)

In [None]:
### Upset Plot.

binary_upset_matrix$Description <- rownames(binary_upset_matrix)

plot <- upset(
  binary_upset_matrix,
  intersect = c('Upregulated', 'Downregulated'),
  name = "Shared GO Terms",
  base_annotations = list(
    'Intersection size' = intersection_size(
      mapping = aes(fill = "#2B2B2B"),
      counts = FALSE
    ) +
      scale_fill_identity()
  ),
  set_sizes = (
    upset_set_size() +
      theme(
        panel.grid = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank()
      )
  ),
  matrix = intersection_matrix(
    geom = geom_point(
      aes(fill = as.factor(value)),
      color = "transparent",
      size = 6,
      shape = 21
    ),
    segment = geom_segment(size = 2, color = "#4C4C4C")
  ) +
    scale_fill_manual(
      values = c("0" = "grey90",
                 "1" = "grey90")
    ) +
    theme(legend.position = "none"),
  queries = list(
    upset_query(
      set = c('Upregulated'),
      fill = "blue"
    ),
    upset_query(
      set = c('Downregulated'),
      fill = "green"
    )
  )
) +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  )

plot

ggsave("Upset_Plot.png", plot, width = 3, height = 5)

In [None]:
## Venn Diagram of GO Terms.

eul <- euler(c(
  "Upregulated" = sum(binary_upset$`Upregulated` == 1 & binary_upset$`Downregulated` == 0),
  "Downregulated" = sum(binary_upset$`Upregulated` == 0 & binary_upset$`Downregulated` == 1),
  "Mixed" = sum(binary_upset$`Upregulated` == 1 & binary_upset$`Downregulated` == 1)
))

plot(eul, quantities = TRUE, fills = c("blue", "green"))

In [None]:
### Lollipop Plot.

top_terms <- combined_long %>%
  group_by(Condition) %>%
  slice_min(order_by = -FoldEnrichment, n = 50) %>%
  ungroup()

overlaps <- top_terms %>%
  group_by(Description) %>%
  filter(n_distinct(Condition) > 1) %>%
  pull(Description) %>%
  unique()

top_terms <- top_terms %>%
  mutate(LineColor = ifelse(Description %in% overlaps, "Shared", Condition),
         LineType = ifelse(Description %in% overlaps, "Shared", "NotShared"))

plot <- ggplot(top_terms, aes(x = reorder(Description, -log10(p.adjust)), y = -log10(p.adjust))) +
  geom_segment(aes(xend = Description, y = 0, yend = -log10(p.adjust),
                   color = LineColor, linetype = LineType), size = 1.5) +
  geom_point(aes(size = Count, color = Condition)) +
  coord_flip() +
  labs(
    title = "Top GO Terms by Condition (Lollipop Plot)",
    y = "-log10(p.adjust)",
    x = "GO Term"
  ) +
  scale_color_manual(
    values = c(
       "Upregulated" = "blue",
       "Downregulated" = "green",
      "Shared" = "red"
    )
  ) +
  scale_linetype_manual(values = c("Shared" = "dashed", "NotShared" = "solid")) +
  guides(
    linetype = "none",
    color = guide_legend(title = "", override.aes = list(size = 4)),
    size = guide_legend(title = "Count")
  ) +
  theme_classic(base_size = 12) +
  theme(
    legend.position = "top",
    legend.box = "vertical",
    legend.box.just = "left",
    axis.text.y = element_text(size = 14, face = "bold"),
    plot.title = element_text(face = "bold")
  )

plot

ggsave("Lollipop_Plot.png", plot, width = 9, height = 7)