# Phase 1: Enrichment DotPlots--Top 10 per Region
Conceptualized, Curated by Monica E. Mesecar with feedback from collaborators and aesthetic support by Perplexity AI

In [None]:
#Load Libraries 
suppressPackageStartupMessages({
  library(tidyverse)
  library(readr)
  library(ggplot2)
  library(ComplexHeatmap)
  library(viridis)
  library(dplyr)
  library(RColorBrewer)
})

In [None]:
master_df <- read_csv('GSE_P1/P1_Background_Corrected_Enrichments/p1_Gprofiler_bckgrndcorr_cleaned_SIGNIF.csv',show_col_types = FALSE)

In [None]:
master_df$source=as.factor(master_df$source)

In [None]:
library(dplyr)

master_df <- master_df %>%
  filter(!(brain_region %in% c("EC", "MTG", "SVZ") & cell_type == "SPN"))

In [None]:
# First, get all unique sources and brain regions from the entire dataset
all_sources <- unique(master_df$source)
all_brain_regions <- unique(master_df$brain_region)

# Create a color palette for all sources
source_colors <- setNames(
  brewer.pal(length(all_sources), "Dark2")[1:length(all_sources)], 
  all_sources
)

In [None]:
enrichment_plots <- master_df %>%
  group_by(cell_type) %>%
  nest() %>%
  mutate(plot = map2(data, cell_type, function(df, ct) {
    # Get top 10 enrichments for each brain region
    df_top5 <- df %>%
      group_by(brain_region) %>%
      arrange(p_value) %>%
      slice_head(n = 10) %>%
      ungroup() %>%
      mutate(term_name = factor(term_name, levels = rev(unique(term_name))))
    
    # Create a dummy dataframe with all brain regions, terms, and sources
    dummy_df <- expand.grid(
      brain_region = all_brain_regions,
      term_name = unique(df_top5$term_name),
      source = all_sources,
      stringsAsFactors = FALSE
    ) %>%
    mutate(source = factor(source, levels = all_sources))
    
    ggplot() +
      # Add dummy points for all brain regions
      geom_point(data = dummy_df, 
                 aes(x = brain_region, y = term_name, fill = source), 
                 color = "white", 
                 alpha = 0, 
                 shape = 21, 
                 size = 3) +
      # Add actual enrichment points
      geom_point(data = df_top5, 
                 aes(x = brain_region, y = term_name, 
                     color = -log10(p_value), 
                     size = precision)) +
      scale_size_continuous(name = "Intersection Size/Query Size", range = c(0.5, 3)) +
      scale_color_viridis(name = "-log10(p_val)", direction = 1) +
      scale_fill_manual(name = "Source", 
                        values = source_colors, 
                        breaks = all_sources, 
                        drop = FALSE,
                        guide = guide_legend(override.aes = list(alpha = 1, size = 3))) +
      theme_bw() +
      theme(
        axis.text.x = element_text(angle = 45, hjust = 1, color = 'black', size = 8),
        axis.text.y = element_text(angle = 0, hjust = 1, 
                                   color = source_colors[match(levels(df_top5$source), names(source_colors))], 
                                   size = 6),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.spacing = unit(0.5, "lines"),
        legend.position = "right",
        legend.box = "vertical",
        legend.key.size = unit(0.7, "lines"),
        legend.text = element_text(size = 7),
        plot.title = element_text(size = 10, hjust = 0.5),
        plot.margin = unit(c(0.5, 2, 0.5, 0.5), "lines")
      ) +
      xlab('Brain Region') +
      ylab('Path Name') +
      ggtitle(paste(ct, " Top 10 Enrichments\nby Brain Region"))
  }))

# Save plots for each cell type
walk2(enrichment_plots$plot, enrichment_plots$cell_type, function(p, ct) {
  ggsave(paste0(ct, "_Enrich_T10perRegion_compact4.pdf"), p, 
         width = 6,   # Increased width
         height = 4,   # Increased height
         dpi = 300)
})


## Optional: Create Top 10 Enrichment Dataframe

In [None]:
enrichment_data <- master_df %>%
  group_by(cell_type) %>%
  nest() %>%
  mutate(top_enrichments = map(data, function(df) {
    df %>%
      group_by(brain_region) %>%
      arrange(p_value) %>%
      slice_head(n = 10) %>%
      ungroup() %>%
      arrange(brain_region, p_value)
  }))

# To view the data for a specific cell type (e.g., the first one):
print(enrichment_data$cell_type[1])
print(enrichment_data$top_enrichments[[1]])

# To view data for all cell types:
all_top_enrichments <- enrichment_data %>%
  select(cell_type, top_enrichments) %>%
  unnest(top_enrichments)

# Print the first few rows to check the structure
print(head(all_top_enrichments))

# Save to a CSV file:
write.csv(all_top_enrichments, "top_10_enrichments_by_region_and_cell_type_full2.csv", row.names = FALSE)
