In [None]:
library(Seurat)
library(SeuratDisk)
library(ggplot2)
library(reticulate)
library(SingleCellExperiment)
library(edgeR)
library(scran)
library(ggplot2)
library(ggrepel)
library(clusterProfiler)
library(enrichplot)
organism = "org.Hs.eg.db"
BiocManager::install(organism, character.only = TRUE)
library(organism, character.only = TRUE)



In [None]:
# Convert H5AD to Seurat
Convert("./combined_CRX_40um_plaqueDistances_forDE.h5ad", dest = "h5seurat", overwrite = TRUE)
seurat_obj <- LoadH5Seurat("./combined_CRX_40um_plaqueDistances_forDE.h5seurat")


In [None]:
# Create dictionary mapping to name smaples
mapping <- c("B1S3" = "LEC_1", "B1S4" = "LALA-PG_1", "B2S3" = "LEC_2", "B2S4" = "LALA-PG_2")

seurat_obj@meta.data$SampleID <- mapping[seurat_obj@meta.data$sample]



In [None]:
x <- 90  # Set TD count threshold

seurat_obj_sub <- subset(seurat_obj, features = rownames(seurat_obj)[Matrix::rowSums(seurat_obj@assays$RNA@counts) >= x])
# set distance threshold 
seurat_obj_sub = subset(seurat_obj_sub, subset = distance_to_plaques < 200)


In [None]:
#define funciton for plotting MAs of results of edgeR DEs

MAplot <- function(resTable, overlaps, force = 2) {
  
  resTable2 <- resTable[order(resTable$logFC), ]
  resTable2$gene <- rownames(resTable2)
  resTable2$threshold <- as.factor(resTable2$FDR < 0.05)
  
  # Aesthetics
  plot_aesthetics <- list(
    xlab("\nLog2 Counts Per Million (logCPM)"),
    ylab("Log2 Fold Change (logFC)\n"),
    theme_minimal(base_size = 15),
    theme(
      axis.title.x = element_text(face = "bold", size = 18, color = "black"),
      axis.text.x = element_text(size = 14, color = "black"),
      axis.title.y = element_text(face = "bold", size = 18, color = "black"),
      axis.text.y = element_text(size = 14, color = "black"),
      legend.title = element_text(face = "bold", size = 15),
      legend.text = element_text(size = 13),
      plot.title = element_text(face = "bold", size = 20, hjust = 0.5, color = "black"),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      plot.background = element_rect(fill = "white", color = NA),
      legend.background = element_rect(fill = "white", color = NA)
    ),
    scale_colour_manual(name = "p.adjusted < 0.05", values = c("steelblue", "red2")),
    scale_size_manual(name = "p.adjusted < 0.05", values = c(1.2, 2))
  )
  
  #  MA plot 
  ma_plot <- ggplot(data = resTable2, aes(x = logCPM, y = logFC, colour = threshold, size = threshold)) +
    geom_point(aes(alpha = ifelse(threshold == "TRUE", 0.9, 0.6)), show.legend = FALSE) +
    geom_hline(aes(yintercept = 0), colour = "blue", size = 0.8, linetype = "dashed") +
    ylim(c(min(resTable2$logFC), max(resTable2$logFC))) + 
    plot_aesthetics
  
  # Add gene labels for X non-significant genes
  ma_plot <- ma_plot + 
    geom_label_repel(
      data = subset(head(resTable2[order(-abs(resTable2$logFC)), ], 30), FDR > 0.05 & logCPM > 14.8),
      aes(logCPM, logFC, label = gene),
      size = 4,
      color = "steelblue",  
      force = force,
      box.padding = 0.5,
      max.overlaps = overlaps,
      segment.color = "gray50",
      segment.size = 0.5
    )
  
  # Add gene labels for significant genes 
  ma_plot <- ma_plot + 
    geom_label_repel(
      data = subset(resTable2, FDR < 0.05),
      aes(logCPM, logFC, label = gene),
      size = 5,
      color = "red2",  
      force = force, 
      max.overlaps = overlaps,
      box.padding = 0.6,
      segment.color = "gray50", 
      segment.size = 0.5
    )
  
  ma_plot <- ma_plot + ggtitle("MA Plot of Differential Expression")
  
  return(ma_plot)
}


In [None]:
seurat_obj_lec = subset(seurat_obj_sub, subset = sample %in% c('B2S3', 'B1S3'))

counts <- as.matrix(seurat_obj_lec@assays$RNA@counts)
metadata <- seurat_obj_lec@meta.data

sce <- SingleCellExperiment(assays = counts, 
                                    colData = metadata)

dge <- convertTo(sce, type="edgeR", assay.type = 1)
dge <- calcNormFactors(dge)

meta_dge <- dge$samples
meta_dge <- meta_dge[,c("lib.size","norm.factors")]

#specify design for  glm
design <- model.matrix(~metadata$distance_to_plaques)

dge <- estimateDisp(dge, design = design)
fit <- glmQLFit(dge, design = design)
qlf <- glmQLFTest(fit)
tt <- topTags(qlf, n = Inf)

res_lec = tt$table


In [None]:
restable_lec = res_lec

options(repr.plot.width=20, repr.plot.height=14)
restable_lec$logFC = restable_lec$logFC*-1 #invert LFCs so positive indicates closer to plaques
restable_lec$logFC = restable_lec$logFC*100

MAplot(restable_lec, 14, force = 0.2) + ggtitle("CRX LEC: 40um hexbins with respect to distance to AB") + 
    labs(subtitle = "LFC for 100um closer to pathology")  +
    xlim(min(restable$logCPM), 15)

In [None]:
seurat_obj_lala = subset(seurat_obj_sub, subset = sample %in% c('B1S4', 'B2S4'))


counts <- as.matrix(seurat_obj_lala@assays$RNA@counts)
metadata <- seurat_obj_lala@meta.data


sce <- SingleCellExperiment(assays = counts, 
                                    colData = metadata)

dge <- convertTo(sce, type="edgeR", assay.type = 1)
dge <- calcNormFactors(dge)

meta_dge <- dge$samples
meta_dge <- meta_dge[,c("lib.size","norm.factors")]

design <- model.matrix(~metadata$distance_to_plaques)
dge <- estimateDisp(dge, design = design)
fit <- glmQLFit(dge, design = design)
qlf <- glmQLFTest(fit)
tt <- topTags(qlf, n = Inf)
res_lala = tt$table


In [None]:
restable_lala = res_lala

options(repr.plot.width=20, repr.plot.height=14)
restable_lala$logFC = restable_lala$logFC*-1
restable_lala$logFC = restable_lala$logFC*100

MAplot(restable_lala, 15, force = 0.2) + ggtitle("CRX LALA: 40um hexbins with respect to distance to AB") + 
    labs(subtitle = "LFC for 100um closer to pathology")  +
    xlim(min(restable$logCPM), 15.5)

In [None]:
plotQuadrant <- function(results_1, results_2, title) {
  
  # Merge the two dataframes by  'Gene' 
  merged_df <- merge(results_1, results_2, by = "X", suffixes = c("_1", "_2"))
  
  # indicate Significance
  merged_df$Significance <- with(merged_df, 
    ifelse(FDR_1 < 0.05 & FDR_2 < 0.05, "Both significant",
           ifelse(FDR_1 < 0.05, "Significant in LALA-PG only",
           ifelse(FDR_2 < 0.05, "Significant in Lec only", "Neither significant"))))

  merged_df$label_grey <- with(merged_df, Significance == "Neither significant")

  # Aesthetic
  base_plot <- ggplot(merged_df, aes(x = logFC_1, y = logFC_2)) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red")

  # Add points
  plot_with_points <- base_plot + 
    geom_point(data = merged_df[merged_df$Significance != "Neither significant",], 
               aes(color = Significance), size = 3, alpha = 0.8) +
    geom_point(data = merged_df[merged_df$Significance == "Neither significant",], 
               aes(color = Significance), size = 0.5, alpha = 0.15)

  # Add labels 
  plot_with_labels <- plot_with_points +
    geom_label_repel(data = merged_df[merged_df$Significance == "Significant in LALA-PG only",], 
                     aes(label = X), size = 8, color = "purple2", 
                     label.size = 0.5, max.overlaps = 7) +
    geom_label_repel(data = merged_df[merged_df$Significance == "Significant in Lec only",], 
                     aes(label = X), size = 9, color = "red2", 
                     label.size = 0.5, max.overlaps = 18) +
    geom_label_repel(data = merged_df[merged_df$Significance == "Both significant",], 
                     aes(label = X), size = 9, color = 'darkgreen', 
                     label.size = 0.5, max.overlaps = 12)

  final_plot <- plot_with_labels + 
    scale_color_manual(values = c("Both significant" = "darkgreen", 
                                  "Significant in LALA-PG only" = "purple2", 
                                  "Significant in Lec only" = "red2", 
                                  "Neither significant" = "darkgray"),
                       limits = c("Both significant", "Significant in Lec only", "Significant in LALA-PG only", "Neither significant")) +
    theme_minimal(base_size = 26) +
    theme(plot.margin = unit(c(1, 1, 1, 1), "lines")) +
    ggtitle(label = title)
  
  # Return  final plot
  return(final_plot)
}


In [None]:
options(repr.plot.width=22, repr.plot.height=15)

restable_lala$X = gsub( "GRCh38-", "", row.names(restable_lala))
restable_lec$X = gsub( "GRCh38-", "", row.names(restable_lec))


plotQuadrant(restable_lala, restable_lec, 'DE of TDs with respect to distance to pathology (cortex)') + 
    xlab("\n Lec LALA-PG (LFC for 100um closer to pathology)") + ylab("Lec (LFC for 100um closer to pathology)\n ") 

### KEGG GSEA on DEs

In [None]:


# Function to preprocess and perform GSEA KEGG
perform_GSEA_KEGG <- function(df, title, colour_up, colour_down, colour_ns) {

    df$Gene <- sub("GRCh38-", "", rownames(df))
    gene_list <- sort(na.omit(df$logFC), decreasing = TRUE)
    
    # Convert gene symbols to ENTREZ IDs
    ids <- bitr(names(gene_list), fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
    dedup_ids <- ids[!duplicated(ids$SYMBOL),]
    df2 <- df[df$Gene %in% dedup_ids$SYMBOL,]
    df2$Y <- dedup_ids$ENTREZID
    
    # Prepare KEGG gene list
    kegg_gene_list <- sort(na.omit(df2$logFC), decreasing = TRUE)
    names(kegg_gene_list) <- df2$Y
    
    # Perform GSEA KEGG
    kk_result <- gseKEGG(geneList = kegg_gene_list, organism = "hsa", nPerm = 1000,
                         minGSSize = 70, maxGSSize = 800, pvalueCutoff = 0.05,
                         pAdjustMethod = "BH", keyType = "ncbi-geneid")
    
    res <- kk_result@result
    res$Treatment <- ifelse(res$NES > 0 & res$qvalue < 0.05, "Up",
                            ifelse(res$NES < 0 & res$qvalue < 0.05, "Down", "Not Sig"))
    
    res$Significance <- ifelse(res$qvalue < 0.001, "***",
                                ifelse(res$qvalue < 0.01, "**", ifelse(res$qvalue < 0.05, "*", "")))
    
    # Create plot
    ggplot(head(res, 30), aes(x = reorder(Description, -NES), y = NES, fill = Treatment, color = Treatment)) +
        geom_bar(stat = "identity", width = 0.75, alpha = 0.5, size = 1.5) +
        geom_text(aes(label = Significance, y = NES - 0.2 * sign(NES)), size = 8, fontface = "bold") +
        scale_fill_manual(values = c("Up" = colour_up, "Down" = colour_down, "Not Sig" = colour_ns)) +
        scale_color_manual(values = c("Up" = colour_up, "Down" = colour_down, "Not Sig" = colour_ns)) +
        scale_x_discrete(expand = expansion(add = .5)) +
        scale_y_continuous(breaks = seq(floor(min(res$NES)), ceiling(max(res$NES)),
                                       ceiling((ceiling(max(res$NES)) - floor(min(res$NES))) / 6))) +
        coord_flip() + 
        theme_bw(base_size = 29) +
        theme(plot.title = element_text(size = 23), legend.position = "none",
              plot.margin = margin(10, 10, 10, 35)) +
        ggtitle(title) +
        xlab("")
}


In [None]:

# Perform GSEA KEGG for lec DE
perform_GSEA_KEGG(
  restable_lec, 
  "LEC: GSEA KEGG wrt distance to pathology (cortex)", 
  colour_up = "red", colour_down = "darkgrey", colour_ns = "#A9A9A9"
)



In [None]:
# Perform GSEA KEGG for lala de
perform_GSEA_KEGG(
  restable_lala, 
  "LALA: GSEA KEGG wrt distance to pathology (cortex)", 
  colour_up = "purple2", colour_down = "darkgrey", colour_ns = "#A9A9A9"
)


## Enrichment of scRNA-seq hdWGCNA modules 

In [None]:
modules = read.csv('../../../scRNA/900.wgcna/modules_hdWGCNA_lecMicro.csv')

In [None]:
#prepare modules for GSEA format 

# Create an empty list to store individual module data frames
module_list <- list()

# Iterate over each unique module
for(module_name in unique(modules$module)) {
  # Extract genes for  current module
  module_genes <- modules[modules$module == module_name, c("gene_name")]
  
  # Create a data frame for current module
  module_df <- data.frame(geneID = module_genes, set = module_name)
  
  # Append the module data frame to the list
  module_list[[module_name]] <- module_df
}

sets <- do.call(rbind, module_list)
row.names(sets) = NULL

colnames(sets) <- c("geneID", "set")
sets2 <- data.frame(cbind(sets$set, sets$geneID))
colnames(sets2) <- c("set", "geneID")


In [None]:
options(repr.plot.width=10, repr.plot.height=8)

df = restable_lec
df$Gene <- sub("GRCh38-", "", rownames(df))  # Remove GRCh38- prefix
original_gene_list <-  df$logFC 
names(original_gene_list) <- df$Gene
gene_list<-na.omit(original_gene_list)
gene_list = sort(gene_list, decreasing = TRUE)


gsea_mod_lec <- GSEA(gene_list, TERM2GENE = sets2, eps = 0,minGSSize = 5, maxGSSize = 800,
             pAdjustMethod = "BH",  pvalueCutoff = 0.05)

gsea_mod_lec@result = gsea_mod_lec@result[order(gsea_mod_lec@result$NES),]



In [None]:
options(repr.plot.width=12, repr.plot.height=10)

# Assuming res$Description contains color names or hex codes
res = gsea_mod_lec@result

res$Treatment = ifelse(res$NES > 0 & res$p.adjust < 0.05, "Up near plaques", 
                   ifelse(res$NES < 0 & res$p.adjust < 0.05, "Down near plaques", "C"))


res$OutlineColor <- ifelse(res$NES > 0, "Up near plaques", "Down near plaques")
res$OutlineColor <- factor(res$OutlineColor, levels = c("Up near plaques", "Down near plaques"))


# Define significance levels
res$Significance <- ifelse(res$qvalue < 0.001, "***",
                     ifelse(res$qvalue < 0.01, "**",
                     ifelse(res$qvalue < 0.05, "*", "")))


ggplot(res, aes(x = reorder(ID, -NES), y = NES, fill = Description, color = OutlineColor)) +
      geom_bar(stat = "identity", width = 0.8, size = 1.4, alpha = 0.75) + 
      geom_text(aes(label = Significance, y = NES - 0.3 * sign(NES)), 
                size = 6, fontface = "bold") +
    geom_hline(yintercept = 0,size = 1)+
      scale_fill_identity() +  # Use colors from Description column
      scale_color_manual(values = c("Up near plaques" = "red2", "Down" = "#7a7a7a")) +  
      
      scale_x_discrete(expand = expansion(add = .5)) +
      scale_y_continuous(
        breaks = seq(
          floor(min(res$NES)), ceiling(max(res$NES)),
          ceiling((ceiling(max(res$NES)) - floor(min(res$NES))) / 6)
        )       ) +
      coord_flip() + 
      theme_bw(base_size = 22) +   
        theme(
        legend.position = "right",
        legend.key.size = unit(1.5, "lines"),
        legend.text = element_text(size = 23),
        axis.text = element_text(size = 25),
        axis.title = element_text(size = 25), 
        plot.title = element_text(size = 27, face = 'bold', hjust = 0.6, 
                                 margin = margin(b = 10)), 
        plot.margin = margin(10,120,10, 10), 
        legend.title = element_text(size = 24)

      ) +  
      xlab("WGCNA Modules\n")+
    labs(title = "GSEA: Lec TDs with respect to\n distance to pathology",  y = "NES", color = "Direction") +  # Change the legend title to "NES Direction"
      labs(color = NULL, fill = NULL) +  
    labs(fill = NULL) + 
      guides(fill = "none", color = 'none') 
