In [None]:
suppressMessages(suppressWarnings(library(Seurat)))
suppressMessages(suppressWarnings(library(tidyverse)))
suppressMessages(suppressWarnings(library(ggpubr)))
suppressMessages(suppressWarnings(library(ggrepel)))
suppressMessages(suppressWarnings(library(ggrastr)))
suppressMessages(suppressWarnings(library(RColorBrewer)))
suppressMessages(suppressWarnings(library(ggdendro)))
suppressMessages(suppressWarnings(library(cowplot)))
suppressMessages(suppressWarnings(library(reshape2)))
suppressMessages(suppressWarnings(library(gtools)))
suppressMessages(suppressWarnings(library(ggplot2)))
suppressMessages(suppressWarnings(library(stringr)))
suppressMessages(suppressWarnings(library(ggalluvial)))
suppressMessages(suppressWarnings(library(openxlsx)))
suppressMessages(suppressWarnings(library(lme4)))
suppressMessages(suppressWarnings(library(emmeans)))
suppressMessages(suppressWarnings(library(dplyr)))
suppressMessages(suppressWarnings(library(foreach)))
suppressMessages(suppressWarnings(library(doParallel)))
suppressMessages(suppressWarnings(library(pheatmap)))
suppressMessages(suppressWarnings(library(forcats)))

In [None]:
options(repr.plot.width = 15, repr.plot.height = 15)

# Data

In [None]:
# Main (Fig. 1-4)
neurons <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/neurons_finalized_2025.rds")
glia <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/glia_finalized_2025.rds")
neurons$cell.type <- gsub("_(\\d+)_", "\\1.", neurons$cell.type)
glia$cell.type <- gsub("_", " ", glia$cell.type)
DefaultAssay(neurons) <- "integrated"
DefaultAssay(glia) <- "integrated"

# Spatial (Fig. 2)
spatial <- readRDS("/projects/perslab/people/jmg776/projects/DVC/analysis/revision/spatial_transcriptomics_dvc/processed_data/spatial_object_preprocessed+labelled.rds")

# CELLEX (Fig. 2-3)
cellex_mouse <- read.table("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_mouse_2025.esmu.csv", header = TRUE, sep = ",", row.names = 1)
cellex_rat <- read.table("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_rat_2025.esmu.csv", header = TRUE, sep = ",", row.names = 1)
cellex_macaque <- read.table("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_macaque_2025.esmu.csv", header = TRUE, sep = ",", row.names = 1)
colnames(cellex_mouse) <- gsub("_(\\d+)_", "\\1.", colnames(cellex_mouse))
colnames(cellex_rat) <- gsub("_(\\d+)_", "\\1.", colnames(cellex_rat))
colnames(cellex_macaque) <- gsub("_(\\d+)_", "\\1.", colnames(cellex_macaque))

# Ludwig 2021 (Fig. 3)
ludwig2021_neurons <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/mouse/mouse_neurons_Ludwig_reintegrated_Seurat_obj.rds")

# Bregma meta (Fig. 3)
roi_bregma <- read.table("/projects/perslab/people/jmg776/projects/DVC/analysis/revision/spatial_transcriptomics_dvc/ROI_Bregma.tsv", sep = "\t", header = TRUE)

# SCENIC (Fig. 4)
rat.AUC <- read.csv("/projects/perslab/people/jmg776/projects/DVC/output/SCENIC/rat_DVC_neurons_2025/rat_DVC_neurons_2025_auc.csv", header = TRUE, row.names = 1)
mouse.AUC <- read.csv("/projects/perslab/people/jmg776/projects/DVC/output/SCENIC/mouse_DVC_neurons_2025/mouse_DVC_neurons_2025_auc.csv", header = TRUE, row.names = 1)

# DEGs (Fig. 4)
DEGs.sc.mouse.neurons.acute <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_mouse_neurons_acute_AM833_2025.rds")
DEGs.sc.mouse.glia.acute <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_mouse_glia_acute_AM833_2025.rds")
DEGs.sc.mouse.neurons.chronic <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_mouse_neurons_chronic_AM833_2025.rds")
DEGs.sc.mouse.glia.chronic <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_mouse_glia_chronic_AM833_2025.rds")

DEGs.sc.rat.neurons.acute <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_rat_neurons_acute_AM833_2025.rds")
DEGs.sc.rat.glia.acute <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_rat_glia_acute_AM833_2025.rds")
DEGs.sc.rat.neurons.chronic <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_rat_neurons_chronic_AM833_2025.rds")
DEGs.sc.rat.glia.chronic <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/single-cell/DESeq2_rat_glia_chronic_AM833_2025.rds")

DEGs.bulk.mouse.acute <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/bulk/DESeq2_mouse_acute.rds")
DEGs.bulk.rat.acute <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/bulk/DESeq2_rat_acute.rds")
DEGs.bulk.mouse.chronic <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/bulk/DESeq2_mouse_chronic.rds")
DEGs.bulk.rat.chronic <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/bulk/DESeq2_rat_chronic.rds")

DEGs.sc.mouse.acute <- c(DEGs.sc.mouse.glia.acute, DEGs.sc.mouse.neurons.acute)
DEGs.sc.mouse.chronic <- c(DEGs.sc.mouse.glia.chronic, DEGs.sc.mouse.neurons.chronic)
DEGs.sc.rat.acute <- c(DEGs.sc.rat.glia.acute, DEGs.sc.rat.neurons.acute)
DEGs.sc.rat.chronic <- c(DEGs.sc.rat.glia.chronic, DEGs.sc.rat.neurons.chronic)

names(DEGs.sc.mouse.acute) <- gsub("_(\\d+)_", "\\1.", names(DEGs.sc.mouse.acute))
names(DEGs.sc.rat.acute) <- gsub("_(\\d+)_", "\\1.", names(DEGs.sc.rat.acute))
names(DEGs.sc.mouse.chronic) <- gsub("_(\\d+)_", "\\1.", names(DEGs.sc.mouse.chronic))
names(DEGs.sc.rat.chronic) <- gsub("_(\\d+)_", "\\1.", names(DEGs.sc.rat.chronic))

# Logistic regression (Fig. 4)
glmnet.data <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/DEGs/bulk/glmnet.rds")

# Figure 1

## B. UMAP of neurons colored by cell type

In [None]:
umap_embed_neurons <- as.data.frame(neurons@reductions$umap@cell.embeddings) %>%
  mutate(
    celltype = neurons$cell.type,
    celltype_numeric = as.numeric(factor(celltype, levels = str_sort(unique(celltype), numeric = TRUE))) # Assign numeric IDs to cell types sorted numerically (ensures 'Glu9.0' precedes 'Glu10.0')
  )

label <- umap_embed_neurons %>%
  group_by(celltype_numeric) %>%
  summarize(x = median(umap_1), y = median(umap_2))

In [None]:
p <- ggplot(umap_embed_neurons, aes(x = umap_1, y = umap_2, colour = factor(celltype_numeric))) +
  geom_point_rast(size = 0.1, alpha = 0.5) +
  theme_pubr() +
  theme(
    axis.line = element_line(colour = "black", linewidth = 0.5),
    panel.grid = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    legend.position = "none",
    axis.title = element_text(size = 6, face = "bold", family = "sans"),
    axis.text = element_text(size = 6, face = "bold", family = "sans")
  ) +
  labs(x = "UMAP 1", y = "UMAP 2") +
  scale_color_manual(values = c(
    "#6D655E", "#B45C20", "#9ABDA4", "#FEEF94", "#A6B8B3",
    "#e586f7", "#92AEA5", "#76479F", "#8F7C00", "#8AC48E",
    "#9D5F35", "#F0A0FF", "#0075DC", "#856249", "#FEF495",
    "#B8228D", "#95603C", "#565AA7", "#EAF09B", "#6F94A9",
    "#C7D69F", "#2BCE48", "#3A6DAF", "#A4BCA3", "#D2B3BA",
    "#4C7AAD", "#ED0679", "#D80F85", "#756458", "#E3B8A5",
    "#ABB5BB", "#005C31", "#94FFB5", "#5EF1F2", "#FEE390",
    "#84C686", "#DDB7AC", "#AC5D26", "#610075", "#7FC97F",
    "#F9BF89", "#CDB2C1", "#00998F", "#5D87AB", "#6651A3",
    "#A55E2E", "#FEFA97", "#EEBB97", "#F4BD90", "#C1AFCF",
    "#A0BAAC", "#D9E39D", "#4563AC", "#BCAED1", "#E9BA9E",
    "#8FC195", "#666666", "#B1B3C2", "#B5C9A1", "#FDDE8F",
    "#FDD88D", "#873E9A", "#7D6351", "#FEE992", "#81A1A7",
    "#f6c7ff", "#FDCD8A", "#8D6143", "#E90E70", "#95BF9D",
    "#C7B0C8", "#FDD38B", "#C2551D", "#FDC286", "#B6B1CA",
    "#FDC788", "#FCFD99", "#E41667", "#E90680", "#A72B92"
  )) +
  geom_text(
    data = label, aes(label = celltype_numeric, x = x, y = y),
    size = 6 / .pt,
    fontface = "bold",
    inherit.aes = FALSE
  )

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_1b.pdf", height = 4, width = 5)

## C. Neuronal cell population proportion bar plot

In [None]:
full_df <- neurons@meta.data %>%
  filter(species %in% c("macaque", "rat") | dataset == "mouse.2023") %>%
  mutate(
    celltype = cell.type,
    celltype_numeric = as.numeric(factor(celltype, levels = str_sort(unique(celltype), numeric = TRUE))),  # Assign numeric IDs to cell types sorted numerically (ensures 'Glu9.0' precedes 'Glu10.0')
    celltype_meta = paste0(celltype_numeric, ". ", celltype)  # Converts cell type names to '#. Celltype' to act as UMAP legend
  ) %>%
  count(species, celltype_numeric, celltype_meta) %>%
  group_by(species) %>%
  mutate(proportion = n / sum(n)) %>%
  ungroup() %>%
  arrange(celltype_numeric) %>%
  mutate(
    celltype_meta = factor(celltype_meta, levels = unique(celltype_meta)),
    species = factor(species, levels = c("mouse", "rat", "macaque"))
  )

In [None]:
species_offsets <- c("mouse" = -0.25, "rat" = 0, "macaque" = 0.25)
full_df$offset <- species_offsets[as.character(full_df$species)]
full_df$x_adj <- as.numeric(full_df$celltype_meta) + full_df$offset

p <- ggplot(full_df, aes(x = x_adj, y = proportion, fill = species)) +
  geom_bar(stat = "identity", width = 0.2) +  # Adjust width as needed
  scale_x_continuous(
    breaks = unique(full_df$celltype_numeric), 
    labels = levels(full_df$celltype_meta)
  ) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +  # Display y-axis as percentages
  scale_fill_manual(
    values = c("mouse" = "#75C0AF", "rat" = "#546577", "macaque" = "#DC7040")
  ) +
  theme_minimal() +
  labs(x = NULL, y = "Proportion", fill = "Species") +
  theme(
    axis.text.x = element_text(
      angle = 90, hjust = 1, vjust = 0.5, size = 5
    ),
    panel.grid = element_blank()
  )

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_1c.pdf", p, width = 15, height = 10)  # Full figure, process further in illustrator

## D. UMAP of species in neurons

In [None]:
neurons_sub <- subset(neurons, dataset %in% c("mouse.2023", "rat.2023", "macaque.2023"))

umap_embed_neurons <- as.data.frame(neurons_sub@reductions$umap@cell.embeddings) %>%
  mutate(
    species = factor(neurons_sub$species, levels = c("mouse", "rat", "macaque"))
  ) %>%
  sample_frac(1)  # Shuffle the rows

In [None]:
p <- ggplot(umap_embed_neurons, aes(umap_1, umap_2, colour = species)) +
  geom_point_rast(size = 0.1, alpha = 0.5) +
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(axis.line = element_line(colour = "black", size = 0.4), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        legend.position = "right",
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold")) + 
  labs(x = "UMAP 1", y = "UMAP 2") + 
  scale_color_manual(values = c("#75C0AF", "#546577", "#DC7040"))

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_1d.pdf", height = 4, width = 5)

## E. UMAP of glial cells colored by species

In [None]:
umap_embed_glia <- as.data.frame(glia@reductions$umap@cell.embeddings) %>%
  mutate(
    celltype = glia$cell.type,
    species = factor(glia$species, levels = c("mouse", "rat", "macaque"))
  ) %>%
  sample_frac(1)

label <- umap_embed_glia %>%
  group_by(celltype) %>%
  summarize(x = median(umap_1), y = median(umap_2))

In [None]:
p <- ggplot(umap_embed_glia, aes(x = umap_1, y = umap_2, colour = species)) +
  geom_point_rast(size = 0.1, alpha = 0.5) +
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(
    axis.line = element_line(colour = "black", size = 0.4),
    panel.grid = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    legend.position = "none",
    axis.title = element_text(face = "bold"),
    axis.text = element_text(face = "bold")
  ) +
  labs(x = "UMAP 1", y = "UMAP 2") +
  scale_color_manual(values = c("#75C0AF", "#546577", "#DC7040")) +
  geom_text(
    data = label, aes(label = celltype, x = x, y = y),
    size = 6 / .pt,
    fontface = "bold",
    inherit.aes = FALSE
  )

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_1e.pdf", height = 4, width = 5)

# Figure 2

## D. Spatial enrichment plot and Calcr specificity

### Enrichment plot

In [None]:
calculate_fisher_region_cell_property_enrichment <- function(regions,cell_properties){

    region_unique <- unique(na.omit(regions))
    cell_property_unique <- unique(na.omit(cell_properties))

    enrichment_matrix <- matrix(0, nrow = length(region_unique), ncol = length(cell_property_unique))

    rownames(enrichment_matrix) <- sort(region_unique)
    colnames(enrichment_matrix) <- sort(cell_property_unique)

    for (region in region_unique){
        for (cell_property in cell_property_unique){
            regions_test <- case_when(regions == region ~ region, TRUE ~ 'Others')
            cell_properties_test <- case_when(cell_properties == cell_property ~ cell_property, TRUE ~ 'Others')

            enrichment_matrix[region,cell_property] <- table(regions_test,cell_properties_test) %>% 
                as.data.frame() %>% 
                arrange(desc(regions_test != "Others"),desc(cell_properties_test != "Others")) %>% 
                pivot_wider(names_from = cell_properties_test, values_from = Freq) %>% 
                dplyr::select(-regions_test) %>% 
                fisher.test(alternative="greater") %>% 
                .[["p.value"]]
            
        }
    }

    return(enrichment_matrix)
}

spatial_sub <- subset(spatial, prediction.score.cell.type > 0.6)
spatial_sub$region_annotated <- case_when(spatial_sub$region %in% c("Sol","Sol_R","Sol_L") ~ "NTS",
                                          spatial_sub$region %in% c("10N","10N_R","10N_L") ~ "DMV",
                                          spatial_sub$region %in% c("AP") ~ "AP",
                                          spatial_sub$region %in% c("12N") ~ "Other", # HYP
                                          spatial_sub$region %in% c("CC","4V") ~ "Other", # Ventricles
                                          spatial_sub$region %in% c("Gig","Ret_R","Ret_L") ~ "Other", # RetNuc
                                          spatial_sub$region %in% c("Pre") ~ "Other", # PrepositusNucleus
                                          spatial_sub$region %in% c("Gr","Gr_R","Gr_L") ~ "Other", # DorsalColumn
                                          spatial_sub$region %in% c("CB") ~ "Other") # Cerebellum

fisher_matrix <- calculate_fisher_region_cell_property_enrichment(spatial_sub$region_annotated, spatial_sub$predicted.cell.type)
colnames(fisher_matrix) <- gsub("_(\\d+)_", "\\1.", colnames(fisher_matrix))

In [None]:
inv_p <- -log(fisher_matrix)
inv_p[!is.finite(inv_p)] <- max(inv_p[is.finite(inv_p)], na.rm = TRUE)

df <- inv_p %>%
  as.data.frame() %>%
  rownames_to_column("region") %>%
  pivot_longer(-region, names_to = "cell_type", values_to = "inv_p") %>%
  filter(!cell_type %in% c(
    "Astrocytes","Oligodendrocytes","OPCs","Microglia",
    "Endothelial_cells","Ependymal_cells","Tanycytes",
    "Pericytes","VLMCs","Choroid_plexus_cells"
  )) %>%
  mutate(
    p = exp(-inv_p),
    fdr = p.adjust(p, method = "BH"),
    sig = fdr < 0.05,
    cell_type = factor(cell_type, levels = unique(cell_type))
  ) %>%
  filter(region %in% c("DMV", "NTS", "AP")) %>%
  group_by(cell_type) %>%
  filter(any(sig)) %>%
  ungroup() %>%
  mutate(
    cell_type = fct_drop(cell_type),
    region    = factor(region, levels = c("AP","NTS","DMV"))
  )

In [None]:
p <- ggplot(df, aes(region, cell_type)) +
  geom_tile(
    aes(fill = factor(
      if_else(sig, as.character(region), NA_character_),
      levels = c("AP","NTS","DMV"))),
    color = "white", size = 0.2) +
  scale_fill_manual(
    values = c("AP"="#B85283","NTS"="#00A0BA","DMV"="#F0A672"),
    na.value = "white", drop = FALSE) +
  scale_y_discrete(limits = rev(levels(df$cell_type))) +
  theme_minimal() +
  theme(
    axis.text.x     = element_text(angle = 90, vjust = 0.5, hjust = 1),
    axis.title      = element_blank(),
    panel.grid      = element_blank(),
    legend.position = "none")

p # Serotonin cluster was on the periphery, not actually in the DVC and will thus be discarded

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_2.1d.pdf", height = 15, width = 3)

### Calcr specificity

In [None]:
calcr_mouse <- cellex_mouse["Calcr",]
rownames(calcr_mouse) <- "Mouse"
calcr_rat <- cellex_rat["Calcr",]
rownames(calcr_rat) <- "Rat"
calcr_macaque <- cellex_macaque["Calcr",]
rownames(calcr_macaque) <- "Macaque"

calcr_combined <- bind_rows(calcr_mouse, calcr_rat, calcr_macaque)[, c("Chat0.0", "GABA0.0", "GABA0.1", "GABA0.2",
                                                                       "GABA2.0", "GABA2.1", "GABA2.2", "GABA6.1",
                                                                       "GABA6.2", "Glu0.2", "Glu2.1", "Glu2.2", 
                                                                       "Glu2.3", "Glu2.4", "Glu3.0", "Glu3.1", 
                                                                       "Glu3.2", "Glu3.3", "Glu3.4", "Glu4.0", 
                                                                       "Glu4.1", "Glu4.2", "Glu4.3", "Glu5.0", 
                                                                       "Glu5.1", "Glu5.2", "Glu5.3", "Glu6.0",
                                                                       "Glu6.1", "Glu6.2", "Glu8.0", "Glu8.1", 
                                                                       "Glu8.2", "Glu8.3")]

In [None]:
pheatmap(
  mat = t(calcr_combined),
  cluster_rows = FALSE,
  cluster_cols = FALSE,
  display_numbers = FALSE,
  breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1),
  color = c("#FFFFFF","#91BFDB","#FFFF00","#FC8D59","#D73027"),
  # filename = "/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_2.2d.pdf",
  width = 3,
  height = 15)

## G. Marker gene plot

In [None]:
Idents(neurons) <- neurons$cell.type

glu02 <- FindMarkers(neurons, ident.1 = "Glu0.2")
glu41 <- FindMarkers(neurons, ident.1 = "Glu4.1")
glu42 <- FindMarkers(neurons, ident.1 = "Glu4.2")
glu50 <- FindMarkers(neurons, ident.1 = "Glu5.0")
glu61 <- FindMarkers(neurons, ident.1 = "Glu6.1")
glu80 <- FindMarkers(neurons, ident.1 = "Glu8.0")
glu82 <- FindMarkers(neurons, ident.1 = "Glu8.2")

In [None]:
glu53 %>% arrange(desc(avg_log2FC)) %>% head() # Manually checking and picking genes using pct.1, pct.2 and avglog2FC as guides. Cross referencing with other cell types when plotting...

In [None]:
selected_cells <- c("Glu0.2", "Glu4.1", "Glu4.2", "Glu5.0", "Glu5.3", "Glu6.1", "Glu8.0", "Glu8.2")
genes <- c("Shox2", "Prrxl1", "Olfr78", "Etv1", "Grp", "Cbln2", "Dbh", "Prlh")

for (i in seq_along(genes)) {

  # Create one violin plot for each gene. Will be concatenated later into one plot
  gene <- genes[i]
  gene_data <- data.frame(expression = neurons@assays$SCT@data[gene, neurons$cell.type %in% selected_cells], 
                          celltype = factor(neurons$cell.type[neurons$cell.type %in% selected_cells]),
                          species = factor(neurons$species[neurons$cell.type %in% selected_cells], levels = c("mouse", "rat", "macaque")))

  violin_plot <- ggplot(gene_data, aes(x = celltype, y = expression, color = celltype, fill = species)) + 
    geom_violin(scale = "width", adjust = 1, show.legend = FALSE, size = 0.2, color = "black", width = 0.8) +
    theme_pubr(base_size = 6, base_family = "sans") +
    theme(
      panel.grid = element_blank(),
      axis.line.x = element_line(color = "black", size = 0.4),
      axis.line.y = element_line(color = "black", size = 0.4),
      axis.text.y = element_text(face = "bold"),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.title.y = element_text(face = "bold.italic", angle = 360, vjust = 0.5),
      plot.margin = unit(c(0, 0, 0, 0), "cm"),  # Removed negative bottom margin
    ) +
    xlab("") + 
    ylab(gene) + 
    scale_fill_manual(
      values = c("#75C0AF", "#546577", "#DC7040"), 
      name = "", 
      labels = c("Mouse", "Rat", "Macaque")
    ) + 
    scale_y_continuous(breaks = c(0, floor(max(gene_data$expression))),
    limits = c(0, NA))

  assign(paste0("violin_plot", i), violin_plot)
}

In [None]:
marker_plot <- plot_grid(
  violin_plot1, violin_plot2, violin_plot3, violin_plot4, violin_plot5,
  violin_plot6, violin_plot7, violin_plot8,
  align = "v", 
  ncol = 1, 
  rel_heights = rep(0.8, 10))

labels_plot <- ggplot(
  data.frame(celltype = factor(selected_cells)),
  aes(x = celltype, y = 1)) +
  geom_blank() +
  theme_void(base_size = 6, base_family = "sans") +
  theme(
    axis.text.x = element_text(face = "bold", angle = 45, hjust = 1), ) +
  xlab("")

p <- plot_grid(marker_plot, labels_plot, ncol = 1, rel_heights = c(10, 1))

p # Fix x-axis labels in illustrator

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_2e.pdf", height = 15, width = 15)

# Figure 3

## A. Dot plot, key genes

In [None]:
format_species <- function(data, species_name) {
  expr <- data[genes, ]
  expr$gene <- rownames(expr)
  expr <- reshape2::melt(expr, id.vars = "gene")
  colnames(expr) <- c("gene", "celltype", "cellex_score")
  expr$species <- species_name
  return(expr)
}

# Define genes of interest
genes <- c("Calcr", "Ramp3", "Glp1r")

# Format data for each species
mouse_data <- format_species(cellex_mouse, "mouse")
rat_data <- format_species(cellex_rat, "rat")
macaque_data <- format_species(cellex_macaque, "macaque")
species_data <- rbind(mouse_data, rat_data, macaque_data) %>% filter(gene %in% genes)

# Adjust factor levels to ensure the desired plotting order
species_data$species <- factor(species_data$species, levels = c("macaque", "rat", "mouse"))  # Put mouse last for the plot
species_data$celltype <- factor(species_data$celltype, levels = levels(factor(neurons$cell.type)))
species_data$gene <- factor(species_data$gene, levels = rev(genes))

# Adjust the color column with correct factor levels
species_data$color <- factor(paste0(species_data$species, round(species_data$cellex_score * 100, 0)),
                             levels = c(paste0("mouse", seq(0, 100)),
                                        paste0("rat", seq(0, 100)), 
                                        paste0("macaque", seq(0, 100))))

# Define color palettes for each species with named colors
palettes <- unlist(list(
  mouse = setNames(colorRampPalette(c("#C2BEC0", "#8AC1B6", "#46D3B8"))(101), paste0("mouse", 0:100)),
  rat = setNames(colorRampPalette(c("#C2BEC0", "#819bb7", "#546577"))(101), paste0("rat", 0:100)),
  macaque = setNames(colorRampPalette(c("#C2BEC0", "#F0A572", "#E8682F"))(101), paste0("macaque", 0:100))
))
names(palettes) <- sub("^[^.]+\\.", "", names(palettes))

# Create heatmaps for each gene
for (i in seq_along(genes)) {
  gene <- genes[i]
  
  heatmap_plot <- ggplot(
    subset(species_data[species_data$gene == gene & species_data$celltype %in% c("Glu4.2", "Glu8.0", "Glu8.2"),]), 
    aes(x = celltype, y = species, color = color)
  ) +        
    geom_tile(size = 2, color = "white", fill = "grey99") +
    geom_point(size = 2.5, stroke = 0) +
    scale_color_manual(values = palettes) +
    scale_x_discrete(expand = c(0, 0)) +
    xlab(NULL) + ylab(gene) +
    theme(
      axis.text.y = element_blank(), 
      axis.title.y = element_text(size = 6, face = "bold.italic", angle = 360, vjust = 0.5),
      legend.position = "none",
      axis.line = element_line(colour = "black", linewidth = 0.4),
      axis.ticks = element_blank(),
      plot.margin = unit(c(0, 0.1, 0.1, 0.1), "cm"),
      panel.spacing = unit(0.1, "cm"),
      panel.background = element_blank(),
      panel.grid.minor = element_blank(),
      axis.text.x = element_blank()
    )
  
  assign(paste0("heatmap_plot", i), heatmap_plot)
}

In [None]:
draw_gradient <- function(palette, species_name) {
  species_palette <- palette[grep(species_name, names(palette))]  # Extract only the relevant portion of the palette for the species
  species_title <- paste(str_to_title(species_name), "ESμ")

  ggplot(data.frame("x" = seq_along(species_palette),
                    color = factor(names(species_palette), levels = names(species_palette))),
         aes(x = x, y = 1, fill = color)) +
    geom_tile(show.legend = FALSE) +
    scale_fill_manual(values = species_palette) +
    theme_void() +
    ggtitle(species_title) +
    scale_x_continuous(breaks = seq(0, 100, by = 25),
                       labels = seq(0, 1, by = 0.25)) +
    theme(axis.text.x = element_text(size = 6, face = "bold"),
          plot.title = element_text(hjust = 0.5, size = 6, face = "bold"))
}

labels_plot <- ggplot(data.frame(celltype = factor(c("Glu4.2", "Glu8.0", "Glu8.2"))), 
                      aes(x = celltype, y = 1)) +
  geom_blank() +
  theme_void() +
  theme(axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
        axis.ticks.x = element_line()) +
  xlab("")

legend_plot <- plot_grid(
  draw_gradient(palettes, "mouse"), 
  draw_gradient(palettes, "rat"), 
  draw_gradient(palettes, "macaque"), 
  ncol = 1
)

p <- plot_grid(legend_plot,
               heatmap_plot1, heatmap_plot2, heatmap_plot3,
               labels_plot, 
               align = "v", 
               ncol = 1)

p # Fix x-axis label in illustrator

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_3a.pdf", height = 8, width = 10)

## C. Bregma levels

In [None]:
spatial@meta.data %>% filter(prediction.score.cell.type > 0.6) %>% select(c("section_index", "run_index", "predicted.cell.type")) %>% 
  filter(!predicted.cell.type %in% c("Astrocytes", "Oligodendrocytes","OPCs","Microglia","Endothelial_cells","Ependymal_cells","Tanycytes","Pericytes","VLMCs", "Choroid_plexus_cells")) %>%
  filter(!(section_index %in% c("D1","D2") & run_index == "spatial1")) %>%
  group_by(run_index, section_index, predicted.cell.type) %>%
  summarize(counts = n()) -> cell_type_by_section
cell_type_by_section <- cell_type_by_section %>% ungroup() %>% group_by(predicted.cell.type) %>% mutate(fraction = counts / sum(counts))

roi_bregma$Run <- paste0("spatial", roi_bregma$Run - 1)
roi_bregma <- roi_bregma %>% select(Run, Well, Bregma)
roi_bregma[roi_bregma$Run == "spatial2" & roi_bregma$Well == "C2", "Bregma"] <- -13.71
cell_type_by_section <- cell_type_by_section %>% left_join(roi_bregma, by = c("run_index" = "Run", "section_index" = "Well"))
cell_type_by_section$predicted.cell.type <- gsub("^(\\w+)_(\\d{1,2})_(\\d)$", "\\1\\2.\\3", cell_type_by_section$predicted.cell.type)
cell_type_by_section$Bregma <- gsub(",", ".", cell_type_by_section$Bregma)

In [None]:
p <- cell_type_by_section %>% filter(predicted.cell.type %in% c("Glu4.2", "Glu8.0", "Glu8.2")) %>% ggplot() + geom_col(aes(x = fraction, y = factor(Bregma,levels = sort(unique(cell_type_by_section$Bregma), decreasing = TRUE)))) + 
                                            facet_grid(~predicted.cell.type) + 
                                            scale_x_continuous(breaks = c(0,1), limits = c(0,1)) + 
                                            theme(strip.text.x = element_text(angle = 90))

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_3c.pdf", height = 5, width = 4)

## E. Sankey diagram

In [None]:
ludwig2021_neurons <- subset(ludwig2021_neurons, cells = which(colnames(ludwig2021_neurons) %in% gsub("_[0-9]+$", "", colnames(neurons)[which(neurons$year == "2021")])))
neurons$ludwig.celltype <- NA
neurons$ludwig.celltype[na.omit(match(colnames(ludwig2021_neurons), gsub("_[0-9]+$", "", colnames(neurons))))] <- ludwig2021_neurons$cell.subtype2

celltype_data <- subset(
  data.frame(
    celltype = neurons$cell.type[neurons$dataset == "mouse.2021"],
    ludwig_celltype = neurons$ludwig.celltype[neurons$dataset == "mouse.2021"]),
  !is.na(ludwig_celltype))

for (ludwig_type in unique(celltype_data$ludwig_celltype)) {
  freq_table <- as.data.frame(table(celltype_data$celltype[celltype_data$ludwig_celltype == ludwig_type]))  # Check how many of present dataset map to current Ludwig2021 cell type
  unmappable <- freq_table$Var1[freq_table$Freq < (sum(freq_table$Freq) * 0.2)]  # If less than 20% of a given cell type map to Ludwig2021, it will be considered unmapped
  
  celltype_data$ludwig_celltype[celltype_data$celltype %in% unmappable & celltype_data$ludwig_celltype == ludwig_type] <- "unmapped"
}

In [None]:
counts <- celltype_data %>%  # Aggregate counts for plotting
  group_by(celltype, ludwig_celltype) %>%
  summarise(Freq = n(), .groups = 'drop') %>%
  filter(celltype %in% c("Glu4.2", "Glu8.0", "Glu8.2"))  # Only interested in subset

p <- ggplot(counts, aes(axis1 = celltype, axis2 = ludwig_celltype, y = Freq)) +
  geom_alluvium(aes(fill = celltype), width = 1/12) +
  geom_stratum(width = 1/12, fill = "grey", color = "black") +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum),
      x = after_stat(x) + 0.1 * ifelse(after_stat(x) == 1, -1, 1),
      hjust = ifelse(after_stat(x) == 1, 1, 0)),
      size = 3) +
  scale_x_discrete(limits = c("Present", "Ludwig et. al 2021")) +
  theme_minimal() +
  theme(legend.position = "none",
    axis.title = element_blank(),
    axis.text.x = element_text(size = 12),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    plot.title = element_blank())

p # Undecided on colors, will adjust later in illustrator

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_3e.pdf", height = 5, width = 5)

# Figure 4

## A. SCENIC analysis

In [None]:
identify_da_regulons <- function(seurat_obj, celltypes, group1, group2) {
  lmer_results <- foreach(celltype = celltypes) %dopar% {
    
    seurat_obj_sub <- subset(seurat_obj, cell.type == celltype & treatment %in% c(group2, group1))
    sample_ids <- unique(seurat_obj_sub$hash.ID)
    
    regulons <- rownames(seurat_obj_sub@assays$regulon)[
      which(apply(seurat_obj_sub@assays$regulon@data, 1, function(x) {
        sum(x != 0) >= (ncol(seurat_obj_sub) * 0.20)
      }))
    ]
    
    regulons <- "Fos..."  # Placeholder to be replaced by actual regulons selection criteria
    
    # Prepare data for modeling
    data <- cbind.data.frame(
      treatment = as.factor(seurat_obj_sub$treatment), 
      sample = as.factor(seurat_obj_sub$hash.ID),
      pool = as.factor(seurat_obj_sub$pool),
      run = as.factor(seurat_obj_sub$run),
      calcr = seurat_obj_sub@assays$SCT@data["Calcr",],
      treatment = factor(seurat_obj_sub$treatment, levels = c(group2, group1))
    )
    
    results <- data.frame(matrix(NA, nrow = length(regulons), ncol = 4))
    colnames(results) <- c("regulon", "beta", "SE", "p_value")
    
    for (i in seq_along(regulons)) {
      set.seed(i)
      data$regulon <- seurat_obj_sub@assays$regulon@data[regulons[i],]
      
      # Linear mixed effects model
      model <- lmer(
        regulon ~ treatment + (1 | sample),
        data = data,
        REML = TRUE
      )
      
      emm <- lsmeans(model, pairwise ~ treatment, adjust = NULL)
      emm_summary <- summary(emm$contrasts)
      
      results$regulon[i] <- regulons[i]
      contrast <- paste0("(", group1, ") - (", group2, ")")
      results$p_value[i] <- emm_summary$p.value[which(emm_summary$contrast == contrast)]
      results$SE[i] <- emm_summary$SE[which(emm_summary$contrast == contrast)]
      results$beta[i] <- emm_summary$estimate[which(emm_summary$contrast == contrast)]
    }
    
    results$celltype <- celltype
    results
  }
  
  names(lmer_results) <- celltypes
  return(lmer_results)
}

process_SCENIC_data <- function(species) {
  # Set parameters based on species
  if (species == "mouse") {
    seurat_obj <- mouse_neurons
    comparison_acute <- "Cagrilintide vs. vehicle (4 hours)"
    comparison_chronic <- "Cagrilintide vs. weight-matched control (8 days)"
  } else if (species == "rat") {
    seurat_obj <- rat_neurons
    comparison_acute <- "Cagrilintide vs. vehicle control (4 hours)"
    comparison_chronic <- "Cagrilintide vs. weight-matched control (8 days)"
  }
  
  # Process acute data
  lmer_acute <- identify_da_regulons(
    seurat_obj = seurat_obj,
    celltypes = c("Glu4.2", "Glu8.0", "Glu8.2"),
    group1 = "A8-A",
    group2 = "V-A"
  )
  lmer_acute <- dplyr::bind_rows(lmer_acute, .id = "celltype")
  lmer_acute$p_adj <- p.adjust(lmer_acute$p_value, method = "fdr")
  lmer_acute$comparison <- comparison_acute
  
  # Process chronic data
  lmer_chronic <- identify_da_regulons(
    seurat_obj = seurat_obj,
    celltypes = c("Glu4.2", "Glu8.0", "Glu8.2"),
    group1 = "A8-C",
    group2 = "WM-C"
  )
  lmer_chronic <- dplyr::bind_rows(lmer_chronic, .id = "celltype")
  lmer_chronic$p_adj <- p.adjust(lmer_chronic$p_value, method = "fdr")
  lmer_chronic$comparison <- comparison_chronic
  
  # Combine acute and chronic data
  lmer_combined <- rbind(lmer_acute, lmer_chronic)
  lmer_combined$celltype <- factor(
    lmer_combined$celltype,
    levels = c("Glu4.2", "Glu8.0", "Glu8.2")
  )
  
  # Add significance symbols
  lmer_combined$p_symbol <- ""
  lmer_combined$p_symbol[lmer_combined$p_adj < 0.05] <- "*"
  lmer_combined$p_symbol[lmer_combined$p_adj < 0.01] <- "**"
  lmer_combined$p_symbol[lmer_combined$p_adj < 0.001] <- "***"
  
  # Add species column
  lmer_combined$species <- species
  
  # Extract time point from comparison
  lmer_combined$time_point <- ifelse(grepl("4 hours", lmer_combined$comparison), "4 hours", "8 days")
  
  return(lmer_combined)
}

mouse_neurons <- subset(neurons, dataset == "mouse.2023")
mouse_neurons[["regulon"]] <- CreateAssayObject(data = t(as.matrix(mouse.AUC)))
rat_neurons <- subset(neurons, species == "rat")
rat_neurons[["regulon"]] <- CreateAssayObject(data = t(as.matrix(rat.AUC)))

lmer_mouse <- process_SCENIC_data("mouse")
lmer_rat <- process_SCENIC_data("rat")
lmer_combined <- rbind(lmer_mouse, lmer_rat) %>%
  mutate(
    x = as.numeric(celltype),
    x = ifelse(species == "mouse", x - 0.2, x + 0.2)  # Small offset for subsequent plotting
  )

In [None]:
signif_data <- lmer_combined %>%  # Adding significance symbols to plot
  filter(p_symbol != "") %>%
  mutate(y = beta + SE + 0.001)

p <- ggplot(lmer_combined, aes(x = celltype, y = beta, color = species, group = species)) +
  geom_point(size = 1, position = position_dodge(width = 0.6)) +
  geom_errorbar(
    aes(ymin = beta - SE, ymax = beta + SE),
    width = 0.1,
    position = position_dodge(width = 0.6)
  ) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.1) +
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top",  # Place legend above the plot
    plot.margin = unit(c(0, 0, 0, 0), "cm"),
    legend.title = element_blank()
  ) +
  ylab(expression(bold(beta))) +
  xlab("") +
  scale_color_manual(values = c("#75C0AF", "#546577"), labels = c("Mouse", "Rat")) +
  facet_wrap(~time_point) +
  geom_text(
    data = signif_data,
    aes(label = p_symbol, y = y),
    size = 2,
    fontface = "bold",
    color = "black",
    position = position_dodge(width = 0.6)
  )

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4a.pdf", height = 4, width = 5)

## C. DEGs treatment vs. control after 4 hours and 8 days (bulk)

In [None]:
DEGs <- data.frame(
  treatment = factor(c("Acute", "Subchronic", "Acute", "Subchronic")),
  species = factor(c("mouse", "mouse", "rat", "rat"), levels = c("mouse", "rat")),
  sign_genes = c(  # Summarizing all significant genes across species and conditions
    sum(DEGs.bulk.mouse.acute$padj < 0.05, na.rm = TRUE),
    sum(DEGs.bulk.mouse.chronic$padj < 0.05, na.rm = TRUE),
    sum(DEGs.bulk.rat.acute$padj < 0.05, na.rm = TRUE),
    sum(DEGs.bulk.rat.chronic$padj < 0.05, na.rm = TRUE)
  )
)

In [None]:
p <- ggplot(DEGs, aes(x = treatment, y = sign_genes, fill = species)) +
  geom_bar(stat = "identity", width = 0.6, position = "dodge", size = 0.3, color = "black") +
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(
    legend.position = "top",
    text = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.line = element_line(color = "black", size = 0.3),
    axis.ticks = element_line(color = "black", size = 0.3),
    legend.key.height = unit(1, "mm"),
    legend.key.width = unit(2, "mm"),
    legend.title = element_blank()
  ) +
  labs(y = "Differentially expressed genes") +
  scale_fill_manual(values = c("#75C0AF", "#546577"), labels = c("Mouse", "Rat")) +
  scale_x_discrete(labels = c("4 hours", "8 days"))

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4c.pdf", height = 4, width = 5)

## D. Accuracy of regularized logistic regression classifiers

In [None]:
glmnet_data <- glmnet.data %>%
    mutate(
        accuracy = accuracy * 100,
        species = gsub(".acute|.chronic", "", rownames(.)),
        study = gsub("mouse.|rat.", "", rownames(.))
    )

In [None]:
p <- ggplot(glmnet_data, aes(x = study, y = accuracy, fill = species)) +
  geom_bar(stat = "identity", width = 0.5, position = "dodge", size = 0.3, color = "black") +
  theme_classic(base_size = 6, base_family = "sans") +
  theme(
    legend.position = "top",
    text = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.line = element_line(color = "black", size = 0.3),
    axis.ticks = element_line(color = "black", size = 0.3),
    legend.key.height = unit(1, "mm"),
    legend.key.width = unit(2, "mm"),
    legend.title = element_blank()
  ) +
  labs(y = "Accuracy (%)") +
  scale_fill_manual(values = c("#75C0AF", "#546577"), labels = c("Mouse", "Rat")) +
  scale_x_discrete(labels = c("4 hours", "8 days")) +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 25))

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4d.pdf", height = 4, width = 5)

## E. DEGs treatment vs. control after 4 hours and 8 days (single-cell)

In [None]:
get_DEGs_data <- function(celltypes, DEGs_sc_rat, DEGs_sc_mouse, comparison_label) {
  DEGs_data <- data.frame(celltype = celltypes, mouse = NA, rat = NA, stringsAsFactors = FALSE)
  
  for (i in seq_along(celltypes)) {
    celltype <- celltypes[i]
    
    # Rat DEGs
    if (!is.null(DEGs_sc_rat[[celltype]])) {
      DEGs_data$rat[i] <- sum(DEGs_sc_rat[[celltype]]$padj < 0.05, na.rm = TRUE)
    }
    
    # Mouse DEGs
    if (!is.null(DEGs_sc_mouse[[celltype]])) {
      DEGs_data$mouse[i] <- sum(DEGs_sc_mouse[[celltype]]$padj < 0.05, na.rm = TRUE)
    }
  }
  
  DEGs_data$comparison <- comparison_label
  return(DEGs_data)
}

celltypes <- c("Glu4.2", "Glu8.0", "Glu8.2")

# Acute DEGs
DEGs_sc_acute <- get_DEGs_data(
  celltypes,
  DEGs.sc.rat.acute,
  DEGs.sc.mouse.acute,
  "Cagrilintide vs. vehicle control (4 hours)"
)

# Chronic DEGs
DEGs_sc_chronic <- get_DEGs_data(
  celltypes,
  DEGs.sc.rat.chronic,
  DEGs.sc.mouse.chronic,
  "Cagrilintide vs. weight-matched control (8 days)"
)

# Combine and format
DEGs <- rbind(DEGs_sc_acute, DEGs_sc_chronic)
DEGs_melt <- melt(DEGs, id.vars = c("celltype", "comparison"))
colnames(DEGs_melt) <- c("celltype", "comparison", "species", "genes")
DEGs_melt$genes[is.na(DEGs_melt$genes)] <- 0  # One instance where DEGs is NA, need to account for that

In [None]:
p <- ggplot(DEGs_melt, aes(x = celltype, y = genes, fill = species)) +
  geom_bar(position = position_dodge(), stat = "identity", color = "black", width = 0.8) +
  facet_wrap(~comparison, ncol = 1) +
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
    axis.text.y = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    legend.title = element_blank(),
    strip.text = element_text(face = "bold")
  ) +
  xlab(NULL) +
  ylab("Differentially Expressed Genes") +
  scale_y_continuous(limits = c(0, max(15, max(DEGs_melt$genes, na.rm = TRUE)))) +
  scale_fill_manual(values = c("#75C0AF", "#546577"))

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4e.pdf", width = 5, height = 4)

## F. Volcano (bulk)

In [None]:
volcano <- DEGs.bulk.rat.chronic %>%
  filter(!is.na(padj)) %>%
  mutate(
    label = if_else(padj == min(padj), rownames(.), ""),  # Adds label to the smallest adj. pvalue for subsequent plotting
    col = case_when(
      padj <= 0.05 & abs(log2FoldChange) >= 0.5 ~ 1,  # Setting color levels based on numeric value
      padj <= 0.05 & abs(log2FoldChange) < 0.5  ~ 2,
      padj > 0.05 & abs(log2FoldChange) >= 0.5  ~ 3,
      TRUE                                      ~ 4
    )
  ) %>%
  select(log2FoldChange, padj, col, label)

In [None]:
p <- ggplot(volcano, aes(y = -log10(padj), x = log2FoldChange,
                                    fill = factor(col), label = label)) +
  geom_point(shape = 21, size = 3, alpha = 1, stroke = 0.2) + 
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed") + 
  geom_text_repel(fontface = "bold.italic", size = 2, max.overlaps = 20) + 
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(legend.position = "none",
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"),
        axis.line = element_line(size = 0.4),
        plot.margin = margin(0, 0, 0, 0, "cm")) +
  xlab(expression(bold(Log[2] * " fold-change"))) + 
  ylab(expression(bold(-log[10] * "(" * italic(P) * ")"))) + 
  scale_fill_manual(values = c("1" = "#95567D", "2" = "#F9D4EC", 
                               "3" = "grey70", "4" = "grey90")) +
  xlim(c(-2.5, 2.5))  # Cuts off 9 points in total, but these points are not interesting

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4f.pdf", width = 5, height = 4)

## G. Prlh feature plot (single-cell)

In [None]:
rat_neurons <- subset(neurons, dataset == "rat.2023")
DefaultAssay(rat_neurons) <- "RNA"

rat_prlh_cells <- WhichCells(rat_neurons, expression = Prlh > 5)

umap_embed_rat_neurons <- neurons@reductions$umap@cell.embeddings %>%
  as.data.frame() %>%
  mutate(
    species = neurons$species,
    celltype = neurons$cell.type,
    Prlh_expressed = rownames(.) %in% rat_prlh_cells,  # Label cells where Prlh is expressed
    color_group = ifelse(Prlh_expressed, as.character(species), "Not expressed"),
    color_group = factor(color_group, levels = c("rat", "Not expressed"))
  ) %>%
  sample_frac(1) %>%  # Shuffle rows
  arrange(desc(color_group == "Not expressed"))  # Ensure background is plotted first, so the other points are positioned top of them, making them visible

label <- umap_embed_rat_neurons %>%
  group_by(celltype) %>%
  summarize(
    x = median(umap_1),
    y = median(umap_2)
  )

In [None]:
p <- ggplot(umap_embed_rat_neurons, aes(x = umap_1, y = umap_2, color = color_group)) +
  geom_point_rast(size = 0.1, alpha = 0.5) +
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(
    axis.line = element_line(colour = "black", size = 0.4),
    panel.grid = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    legend.position = "right",
    axis.title = element_text(face = "bold"),
    axis.text = element_text(face = "bold")
  ) +
  scale_color_manual(values = c("rat" = "#546577", "Not expressed" = "grey"), name = "Expression")

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4g_prlh.pdf", width = 5, height = 4)

In [None]:
rat_neurons <- subset(neurons, dataset == "rat.2023")
DefaultAssay(rat_neurons) <- "RNA"

rat_calcr_cells <- WhichCells(rat_neurons, expression = Calcr > 5)

umap_embed_rat_neurons <- neurons@reductions$umap@cell.embeddings %>%
  as.data.frame() %>%
  mutate(
    species = neurons$species,
    celltype = neurons$cell.type,
    Calcr_expressed = rownames(.) %in% rat_calcr_cells,  # Label cells where Calcr is expressed
    color_group = ifelse(Calcr_expressed, as.character(species), "Not expressed"),
    color_group = factor(color_group, levels = c("rat", "Not expressed"))
  ) %>%
  sample_frac(1) %>%  # Shuffle rows
  arrange(desc(color_group == "Not expressed"))  # Ensure background is plotted first, so the other points are positioned top of them, making them visible

label <- umap_embed_rat_neurons %>%
  group_by(celltype) %>%
  summarize(
    x = median(umap_1),
    y = median(umap_2)
  )

In [None]:
p <- ggplot(umap_embed_rat_neurons, aes(x = umap_1, y = umap_2, color = color_group)) +
  geom_point_rast(size = 0.1, alpha = 0.5) +
  theme_pubr(base_size = 6, base_family = "sans") +
  theme(
    axis.line = element_line(colour = "black", size = 0.4),
    panel.grid = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    legend.position = "right",
    axis.title = element_text(face = "bold"),
    axis.text = element_text(face = "bold")
  ) +
  scale_color_manual(values = c("rat" = "#546577", "Not expressed" = "grey"), name = "Expression")

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4g_calcr.pdf", width = 5, height = 4)

## H. Prlh expression in rat (single-cell pseudobulk)

In [None]:
neurons_sub <- neurons %>%
  subset(
    species == "rat" &
      cell.type == "Glu8.2" &
      treatment %in% c("A8-A", "A8-C", "V-A", "WM-C")
  ) %>%
  subset(
    hash.ID %in% names(which(table(hash.ID) >= 5))  # Remove samples with fewer than 5 cells
  )
DefaultAssay(neurons_sub) <- "RNA"

genes <- rownames(neurons_sub)[rowMeans(neurons_sub@assays$RNA@counts != 0) >= 0.1]  # Get genes expressed in at least 10% of the cells
samples <- unique(neurons_sub$hash.ID)

# Prepare pseudobulk data
pseudo_data <- data.frame(matrix(NA, nrow = length(genes), ncol = length(samples)))
rownames(pseudo_data) <- genes
colnames(pseudo_data) <- samples

pseudo_meta <- data.frame(
  sample = samples,
  treatment = NA,
  stringsAsFactors = FALSE
)

# Compute pseudobulk data using a for-loop
for (j in seq_along(samples)) {
  sample_cells <- which(neurons_sub$hash.ID == samples[j])
  pseudo_data[, j] <- rowMeans(neurons_sub@assays$RNA@counts[genes, sample_cells, drop = FALSE])
  pseudo_meta$treatment[j] <- unique(neurons_sub$treatment[sample_cells])
}

# Prepare expression data
expr_data <- cbind.data.frame(t(pseudo_data), treatment = factor(pseudo_meta$treatment))
expr_data <- melt(expr_data, id.vars = "treatment")
colnames(expr_data) <- c("treatment", "gene", "expr")

expr_data$time <- "4 hours"
expr_data$time[grep("-C", expr_data$treatment)] <- "8 days"
expr_data$drug <- "Vehicle"
expr_data$drug[grep("A8-", expr_data$treatment)] <- "Cagrilintide"

# Prepare DGE results
prlh_expr <- subset(expr_data, gene == "Prlh")
prlh_expr$drug <- factor(prlh_expr$drug, levels = c("Vehicle", "Cagrilintide"))
p_values <- c(
  DEGs.sc.rat.acute[["Glu8.2"]]["Prlh", "pvalue"],
  DEGs.sc.rat.chronic[["Glu8.2"]]["Prlh", "pvalue"]
)
p_values <- round(p_values, 3)
p_symbol <- ifelse(p_values <= 0.05, p_values, "NS")

In [None]:
p <- ggplot(prlh_expr, aes(x = time, y = expr, fill = drug)) +
  geom_boxplot(
    outlier.shape = NA,
    position = position_dodge(0.6),
    width = 0.5
  ) +
  theme_pubr(legend = "top") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 6, face = "bold"),
    axis.text.y = element_text(size = 6, face = "bold"),
    axis.title.y = element_text(size = 6, face = "bold"),
    plot.title = element_text(hjust = 0.5, size = 8, face = "bold.italic"),
    legend.text = element_text(size = 6, face = "bold"),
    legend.title = element_blank()
  ) +
  scale_fill_manual(values = c("#AFC3E4", "#95567D")) + # Unsure about colors, may change later in illustrator
  labs(
    y = "Pseudobulk expression",
    x = "",
    title = "Prlh"
  ) +
  coord_cartesian(ylim = c(0, 15)) +
  geom_signif(
    y_position = c(9, 14),
    xmin = c(0.85, 1.85),
    xmax = c(1.15, 2.15),
    annotation = p_symbol,
    tip_length = 0.01,
    textsize = 2,
    size = 0.2
  )

p

# ggsave("/projects/perslab/people/jmg776/projects/DVC/figures/2025/figure_4h.pdf", width = 5, height = 4)