In [None]:
suppressMessages(suppressWarnings(library(Seurat)))
suppressMessages(suppressWarnings(library(tidyverse)))
suppressMessages(suppressWarnings(library(BiocParallel))) # Parallelize doublet detection
suppressMessages(suppressWarnings(library(Matrix)))
suppressMessages(suppressWarnings(library(reticulate))) # CELLEX
suppressMessages(suppressWarnings(library(ggdendro)))
suppressMessages(suppressWarnings(library(ggalluvial)))
suppressMessages(suppressWarnings(library(RColorBrewer)))
suppressMessages(suppressWarnings(library(mclust)))
suppressMessages(suppressWarnings(library(pheatmap)))

options(repr.matrix.max.cols = 100)
options(repr.plot.height = 10, repr.plot.width = 10)

# Glia

In [None]:
mouse.glia <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/mouse/mouse_glia_Seurat_obj.rds")
mouse.glia$species <- "mouse"
mouse.glia$orig.clusters <- mouse.glia$SCT_snn_res.0.1
mouse.glia$year <- "2023"
mouse.glia$dataset <- paste0(mouse.glia$species, ".", mouse.glia$year)


rat.glia <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/rat/rat_glia_Seurat_obj.rds")
rat.glia$species <- "rat"
rat.glia$orig.clusters <- rat.glia$SCT_snn_res.0.1
rat.glia$year <- "2023"
rat.glia$dataset <- paste0(rat.glia$species, ".", rat.glia$year)


monkey.glia <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/monkey/monkey_glia_2025.rds")
monkey.glia$species <- "macaque"
monkey.glia$orig.clusters <- monkey.glia$integrated_snn_res.0.1
monkey.glia$year <- "2023"
monkey.glia$dataset <- paste0(monkey.glia$species, ".", monkey.glia$year)


mouse.glia.Ludwig <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/mouse/mouse_glia_Ludwig_Seurat_obj.rds")
mouse.glia.Ludwig$species <- "mouse"
mouse.glia.Ludwig$orig.clusters <- mouse.glia.Ludwig$SCT_snn_res.0.1
mouse.glia.Ludwig$year <- "2021"
mouse.glia.Ludwig$dataset <- paste0(mouse.glia.Ludwig$species, ".", mouse.glia.Ludwig$year)

## 1. Integration (sbatch)

### 1.1. Find common genes

In [None]:
addSpeciesAssay <- function(x, ensembl.mapping = NULL, species = NULL) {
    from <- "Gene.name"
    to <- paste0(stringr::str_to_title(species), ".gene.name")

    # Process mapping data
    mapping <- ensembl.mapping[apply(ensembl.mapping[, c(from, to)], 1, function(row) all(row != "")),
                               c(from, to)]
    
    newgenes <- mapping[mapping[, 1] %in% rownames(GetAssayData(x, assay = "RNA", slot = "counts")), ]
    newgenes <- newgenes[order(nchar(newgenes[,2])), ]
    newgenes <- newgenes[!duplicated(newgenes[,2]), ]
    defect_genes <- which(newgenes[,1] %in% setdiff(newgenes[,1], rownames(GetAssayData(x, assay = "RNA", slot = "counts"))))
    if (length(defect_genes) > 0) newgenes <- newgenes[-defect_genes, ]
    
    # Get the counts matrix from the RNA assay
    rna_counts <- GetAssayData(x, assay = "RNA", layer = "counts")
    new_mat <- fac2sparse(newgenes[,2]) %*% rna_counts[newgenes[,1], ]
    
    new_mat <- CreateAssayObject(counts = new_mat)
    x[[paste0(species, '_RNA')]] <- new_mat
    DefaultAssay(x) <- paste0(species, '_RNA')
    return(x)
}
                                     
# https://www.ensembl.info/2009/01/21/how-to-get-all-the-orthologous-genes-between-two-species/
macaque.to.mouse <- read.csv("/projects/perslab/people/jmg776/projects/DVC/data/gene_info/macaque_to_mouse.txt")
rat.to.mouse <- read.csv("/projects/perslab/people/jmg776/projects/DVC/data/gene_info/rat_to_mouse.txt")
rat.to.mouse <- rbind(rat.to.mouse, c("", "Chat", "Chat"), c("", "Ramp2", "Ramp2")) 
                                     
# Map macaque and rat genes to mouse orthologues
monkey.glia <- addSpeciesAssay(monkey.glia, ensembl.mapping = macaque.to.mouse, species = "mouse")
rat.glia <- addSpeciesAssay(rat.glia, ensembl.mapping = rat.to.mouse, species = "mouse")

common.genes <- intersect(intersect(rownames(mouse.glia@assays$RNA), rownames(monkey.glia@assays$mouse_RNA)),
                          rownames(rat.glia@assays$mouse_RNA))

monkey.RNA <- CreateAssayObject(counts = monkey.glia@assays$mouse_RNA@counts[common.genes, ])
monkey.glia[['monkey_RNA']] <- monkey.glia[['RNA']]
monkey.glia[['RNA']] <- NULL
monkey.glia[["RNA"]] <- monkey.RNA
DefaultAssay(monkey.glia) <- "RNA"
monkey.glia[["mouse_RNA"]] <- NULL

rat.RNA <- CreateAssayObject(counts = rat.glia@assays$mouse_RNA@counts[common.genes, ])
rat.glia[['rat_RNA']] <- rat.glia[['RNA']]
rat.glia[['RNA']] <- NULL
rat.glia[["RNA"]] <- rat.RNA
DefaultAssay(rat.glia) <- "RNA"
rat.glia[["mouse_RNA"]] <- NULL
                                 
mouse.RNA <- CreateAssayObject(counts = mouse.glia@assays$RNA@counts[common.genes, ])
mouse.glia[['mouse_RNA']] <- mouse.glia[['RNA']]
mouse.glia[['RNA']] <- NULL
mouse.glia[["RNA"]] <- mouse.RNA
DefaultAssay(mouse.glia) <- "RNA"

mouse.RNA.Ludwig <- CreateAssayObject(counts = mouse.glia.Ludwig@assays$RNA@counts[common.genes, ])
mouse.glia.Ludwig[['mouse_RNA']] <- mouse.glia.Ludwig[['RNA']]
mouse.glia.Ludwig[['RNA']] <- NULL
mouse.glia.Ludwig[["RNA"]] <- mouse.RNA.Ludwig
DefaultAssay(mouse.glia.Ludwig) <- "RNA"

In [None]:
# Finding common genes reduces number of UMIs - most in mouse, likely due to higher number of annotated genes, and by extension, pseudogenes
hist(mouse.glia$nCount_RNA, breaks = 100)
min(mouse.glia$nCount_RNA)

hist(rat.glia$nCount_RNA, breaks = 100)
min(rat.glia$nCount_RNA)

hist(monkey.glia$nCount_RNA, breaks = 100)
min(monkey.glia$nCount_RNA)

hist(mouse.glia.Ludwig$nCount_RNA, breaks = 100)
min(mouse.glia.Ludwig$nCount_RNA)

### 1.2. Run integration

In [None]:
monkey.list <- SplitObject(monkey.glia, split.by = "run")
for (i in 1:length(monkey.list)) {
  print(i)
  monkey.list[[i]][['integrated']] <- NULL
  monkey.list[[i]] <- SCTransform(monkey.list[[i]], verbose = F, method = "qpoisson")
}
mouse.glia <- SCTransform(mouse.glia, verbose = F, method = "qpoisson")
mouse.glia.Ludwig <- SCTransform(mouse.glia.Ludwig, verbose = F, method = "qpoisson")
rat.glia <- SCTransform(rat.glia, verbose = F, method = "qpoisson")

dims <- 30
glia.list <- c(mouse.glia, rat.glia, monkey.list, mouse.glia.Ludwig)

options(future.globals.maxSize = 80000000000)

# Integrate data
features <- SelectIntegrationFeatures(object.list = glia.list, nfeatures = 3000)

glia.list <- PrepSCTIntegration(object.list = glia.list, anchor.features = features,
    verbose = F)

glia.anchors <- FindIntegrationAnchors(object.list = glia.list, normalization.method = "SCT",
    anchor.features = features, reduction = "cca", verbose = F, dims = 1:dims)
glia <- IntegrateData(anchorset = glia.anchors, normalization.method = "SCT",
    verbose = F, dims = 1:dims)

In [None]:
glia <- RunPCA(glia, verbose = F, npcs = 100)
glia <- RunUMAP(glia, dims = 1:dims, n.neighbors = 50)
glia <- FindNeighbors(glia, dims = 1:dims, k.param = 50, verbose = F)
glia <- FindClusters(glia, resolution = 0.1, verbose = F)
glia <- FindClusters(glia, resolution = 1, verbose = F)

In [None]:
saveRDS(glia, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/glia_integrated_2025.rds")

## 2. Labelling

### 2.1. Quality control

In [None]:
glia <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/glia_integrated_2025.rds")

In [None]:
hist(glia$nCount_RNA, breaks = 200)
hist(subset(glia, nCount_RNA < 2000)$nCount_RNA, breaks = 40)  # Cut at 500

In [None]:
# Examine where low quality cells originated from
glia_500 <- subset(glia, nCount_RNA < 500)
table(glia_500$species)
table(glia_500$dataset)

In [None]:
# Recluster after subsetting
glia <- subset(glia, nCount_RNA > 500)
dims <- 30
glia <- RunPCA(glia, verbose = F, npcs = 100)
glia <- RunUMAP(glia, dims = 1:dims, n.neighbors = 50, verbose = F)
glia <- FindNeighbors(glia, dims = 1:dims, k.param = 50, verbose = F)
glia <- FindClusters(glia, resolution = 0.1, verbose = F)
glia <- FindClusters(glia, resolution = 1, verbose = F)

In [None]:
DimPlot(glia, group.by = "integrated_snn_res.0.1", label = TRUE, raster = TRUE)
DimPlot(glia, group.by = "integrated_snn_res.1", label = TRUE, raster = TRUE)
DimPlot(glia, group.by = "dataset", raster = TRUE)
DimPlot(glia, group.by = "treatment", raster = TRUE)

In [None]:
source("/projects/perslab/people/jmg776/projects/DVC/code/flag_clusters.R")

flagged.clusters1 <- flag.clusters(seurat.obj = glia, 
                                   genes = c("Slc17a6", "Slc32a1"),
                                  resolution = "integrated_snn_res.1", quantile = 0.95)

In [None]:
DefaultAssay(glia) <- "RNA"

# Look at neurotransmitter markers
DotPlot(glia, features = c("Slc17a6", "Slc32a1", "Chat"), group.by = "integrated_snn_res.1")
FeaturePlot(glia, features = "Slc17a6", raster = TRUE, max.cutoff = "q75")
FeaturePlot(glia, features = "Slc32a1", raster = TRUE, max.cutoff = "q75")
FeaturePlot(glia, features = "Chat", raster = TRUE, max.cutoff = "q75")

# Look at neuronal markers
FeaturePlot(glia, features = "Rbfox3", raster = TRUE, max.cutoff = "q90") # NeuN
FeaturePlot(glia, features = "Stmn2", raster = TRUE, max.cutoff = "q90")
FeaturePlot(glia, features = "Map2", raster = TRUE, max.cutoff = "q90")

In [None]:
# Some neuronal markers are present, particularly for 16, and less consistently for 11
# Both clusters are strangely somewhat positive for all neurotransmitters, which might indicate bad quality
# Course of action: Remove 11 and 16. Save 16 in case relevant for neuronal dataset

glia_neurons <- subset(glia, integrated_snn_res.1 == 16)
saveRDS(glia_neurons, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/neurons_from_glia_2025.rds")

glia <- subset(glia, integrated_snn_res.1 %in% c(11, 16), invert = TRUE)

In [None]:
DimPlot(glia, group.by = "integrated_snn_res.1", label = TRUE, raster = TRUE)

In [None]:
resolution <- "integrated_snn_res.1"

mouse.2023.clusters <- names(table(glia@meta.data[glia$dataset == "mouse.2023", resolution]))[which(table(glia@meta.data[glia$species == "mouse", resolution]) >= 5)]
mouse.2021.clusters <- names(table(glia@meta.data[glia$dataset == "mouse.2021", resolution]))[which(table(glia@meta.data[glia$dataset == "mouse.2021", resolution]) >= 5)]
rat.2023.clusters <- names(table(glia@meta.data[glia$dataset == "rat.2023", resolution]))[which(table(glia@meta.data[glia$dataset == "rat.2023", resolution]) >= 5)]
macaque.2023.clusters <- names(table(glia@meta.data[glia$dataset == "macaque.2023", resolution]))[which(table(glia@meta.data[glia$dataset == "macaque.2023", resolution]) >= 5)]

genes <- c("Ikzf1", "Slc38a5", "Ntsr2", "Myt1", "Mbp") # In order: Microglia, #, Astrocytes, OPC, Oligodendrocytes

mouse.2023.list <- rat.2023.list <- macaque.2023.list <- mouse.2021.list <- vector(mode = "list",
                                                                                   length(genes))
names(mouse.2023.list) <- names(rat.2023.list) <- names(macaque.2023.list) <- names(mouse.2021.list) <- genes

for (gene in genes) {

  cluster.mean <- sapply(mouse.2023.clusters, FUN = function(x) {
    mean(glia@assays$SCT@data[gene, which(glia@meta.data[, resolution] == x & glia$dataset == "mouse.2023")])})
  flag <- names(cluster.mean)[which(cluster.mean > (max(cluster.mean)/4))]
  mouse.2023.list[[gene]] <- flag

  cluster.mean <- sapply(rat.2023.clusters, FUN = function(x) {
    mean(glia@assays$SCT@data[gene, which(glia@meta.data[, resolution] == x & glia$dataset == "rat.2023")])})
  flag <- names(cluster.mean)[which(cluster.mean > (max(cluster.mean)/4))]
  rat.2023.list[[gene]] <- flag

  cluster.mean <- sapply(macaque.2023.clusters, FUN = function(x) {
    mean(glia@assays$SCT@data[gene, which(glia@meta.data[, resolution] == x & glia$dataset == "macaque.2023")])})
  flag <- names(cluster.mean)[which(cluster.mean > (max(cluster.mean)/4))]
  macaque.2023.list[[gene]] <- flag
  
  cluster.mean <- sapply(mouse.2021.clusters, FUN = function(x) {
    mean(glia@assays$SCT@data[gene, which(glia@meta.data[, resolution] == x & glia$dataset == "mouse.2021")])})
  flag <- names(cluster.mean)[which(cluster.mean > (max(cluster.mean)/4))]
  mouse.2021.list[[gene]] <- flag

}

mouse.2023.flag <- c(mouse.2023.list[[1]][mouse.2023.list[[1]] %in% unlist(mouse.2023.list[2:5])],
                     mouse.2023.list[[2]][mouse.2023.list[[2]] %in% unlist(mouse.2023.list[3:5])],
                     mouse.2023.list[[3]][mouse.2023.list[[3]] %in% unlist(mouse.2023.list[4:5])])
        
rat.2023.flag <- c(rat.2023.list[[1]][rat.2023.list[[1]] %in% unlist(rat.2023.list[2:5])],
                   rat.2023.list[[2]][rat.2023.list[[2]] %in% unlist(rat.2023.list[3:5])],
                   rat.2023.list[[3]][rat.2023.list[[3]] %in% unlist(rat.2023.list[4:5])])

macaque.2023.flag <- c(macaque.2023.list[[1]][macaque.2023.list[[1]] %in% unlist(macaque.2023.list[2:5])],
                      macaque.2023.list[[2]][macaque.2023.list[[2]] %in% unlist(macaque.2023.list[3:5])],
                      macaque.2023.list[[3]][macaque.2023.list[[3]] %in% unlist(macaque.2023.list[4:5])])

mouse.2021.flag <- c(mouse.2021.list[[1]][mouse.2021.list[[1]] %in% unlist(mouse.2021.list[2:5])],
                     mouse.2021.list[[2]][mouse.2021.list[[2]] %in% unlist(mouse.2021.list[3:5])],
                     mouse.2021.list[[3]][mouse.2021.list[[3]] %in% unlist(mouse.2021.list[4:5])])

suspicious.clusters.table <- as.data.frame(table(c(unique(mouse.2023.flag),
                                                   unique(rat.2023.flag), 
                                                   unique(macaque.2023.flag),
                                                   unique(mouse.2021.flag))))
colnames(suspicious.clusters.table) <- c("cluster", "freq")

clusters.table <- as.data.frame(table(c(mouse.2023.clusters, rat.2023.clusters,
                                      macaque.2023.clusters, mouse.2021.clusters)))
colnames(clusters.table) <- c("cluster", "freq")
      
suspicious.clusters.table <- merge(suspicious.clusters.table, clusters.table, by = "cluster")
    
flagged.clusters2 <- as.character(suspicious.clusters.table$cluster[which(suspicious.clusters.table$freq.x == suspicious.clusters.table$freq.y | suspicious.clusters.table$freq.x >= 3)])

flagged.clusters2

In [None]:
# Look at glia markers
DotPlot(glia, features = c("Ikzf1", "Slc38a5", "Ntsr2", "Myt1", "Mbp")) # In order: Microglia, #, Astrocytes, OPC, Oligodendrocytes
FeaturePlot(glia, features = "Ikzf1", max.cutoff = "q90", raster = TRUE)
FeaturePlot(glia, features = "Slc38a5", max.cutoff = "q90", raster = TRUE)
FeaturePlot(glia, features = "Ntsr2", max.cutoff = "q90", raster = TRUE)
FeaturePlot(glia, features = "Myt1", max.cutoff = "q90", raster = TRUE)
FeaturePlot(glia, features = "Mbp", max.cutoff = "q90", raster = TRUE)

In [None]:
# All the flagged clusters heterogeneous for multiple glia markers
# Course of action: Remove

glia <- subset(glia, integrated_snn_res.1 %in% flagged.clusters2, invert = T)

In [None]:
DefaultAssay(glia) <- "integrated"
glia <- RunPCA(glia, verbose = F, npcs = 100)
glia <- RunUMAP(glia, dims = 1:dims, n.neighbors = 50, verbose = F)
glia <- FindNeighbors(glia, dims = 1:dims, k.param = 50, verbose = F)
glia <- FindClusters(glia, resolution = 0.1, verbose = F)
glia <- FindClusters(glia, resolution = 1, verbose = F)

In [None]:
DimPlot(glia, group.by = "integrated_snn_res.0.1", label = TRUE, raster = TRUE) + NoLegend()
DimPlot(glia, group.by = "integrated_snn_res.1", label = TRUE, raster = TRUE) + NoLegend()
DimPlot(glia, group.by = "species", shuffle = TRUE, raster = TRUE)
DimPlot(glia, group.by = "treatment", shuffle = TRUE, raster = TRUE)

### 3.2. Add labels

In [None]:
DefaultAssay(glia) <- "RNA"

FeaturePlot(glia, features = "Clic6", max.cutoff = "q90", raster = TRUE) # Choroid Plexus
FeaturePlot(glia, features = "Gfap", max.cutoff = "q90", raster = TRUE) # Astrocytes
FeaturePlot(glia, features = "Mbp", max.cutoff = "q90", raster = TRUE) # Oligo
FeaturePlot(glia, features = "Myt1", max.cutoff = "q90", raster = TRUE) # OPC
FeaturePlot(glia, features = "Ikzf1", max.cutoff = "q90", raster = TRUE) # Microglia
FeaturePlot(glia, features = "Lef1", max.cutoff = "q90", raster = TRUE) # Endothelial
FeaturePlot(glia, features = "Tmem212", max.cutoff = "q90", raster = TRUE) # Ependymal
FeaturePlot(glia, features = "Abcc9", max.cutoff = "q90", raster = TRUE) # Pericyte
FeaturePlot(glia, features = "Dcn", max.cutoff = "q90", raster = TRUE) # VLMC

In [None]:
# Having a hard time separating endothelial and pericytes. Checking marker genes
marker_16endo <- FindMarkers(glia, ident.1 = 16)
marker_21peri <- FindMarkers(glia, ident.1 = 21)
marker_26mixed <- FindMarkers(glia, ident.1 = 26)

marker_16endo %>% filter(pct.1 > 0.5 & pct.2 < 0.3) %>% arrange(desc(avg_log2FC)) %>% head(10)
marker_21peri %>% filter(pct.1 > 0.5 & pct.2 < 0.3) %>% arrange(desc(avg_log2FC)) %>% head(10) # Pericyte 1
marker_26mixed %>% filter(pct.1 > 0.5 & pct.2 < 0.3) %>% arrange(desc(avg_log2FC)) %>% head(10) # Pericyte 2

In [None]:
glia@meta.data$UMAP_1 <- Embeddings(glia, "umap")[,1]
glia <- subset(glia, subset = (integrated_snn_res.1 == 24 & UMAP_1 > 0), invert = TRUE) # Removing these because they are low counts (not shown, but can be reproduced) and cluster strangely

In [None]:
glia$cell.type <- NA
glia$cell.type[which(glia$integrated_snn_res.0.1 %in% c(0, 1))] <- "Oligodendrocytes"
glia$cell.type[which(glia$integrated_snn_res.0.1 == 2)] <- "Astrocytes"
glia$cell.type[which(glia$integrated_snn_res.0.1 == 3)] <- "Microglia"
glia$cell.type[which(glia$integrated_snn_res.0.1 %in% c(4, 9))] <- "OPCs"
glia$cell.type[which(glia$integrated_snn_res.0.1 == 5)] <- "Endothelial_cells"
glia$cell.type[which(glia$integrated_snn_res.0.1 == 6)] <- "Choroid_plexus_cells"
glia$cell.type[which(glia$integrated_snn_res.1 == 24)] <- "Ependymal_cells"
glia$cell.type[which(glia$integrated_snn_res.0.1 == 8)] <- "Tanycytes"
glia$cell.type[which(glia$integrated_snn_res.1 %in% c(21, 26)] <- "Pericytes" # Mette suggested that these were pericytes 1/2 subtypes
glia$cell.type[which(glia$integrated_snn_res.1 == 22)] <- "VLMCs"

In [None]:
DimPlot(glia, group.by = "cell.type", label = TRUE, raster = TRUE) + NoLegend()

In [None]:
glia <- subset(glia, cells = colnames(glia)[ is.na(glia@meta.data$cell.type) ], invert = TRUE) # 11 straggler NA's, probably from UMAP-subsetting of Choroid Plexus cells

In [None]:
saveRDS(glia, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/glia_finalized_2025.rds")

### 3.3 CELLEX

In [None]:
use_condaenv("/home/jmg776/.conda/envs/CELLEX")
cellex <- import("cellex", convert = FALSE)
pd <- import("pandas", convert = FALSE)

In [None]:
# Grab normalized counts
counts_mouse <- as.data.frame(GetAssayData(subset(glia, species == "mouse"), assay = "RNA", layer = "counts"))
counts_rat <- as.data.frame(GetAssayData(subset(glia, species == "rat"), assay = "RNA", layer = "counts"))
counts_macaque <- as.data.frame(GetAssayData(subset(glia, species == "macaque"), assay = "RNA", layer = "counts"))

# Grab meta data
meta_mouse <- subset(glia, species == "mouse")$cell.type %>% as.data.frame()
meta_rat <- subset(glia, species == "rat")$cell.type %>% as.data.frame()
meta_macaque <- subset(glia, species == "macaque")$cell.type %>% as.data.frame()

In [None]:
# Run CELLEX
eso_mouse <- cellex$ESObject(data = counts_mouse, annotation = meta_mouse, normalize = TRUE, verbose = TRUE)
eso_mouse$compute(verbose = TRUE)
eso_mouse$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/glia_mouse_2025.esmu.csv")

eso_rat <- cellex$ESObject(data = counts_rat, annotation = meta_rat, normalize = TRUE, verbose = TRUE)
eso_rat$compute(verbose = TRUE)
eso_rat$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/glia_rat_2025.esmu.csv")

eso_macaque <- cellex$ESObject(data = counts_macaque, annotation = meta_macaque, normalize = TRUE, verbose = TRUE)
eso_macaque$compute(verbose = TRUE)
eso_macaque$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/glia_macaque_2025.esmu.csv")

# Neurons

In [None]:
# Mouse 2023
mouse.neurons <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/mouse/mouse_neurons_Seurat_obj.rds")
mouse.neurons$species <- "mouse"
mouse.neurons$orig.clusters <- mouse.neurons$SCT_snn_res.0.1
mouse.neurons$year <- "2023"
mouse.neurons$dataset <- paste0(mouse.neurons$species, ".", mouse.neurons$year)
mouse.neurons$from_glia_neurons <- "no"

# Rat 2023
rat.neurons <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/rat/rat_neurons_Seurat_obj.rds")
rat.neurons$species <- "rat"
rat.neurons$orig.clusters <- rat.neurons$SCT_snn_res.0.1
rat.neurons$year <- "2023"
rat.neurons$dataset <- paste0(rat.neurons$species, ".", rat.neurons$year)
rat.neurons$from_glia_neurons <- "no"

# Macaque (Monkey) 2023
monkey.neurons <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/monkey/monkey_neurons_2025.rds")
monkey.neurons$species <- "macaque"
monkey.neurons$orig.clusters <- monkey.neurons$integrated_snn_res.0.1
monkey.neurons$year <- "2023"
monkey.neurons$dataset <- paste0(monkey.neurons$species, ".", monkey.neurons$year)
monkey.neurons$from_glia_neurons <- "no"

# Mouse 2021 (Ludwig)
mouse.neurons.Ludwig <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/mouse/mouse_neurons_Ludwig_Seurat_obj.rds")
mouse.neurons.Ludwig$species <- "mouse"
mouse.neurons.Ludwig$orig.clusters <- mouse.neurons.Ludwig$SCT_snn_res.0.1
mouse.neurons.Ludwig$year <- "2021"
mouse.neurons.Ludwig$dataset <- paste0(mouse.neurons.Ludwig$species, ".", mouse.neurons.Ludwig$year)
mouse.neurons.Ludwig$from_glia_neurons <- "no"

## 2. Integration (sbatch)

### 2.1. Find common genes

In [None]:
addSpeciesAssay <- function(x, ensembl.mapping = NULL, species = NULL) {
    from <- "Gene.name"
    to <- paste0(stringr::str_to_title(species), ".gene.name")

    # Process mapping data
    mapping <- ensembl.mapping[apply(ensembl.mapping[, c(from, to)], 1, function(row) all(row != "")),
                               c(from, to)]
    
    newgenes <- mapping[mapping[, 1] %in% rownames(GetAssayData(x, assay = "RNA", layer = "counts")), ]
    newgenes <- newgenes[order(nchar(newgenes[,2])), ]
    newgenes <- newgenes[!duplicated(newgenes[,2]), ]
    defect_genes <- which(newgenes[,1] %in% setdiff(newgenes[,1], rownames(GetAssayData(x, assay = "RNA", layer = "counts"))))
    if (length(defect_genes) > 0) newgenes <- newgenes[-defect_genes, ]
    
    # Get the counts matrix from the RNA assay
    rna_counts <- GetAssayData(x, assay = "RNA", layer = "counts")
    new_mat <- fac2sparse(newgenes[,2]) %*% rna_counts[newgenes[,1], ]
    
    new_mat <- CreateAssayObject(counts = new_mat)
    x[[paste0(species, '_RNA')]] <- new_mat
    DefaultAssay(x) <- paste0(species, '_RNA')
    return(x)
}

# https://www.ensembl.info/2009/01/21/how-to-get-all-the-orthologous-genes-between-two-species/
macaque.to.mouse <- read.csv("/projects/perslab/people/jmg776/projects/DVC/data/gene_info/macaque_to_mouse.txt")
rat.to.mouse <- read.csv("/projects/perslab/people/jmg776/projects/DVC/data/gene_info/rat_to_mouse.txt")
rat.to.mouse <- rbind(rat.to.mouse, c("", "Chat", "Chat"), c("", "Ramp2", "Ramp2"))
                                     
monkey.neurons <- addSpeciesAssay(monkey.neurons, ensembl.mapping = macaque.to.mouse, species = "mouse")
rat.neurons <- addSpeciesAssay(rat.neurons, ensembl.mapping = rat.to.mouse, species = "mouse")

In [None]:
common.genes <- intersect(intersect(rownames(mouse.neurons@assays$RNA),
                                    rownames(monkey.neurons@assays$mouse_RNA)),
                          intersect(rownames(rat.neurons@assays$mouse_RNA),
                                    rownames(mouse.neurons.Ludwig@assays$RNA)))

monkey.RNA <- CreateAssayObject(counts = monkey.neurons@assays$mouse_RNA@counts[common.genes, ])
monkey.neurons[['macaque_RNA']] <- monkey.neurons[['RNA']]
monkey.neurons[['RNA']] <- NULL
monkey.neurons[["RNA"]] <- monkey.RNA
DefaultAssay(monkey.neurons) <- "RNA"
monkey.neurons[["mouse_RNA"]] <- NULL


rat.RNA <- CreateAssayObject(counts = rat.neurons@assays$mouse_RNA@counts[common.genes, ])
rat.neurons[['rat_RNA']] <- rat.neurons[['RNA']]
rat.neurons[['RNA']] <- NULL
rat.neurons[["RNA"]] <- rat.RNA
DefaultAssay(rat.neurons) <- "RNA"
rat.neurons[["mouse_RNA"]] <- NULL


mouse.RNA <- CreateAssayObject(counts = mouse.neurons@assays$RNA@counts[common.genes, ])
mouse.neurons[['mouse_RNA']] <- mouse.neurons[['RNA']]
mouse.neurons[['RNA']] <- NULL
mouse.neurons[["RNA"]] <- mouse.RNA
DefaultAssay(mouse.neurons) <- "RNA"


mouse.RNA.Ludwig <- CreateAssayObject(counts = mouse.neurons.Ludwig@assays$RNA@counts[common.genes, ])
mouse.neurons.Ludwig[['mouse_RNA']] <- mouse.neurons.Ludwig[['RNA']]
mouse.neurons.Ludwig[['RNA']] <- NULL
mouse.neurons.Ludwig[["RNA"]] <- mouse.RNA.Ludwig
DefaultAssay(mouse.neurons.Ludwig) <- "RNA"

In [None]:
# Finding common genes + species mapping reduces number of UMIs - most in mouse, likely due to higher number of annotated genes, and by extension, pseudogenes
hist(mouse.neurons$nCount_RNA, breaks = 100)
min(mouse.neurons$nCount_RNA)

hist(rat.neurons$nCount_RNA, breaks = 100)
min(rat.neurons$nCount_RNA)

hist(monkey.neurons$nCount_RNA, breaks = 100)
min(monkey.neurons$nCount_RNA)

hist(mouse.neurons.Ludwig$nCount_RNA, breaks = 100)
min(mouse.neurons.Ludwig$nCount_RNA)

### 2.2. Run integration

In [None]:
monkey.list <- SplitObject(monkey.neurons, split.by = "run")
for (i in 1:length(monkey.list)) {
  print(i)
  monkey.list[[i]][['integrated']] <- NULL
  monkey.list[[i]] <- SCTransform(monkey.list[[i]], verbose = F, method = "qpoisson")
}
mouse.neurons <- SCTransform(mouse.neurons, verbose = F, method = "qpoisson")
mouse.neurons.Ludwig <- SCTransform(mouse.neurons.Ludwig, verbose = F, method = "qpoisson")
rat.neurons <- SCTransform(rat.neurons, verbose = F, method = "qpoisson")

dims <- 30
neurons.list <- c(mouse.neurons, rat.neurons, monkey.list, mouse.neurons.Ludwig)

options(future.globals.maxSize = 80000000000)

# Integrate data
features <- SelectIntegrationFeatures(object.list = neurons.list, nfeatures = 3000)

neurons.list <- PrepSCTIntegration(object.list = neurons.list, anchor.features = features,
    verbose = F)

neurons.anchors <- FindIntegrationAnchors(object.list = neurons.list, normalization.method = "SCT",
    anchor.features = features, reduction = "cca", verbose = F, dims = 1:dims)
neurons <- IntegrateData(anchorset = neurons.anchors, normalization.method = "SCT",
    verbose = F, dims = 1:dims)

In [None]:
neurons <- RunPCA(neurons, verbose = F, npcs = 100)
neurons <- RunUMAP(neurons, dims = 1:dims, n.neighbors = 50)
neurons <- FindNeighbors(neurons, dims = 1:dims, k.param = 50, verbose = F)
neurons <- FindClusters(neurons, resolution = 0.1, verbose = F)
neurons <- FindClusters(neurons, resolution = 1, verbose = F)

In [None]:
saveRDS(neurons, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/neurons_integrated_2025.rds")

## 3. Labelling

### 3.1. Quality control

In [None]:
neurons <- readRDS("/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/neurons_integrated_2025.rds")

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
hist(neurons$nCount_RNA, breaks = 200)  # Strong bimodial distribution due to common genes filtering and SCTransform native filtering (genes with low expression or little to no variance)
hist(subset(neurons, nCount_RNA < 3000)$nCount_RNA, breaks = 30)  # Cut at 2000

In [None]:
# Examine where low quality cells originated from
neurons_2000 <- subset(neurons, nCount_RNA < 2000)
table(neurons_2000$species)
table(neurons_2000$dataset)

In [None]:
neurons <- subset(neurons, nCount_RNA > 2000)
dims <- 30
neurons <- RunPCA(neurons, verbose = F, npcs = 100)
neurons <- RunUMAP(neurons, dims = 1:dims, n.neighbors = 50, verbose = F)
neurons <- FindNeighbors(neurons, dims = 1:dims, k.param = 50, verbose = F)
neurons <- FindClusters(neurons, resolution = 0.1, verbose = F)
neurons <- FindClusters(neurons, resolution = 1, verbose = F)

In [None]:
DimPlot(neurons, group.by = "integrated_snn_res.1", raster = TRUE, label = TRUE) + NoLegend()
DimPlot(neurons, group.by = "dataset", raster = TRUE, shuffle = TRUE)
DimPlot(neurons, group.by = "treatment", raster = TRUE, shuffle = TRUE)

### 3.2. Neurotransmitter annotation

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
DefaultAssay(neurons) <- "RNA"
VlnPlot(neurons, features = "Slc32a1", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # GABA
VlnPlot(neurons, features = "Gad1", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # GABA
VlnPlot(neurons, features = "Gad2", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # GABA
VlnPlot(neurons, features = "Slc17a6", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # Glutamate
VlnPlot(neurons, features = "Chat", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # Acetylcholine

In [None]:
glutamate.clusters <- c(0, 10, 12, 13, 16, 18, 19, 22, 23, 24, 25, 26,
                        27, 28, 29, 3, 30, 31, 32, 33, 36, 37, 38, 39,
                        40, 41, 43, 44, 45, 46, 49, 51, 54)
GABA.clusters <- c(1, 11, 14, 15, 17, 2, 20, 3, 34, 4, 42, 48, 5, 52, 53, 6, 7, 8, 9)
acetylcholine.clusters <- c(21, 35)

neurons$neurotransmitter <- NA
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% glutamate.clusters)] <- "Glutamate"
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% GABA.clusters)] <- "GABA"
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% acetylcholine.clusters)] <- "Acetylcholine"

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
DimPlot(neurons, group.by = "neurotransmitter", raster = TRUE) 

#### 3.2.1. Neurotransmitter QC

In [None]:
source("/projects/perslab/people/jmg776/projects/DVC/code/flag_clusters.R")

neurons$dataset <- paste0(neurons$species, ".", neurons$year)
flagged.clusters <- flag.clusters(seurat.obj = neurons, genes = c("percent.mt", "Clic6", "Mbp"),
                                  resolution = "integrated_snn_res.1", quantile = 0.95)


In [None]:
# Update these plots, this is the new umap space 
FeaturePlot(neurons, features = "percent.mt", max.cutoff = "q90")
FeaturePlot(neurons, features = "Mbp", max.cutoff = "q90")
FeaturePlot(neurons, features = "Clic6", max.cutoff = "q90")

In [None]:
# Checking cluster 47 and 50 since they had no neurotransmitter marker
cluster_47_markers <- FindMarkers(neurons, ident.1 = 47)
cluster_47_markers %>% filter(pct.2 < 0.5 & pct.1 > 0.7) %>% arrange(desc(avg_log2FC))

In [None]:
serotonin.clusters <- 47 # Strong serotonin cluster
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% serotonin.clusters)] <- "Serotonin"

In [None]:
neurons <- subset(neurons, integrated_snn_res.1 %in% flagged.clusters, invert = TRUE)
DefaultAssay(neurons) <- "integrated"
dims <- 30
neurons <- RunPCA(neurons, verbose = F, npcs = 100)
neurons <- RunUMAP(neurons, dims = 1:dims, n.neighbors = 50, verbose = F)
neurons <- FindNeighbors(neurons, dims = 1:dims, k.param = 50, verbose = F)
neurons <- FindClusters(neurons, resolution = 0.1, verbose = F)
neurons <- FindClusters(neurons, resolution = 1, verbose = F)

In [None]:
DimPlot(neurons, group.by = "integrated_snn_res.1", label = TRUE, raster = TRUE) + NoLegend()
DimPlot(neurons, group.by = "dataset", raster = TRUE, shuffle = TRUE)
DimPlot(neurons, group.by = "treatment", raster = TRUE, shuffle = TRUE)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
DefaultAssay(neurons) <- "RNA"
VlnPlot(neurons, features = "Slc32a1", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # GABA
VlnPlot(neurons, features = "Gad1", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # GABA
VlnPlot(neurons, features = "Gad2", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # GABA
VlnPlot(neurons, features = "Slc17a6", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # Glutamate
VlnPlot(neurons, features = "Chat", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # Acetylcholine
VlnPlot(neurons, features = "Slc6a4", group.by = "integrated_snn_res.1", pt.size = 0, raster = TRUE) + NoLegend() # Serotonin

In [None]:
glutamate.clusters <- c(0, 11, 12, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
                        32, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 48, 50, 52,
                        7)
GABA.clusters <- c(1, 10, 13, 14, 15, 2, 3, 31, 33, 38, 4, 40, 49, 5, 51, 53, 6, 8, 9)
acetylcholine.clusters <- c(19, 30)
serotonin.clusters <- 47

neurons$neurotransmitter <- NA
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% glutamate.clusters)] <- "Glutamate"
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% GABA.clusters)] <- "GABA"
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% acetylcholine.clusters)] <- "Acetylcholine"
neurons$neurotransmitter[which(neurons$integrated_snn_res.1 %in% serotonin.clusters)] <- "Serotonin"

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
DimPlot(neurons, group.by = "neurotransmitter", raster = TRUE)

#### 3.2.2. Subcluster neurotransmitter

In [None]:
# Subcluster glutamatergic neurons
glutamate.neurons <- subset(neurons, neurotransmitter == "Glutamate")
DefaultAssay(glutamate.neurons) <- "integrated"

dims <- 30
glutamate.neurons <- RunPCA(glutamate.neurons, verbose = F, npcs = 100)  
glutamate.neurons <- FindNeighbors(glutamate.neurons, dims = 1:dims, k.param = 50, verbose = F)
glutamate.neurons <- FindClusters(glutamate.neurons, resolution = 0.1, verbose = F) 
major.clusters <- unique(glutamate.neurons$integrated_snn_res.0.1)
glutamate.neurons$sub.cluster <- ""

for (i in major.clusters) {
  
  # Find subclusters
  glutamate.neurons <- FindSubCluster(glutamate.neurons, graph.name = "integrated_snn", 
                                      subcluster.name = "sub.cluster.i", 
                                      cluster = i, resolution = 0.1)
  
  glutamate.neurons$sub.cluster[which(glutamate.neurons$integrated_snn_res.0.1 == i)] <-
    glutamate.neurons$sub.cluster.i[which(glutamate.neurons$integrated_snn_res.0.1 == i)]
  
  # Remove tmp column
  glutamate.neurons@meta.data <- glutamate.neurons@meta.data[,!(colnames(glutamate.neurons@meta.data) == "sub.cluster.i")]

}

glutamate.neurons <- RunUMAP(glutamate.neurons, dims = 1:dims, n.neighbors = 50)

In [None]:
# Subcluster GABAergic neurons
GABA.neurons <- subset(neurons, neurotransmitter == "GABA")
DefaultAssay(GABA.neurons) <- "integrated"

dims <- 30
GABA.neurons <- RunPCA(GABA.neurons, verbose = F, npcs = 100) 
GABA.neurons <- FindNeighbors(GABA.neurons, dims = 1:dims, k.param = 50, verbose = F)
GABA.neurons <- FindClusters(GABA.neurons, resolution = 0.1, verbose = F) 
major.clusters <- unique(GABA.neurons$integrated_snn_res.0.1)
GABA.neurons$sub.cluster <- ""

for (i in major.clusters) {
  
  # Find subclusters
  GABA.neurons <- FindSubCluster(GABA.neurons, graph.name = "integrated_snn", 
                                      subcluster.name = "sub.cluster.i", 
                                      cluster = i, resolution = 0.1)
  
  GABA.neurons$sub.cluster[which(GABA.neurons$integrated_snn_res.0.1 == i)] <-
    GABA.neurons$sub.cluster.i[which(GABA.neurons$integrated_snn_res.0.1 == i)]
  
  # Remove tmp column
  GABA.neurons@meta.data <- GABA.neurons@meta.data[,!(colnames(GABA.neurons@meta.data) == "sub.cluster.i")]

}

GABA.neurons <- RunUMAP(GABA.neurons, dims = 1:dims, n.neighbors = 50)

In [None]:
# Save temporary subclusters
neurons$sub.cluster <- NA
neurons$sub.cluster[match(colnames(GABA.neurons), colnames(neurons))] <- 
  paste0("GABA_", GABA.neurons$sub.cluster) 
neurons$sub.cluster[match(colnames(glutamate.neurons), colnames(neurons))] <- 
  paste0("Glu_", glutamate.neurons$sub.cluster) 
neurons$sub.cluster[which(neurons$neurotransmitter == "Acetylcholine")] <-
  paste0("Chat_", neurons$integrated_snn_res.1[which(neurons$neurotransmitter == "Acetylcholine")])
neurons$sub.cluster[which(neurons$neurotransmitter == "Serotonin")] <-
  paste0("Sero_", neurons$integrated_snn_res.1[which(neurons$neurotransmitter == "Serotonin")])

#### 3.2.3 CELLEX

In [None]:
use_condaenv("/home/jmg776/.conda/envs/CELLEX")
cellex <- import("cellex", convert = FALSE)
pd <- import("pandas", convert = FALSE)

In [None]:
# Grab counts
counts_glu <- as.data.frame(GetAssayData(subset(neurons, neurotransmitter == "Glutamate"), assay = "RNA", layer = "counts"))
counts_GABA <- as.data.frame(GetAssayData(subset(neurons, neurotransmitter == "GABA"), assay = "RNA", layer = "counts"))

# Grab meta data
meta_glu <- subset(neurons, neurotransmitter == "Glutamate")$sub.cluster %>% as.data.frame()
meta_GABA <- subset(neurons, neurotransmitter == "GABA")$sub.cluster %>% as.data.frame()

In [None]:
# Run CELLEX
eso_glu <- cellex$ESObject(data = counts_glu, annotation = meta_glu, normalize = TRUE, verbose = TRUE)
eso_glu$compute(verbose = TRUE)
eso_glu$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_glu_2025.esmu.csv")

eso_GABA <- cellex$ESObject(data = counts_GABA, annotation = meta_GABA, normalize = TRUE, verbose = TRUE)
eso_GABA$compute(verbose = TRUE)
eso_GABA$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_GABA_2025.esmu.csv")

In [None]:
# Load in CELLEX results
cellex.glutamate <- read.csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_glu_2025.esmu.csv")
rownames(cellex.glutamate) <- cellex.glutamate$gene
cellex.glutamate <- cellex.glutamate[, -1]

cellex.GABA <- read.csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_GABA_2025.esmu.csv")
rownames(cellex.GABA) <- cellex.GABA$gene
cellex.GABA <- cellex.GABA[, -1]

In [None]:
DimPlot(glutamate.neurons, group.by = "integrated_snn_res.0.1", label = TRUE, raster = TRUE)
DimPlot(glutamate.neurons, group.by = "sub.cluster", label = TRUE, raster = TRUE) + NoLegend()
DimPlot(GABA.neurons, group.by = "integrated_snn_res.0.1", label = TRUE, raster = TRUE)
DimPlot(GABA.neurons, group.by = "sub.cluster", label = TRUE, raster = TRUE) + NoLegend()

In [None]:
glutamate.dendro <- as.dendrogram(hclust(dist(t(cellex.glutamate), method = "euclidean")))
glutamate.dendro <- dendro_data(glutamate.dendro)
ggdendrogram(glutamate.dendro, rotate = FALSE, size = 2)

GABA.dendro <- as.dendrogram(hclust(dist(t(cellex.GABA), method = "euclidean")))
GABA.dendro <- dendro_data(GABA.dendro)
ggdendrogram(GABA.dendro, rotate = FALSE, size = 2)

#### 3.2.4. Merge clusters

In [None]:
# Glutamate
for (cell.type in colnames(cellex.glutamate)) {
  
  neighbors <- colnames(cellex.glutamate)[grep(gsub("_[0-9]$", "", cell.type), colnames(cellex.glutamate))]
  neighbors <- neighbors[which(neighbors != cell.type)]

  idx.cell.type <- which(glutamate.neurons$sub.cluster == cell.type)
  
  # Markers of cell type
  markers.cell.type <- rownames(cellex.glutamate)[order(cellex.glutamate[, cell.type], decreasing = T)] 
  markers.cell.type <- which(apply(glutamate.neurons@assays$RNA@counts[markers.cell.type, idx.cell.type], 1, 
                          function(x) {sum(x != 0)}) >= (length(idx.cell.type)/5))
  markers.cell.type <- names(markers.cell.type)[1:50]
  
  for (neighbor in neighbors) {
    
    idx.neighbor <- which(glutamate.neurons$sub.cluster == neighbor)
    
    # Markers of two nearest neighbors
    # Neighbor 1
    markers.neighbor <- rownames(cellex.glutamate)[order(cellex.glutamate[, neighbor], decreasing = T)] 
    markers.neighbor <- which(apply(glutamate.neurons@assays$RNA@counts[markers.neighbor, idx.neighbor], 1, 
                            function(x) {sum(x != 0)}) >= (length(idx.neighbor)/5))
    markers.neighbor <- names(markers.neighbor)[1:50]

    unique.markers <- length(c(which(cellex.glutamate[markers.cell.type, neighbor] == 0), 
                             which(cellex.glutamate[markers.neighbor, cell.type] == 0)))
    
    n <- 10
    if (unique.markers < n) {
      print(paste0(cell.type, " and ", neighbor, " lack unique markers")) 
    }
  }
}

In [None]:
# GABA
for (cell.type in colnames(cellex.GABA)) {
  
  neighbors <- colnames(cellex.GABA)[grep(gsub("_[0-9]$", "", cell.type), colnames(cellex.GABA))]
  neighbors <- neighbors[which(neighbors != cell.type)]

  idx.cell.type <- which(GABA.neurons$sub.cluster == cell.type)
  
  # Markers of cell type
  markers.cell.type <- rownames(cellex.GABA)[order(cellex.GABA[, cell.type], decreasing = T)] 
  markers.cell.type <- which(apply(GABA.neurons@assays$RNA@counts[markers.cell.type, idx.cell.type], 1, 
                          function(x) {sum(x != 0)}) >= (length(idx.cell.type)/5))
  markers.cell.type <- names(markers.cell.type)[1:50]
  
  for (neighbor in neighbors) {
    
    idx.neighbor <- which(GABA.neurons$sub.cluster == neighbor)
    
    # Markers of two nearest neighbors
    # Neighbor 1
    markers.neighbor <- rownames(cellex.GABA)[order(cellex.GABA[, neighbor], decreasing = T)] 
    markers.neighbor <- which(apply(GABA.neurons@assays$RNA@counts[markers.neighbor, idx.neighbor], 1, 
                            function(x) {sum(x != 0)}) >= (length(idx.neighbor)/5))
    markers.neighbor <- names(markers.neighbor)[1:50]

    unique.markers <- length(c(which(cellex.GABA[markers.cell.type, neighbor] == 0), 
                             which(cellex.GABA[markers.neighbor, cell.type] == 0)))
    
    n <- 10
    if (unique.markers < n) {
      print(paste0(cell.type, " and ", neighbor, " lack unique markers")) 
    }
  }
}

In [None]:
sort(unique(neurons$sub.cluster))

In [None]:
# Glutamate
neurons$cell.type <- neurons$sub.cluster
neurons$cell.type[which(neurons$cell.type %in% c("Glu_1_0", "Glu_1_1"))] <- "Glu_1_0"
neurons$cell.type[which(neurons$cell.type == "Glu_1_2")] <- "Glu_1_1"
neurons$cell.type[which(neurons$cell.type %in% c("Glu_9_0", "Glu_9_1"))] <- "Glu_9_0"

# GABA
neurons$cell.type[which(neurons$cell.type %in% c("GABA_7_0", "GABA_7_1"))] <- "GABA_7_0"
neurons$cell.type[which(neurons$cell.type %in% c("GABA_1_0", "GABA_1_2"))] <- "GABA_1_0"
neurons$cell.type[which(neurons$cell.type == "GABA_1_3")] <- "GABA_1_2"

# Acetylcholine
neurons$cell.type <- gsub("Chat_19", "Chat_0_0", neurons$cell.type)
neurons$cell.type <- gsub("Chat_30", "Chat_1_0", neurons$cell.type)

# Serotonin
neurons$cell.type <- gsub("Sero_47", "Sero_0_0", neurons$cell.type)

In [None]:
neurons$major.cell.type <- ""
neurons$major.cell.type[match(colnames(glutamate.neurons), colnames(neurons))] <- paste0("Glu_", glutamate.neurons$integrated_snn_res.0.1)
neurons$major.cell.type[match(colnames(GABA.neurons), colnames(neurons))] <- paste0("GABA_", GABA.neurons$integrated_snn_res.0.1)
neurons$major.cell.type[grep("Chat", neurons$cell.type)] <- gsub("_0$", "", neurons$cell.type[grep("Chat", neurons$cell.type)])
neurons$major.cell.type[grep("Sero", neurons$cell.type)] <- gsub("_0$", "", neurons$cell.type[grep("Sero", neurons$cell.type)])

In [None]:
options(repr.plot.width = 15, repr.plot.height = 15)
DimPlot(neurons, group.by = "cell.type", cols = sample(rainbow(length(unique(neurons$cell.type)))), label = TRUE, raster = TRUE) + NoLegend()  # Dont mind the colors, its just to have hard contrasts to check cluster partitions

In [None]:
saveRDS(neurons, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/integrated/neurons_finalized_2025.rds")

### 3.3 CELLEX by species (marker genes)

In [None]:
# Grab counts
counts_mouse <- as.data.frame(GetAssayData(subset(neurons, species == "mouse"), assay = "RNA", layer = "counts"))
counts_rat <- as.data.frame(GetAssayData(subset(neurons, species == "rat"), assay = "RNA", layer = "counts"))
counts_macaque <- as.data.frame(GetAssayData(subset(neurons, species == "macaque"), assay = "RNA", layer = "counts"))

# Grab meta data
meta_mouse <- subset(neurons, species == "mouse")$cell.type %>% as.data.frame()
meta_rat <- subset(neurons, species == "rat")$cell.type %>% as.data.frame()
meta_macaque <- subset(neurons, species == "macaque")$cell.type %>% as.data.frame()

In [None]:
# Run CELLEX
eso_mouse <- cellex$ESObject(data = counts_mouse, annotation = meta_mouse, normalize = TRUE, verbose = TRUE)
eso_mouse$compute(verbose = TRUE)
eso_mouse$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_mouse_2025.esmu.csv")

eso_rat <- cellex$ESObject(data = counts_rat, annotation = meta_rat, normalize = TRUE, verbose = TRUE)
eso_rat$compute(verbose = TRUE)
eso_rat$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_rat_2025.esmu.csv")

eso_macaque <- cellex$ESObject(data = counts_macaque, annotation = meta_macaque, normalize = TRUE, verbose = TRUE)
eso_macaque$compute(verbose = TRUE)
eso_macaque$results$esmu$to_csv("/projects/perslab/people/jmg776/projects/DVC/output/CELLEX/output/neurons_macaque_2025.esmu.csv")