In [None]:
library(Seurat)
library(tidyverse)
options(repr.matrix.max.cols = 100)

# 1. Pre-processing

## 1.1. Filter poor quality cells (low count, doublet, mitochondrial contamination) and pre-process

In [None]:
mito.genes <- openxlsx::read.xlsx("/projects/perslab/people/jmg776/projects/DVC/data/gene_info/mito_genes.xlsx")
mito.genes <- mito.genes$Symbol[which(mito.genes$mm10_Chromosome == "chrM")]

# macaque batch 1
macaque1 <- readRDS("/projects/perslab/people/jmg776/projects/DVC/data/macaque_snRNA/220318_DVC_rhesus_seurat_obj.rds")
macaque1$hash.ID <- "macaque_25865"
macaque1$run <- "macaque1"
macaque1$area <- "DVC"
macaque1$pool <- macaque1$orig.ident
macaque1[["percent.mt"]] <- PercentageFeatureSet(macaque1, features = intersect(rownames(macaque1@assays$SCT), mito.genes))

macaque1 <- subset(macaque1, nCount_RNA > 50000 | nCount_RNA < 500 | percent.mt > 0.5, invert = T)

# macaque batch 2
macaque2 <- readRDS("/projects/perslab/people/jmg776/projects/DVC/data/macaque_snRNA/220408_DVC_rhesus_seurat_obj.rds")
macaque2$hash.ID <- ""
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 1)] <- "macaque_30790"
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 2)] <- "macaque_30790"
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 3)] <- "macaque_20275"
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 4)] <- "macaque_20275"
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 5)] <- "macaque_24693"
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 6)] <- "macaque_24693"
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 7)] <- "macaque_24938"
macaque2$hash.ID[which(macaque2$hash.mcl.ID == 8)] <- "macaque_24938"
macaque2$run <- "macaque2"
macaque2$area <- "AP"
macaque2$area[which(macaque2$hash.mcl.ID %in% c(2, 4, 6, 8))] <- "NTS"
macaque2$pool <- macaque2$orig.ident
macaque2[["percent.mt"]] <- PercentageFeatureSet(macaque2, features = intersect(rownames(macaque2@assays$SCT), mito.genes))

macaque2 <- subset(macaque2, nCount_RNA > 50000 | nCount_RNA < 500 | percent.mt > 0.5, invert = T)

# macaque batch 3
macaque3 <- readRDS("/projects/perslab/people/jmg776/projects/DVC/data/macaque_snRNA/241003_DVC_rhesus_seurat_obj.rds")
DefaultAssay(macaque3) <- "RNA"
macaque3 <- JoinLayers(macaque3)
macaque3$hash.ID <- paste0("macaque_", str_split_fixed(macaque3$sample_id, "_", 2)[,1])
macaque3$run <- "macaque3"
macaque3$area <- str_split_fixed(macaque3$sample_id, "_", 2)[,2]
macaque3$pool <- macaque3$reaction_id
macaque3$percent.mt <- PercentageFeatureSet(macaque3, features = intersect(rownames(macaque3@assays$SCT), mito.genes))

macaque3 <- subset(macaque3, nCount_RNA > 50000 | nCount_RNA < 500 | percent.mt > 0.5, invert = T)

In [None]:
# Verifying subsetting was succesful
hist(macaque1$nCount_RNA, breaks = 100)
min(macaque1$nCount_RNA)

In [None]:
# Prepare newest sample
mapping <- read.table("/projects/perslab/people/jmg776/projects/DVC/analysis/revision/macaque_gene-ens_mapping.txt",
                      sep="\t", header=TRUE)

ens_to_gene <- setNames(mapping$Gene.name, mapping$Gene.stable.ID)

old_names <- rownames(macaque3)
new_names <- ifelse(is.na(ens_to_gene[old_names]) | ens_to_gene[old_names] == "",
                    old_names, ens_to_gene[old_names])

counts <- GetAssayData(macaque3, assay = "RNA", layer = "counts")
rownames(counts) <- new_names

macaque3[["RNA"]] <- CreateAssayObject(counts = counts)

macaque3 <- SCTransform(macaque3)

## 1.2. Run integration

In [None]:
macaque.list <- c(macaque1, SplitObject(macaque2, split.by = "hash.ID"), SplitObject(macaque3, split.by = "hash.ID"))
for (i in 1:length(macaque.list)) {
    macaque.list[[i]] <- SCTransform(macaque.list[[i]], verbose = F, method = "qpoisson")
}

features <- SelectIntegrationFeatures(object.list = macaque.list, nfeatures = 3000)
options(future.globals.maxSize = 20000000000)
macaque.list <- PrepSCTIntegration(object.list = macaque.list, anchor.features = features,
    verbose = F)

macaque.anchors <- FindIntegrationAnchors(object.list = macaque.list, normalization.method = "SCT",
                                         reduction = "cca", anchor.features = features, verbose = F)
macaque <- IntegrateData(anchorset = macaque.anchors, normalization.method = "SCT",
    verbose = F, dims = 1:30)

In [None]:
DefaultAssay(macaque) <- "integrated"
macaque <- RunPCA(macaque, verbose = F, npcs = 100)
macaque <- RunUMAP(macaque, dims = 1:30, n.neighbors = 30, verbose = F)
macaque <- FindNeighbors(macaque, dims = 1:30, verbose = F)
macaque <- FindClusters(macaque, resolution = 0.1, verbose = F)
macaque <- FindClusters(macaque, resolution = 1, verbose = F)

In [None]:
macaque@meta.data <- macaque@meta.data[, c("nCount_RNA", "nFeature_RNA", "nCount_SCT", "nFeature_SCT",
                                          "percent.mt", "pool", "hash.ID", "area", "run",
                                          "integrated_snn_res.0.1", "integrated_snn_res.1")]
saveRDS(macaque, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/macaque/macaque_combined_2025.rds")

## 1.3. Inspect clusters and divide celltypes (glia, neuron)

### 1.3.1. Verify integration

In [None]:
DimPlot(macaque, group.by = "integrated_snn_res.0.1", label = TRUE)
DimPlot(macaque, group.by = "integrated_snn_res.1", label = TRUE)
DimPlot(macaque, group.by = "run")

### 1.3.2. Define neurons

In [None]:
DefaultAssay(macaque) <- "RNA"
FeaturePlot(subset(macaque, run == "macaque3"), features = "STMN2", max.cutoff = "q90", slot = "counts")
FeaturePlot(subset(macaque, run == "macaque3"), features = "MAP2", max.cutoff = "q90", slot = "counts")
FeaturePlot(subset(macaque, run == "macaque3"), features = "RBFOX3", max.cutoff = "q90", slot = "counts")

FeaturePlot(subset(macaque, run == "macaque2"), features = "STMN2", max.cutoff = "q90", slot = "counts")
FeaturePlot(subset(macaque, run == "macaque2"), features = "MAP2", max.cutoff = "q90", slot = "counts")
FeaturePlot(subset(macaque, run == "macaque2"), features = "RBFOX3", max.cutoff = "q90", slot = "counts")

FeaturePlot(subset(macaque, run == "macaque1"), features = "STMN2", max.cutoff = "q90", slot = "counts")
FeaturePlot(subset(macaque, run == "macaque1"), features = "MAP2", max.cutoff = "q90", slot = "counts")
FeaturePlot(subset(macaque, run == "macaque1"), features = "RBFOX3", max.cutoff = "q90", slot = "counts")

In [None]:
macaque_neurons <- subset(macaque, integrated_snn_res.1 %in% c(6, 12, 14, 15, 16, 17, 18, 19, 20, 21, 28, 32, 33, 34, 36))
DimPlot(macaque_neurons, group.by = "integrated_snn_res.1", label = TRUE)
DimPlot(macaque_neurons, group.by = "run")
FeaturePlot(macaque_neurons, features = "nCount_RNA")

In [None]:
macaque_neurons <- subset(macaque_neurons, integrated_snn_res.1 == 6, invert = TRUE)
DimPlot(macaque_neurons)
Idents(macaque_neurons) <- macaque_neurons$`integrated_snn_res.1`

In [None]:
macaque.list <- SplitObject(macaque_neurons, split.by = "hash.ID")

for (i in 1:length(macaque.list)) {
  DefaultAssay(macaque.list[[i]]) <- "RNA"
  macaque.list[[i]][['integrated']] <- NULL
  macaque.list[[i]] <- SCTransform(macaque.list[[i]], verbose = F, method = "qpoisson")
}

options(future.globals.maxSize = 40000000000)

features <- SelectIntegrationFeatures(object.list = macaque.list, nfeatures = 3000)

macaque.list <- PrepSCTIntegration(object.list = macaque.list, anchor.features = features,
    verbose = F)

macaque.anchors <- FindIntegrationAnchors(object.list = macaque.list, normalization.method = "SCT",
    anchor.features = features, reduction = "cca", verbose = F)
macaque_neurons <- IntegrateData(anchorset = macaque.anchors, normalization.method = "SCT",
    verbose = F, dims = 1:30)

In [None]:
macaque_neurons <- RunPCA(macaque_neurons, verbose = F, npcs = 100)
macaque_neurons <- RunUMAP(macaque_neurons, dims = 1:30, n.neighbors = 30, verbose = F)
macaque_neurons <- FindNeighbors(macaque_neurons, dims = 1:30, k.param = 30, verbose = F)
macaque_neurons <- FindClusters(macaque_neurons, resolution = 0.1, verbose = F)

In [None]:
DimPlot(macaque_neurons, group.by = "integrated_snn_res.0.1", label = TRUE)

In [None]:
macaque_neurons@meta.data <- macaque_neurons@meta.data[, c("nCount_RNA", "nFeature_RNA", "nCount_SCT",
                                                         "nFeature_SCT", "percent.mt", "pool", 
                                                         "hash.ID", "area", "run", "integrated_snn_res.0.1")]
saveRDS(macaque_neurons, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/macaque/macaque_neurons_2025.rds")

### 1.3.3. Define glia

In [None]:
macaque_glia <- subset(macaque, integrated_snn_res.1 %in% c(6, 12, 14, 15, 16, 17, 18, 19, 20, 21, 28, 32, 33, 34, 36), invert = TRUE)
DimPlot(macaque_glia, group.by = "integrated_snn_res.1", label = TRUE)
DimPlot(macaque_glia, group.by = "run")
FeaturePlot(macaque_glia, features = "nCount_RNA")

In [None]:
macaque.list <- SplitObject(macaque_glia, split.by = "hash.ID")

for (i in 1:length(macaque.list)) {
  DefaultAssay(macaque.list[[i]]) <- "RNA"
  macaque.list[[i]][['integrated']] <- NULL
  macaque.list[[i]] <- SCTransform(macaque.list[[i]], verbose = F, method = "qpoisson")
}

options(future.globals.maxSize = 40000000000)

features <- SelectIntegrationFeatures(object.list = macaque.list, nfeatures = 3000)

macaque.list <- PrepSCTIntegration(object.list = macaque.list, anchor.features = features,
    verbose = F)

macaque.anchors <- FindIntegrationAnchors(object.list = macaque.list, normalization.method = "SCT",
    anchor.features = features, reduction = "cca", verbose = F)
macaque_glia <- IntegrateData(anchorset = macaque.anchors, normalization.method = "SCT",
    verbose = F, dims = 1:30)

In [None]:
macaque_glia <- RunPCA(macaque_glia, verbose = F, npcs = 100)
macaque_glia <- RunUMAP(macaque_glia, dims = 1:30, n.neighbors = 30, verbose = F)
macaque_glia <- FindNeighbors(macaque_glia, dims = 1:30, k.param = 30, verbose = F)
macaque_glia <- FindClusters(macaque_glia, resolution = 0.1, verbose = F)
DimPlot(macaque_glia, group.by = "integrated_snn_res.0.1", label = TRUE)

In [None]:
macaque_glia@meta.data <- macaque_glia@meta.data[, c("nCount_RNA", "nFeature_RNA", "nCount_SCT",
                                                         "nFeature_SCT", "percent.mt", "pool", 
                                                         "hash.ID", "area", "run", "integrated_snn_res.0.1")]
saveRDS(macaque_glia, file = "/projects/perslab/people/jmg776/projects/DVC/output/Seurat_objs/macaque/macaque_glia_2025.rds")