In [1]:
library(scRNAseq)

Loading required package: SingleCellExperiment

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    

In [2]:
# Brain resources

In [3]:
## 1) Search: only "brain", human datasets
# brain <- searchDatasets(defineTextQuery("brain", partial = TRUE))

## 2) Tidy summar
# unlist_first <- function(x) paste0(unlist(x), collapse = ", ")
# summ <- data.frame(
#  name    = brain$name,
#  version = brain$version,
#  title   = brain$title,
#  taxonomy_id = sapply(brain$taxonomy_id, unlist_first),  # 9606 = human; 10090 = mouse
#  genome      = sapply(brain$genome,      unlist_first),  # GRCh* human; GRCm* mouse
#  n_genes = brain$rows,
#  n_cells = brain$columns,
#  assays  = sapply(brain$assays,  unlist_first),
#  coldata = sapply(brain$column_annotations, unlist_first),
#  stringsAsFactors = FALSE
# )
# summ <- summ[order(summ$name, summ$version), ]
# summ
# If you want only human brain:
# subset(summ, grepl("9606", taxonomy_id) | grepl("^GRCh", genome))

## 3) Pretty “details per study”
# for (i in seq_len(nrow(brain))) {
#  src <- tryCatch(as.data.frame(brain$sources[[i]]), error = function(e) NULL)
#  src_str <- if (!is.null(src) && nrow(src)) {
#    paste(sprintf("%s:%s", src$provider, src$id), collapse = " | ")
#  } else NA_character_

#  cat(sprintf("\n[%d] %s (version %s)\n", i, brain$name[i], brain$version[i]))
#  cat(sprintf("Title:   %s\n", brain$title[i]))
#  cat(sprintf("Genome:  %s   |   Taxonomy: %s\n",
#              paste(unlist(brain$genome[[i]]), collapse=", "),
#              paste(unlist(brain$taxonomy_id[[i]]), collapse=", ")))
#  cat(sprintf("Cells:   %s   |   Genes: %s\n", brain$columns[i], brain$rows[i]))
#  cat(sprintf("Assays:  %s\n",
#              paste(unlist(brain$assays[[i]]), collapse=", ")))
#  cat(sprintf("colData: %s\n",
#              paste(unlist(brain$column_annotations[[i]]), collapse=", ")))
#  cat(sprintf("Sources: %s\n", src_str))
# }

## 4) (Optional)Save the table
# write.csv(summ, "scRNAseq_brain_datasets.csv", row.names = FALSE)

# library(scRNAseq)
# name <- "zhong-prefrontal-2018"
# ver  <- "2023-12-22"

# File-backed by default; set realize.assays=TRUE to load into RAM (dgCMatrix)
# sce <- fetchDataset(name, ver, realize.assays = TRUE)
# sce

# assayNames(sce)              # counts
# dim(sce)                     # 24153 genes x 2394 cells (per your summary)
# names(colData(sce))          # developmental_stage, gender, sample, cell_types, week, ...

# Peek at labels
# head(unique(colData(sce)$cell_types))
# table(colData(sce)$developmental_stage, useNA = "ifany")

# lab <- colData(sce)[["cell_types"]]
# unique(lab)
# [1] "Neurons"           "GABAergic neurons" "Microglia"        
# [4] "Stem cells"        NA                  "Astrocytes"       
# [7] "OPC"       

# SeuratData humancortexref.SeuratData	

# library(SeuratData)
# AvailableData() 
# humancortexref.SeuratData	humancortexref	1.0.0	Azimuth Reference: humancortex	human	motor cortex	76533	cells

# CellXGene 

# Seattle AZ Alzheimer Disease Atlas (SEA-AD) : 
# https://cellxgene.cziscience.com/collections/1ca90a2d-2943-483d-b678-b809bf464c30
# https://cellxgene.cziscience.com/collections/283d65eb-dd53-496d-adb7-7570c7caa443

# Human Brain Atlas 
# https://cellxgene.cziscience.com/collections/283d65eb-dd53-496d-adb7-7570c7caa443

In [4]:
library("SingleR")
library(SingleCellExperiment)

# sce <- as.SingleCellExperiment(obj)
# ref <- celldex::HumanPrimaryCellAtlasData()

# pred_clust <- SingleR(test = sce, 
#                        ref = ref, 
#                        labels = ref$label.main,
#                        clusters = sce$seurat_clusters)

base_dir <- "/mnt/nfs/CX000008_DS1/projects/btanasa/brain_refs_scRNAseq"

# Human Motor Cortex : 
# https://portal.brain-map.org/atlases-and-data/rnaseq/human-m1-10x
# https://celltypes.brain-map.org/rnaseq/human_m1_10x?selectedVisualization=Heatmap&colorByFeature=Cell+Type&colorByFeatureValue=GAD1
# https://portal.brain-map.org/atlases-and-data/rnaseq/human-mtg-10x_sea-ad
# https://cellxgene.cziscience.com/collections/1ca90a2d-2943-483d-b678-b809bf464c30

# Mouse annotations
zeisel   <- readRDS(file.path(base_dir, "ZeiselBrainData.rds"))
tasic    <- readRDS(file.path(base_dir, "TasicBrainData.rds"))
romanov  <- readRDS(file.path(base_dir, "RomanovBrainData.rds"))

# sanity checks
stopifnot(inherits(zeisel, "SingleCellExperiment"))
stopifnot(inherits(tasic, "SingleCellExperiment"))
stopifnot(inherits(romanov, "SingleCellExperiment"))

show_sce <- function(sce, name) {
  cat("\n================ ", name, " ================\n", sep = "")
  cat("assays: ", paste(assayNames(sce), collapse = ", "), "\n", sep = "")

## 1) logcounts
  if ("logcounts" %in% assayNames(sce)) {
    logc <- assay(sce, "logcounts")
    cat("logcounts  dim: ", paste(dim(logc), collapse = " x "),
        "  class: ", class(logc), "\n", sep = "")
    print(as.matrix(logc[1:min(5, nrow(logc)), 1:min(5, ncol(logc))]))
  } else {
    cat("No 'logcounts' assay found.\n")
  }

## 2) raw counts (if available)
  raw_name <- if ("counts" %in% assayNames(sce)) "counts" else NA
  if (is.na(raw_name)) {
    cat("No raw 'counts' assay in this reference.\n")
  } else {
    cnt <- assay(sce, raw_name)
    cat(raw_name, " dim: ", paste(dim(cnt), collapse = " x "),
        "  class: ", class(cnt), "\n", sep = "")
    print(as.matrix(cnt[1:min(5, nrow(cnt)), 1:min(5, ncol(cnt))]))
  }

## 3) colData
  cd <- colData(sce)
  cat("colData columns: ", paste(colnames(cd), collapse = ", "), "\n", sep = "")
  print(head(as.data.frame(cd), 10))
 }

In [5]:
## run one by one
show_sce(zeisel,   "ZeiselBrainData")
show_sce(tasic,    "TasicBrainData")
show_sce(romanov,  "RomanovBrainData")


assays: counts
No 'logcounts' assay found.
counts dim: 20006 x 3005  class: dgCMatrix
         1772071015_C02 1772071017_G12 1772071017_A05 1772071014_B06
Tspan12               0              0              0              3
Tshz1                 3              1              0              2
Fnbp1l                3              1              6              4
Adamts15              0              0              0              0
Cldn12                1              1              1              0
         1772067065_H06
Tspan12               0
Tshz1                 2
Fnbp1l                1
Adamts15              0
Cldn12                0
colData columns: tissue, group #, total mRNA mol, well, sex, age, diameter, level1class, level2class
                       tissue group.. total.mRNA.mol well sex age diameter
1772071015_C02       sscortex       1          21580   11   1  21     0.00
1772071017_G12       sscortex       1          21748   95  -1  20     9.56
1772071017_A05       sscortex

In [6]:
## run one by one
colnames(colData(zeisel))
show_sce(zeisel,   "ZeiselBrainData")
unique(colData(zeisel)[, 8])


assays: counts
No 'logcounts' assay found.
counts dim: 20006 x 3005  class: dgCMatrix
         1772071015_C02 1772071017_G12 1772071017_A05 1772071014_B06
Tspan12               0              0              0              3
Tshz1                 3              1              0              2
Fnbp1l                3              1              6              4
Adamts15              0              0              0              0
Cldn12                1              1              1              0
         1772067065_H06
Tspan12               0
Tshz1                 2
Fnbp1l                1
Adamts15              0
Cldn12                0
colData columns: tissue, group #, total mRNA mol, well, sex, age, diameter, level1class, level2class
                       tissue group.. total.mRNA.mol well sex age diameter
1772071015_C02       sscortex       1          21580   11   1  21     0.00
1772071017_G12       sscortex       1          21748   95  -1  20     9.56
1772071017_A05       sscortex

In [7]:
## run one by one
colnames(colData(tasic))
show_sce(tasic,   "TasicBrainData")
unique(colData(tasic)[, 1])


assays: counts
No 'logcounts' assay found.
counts dim: 24058 x 1809  class: matrixarray
              Calb2_tdTpositive_cell_1 Calb2_tdTpositive_cell_2
0610005C13Rik                     0.00                     0.00
0610007C21Rik                   992.00                  2287.02
0610007L01Rik                     2.57                   177.00
0610007N19Rik                     0.00                     0.00
0610007P08Rik                     0.00                     0.00
              Calb2_tdTpositive_cell_3 Calb2_tdTpositive_cell_4
0610005C13Rik                     0.00                        0
0610007C21Rik                   491.78                     1932
0610007L01Rik                     0.00                        1
0610007N19Rik                     0.00                        0
0610007P08Rik                     0.00                        0
              Calb2_tdTpositive_cell_5
0610005C13Rik                        0
0610007C21Rik                     1425
0610007L01Rik             

In [8]:
## run one by one
colnames(colData(romanov))
show_sce(romanov,   "RomanovBrainData")
unique(colData(tasic)[, 1])


assays: counts
No 'logcounts' assay found.
counts dim: 24341 x 2881  class: dgCMatrix
       1772058147_F02 1772096158_E08 1772096144_A05 1772092004_A05
Xkr4                0              0              0              0
Rp1                 0              0              0              0
Sox17               0              0              0              0
Mrpl15              0              0              0              0
Lypla1              0              0              0              0
       1772092004_B06
Xkr4                0
Rp1                 0
Sox17               0
Mrpl15              3
Lypla1              0
colData columns: level1 class, level2 class (neurons only), level2 cluster number (neurons only), age (days postnatal), sex, cell diameter, acute stress, total molecules
               level1.class level2.class..neurons.only.
1772058147_F02       oligos                        <NA>
1772096158_E08       oligos                        <NA>
1772096144_A05       oligos              

In [9]:
# Show unique values for categorical columns in colData
df <- as.data.frame(SummarizedExperiment::colData(romanov))
cat_cols <- names(Filter(function(x) is.factor(x) || is.character(x) || is.logical(x), df))

invisible(lapply(cat_cols, function(nm) {
  cat("\n==", nm, "==\n", sep = "")
  print(unique(df[[nm]]))
}))


==level1.class==
[1] "oligos"      "astrocytes"  "ependymal"   "microglia"   "vsm"        
[6] "endothelial" "neurons"    

==level2.class..neurons.only.==
 [1] NA                                 "Adcyap1 1 (Tac1)"                
 [3] "Adcyap1 2"                        "Avp 1, high"                     
 [5] "Avp 2, high"                      "Avp 3, medium"                   
 [7] "Dopamine 1"                       "Dopamine 2 (low VMAT2)"          
 [9] "Dopamine 3"                       "Dopamine 4"                      
[11] "GABA 10"                          "GABA 11 (Nts 1)"                 
[13] "GABA 12 (Nts 2)"                  "GABA 13 (Galanin)"               
[15] "GABA 14 (Npy,Agrp)"               "GABA 15 (Npy-medium)"            
[17] "GABA 2 (Gucy1a3)"                 "GABA 3 (Crh+/-, Lhx6)"           
[19] "GABA 4 (Crh+/-,Pgr15l)"           "GABA 5 (Calcr, Lhx1)"            
[21] "GABA 6 (Otof, Lhx1)"              "GABA 7 (Pomc+/-)"                
[23] "GABA 8"     

In [10]:
# Show unique values for categorical columns in colData
df <- as.data.frame(SummarizedExperiment::colData(tasic))
cat_cols <- names(Filter(function(x) is.factor(x) || is.character(x) || is.logical(x), df))

invisible(lapply(cat_cols, function(nm) {
  cat("\n==", nm, "==\n", sep = "")
  print(unique(df[[nm]]))
}))


==mouse_line==
 [1] "Calb2"           "Chat"            "Chrna2"          "Ctgf"           
 [5] "Cux2"            "Gad2"            "Htr3a"           "Ndnf"           
 [9] "Nkx2-1"          "Nos1"            "Nr5a1"           "Ntsr1"          
[13] "Pvalb"           "PvalbD-Slc32a1"  "PvalbF-Gad2"     "Rbp4"           
[17] "Rorb"            "Scnn1a-Tg2"      "Scnn1a-Tg3"      "Slc17a6"        
[21] "Sst"             "Tac2"            "Vip"             "CAV_LP_Ipsi"    
[25] "CAV_VISp_Contra" "Trib2"          

==cre_driver_1==
 [1] "Calb2-IRES-Cre"   "Chat-IRES-Cre"    "Chrna2-Cre_OE25"  "Ctgf-2A-dgCre"   
 [5] "Cux2-CreERT2"     "Gad2-IRES-Cre"    "Htr3a-Cre_NO152"  "Ndnf-IRES2-dgCre"
 [9] "Nkx2-1-CreERT2"   "Nos1-CreERT2"     "Nr5a1-Cre"        "Ntsr1-Cre_GN220" 
[13] "Pvalb-IRES-Cre"   "Pvalb-2A-DreO"    "Pvalb-2A-FlpO"    "Rbp4-Cre_KL100"  
[17] "Rorb-IRES2-Cre"   "Scnn1a-Tg2-Cre"   "Scnn1a-Tg3-Cre"   "Slc17a6-IRES-Cre"
[21] "Sst-IRES-Cre"     "Tac2-IRES2-Cre"   "Vip-IRES-Cre" 

In [11]:
# Show unique values for categorical columns in colData
df <- as.data.frame(SummarizedExperiment::colData(zeisel))
cat_cols <- names(Filter(function(x) is.factor(x) || is.character(x) || is.logical(x), df))

invisible(lapply(cat_cols, function(nm) {
  cat("\n==", nm, "==\n", sep = "")
  print(unique(df[[nm]]))
}))


==tissue==
[1] "sscortex"       "ca1hippocampus"

==level1class==
[1] "interneurons"         "pyramidal SS"         "pyramidal CA1"       
[4] "oligodendrocytes"     "microglia"            "endothelial-mural"   
[7] "astrocytes_ependymal"

==level2class==
 [1] "Int10"     "Int6"      "Int9"      "Int2"      "Int4"      "Int1"     
 [7] "Int3"      "Int13"     "Int16"     "Int14"     "Int11"     "Int5"     
[13] "Int7"      "Int8"      "Int12"     "Int15"     "(none)"    "S1PyrL4"  
[19] "ClauPyr"   "S1PyrL5"   "S1PyrL23"  "S1PyrDL"   "S1PyrL5a"  "SubPyr"   
[25] "CA1Pyr1"   "S1PyrL6b"  "S1PyrL6"   "CA1Pyr2"   "CA1PyrInt" "CA2Pyr2"  
[31] "Oligo1"    "Oligo3"    "Oligo4"    "Oligo2"    "Oligo6"    "Oligo5"   
[37] "Mgl1"      "Mgl2"      "Pvm1"      "Pvm2"      "Vsmc"      "Vend2"    
[43] "Peric"     "Vend1"     "Astro2"    "Astro1"    "Choroid"   "Epend"    


In [12]:
# make rds objects from MTG_SEA resources

In [13]:
# /mnt/nfs/CX000008_DS1/projects/btanasa/brain_refs_MTG_SE

In [14]:
# working with a file from : /mnt/nfs/CX000008_DS1/projects/btanasa/brain_refs_MTG_SE

# setwd("/mnt/nfs/CX000008_DS1/projects/btanasa/brain_refs_MTG_SEA")
# dir="./"

# suppressPackageStartupMessages({
#  library(data.table)
#  library(Matrix)
#  library(SingleCellExperiment)
#  library(SummarizedExperiment)
# })

# mfile <- file.path(dir, "matrix.csv")
# meta  <- file.path(dir, "metadata.csv")

# dt <- fread(mfile)   
# head(dt[1:10,1:10])

# md <- fread(meta)
# colnames(md)
# head(md,2)

# unique(md$cluster_label)
# unique(md$class_label)
# unique(md$subclass_label)

# cell_ids <- as.character(dt[["sample_name"]])
# gene_ids <- as.character(names(dt)[-1])

# fast path (dense -> transpose -> sparse)
# counts <- try({
#  M <- as.matrix(dt[, -1, with = FALSE])   # cells x genes
#  mode(M) <- "numeric"
#  M <- t(M)                                # genes x cells
#  colnames(M) <- cell_ids
#  rownames(M) <- gene_ids
#  Matrix(M, sparse = TRUE)
# }, silent = TRUE)

# o <- match(colnames(counts), md$sample_name)

# dim(counts)
# head(o)

# o <- match(colnames(counts), md$sample_name)
# if (anyNA(o))
#  stop("Some cells in matrix.csv are missing from metadata.csv via 'sample_name'.")
# md <- as.data.frame(md[o, , drop = FALSE])

#  Build SCE and add logcounts
# sce <- SingleCellExperiment(
#  assays  = list(counts = counts),
#  colData = S4Vectors::DataFrame(md)
# )

# sce <- logNormCounts(sce)

# sce                             # class, dims, assays, reduced dims, altExps
# class(sce)
# dim(sce)                        # genes x cells
# assayNames(sce)                 # e.g., "counts", "logcounts"
# head(rownames(sce), 5)          # first genes
# head(colnames(sce), 5)          # first cells (if present)

# names(rowData(sce))             # feature metadata fields
# names(colData(sce))             # cell metadata fields
# head(as.data.frame(colData(sce))[ , 1:min(10, ncol(colData(sce))), drop=FALSE])

# reducedDimNames(sce)            # embeddings stored (e.g., PCA/TSNE/UMAP)
# altExpNames(sce)                # alternative experiments (e.g., ERCC, repeats)
# metadata(sce)                   # list of extra metadata (taxonomy, dendrogram, etc.)

# --- Basic counts/logcounts checks ---
# has_counts   <- "counts"    %in% assayNames(sce)
# has_logcounts<- "logcounts" %in% assayNames(sce)
# cat("Has counts:", has_counts, " | Has logcounts:", has_logcounts, "\n")

# saveRDS(sce, file.path(dir, "brain_refs_MTG_SEA.from.matrix.metadata.sce.rds"))

In [15]:
mtgsea <- "/mnt/nfs/CX000008_DS1/projects/btanasa/brain_refs_MTG_SEA/brain_refs_MTG_SEA.from.matrix.metadata.sce.rds"
mtg_sea <- readRDS(mtgsea)
# Inspect its structure
class(mtg_sea)
# str(mtg_sea)

# .$ cluster_label              : chr [1:76533] "Inh L1-2 SST CCNJL" "Exc L5-6 FEZF2 IFNG-AS1" "Exc L3-5 RORB LINC01202" "Exc L2 LINC00507 GLRA3" ...
# .$ class_label                : chr [1:76533] "GABAergic" "Glutamatergic" "Glutamatergic" "Glutamatergic" ...
# .$ subclass_label             : chr [1:76533] "Sst" "L5/6 NP" "L5 IT" "L2/3 IT" ...

# Check available columns in colData
colnames(colData(mtg_sea))

# Get unique values for each label
unique_clusters    <- unique(mtg_sea$cluster_label)
unique_classes     <- unique(mtg_sea$class_label)
unique_subclasses  <- unique(mtg_sea$subclass_label)

# Print counts + first few
cat("cluster_label:", length(unique_clusters), "unique\n")
print(unique_clusters)

cat("class_label:", length(unique_classes), "unique\n")
print(unique_classes)

cat("subclass_label:", length(unique_subclasses), "unique\n")
print(unique_subclasses)

cluster_label: 127 unique
  [1] "Inh L1-2 SST CCNJL"          "Exc L5-6 FEZF2 IFNG-AS1"    
  [3] "Exc L3-5 RORB LINC01202"     "Exc L2 LINC00507 GLRA3"     
  [5] "Oligo L2-6 OPALIN FTH1P3"    "Exc L5-6 FEZF2 C9orf135-AS1"
  [7] "Exc L3-5 FEZF2 ASGR2"        "Exc L3-5 RORB LNX2"         
  [9] "Exc L5 THEMIS RGPD6"         "Exc L3 LAMP5 CARM1P1"       
 [11] "Exc L2-3 RORB CCDC68"        "Inh L5 SST RPL35AP11"       
 [13] "Exc L3 RORB OTOGL"           "Exc L3-5 RORB RPRM"         
 [15] "Exc L6 FEZF2 KLK7"           "Inh L1-6 LAMP5 NES"         
 [17] "Exc L6 THEMIS LINC00343"     "Inh L5-6 PVALB MEPE"        
 [19] "Inh L1-3 VIP CHRNA2"         "Inh L3-5 SST GGTLC3"        
 [21] "Exc L5 THEMIS VILL"          "Exc L3 THEMIS ENPEP"        
 [23] "Oligo L3-6 OPALIN ENPP6"     "Inh L2-5 PVALB RPH3AL"      
 [25] "Inh L1-3 VIP HSPB6"          "Exc L5 FEZF2 CSN1S1"        
 [27] "Inh L5-6 PVALB FAM150B"      "Exc L5 THEMIS SLC22A18"     
 [29] "Inh L3-5 VIP TAC3"           "Exc L5 FEZF2 

In [16]:
# Inspect the object type
class(mtg_sea)

# Extract rownames (gene IDs)
ids <- sub("\\.\\d+$", "", rownames(mtg_sea))   # strip version suffixes like .12

# Flag Ensembl IDs
is_ensg <- grepl("^ENSG", ids)

# Count totals
n_total  <- length(ids)
n_ensg   <- sum(is_ensg)
n_symbol <- n_total - n_ensg

cat("Object class:", class(mtg_sea), "\n")
cat("Total genes:", n_total, "\n")
cat("Ensembl (ENSG) IDs:", n_ensg, "\n")
cat("Other (likely gene symbols):", n_symbol, "\n")

# Peek at examples
cat("\nExample Ensembl IDs:\n")
print(head(ids[is_ensg]))

cat("\nExample non-Ensembl IDs:\n")
print(head(ids[!is_ensg]))

Object class: SingleCellExperiment 
Total genes: 50281 
Ensembl (ENSG) IDs: 0 
Other (likely gene symbols): 50281 

Example Ensembl IDs:
character(0)

Example non-Ensembl IDs:
[1] "DDX11L1"      "WASH7P"       "MIR6859-1"    "MIR1302-2"    "FAM138A"     
[6] "LOC105379212"


In [17]:
mtgseaad <- "/mnt/nfs/CX000008_DS1/projects/btanasa/brain_refs_MTG_SEA_AD/Reference_MTG_RNAseq_all-nuclei.2022-06-07.sce.rds"
mtg_sea_ad <- readRDS(mtgseaad)

# Inspect its structure
class(mtg_sea_ad)

#.$ cluster_label            : chr [1:166868] "Pax6_1" NA "L5/6 NP_1" "L5 IT_7" ...
#.$ subclass_label           : chr [1:166868] "Pax6" NA "L5/6 NP" "L5 IT" ...
#.$ class_label              : chr [1:166868] "Neuronal: GABAergic" "NA" "Neuronal: Glutamatergic" "Neuronal: Glutamatergic" ...
#.$ GA_cluster_label         : chr [1:166868] "Pax6_1" NA "L5/6 NP_1" "L5 IT_7" ...
#.$ GA_subclass_label        : chr [1:166868] "Pax6" NA "L5/6 NP" "L5 IT" ...
#.$ GA_neighborhood_label    : chr [1:166868] "lamp5_sncg_vip" NA "l5et_l56np_l6ct_l6b" "it_types" ...
#.$ CA_cluster_label         : chr [1:166868] "Pax6_1" NA "L5/6 NP_3" "L5 IT_1" ...
#.$ CA_subclass_label        : chr [1:166868] "Pax6" NA "L5/6 NP" "L5 IT" ...
#.$ CA_neighborhood_label    : chr [1:166868] "CGE Inh" NA "Deep Exc" "IT types" ..

# Check available columns in colData
colnames(colData(mtg_sea_ad))

# Get unique values for each label
unique_clusters    <- unique(mtg_sea_ad$cluster_label)
unique_subclasses  <- unique(mtg_sea_ad$subclass_label)
unique_classes     <- unique(mtg_sea_ad$class_label)

# Print counts + first few
cat("cluster_label:", length(unique_clusters), "unique\n")
print(unique_clusters)

cat("class_label:", length(unique_classes), "unique\n")
print(unique_classes)

cat("subclass_label:", length(unique_subclasses), "unique\n")
print(unique_subclasses)

cluster_label: 128 unique
  [1] "Pax6_1"       NA             "L5/6 NP_1"    "L5 IT_7"      "L6 CT_2"     
  [6] "L4 IT_2"      "Astro_1"      "L5 IT_2"      "L2/3 IT_5"    "Vip_9"       
 [11] "L6 CT_1"      "L2/3 IT_1"    "L5 IT_1"      "Sst Chodl_1"  "L6 IT Car3_1"
 [16] "L6 IT_2"      "Astro_2"      "Sncg_4"       "Pvalb_13"     "L4 IT_3"     
 [21] "Vip_1"        "L2/3 IT_13"   "Oligo_4"      "L2/3 IT_6"    "L2/3 IT_10"  
 [26] "Pvalb_15"     "Sst_19"       "Vip_19"       "Sst_22"       "L2/3 IT_8"   
 [31] "Sst_9"        "Vip_15"       "Sncg_1"       "VLMC_1"       "L2/3 IT_12"  
 [36] "L4 IT_1"      "Vip_18"       "L5 IT_5"      "L2/3 IT_3"    "Oligo_1"     
 [41] "L6 IT Car3_2" "Vip_4"        "Lamp5_5"      "Micro-PVM_2"  "OPC_2"       
 [46] "L4 IT_4"      "Lamp5_3"      "Pvalb_6"      "Vip_11"       "Oligo_2"     
 [51] "Lamp5_2"      "L6b_6"        "Sst_11"       "Lamp5_Lhx6_1" "Endo_1"      
 [56] "Lamp5_6"      "L6 IT Car3_3" "L6b_3"        "L5 IT_3"      "L5/6 NP_2"   
 [

In [18]:
# Inspect class
class(mtg_sea_ad)

# Extract feature IDs, strip version suffixes
ids <- sub("\\.\\d+$", "", rownames(mtg_sea_ad))

# Flag Ensembl IDs
is_ensg <- grepl("^ENSG", ids)

# Counts
n_total  <- length(ids)
n_ensg   <- sum(is_ensg)
n_symbol <- n_total - n_ensg

cat("Object class:", class(mtg_sea_ad), "\n")
cat("Total features:", n_total, "\n")
cat("Ensembl (ENSG) IDs:", n_ensg, "\n")
cat("Other (likely gene symbols):", n_symbol, "\n")

# Peek at examples
cat("\nExample Ensembl IDs:\n")
print(head(ids[is_ensg]))

cat("\nExample non-Ensembl IDs:\n")
print(head(ids[!is_ensg]))

Object class: SingleCellExperiment 
Total features: 36601 
Ensembl (ENSG) IDs: 0 
Other (likely gene symbols): 36601 

Example Ensembl IDs:
character(0)

Example non-Ensembl IDs:
[1] "MIR1302-2HG" "FAM138A"     "OR4F5"       "AL627309"    "AL627309"   
[6] "AL627309"   


In [19]:
# To work with : 

# cluster_label
# subclass_label