In [7]:
# Load required libraries
library(Seurat)
library(dplyr)
library(ggplot2)
library(cowplot)   # For combining plots
library(ggrepel)

# Set working directory (adjust as needed)
setwd("/Users/katherineridley/Projects/CosMx/APP/Cortex Results")

# Load your main Seurat object (adjust filename if needed)
seurat_obj <- readRDS("combined_seurat_C_plaquedistance.RDS")

# Check the distribution of cell types
print("Celltypes distribution in the main Seurat object:")
print(table(seurat_obj@meta.data$Celltypes, useNA = "ifany"))

# Get unique cell types (excluding NA)
cell_types_unique <- unique(seurat_obj@meta.data$Celltypes)
cell_types_unique <- cell_types_unique[!is.na(cell_types_unique)]
print("Unique cell types to process:")
print(cell_types_unique)

# Loop through each cell type
for (ct in cell_types_unique) {
  message("Processing cell type: ", ct)
  
  # Subset the Seurat object by cell type using WhichCells() for safety
  cells_to_use <- WhichCells(seurat_obj, expression = Celltypes == ct)
  
  if (length(cells_to_use) == 0) {
    message("  Skipping ", ct, " - no cells found.")
    next
  }
  
  sub_obj <- subset(seurat_obj, cells = cells_to_use)
  
  # Run standard subclustering workflow on the subset
  # (Assumes variable features have been identified; if not, you may run FindVariableFeatures)
  sub_obj <- RunPCA(sub_obj, features = VariableFeatures(sub_obj))
  sub_obj <- FindNeighbors(sub_obj, dims = 1:10)
  sub_obj <- FindClusters(sub_obj, resolution = 0.5)
  sub_obj <- RunUMAP(sub_obj, dims = 1:10)
  
  # Save the cluster identities as a new metadata column ("Subcluster")
  sub_obj$Subcluster <- Idents(sub_obj)
  
  # Plot and save UMAP with cluster labels for visual inspection
  umap_plot <- DimPlot(sub_obj, reduction = "umap", label = TRUE, repel = TRUE) +
    ggtitle(paste("Subcluster UMAP for", ct))
  umap_file <- paste0("UMAP_Subcluster_", gsub(" ", "_", ct), ".png")
  ggsave(filename = umap_file, plot = umap_plot, width = 8, height = 6)
  message("  Saved UMAP plot for ", ct, " in ", umap_file)
  
  # Run FindAllMarkers to find genes that characterize each cluster
  markers <- FindAllMarkers(sub_obj, only.pos = TRUE, min.pct = 0.25)
  
  # Optionally, print a quick summary table of top markers per cluster for inspection
  clusters_summary <- markers %>% group_by(cluster) %>% top_n(3, wt = avg_log2FC)
  message("  Top markers per cluster for ", ct, ":")
  print(clusters_summary)
  
  # Save the marker genes as a CSV for this cell type
  markers_csv <- paste0("markers_Subclusters_", gsub(" ", "_", ct), ".csv")
  write.csv(markers, file = markers_csv, row.names = FALSE)
  message("  Saved markers CSV for ", ct, " as ", markers_csv)
  
  # (Optional) You might save the subclustered object itself for future review:
  sub_obj_file <- paste0("subclustered_", gsub(" ", "_", ct), ".rds")
  saveRDS(sub_obj, file = sub_obj_file)
  message("  Saved subclustered Seurat object for ", ct, " as ", sub_obj_file)
}

message("Subclustering and marker CSV generation complete.")


[1] "Celltypes distribution in the main Seurat object:"

        Astrocytes  Endothelial cells Excitatory Neurons Inhibitory Neurons 
             11829               6599             130652               8192 
       Macrophages          Microglia               OPCs   Oligodendrocytes 
               187                805               1319              26260 
              <NA> 
              6723 
[1] "Unique cell types to process:"
[1] "Excitatory Neurons" "Oligodendrocytes"   "Astrocytes"        
[4] "Microglia"          "Endothelial cells"  "Inhibitory Neurons"
[7] "OPCs"               "Macrophages"       


Processing cell type: Excitatory Neurons

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Snap25, Nrgn, Calm2, Rtn1, Egr1, Dnm1, Pcp4, Slc17a7, Aldoa, Syp 
	   Snca, Ywhag, Sncb, Gap43, Thy1, Mef2c, Ndrg4, Olfm1, Ppp3ca, Ctxn1 
	   Cck, Atp1b1, Zwint, Camk2b, Ywhaz, Stxbp1, Hsp90ab1, App, Syt1, Sh3gl2 
Negative:  Mbp, Gfap, Plp1, Ptgds, Apoe, Apod, Mobp, Cst3, Ttr, Clu 
	   Vim, Glul, Atp1a2, Mt1, Ndrg2, Vtn,

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 130652
Number of edges: 3858222

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8881
Number of communities: 17
Elapsed time: 43 seconds


12:59:05 UMAP embedding parameters a = 0.9922 b = 1.112

12:59:05 Read 130652 rows and found 10 numeric columns

12:59:05 Using Annoy for neighbor search, n_neighbors = 30

12:59:05 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

12:59:15 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b449ad66b

12:59:15 Searching Annoy index using 1 thread, search_k = 3000

12:59:53 Annoy recall = 100%

12:59:54 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

12:59:57 Initializing from normalized Laplacian + noise (using RSpectra)

12:59:59 Commencing optimization for 200 epochs, with 5487048 positive edges

13:00:39 Optimization finished

Rasterizing points since number of points exceeds 

[90m# A tibble: 51 x 7[39m
[90m# Groups:   cluster [17][39m
   p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
   [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m  
[90m 1[39m     0      1.01  0.657 0.329         0 0       Pcp4   
[90m 2[39m     0      0.935 0.363 0.186         0 0       Pde1a  
[90m 3[39m     0      0.818 0.326 0.177         0 0       Slc8a1 
[90m 4[39m     0      2.28  0.868 0.313         0 1       Pcp4   
[90m 5[39m     0      1.44  0.702 0.29          0 1       Pak1   
[90m 6[39m     0      1.54  0.502 0.175         0 1       Pde1a  
[90m 7[39m     0      4.94  0.975 0.102         0 2       Gfap   
[90m 8[39m     0      3.28  0.315 0.084         0 2       Vim    
[90m 9[39m     0      1.58  0.361 0.21          0 2       Mt1    
[90m10[39m     0      1.72  0.487 0.15          0 3       Rasgrf2
[90m# 

  Saved markers CSV for Excitatory Neurons as markers_Subclusters_Excitatory_Neurons.csv

  Saved subclustered Seurat object for Excitatory Neurons as subclustered_Excitatory_Neurons.rds

Processing cell type: Oligodendrocytes

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Plp1, Apod, Ptgds, Mag, Mbp, Mobp, Ndrg1, Cryab, Scd2, Gpr37 
	   Pllp, Mog, Olig1, Fa2h, Aspa, Ugt8a, Bin1, Myrf, Glul, Cd9 
	   Cntn2,

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 26260
Number of edges: 775235

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8574
Number of communities: 11
Elapsed time: 3 seconds


13:11:13 UMAP embedding parameters a = 0.9922 b = 1.112

13:11:13 Read 26260 rows and found 10 numeric columns

13:11:13 Using Annoy for neighbor search, n_neighbors = 30

13:11:13 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:11:14 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b11732dfd

13:11:14 Searching Annoy index using 1 thread, search_k = 3000

13:11:21 Annoy recall = 100%

13:11:21 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:11:22 Initializing from normalized Laplacian + noise (using RSpectra)

13:11:23 Commencing optimization for 200 epochs, with 1114254 positive edges

13:11:31 Optimization finished

  Saved UMAP plot for Oligodendrocytes in UMAP_Subc

[90m# A tibble: 33 x 7[39m
[90m# Groups:   cluster [11][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m 0   [90m [39m           1.47 0.865 0.625 0   [90m [39m     0       Snap25
[90m 2[39m 1.61[90me[39m[31m-162[39m       1.33 0.332 0.166 1.53[90me[39m[31m-159[39m 0       Egr1  
[90m 3[39m 1.83[90me[39m[31m-159[39m       1.26 0.374 0.204 1.73[90me[39m[31m-156[39m 0       Pcp4  
[90m 4[39m 0   [90m [39m           1.02 0.875 0.534 0   [90m [39m     1       Gpr37 
[90m 5[39m 0   [90m [39m           1.15 1     0.693 0   [90m [39m     1       Ptgds 
[90m 6[39m 3.36[90me[39m[31m-166[39m       1.10 0.354 0.174 3.19[90me[39m[31m-163[39m 1       Fgfr2 
[90m 7[39m 0   [90m [39m           2.27 0.662 0.259 0   [90m [39m    

  Saved markers CSV for Oligodendrocytes as markers_Subclusters_Oligodendrocytes.csv

  Saved subclustered Seurat object for Oligodendrocytes as subclustered_Oligodendrocytes.rds

Processing cell type: Astrocytes

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Plp1, Mbp, Snap25, Calm2, Nrgn, Dnm1, Apod, Ttr, App, Ptgds 
	   Rtn1, Sncb, Ndrg4, Hsp90ab1, Syp, Pcp4, Ywhag, Atp1b1, Zwint, Nap1l5 
	   Aldoa, Stmn

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 11829
Number of edges: 347279

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8057
Number of communities: 10
Elapsed time: 1 seconds


13:18:01 UMAP embedding parameters a = 0.9922 b = 1.112

13:18:01 Read 11829 rows and found 10 numeric columns

13:18:01 Using Annoy for neighbor search, n_neighbors = 30

13:18:01 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:18:02 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b20d045fe

13:18:02 Searching Annoy index using 1 thread, search_k = 3000

13:18:05 Annoy recall = 100%

13:18:06 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:18:07 Initializing from normalized Laplacian + noise (using RSpectra)

13:18:07 Commencing optimization for 200 epochs, with 486888 positive edges

13:18:10 Optimization finished

  Saved UMAP plot for Astrocytes in UMAP_Subcluster_

[90m# A tibble: 30 x 7[39m
[90m# Groups:   cluster [10][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m  
[90m 1[39m 9.68[90me[39m[31m- 63[39m      0.696 0.506 0.324 9.20[90me[39m[31m- 60[39m 0       Gria2  
[90m 2[39m 6.55[90me[39m[31m- 58[39m      0.695 0.507 0.336 6.22[90me[39m[31m- 55[39m 0       Vegfa  
[90m 3[39m 2.44[90me[39m[31m- 34[39m      0.690 0.307 0.191 2.32[90me[39m[31m- 31[39m 0       Egr1   
[90m 4[39m 0   [90m [39m          1.63  0.991 0.687 0   [90m [39m     1       Mfge8  
[90m 5[39m 1.04[90me[39m[31m-197[39m      0.697 0.982 0.793 9.85[90me[39m[31m-195[39m 1       Plpp3  
[90m 6[39m 2.56[90me[39m[31m- 45[39m      0.740 0.395 0.244 2.43[90me[39m[31m- 42[39m 1       Agt    
[90m 7[39m 0   [90m [39

  Saved markers CSV for Astrocytes as markers_Subclusters_Astrocytes.csv

  Saved subclustered Seurat object for Astrocytes as subclustered_Astrocytes.rds

Processing cell type: Microglia

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Plp1, Ptgds, Mbp, Apod, Snap25, Gfap, Atp1b1, Dnm1, Clu, Ndrg4 
	   Aldoa, Nap1l5, App, Ttr, Sncb, Rtn1, Nrgn, Meg3, Hsp90ab1, Calm2 
	   Thy1, Prnp, Zwint, Calm1, Mdh1, Syp, 

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 805
Number of edges: 28996

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6783
Number of communities: 4
Elapsed time: 0 seconds


13:24:08 UMAP embedding parameters a = 0.9922 b = 1.112

13:24:08 Read 805 rows and found 10 numeric columns

13:24:08 Using Annoy for neighbor search, n_neighbors = 30

13:24:08 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:24:08 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b49a33786

13:24:08 Searching Annoy index using 1 thread, search_k = 3000

13:24:08 Annoy recall = 100%

13:24:09 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:24:10 Initializing from normalized Laplacian + noise (using RSpectra)

13:24:10 Commencing optimization for 500 epochs, with 33170 positive edges

13:24:11 Optimization finished

  Saved UMAP plot for Microglia in UMAP_Subcluster_Micr

[90m# A tibble: 12 x 7[39m
[90m# Groups:   cluster [4][39m
      p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
      [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m 4.32[90me[39m[31m- 5[39m      0.725 0.355 0.225  4.10[90me[39m[31m- 2[39m 0       Bex1/2
[90m 2[39m 9.18[90me[39m[31m- 5[39m      1.01  0.252 0.147  8.72[90me[39m[31m- 2[39m 0       Atp2b4
[90m 3[39m 2.36[90me[39m[31m- 3[39m      0.645 0.299 0.209  1   [90me[39m+ 0 0       Syt1  
[90m 4[39m 2.10[90me[39m[31m-56[39m      1.16  0.986 0.835  1.99[90me[39m[31m-53[39m 1       Csf1r 
[90m 5[39m 2.19[90me[39m[31m-30[39m      1.02  0.937 0.642  2.08[90me[39m[31m-27[39m 1       P2ry12
[90m 6[39m 7.93[90me[39m[31m-24[39m      0.927 0.891 0.669  7.53[90me[39m[31m-21[39m 1       Cx3cr1
[90m 7[39m 1.92[90me[39m[31m-

  Saved markers CSV for Microglia as markers_Subclusters_Microglia.csv

  Saved subclustered Seurat object for Microglia as subclustered_Microglia.rds

Processing cell type: Endothelial cells

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Plp1, Mbp, Snap25, Dnm1, Atp1b1, Calm2, Nrgn, Rtn1, Ndrg4, Sncb 
	   Ttr, Pcp4, Nap1l5, Syp, Meg3, Ywhag, Aldoa, Zwint, Mdh1, Thy1 
	   Olfm1, Mobp, Map1b, Syt1, Stmn3, Sl

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6599
Number of edges: 191592

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7885
Number of communities: 9
Elapsed time: 0 seconds


13:29:54 UMAP embedding parameters a = 0.9922 b = 1.112

13:29:54 Read 6599 rows and found 10 numeric columns

13:29:54 Using Annoy for neighbor search, n_neighbors = 30

13:29:54 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:29:54 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b7a92642e

13:29:54 Searching Annoy index using 1 thread, search_k = 3000

13:29:56 Annoy recall = 100%

13:29:56 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:29:57 Initializing from normalized Laplacian + noise (using RSpectra)

13:29:57 Commencing optimization for 500 epochs, with 262126 positive edges

13:30:02 Optimization finished

  Saved UMAP plot for Endothelial cells in UMAP_Subcl

[90m# A tibble: 27 x 7[39m
[90m# Groups:   cluster [9][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m 9.87[90me[39m[31m- 39[39m      0.565 0.81  0.682 9.38[90me[39m[31m- 36[39m 0       Snap25
[90m 2[39m 1.38[90me[39m[31m- 29[39m      0.614 0.649 0.507 1.31[90me[39m[31m- 26[39m 0       Dnm1  
[90m 3[39m 1.92[90me[39m[31m- 28[39m      0.596 0.577 0.445 1.83[90me[39m[31m- 25[39m 0       Nrgn  
[90m 4[39m 0   [90m [39m          1.71  1     0.77  0   [90m [39m     1       Cldn5 
[90m 5[39m 4.60[90me[39m[31m-101[39m      0.637 0.988 0.943 4.37[90me[39m[31m- 98[39m 1       Bsg   
[90m 6[39m 2.29[90me[39m[31m- 39[39m      0.560 0.851 0.726 2.17[90me[39m[31m- 36[39m 1       Slc2a1
[90m 7[39m 0   [90m [39m        

  Saved markers CSV for Endothelial cells as markers_Subclusters_Endothelial_cells.csv

  Saved subclustered Seurat object for Endothelial cells as subclustered_Endothelial_cells.rds

Processing cell type: Inhibitory Neurons

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Npy, Sst, Gad1, Gad2, Pvalb, Slc32a1, Nap1l5, Nos1, Slc6a1, Atp1b1 
	   Cnr1, Dner, Zwint, Sv2a, Reln, Scg2, Mdh1, Ndrg4, Stmn3, Sncb 
	  

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8192
Number of edges: 257427

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8995
Number of communities: 13
Elapsed time: 0 seconds


13:35:53 UMAP embedding parameters a = 0.9922 b = 1.112

13:35:53 Read 8192 rows and found 10 numeric columns

13:35:53 Using Annoy for neighbor search, n_neighbors = 30

13:35:53 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:35:53 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b20ef46e0

13:35:53 Searching Annoy index using 1 thread, search_k = 3000

13:35:55 Annoy recall = 100%

13:35:55 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:35:56 Initializing from normalized Laplacian + noise (using RSpectra)

13:35:56 Commencing optimization for 500 epochs, with 339454 positive edges

13:36:02 Optimization finished

  Saved UMAP plot for Inhibitory Neurons in UMAP_Subc

[90m# A tibble: 39 x 7[39m
[90m# Groups:   cluster [13][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m  
[90m 1[39m 1.98[90me[39m[31m-214[39m      0.681 0.991 0.465 1.88[90me[39m[31m-211[39m 0       Pvalb  
[90m 2[39m 2.28[90me[39m[31m-114[39m      0.644 0.913 0.692 2.16[90me[39m[31m-111[39m 0       Ldhb   
[90m 3[39m 6.90[90me[39m[31m- 28[39m      0.830 0.295 0.175 6.56[90me[39m[31m- 25[39m 0       Bcan   
[90m 4[39m 0   [90m [39m          2.81  1     0.466 0   [90m [39m     1       Pvalb  
[90m 5[39m 0   [90m [39m          1.50  0.976 0.679 0   [90m [39m     1       Ldhb   
[90m 6[39m 0   [90m [39m          1.31  0.972 0.699 0   [90m [39m     1       Sparcl1
[90m 7[39m 6.55[90me[39m[31m-202[39m      2.03  0.559 0.185 6

  Saved markers CSV for Inhibitory Neurons as markers_Subclusters_Inhibitory_Neurons.csv

  Saved subclustered Seurat object for Inhibitory Neurons as subclustered_Inhibitory_Neurons.rds

Processing cell type: OPCs

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Mbp, Snap25, Ttr, Nrgn, Gfap, Ndrg4, Sncb, Aldoa, Calm2, Dnm1 
	   Plp1, Atp1b1, Syp, Thy1, Slc17a7, Syt1, Map1b, Apod, Mdh1, Egr1 
	   Calm1, Ptgds

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1319
Number of edges: 42153

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7913
Number of communities: 7
Elapsed time: 0 seconds


13:41:59 UMAP embedding parameters a = 0.9922 b = 1.112

13:41:59 Read 1319 rows and found 10 numeric columns

13:41:59 Using Annoy for neighbor search, n_neighbors = 30

13:41:59 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:41:59 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b3d162d04

13:41:59 Searching Annoy index using 1 thread, search_k = 3000

13:41:59 Annoy recall = 100%

13:42:00 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:42:00 Initializing from normalized Laplacian + noise (using RSpectra)

13:42:00 Commencing optimization for 500 epochs, with 52620 positive edges

13:42:02 Optimization finished

  Saved UMAP plot for OPCs in UMAP_Subcluster_OPCs.png

[90m# A tibble: 21 x 7[39m
[90m# Groups:   cluster [7][39m
      p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
      [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m  
[90m 1[39m 5.88[90me[39m[31m-18[39m      1.39  0.571 0.351  5.58[90me[39m[31m-15[39m 0       Mfge8  
[90m 2[39m 4.31[90me[39m[31m-11[39m      1.31  0.293 0.137  4.10[90me[39m[31m- 8[39m 0       Sox9   
[90m 3[39m 9.12[90me[39m[31m-10[39m      1.50  0.253 0.118  8.67[90me[39m[31m- 7[39m 0       Gja1   
[90m 4[39m 2.42[90me[39m[31m-24[39m      0.607 0.989 0.736  2.30[90me[39m[31m-21[39m 1       Snap25 
[90m 5[39m 3.51[90me[39m[31m-15[39m      0.656 0.764 0.506  3.33[90me[39m[31m-12[39m 1       Nrgn   
[90m 6[39m 1.57[90me[39m[31m- 7[39m      0.663 0.435 0.269  1.49[90me[39m[31m- 4[39m 1       Rbfox3 
[90m 7[39m 5.2

  Saved markers CSV for OPCs as markers_Subclusters_OPCs.csv

  Saved subclustered Seurat object for OPCs as subclustered_OPCs.rds

Processing cell type: Macrophages

"Removing 6723 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Plp1, Mbp, Snap25, Ndrg4, Dnm1, Calm1, Rtn1, Atp1b1, Clu, Calm2 
	   Syp, Aldoa, Nrgn, Sncb, Nap1l5, Olfm1, Pcsk1n, Mobp, Zwint, Ywhag 
	   Thy1, Sparcl1, Map1b, Stmn3, Camk2a, Dynll2, Hsp90ab1, Sn

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 187
Number of edges: 6185

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6370
Number of communities: 2
Elapsed time: 0 seconds


13:47:42 UMAP embedding parameters a = 0.9922 b = 1.112

13:47:42 Read 187 rows and found 10 numeric columns

13:47:42 Using Annoy for neighbor search, n_neighbors = 30

13:47:42 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:47:42 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b7ae580f0

13:47:42 Searching Annoy index using 1 thread, search_k = 3000

13:47:42 Annoy recall = 100%

13:47:43 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:47:44 Initializing from normalized Laplacian + noise (using RSpectra)

13:47:44 Commencing optimization for 500 epochs, with 7024 positive edges

13:47:44 Optimization finished

  Saved UMAP plot for Macrophages in UMAP_Subcluster_Mac

[90m# A tibble: 6 x 7[39m
[90m# Groups:   cluster [2][39m
     p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene 
     [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m
[90m1[39m 1.38[90me[39m[31m- 5[39m       2.54 0.321 0.053  1.31[90me[39m[31m- 2[39m 0       Braf 
[90m2[39m 4.87[90me[39m[31m- 4[39m       2.08 0.268 0.067  4.63[90me[39m[31m- 1[39m 0       Ngf  
[90m3[39m 8.02[90me[39m[31m- 4[39m       2.16 0.375 0.16   7.62[90me[39m[31m- 1[39m 0       Vtn  
[90m4[39m 1.60[90me[39m[31m-20[39m       3.12 0.947 0.571  1.52[90me[39m[31m-17[39m 1       Cd74 
[90m5[39m 3.65[90me[39m[31m-11[39m       2.63 0.64  0.214  3.47[90me[39m[31m- 8[39m 1       H2-Aa
[90m6[39m 6.87[90me[39m[31m- 9[39m       3.47 0.627 0.295  6.53[90me[39m[31m- 6[39m 1       Spp1 


  Saved markers CSV for Macrophages as markers_Subclusters_Macrophages.csv

  Saved subclustered Seurat object for Macrophages as subclustered_Macrophages.rds

Subclustering and marker CSV generation complete.



In [8]:
# Load required libraries
library(Seurat)
library(dplyr)
library(ggplot2)
library(cowplot)   # For combining plots
library(ggrepel)

# Set working directory (adjust as needed)
setwd("/Users/katherineridley/Projects/CosMx/APP/Hpc Results")

# Load your main Seurat object (adjust filename if needed)
seurat_obj <- readRDS("combined_seurat_H_plaquedistance.RDS")

# Check the distribution of cell types
print("Celltypes distribution in the main Seurat object:")
print(table(seurat_obj@meta.data$Celltypes, useNA = "ifany"))

# Get unique cell types (excluding NA)
cell_types_unique <- unique(seurat_obj@meta.data$Celltypes)
cell_types_unique <- cell_types_unique[!is.na(cell_types_unique)]
print("Unique cell types to process:")
print(cell_types_unique)

# Loop through each cell type
for (ct in cell_types_unique) {
  message("Processing cell type: ", ct)
  
  # Subset the Seurat object by cell type using WhichCells() for safety
  cells_to_use <- WhichCells(seurat_obj, expression = Celltypes == ct)
  
  if (length(cells_to_use) == 0) {
    message("  Skipping ", ct, " - no cells found.")
    next
  }
  
  sub_obj <- subset(seurat_obj, cells = cells_to_use)
  
  # Run standard subclustering workflow on the subset
  # (Assumes variable features have been identified; if not, you may run FindVariableFeatures)
  sub_obj <- RunPCA(sub_obj, features = VariableFeatures(sub_obj))
  sub_obj <- FindNeighbors(sub_obj, dims = 1:10)
  sub_obj <- FindClusters(sub_obj, resolution = 0.5)
  sub_obj <- RunUMAP(sub_obj, dims = 1:10)
  
  # Save the cluster identities as a new metadata column ("Subcluster")
  sub_obj$Subcluster <- Idents(sub_obj)
  
  # Plot and save UMAP with cluster labels for visual inspection
  umap_plot <- DimPlot(sub_obj, reduction = "umap", label = TRUE, repel = TRUE) +
    ggtitle(paste("Subcluster UMAP for", ct))
  umap_file <- paste0("UMAP_Subcluster_", gsub(" ", "_", ct), ".png")
  ggsave(filename = umap_file, plot = umap_plot, width = 8, height = 6)
  message("  Saved UMAP plot for ", ct, " in ", umap_file)
  
  # Run FindAllMarkers to find genes that characterize each cluster
  markers <- FindAllMarkers(sub_obj, only.pos = TRUE, min.pct = 0.25)
  
  # Optionally, print a quick summary table of top markers per cluster for inspection
  clusters_summary <- markers %>% group_by(cluster) %>% top_n(3, wt = avg_log2FC)
  message("  Top markers per cluster for ", ct, ":")
  print(clusters_summary)
  
  # Save the marker genes as a CSV for this cell type
  markers_csv <- paste0("markers_Subclusters_", gsub(" ", "_", ct), ".csv")
  write.csv(markers, file = markers_csv, row.names = FALSE)
  message("  Saved markers CSV for ", ct, " as ", markers_csv)
  
  # (Optional) You might save the subclustered object itself for future review:
  sub_obj_file <- paste0("subclustered_", gsub(" ", "_", ct), ".rds")
  saveRDS(sub_obj, file = sub_obj_file)
  message("  Saved subclustered Seurat object for ", ct, " as ", sub_obj_file)
}

message("Subclustering and marker CSV generation complete.")


[1] "Celltypes distribution in the main Seurat object:"

        Astrocytes  Endothelial cells Excitatory Neurons Inhibitory Neurons 
             11311               4742              73903               4028 
       Macrophages          Microglia               OPCs   Oligodendrocytes 
               189                698                759              12743 
              <NA> 
              5139 
[1] "Unique cell types to process:"
[1] "Excitatory Neurons" "Oligodendrocytes"   "Astrocytes"        
[4] "Endothelial cells"  "Inhibitory Neurons" "OPCs"              
[7] "Macrophages"        "Microglia"         


Processing cell type: Excitatory Neurons

"Removing 5139 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Mbp, Gfap, Ptgds, Apoe, Plp1, Cst3, Apod, Clu, Mobp, Glul 
	   Aldoc, Vim, Atp1a2, Ndrg2, Mt1, Slc1a2, Sparc, Slc1a3, Vtn, Gstm1 
	   Dbi, Plpp3, Gpr37l1, Cryab, Olig1, S100a16, Scd2, Aqp4, Mag, Rgs5 
Negative:  Snap25, Nrgn, Ppp3ca, Calm2, Dnm1, Atp1b1, Camk2b, Slc17a7, Olfm1, Snca 
	   Wasf1, Rtn1, Nell2, Ywhag, Syp, A

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 73903
Number of edges: 2259611

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9059
Number of communities: 15
Elapsed time: 22 seconds


13:56:13 UMAP embedding parameters a = 0.9922 b = 1.112

13:56:13 Read 73903 rows and found 10 numeric columns

13:56:13 Using Annoy for neighbor search, n_neighbors = 30

13:56:13 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:56:18 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b67980637

13:56:18 Searching Annoy index using 1 thread, search_k = 3000

13:56:40 Annoy recall = 100%

13:56:41 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

13:56:42 Initializing from normalized Laplacian + noise (using RSpectra)

13:56:44 Commencing optimization for 200 epochs, with 3155288 positive edges

13:57:06 Optimization finished

  Saved UMAP plot for Excitatory Neurons in UMAP_Su

[90m# A tibble: 45 x 7[39m
[90m# Groups:   cluster [15][39m
   p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
   [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m     0      2.95  0.67  0.099         0 0       Jun   
[90m 2[39m     0      2.72  0.615 0.092         0 0       Tiam1 
[90m 3[39m     0      2.80  0.39  0.052         0 0       Sema5a
[90m 4[39m     0      0.967 0.766 0.407         0 1       Aldoc 
[90m 5[39m     0      0.929 0.992 0.656         0 1       Apoe  
[90m 6[39m     0      0.916 0.923 0.589         0 1       Slc1a2
[90m 7[39m     0      1.93  0.585 0.189         0 2       Gap43 
[90m 8[39m     0      1.56  0.428 0.148         0 2       Sstr4 
[90m 9[39m     0      1.56  0.362 0.123         0 2       Pde1a 
[90m10[39m     0      2.51  0.844 0.2           0 3       Stmn2 
[90m# i 35 more ro

  Saved markers CSV for Excitatory Neurons as markers_Subclusters_Excitatory_Neurons.csv

  Saved subclustered Seurat object for Excitatory Neurons as subclustered_Excitatory_Neurons.rds

Processing cell type: Oligodendrocytes

"Removing 5139 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Plp1, Ptgds, Apod, Mbp, Mag, Mobp, Cryab, Ndrg1, Scd2, Pllp 
	   Gpr37, Olig1, Mog, Fa2h, Glul, Ugt8a, Aspa, Slc12a2, Cntn2, Bin1 
	   M

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 12743
Number of edges: 388281

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8406
Number of communities: 10
Elapsed time: 1 seconds


14:05:09 UMAP embedding parameters a = 0.9922 b = 1.112

14:05:09 Read 12743 rows and found 10 numeric columns

14:05:09 Using Annoy for neighbor search, n_neighbors = 30

14:05:09 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:05:10 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b2dc03a03

14:05:10 Searching Annoy index using 1 thread, search_k = 3000

14:05:12 Annoy recall = 100%

14:05:13 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

14:05:14 Initializing from normalized Laplacian + noise (using RSpectra)

14:05:14 Commencing optimization for 200 epochs, with 545578 positive edges

14:05:18 Optimization finished

  Saved UMAP plot for Oligodendrocytes in UMAP_Subcl

[90m# A tibble: 30 x 7[39m
[90m# Groups:   cluster [10][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m 0   [90m [39m          1.54  1     0.834 0   [90m [39m     0       Plp1  
[90m 2[39m 7.99[90me[39m[31m-270[39m      1.22  0.704 0.351 7.59[90me[39m[31m-267[39m 0       Fa2h  
[90m 3[39m 2.60[90me[39m[31m-264[39m      1.29  0.651 0.302 2.47[90me[39m[31m-261[39m 0       Ugt8a 
[90m 4[39m 1.02[90me[39m[31m- 34[39m      0.873 0.619 0.531 9.73[90me[39m[31m- 32[39m 1       Snap25
[90m 5[39m 1.68[90me[39m[31m- 31[39m      0.843 0.296 0.192 1.60[90me[39m[31m- 28[39m 1       Sncb  
[90m 6[39m 3.04[90me[39m[31m- 27[39m      0.765 0.261 0.165 2.89[90me[39m[31m- 24[39m 1       Syn2  
[90m 7[39m 0   [90m [39m       

  Saved markers CSV for Oligodendrocytes as markers_Subclusters_Oligodendrocytes.csv

  Saved subclustered Seurat object for Oligodendrocytes as subclustered_Oligodendrocytes.rds

Processing cell type: Astrocytes

"Removing 5139 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Clu, Apoe, Plpp3, Gpr37l1, Atp1a2, Slc1a3, Gja1, Gfap, Aqp4, Mfge8 
	   Aldoc, Cst3, Slc1a2, Ntsr2, Bcan, Slc4a4, Vim, Sox9, Cxcl14, Cspg5 
	   Mt1, D

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 11311
Number of edges: 335583

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8195
Number of communities: 10
Elapsed time: 1 seconds


14:11:23 UMAP embedding parameters a = 0.9922 b = 1.112

14:11:23 Read 11311 rows and found 10 numeric columns

14:11:23 Using Annoy for neighbor search, n_neighbors = 30

14:11:23 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:11:23 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b2720b26c

14:11:23 Searching Annoy index using 1 thread, search_k = 3000

14:11:26 Annoy recall = 100%

14:11:27 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

14:11:27 Initializing from normalized Laplacian + noise (using RSpectra)

14:11:27 Commencing optimization for 200 epochs, with 469604 positive edges

14:11:31 Optimization finished

  Saved UMAP plot for Astrocytes in UMAP_Subcluster_

[90m# A tibble: 30 x 7[39m
[90m# Groups:   cluster [10][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m  
[90m 1[39m 0   [90m [39m          1.61  0.981 0.583 0   [90m [39m     0       Gfap   
[90m 2[39m 5.32[90me[39m[31m-263[39m      1.07  0.701 0.331 5.06[90me[39m[31m-260[39m 0       Vim    
[90m 3[39m 2.76[90me[39m[31m-234[39m      1.26  0.827 0.569 2.63[90me[39m[31m-231[39m 0       Aqp4   
[90m 4[39m 1.14[90me[39m[31m-117[39m      1.11  0.542 0.278 1.08[90me[39m[31m-114[39m 1       Mertk  
[90m 5[39m 1.29[90me[39m[31m-101[39m      1.06  0.495 0.253 1.22[90me[39m[31m- 98[39m 1       Vegfa  
[90m 6[39m 1.02[90me[39m[31m- 61[39m      1.08  0.291 0.137 9.71[90me[39m[31m- 59[39m 1       Gcgr   
[90m 7[39m 0   [90m [39

  Saved markers CSV for Astrocytes as markers_Subclusters_Astrocytes.csv

  Saved subclustered Seurat object for Astrocytes as subclustered_Astrocytes.rds

Processing cell type: Endothelial cells

"Removing 5139 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Cldn5, Vtn, Bsg, Pltp, Slc2a1, Flt1, Rgs5, Igfbp7, Ptgds, Itm2a 
	   Id1, B2m, Esam, Slc7a5, Vim, Sparc, Pecam1, Crip1, Fn1, Lsr 
	   Apod, Acta2, Ptn, Serinc3, Gnai2,

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 4742
Number of edges: 143024

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7974
Number of communities: 7
Elapsed time: 0 seconds


14:17:29 UMAP embedding parameters a = 0.9922 b = 1.112

14:17:29 Read 4742 rows and found 10 numeric columns

14:17:29 Using Annoy for neighbor search, n_neighbors = 30

14:17:29 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:17:29 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b53a1e485

14:17:29 Searching Annoy index using 1 thread, search_k = 3000

14:17:30 Annoy recall = 100%

14:17:30 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

14:17:31 Initializing from normalized Laplacian + noise (using RSpectra)

14:17:31 Commencing optimization for 500 epochs, with 191322 positive edges

14:17:35 Optimization finished

  Saved UMAP plot for Endothelial cells in UMAP_Subcl

[90m# A tibble: 21 x 7[39m
[90m# Groups:   cluster [7][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m 1.84[90me[39m[31m- 18[39m      0.652 0.583 0.45  1.75[90me[39m[31m- 15[39m 0       Snap25
[90m 2[39m 1.96[90me[39m[31m-  5[39m      0.435 0.419 0.363 1.86[90me[39m[31m-  2[39m 0       Dnm1  
[90m 3[39m 5.69[90me[39m[31m-  4[39m      0.477 0.314 0.276 5.41[90me[39m[31m-  1[39m 0       Rgs12 
[90m 4[39m 0   [90m [39m          1.26  0.997 0.902 0   [90m [39m     1       Bsg   
[90m 5[39m 9.59[90me[39m[31m-224[39m      1.34  0.964 0.74  9.11[90me[39m[31m-221[39m 1       Cldn5 
[90m 6[39m 4.24[90me[39m[31m-136[39m      1.28  0.848 0.552 4.03[90me[39m[31m-133[39m 1       Itm2a 
[90m 7[39m 1.64[90me[39m[31m- 2

  Saved markers CSV for Endothelial cells as markers_Subclusters_Endothelial_cells.csv

  Saved subclustered Seurat object for Endothelial cells as subclustered_Endothelial_cells.rds

Processing cell type: Inhibitory Neurons

"Removing 5139 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Mbp, Plp1, Ptgds, Apoe, Gfap, Apod, Cst3, Mobp, Atp1a2, Slc1a2 
	   Nrgn, Vim, Camk2a, Slc1a3, Pcp4, Slc17a7, Glul, Gpr37l1, Ndrg2, Plpp3 

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 4028
Number of edges: 125915

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8918
Number of communities: 10
Elapsed time: 0 seconds


14:23:20 UMAP embedding parameters a = 0.9922 b = 1.112

14:23:20 Read 4028 rows and found 10 numeric columns

14:23:20 Using Annoy for neighbor search, n_neighbors = 30

14:23:20 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:23:20 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b29a202a8

14:23:20 Searching Annoy index using 1 thread, search_k = 3000

14:23:21 Annoy recall = 100%

14:23:21 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

14:23:22 Initializing from normalized Laplacian + noise (using RSpectra)

14:23:22 Commencing optimization for 500 epochs, with 163310 positive edges

14:23:25 Optimization finished

  Saved UMAP plot for Inhibitory Neurons in UMAP_Subc

[90m# A tibble: 30 x 7[39m
[90m# Groups:   cluster [10][39m
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
       [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m 3.76[90me[39m[31m-214[39m       2.91 0.673 0.177 3.58[90me[39m[31m-211[39m 0       Id2   
[90m 2[39m 7.13[90me[39m[31m-167[39m       2.39 0.644 0.198 6.77[90me[39m[31m-164[39m 0       Nos1  
[90m 3[39m 6.48[90me[39m[31m- 92[39m       2.39 0.348 0.08  6.15[90me[39m[31m- 89[39m 0       Cryab 
[90m 4[39m 0   [90m [39m           4.37 1     0.205 0   [90m [39m     1       Pvalb 
[90m 5[39m 1.36[90me[39m[31m-196[39m       1.75 0.967 0.597 1.29[90me[39m[31m-193[39m 1       Ldhb  
[90m 6[39m 2.10[90me[39m[31m- 46[39m       1.64 0.332 0.112 1.99[90me[39m[31m- 43[39m 1       Oprd1 
[90m 7[39m 0   [90m [39m       

  Saved markers CSV for Inhibitory Neurons as markers_Subclusters_Inhibitory_Neurons.csv

  Saved subclustered Seurat object for Inhibitory Neurons as subclustered_Inhibitory_Neurons.rds

Processing cell type: OPCs

"Removing 5139 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Pdgfra, Olig1, Cspg5, Vcan, Ptprz1, Olig2, Pllp, Gpr17, Bcan, Serpine2 
	   Sulf2, S100a16, Pcdh15, Epn2, Gpr37l1, Cd9, Ptn, Gm2a, Tnr, Dbi 
	   Edn

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 759
Number of edges: 24673

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7652
Number of communities: 6
Elapsed time: 0 seconds


14:29:11 UMAP embedding parameters a = 0.9922 b = 1.112

14:29:11 Read 759 rows and found 10 numeric columns

14:29:11 Using Annoy for neighbor search, n_neighbors = 30

14:29:11 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:29:11 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b4b5c78f2

14:29:11 Searching Annoy index using 1 thread, search_k = 3000

14:29:11 Annoy recall = 100%

14:29:11 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

14:29:12 Initializing from normalized Laplacian + noise (using RSpectra)

14:29:12 Commencing optimization for 500 epochs, with 29614 positive edges

14:29:12 Optimization finished

  Saved UMAP plot for OPCs in UMAP_Subcluster_OPCs.png


[90m# A tibble: 18 x 7[39m
[90m# Groups:   cluster [6][39m
      p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
      [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m  
[90m 1[39m 1.07[90me[39m[31m-88[39m      1.50  1     0.84   1.02[90me[39m[31m-85[39m 0       Olig1  
[90m 2[39m 7.47[90me[39m[31m-33[39m      1.11  0.928 0.674  7.09[90me[39m[31m-30[39m 0       Ptprz1 
[90m 3[39m 8.59[90me[39m[31m-28[39m      1.08  0.879 0.605  8.16[90me[39m[31m-25[39m 0       Olig2  
[90m 4[39m 1.60[90me[39m[31m- 6[39m      1.18  0.266 0.13   1.52[90me[39m[31m- 3[39m 1       Grid2  
[90m 5[39m 2.68[90me[39m[31m- 5[39m      0.949 0.301 0.172  2.54[90me[39m[31m- 2[39m 1       Rb1    
[90m 6[39m 1.13[90me[39m[31m- 4[39m      0.901 0.286 0.17   1.07[90me[39m[31m- 1[39m 1       Taok3  
[90m 7[39m 1.7

  Saved markers CSV for OPCs as markers_Subclusters_OPCs.csv

  Saved subclustered Seurat object for OPCs as subclustered_OPCs.rds

Processing cell type: Macrophages

"Removing 5139 cells missing data for vars requested"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
PC_ 1 
Positive:  Mbp, Plp1, Snap25, Atp1b1, Dnm1, Ppp3ca, Nrgn, Calm2, Ndrg4, Rtn1 
	   Map1b, Ywhag, Prnp, Camk2b, Hsp90ab1, Olfm1, App, Nap1l5, Syp, Thy1 
	   Stmn3, Snca, Gnao1, Snrpn, Malat1, Ywhaz, Calm3, Calm

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 189
Number of edges: 5747

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6778
Number of communities: 4
Elapsed time: 0 seconds


14:34:52 UMAP embedding parameters a = 0.9922 b = 1.112

14:34:52 Read 189 rows and found 10 numeric columns

14:34:52 Using Annoy for neighbor search, n_neighbors = 30

14:34:52 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:34:52 Writing NN index file to temp file /var/folders/c7/rvxz8xg96pxfhyt34649yfqh0000gn/T//RtmprXYgVm/filef50b24087e85

14:34:52 Searching Annoy index using 1 thread, search_k = 3000

14:34:52 Annoy recall = 100%

14:34:52 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

14:34:53 Initializing from normalized Laplacian + noise (using RSpectra)

14:34:53 Commencing optimization for 500 epochs, with 6906 positive edges

14:34:53 Optimization finished

  Saved UMAP plot for Macrophages in UMAP_Subcluster_Mac

[90m# A tibble: 12 x 7[39m
[90m# Groups:   cluster [4][39m
      p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
      [3m[90m<dbl>[39m[23m      [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<fct>[39m[23m   [3m[90m<chr>[39m[23m 
[90m 1[39m 4.33[90me[39m[31m- 4[39m       1.97 0.288 0.095  4.11[90me[39m[31m- 1[39m 0       Tiam1 
[90m 2[39m 2.37[90me[39m[31m- 3[39m       1.74 0.329 0.155  1   [90me[39m+ 0 0       Fos   
[90m 3[39m 6.36[90me[39m[31m- 3[39m       1.76 0.315 0.164  1   [90me[39m+ 0 0       Nrsn1 
[90m 4[39m 1.13[90me[39m[31m-16[39m       2.99 0.906 0.448  1.07[90me[39m[31m-13[39m 1       Cd74  
[90m 5[39m 8.89[90me[39m[31m-14[39m       2.90 0.656 0.152  8.44[90me[39m[31m-11[39m 1       Cst7  
[90m 6[39m 5.85[90me[39m[31m- 5[39m       2.88 0.531 0.296  5.56[90me[39m[31m- 2[39m 1       Spp1  
[90m 7[39m 2.92[90me[39m[31m- 7[39m   

  Saved markers CSV for Macrophages as markers_Subclusters_Macrophages.csv



In [1]:
# Load required libraries
library(Seurat)
library(dplyr)
setwd("/Users/katherineridley/Projects/CosMx/APP/Cortex Results")
# Define file paths
full_obj_file <- "/Users/katherineridley/Projects/CosMx/APP/Cortex Results/combined_seurat_C_plaquedistance.RDS"
subcluster_dir <- "/Users/katherineridley/Projects/CosMx/APP/Cortex Results"

# Load the full Seurat object
combined_seurat <- readRDS(full_obj_file)

# For later merging refined annotations, get a copy of the metadata
full_meta <- combined_seurat@meta.data

# Define the clusters to remove per cell type (using character values)
clusters_to_remove <- list(
  "Excitatory Neurons" = c("2", "4", "7", "10", "11", "12"),
  "Oligodendrocytes"    = c("0", "2", "4"),
  "Astrocytes"          = c("4", "7", "9"),
  "Endothelial cells"   = c("0"),
  "OPCs"                = c("1", "2", "5", "6"),
  "Inhibitory Neurons"  = character(0)  # No removal
)

# --- Step 1: Remove cells from the full object based on unwanted subclusters ---
# We'll loop over each cell type for which we have a corresponding subclustered file.
for (ct in names(clusters_to_remove)) {
  message("Processing removal for cell type: ", ct)
  
  # Construct filename for the subclustered object.
  file_name <- paste0("subclustered_", gsub(" ", "_", ct), ".rds")
  sub_obj_path <- file.path(subcluster_dir, file_name)
  if (!file.exists(sub_obj_path)) {
    message("  Subclustered file for ", ct, " not found: ", sub_obj_path, ". Skipping removal for this type.")
    next
  }
  
  # Load the subclustered Seurat object for this cell type.
  sub_obj <- readRDS(sub_obj_path)
  
  # Ensure that the subclustered object has a metadata column "Subcluster"
  if (!"Subcluster" %in% colnames(sub_obj@meta.data)) {
    message("  'Subcluster' column not found in ", ct, " subclustered object. Skipping removal.")
    next
  }
  
  # Identify cells (barcodes) to remove: those that in the subclustered object have a Subcluster in the removal list.
  rm_clusters <- clusters_to_remove[[ct]]
  if (length(rm_clusters) == 0) {
    message("  No clusters defined to remove for ", ct, ".")
    next
  }
  
  cells_to_remove <- rownames(sub_obj@meta.data)[sub_obj@meta.data$Subcluster %in% rm_clusters]
  message("  Removing ", length(cells_to_remove), " cells from ", ct, " based on subcluster criteria.")
  
  # In the full object, for cells of this cell type, remove those cells.
  # First, get cell barcodes in combined_seurat for this cell type:
  cells_full <- WhichCells(combined_seurat, expression = Celltypes == ct)
  # Intersection with cells_to_remove:
  cells_rm <- intersect(cells_full, cells_to_remove)
  message("  Total cells to remove for ", ct, ": ", length(cells_rm))
  
  if(length(cells_rm) > 0){
    combined_seurat <- subset(combined_seurat, cells = setdiff(Cells(combined_seurat), cells_rm))
  }
}

# --- Step 2: Create refined metadata columns for astrocytes (Celltypes_r) and inhibitory neurons (Celltypes_i) ---
# First, start by copying over the existing 'Celltypes' into new columns, so that non-target cell types remain unchanged.
combined_seurat@meta.data$Celltypes_r <- combined_seurat@meta.data$Celltypes  
combined_seurat@meta.data$Celltypes_i <- NA  # default NA for non-inhibitory cells

# For astrocytes:
# Load the astrocyte subclustered object
astro_file <- file.path(subcluster_dir, "subclustered_Astrocytes.rds")
if (file.exists(astro_file)) {
  astro_obj <- readRDS(astro_file)
  
  # We want only the cells that remain in the full object (after removal)
  astro_barcodes <- intersect(Cells(astro_obj), Cells(combined_seurat))
  if(length(astro_barcodes) > 0) {
    # For these astrocytes, take their Subcluster annotations.
    astro_subclusters <- astro_obj@meta.data$Subcluster[match(astro_barcodes, rownames(astro_obj@meta.data))]
    
    # Define refined astrocyte annotation: reactive if subcluster in c("2", "6", "8"), homeostatic otherwise.
    refined_astro <- ifelse(astro_subclusters %in% c("2", "6", "8"),
                            "Reactive Astrocytes",
                            "Homeostatic Astrocytes")
    
    # Place these refined labels in the new metadata column Celltypes_r for astrocytes.
    full_meta <- combined_seurat@meta.data  # current metadata from full object
    astro_idx <- which(full_meta$Celltypes == "Astrocytes" & rownames(full_meta) %in% astro_barcodes)
    full_meta$Celltypes_r[astro_idx] <- refined_astro
    combined_seurat@meta.data <- full_meta
  } else {
    message("  No astrocyte cells in the astro subclustered object are present in the full object.")
  }
} else {
  message("Astrocyte subclustered file not found. Astrocyte refinement skipped.")
}

# For inhibitory neurons:
# Load the inhibitory neuron subclustered object; assume its filename is "subclustered_Inhibitory_Neurons.rds"
inhib_file <- file.path(subcluster_dir, "subclustered_Inhibitory_Neurons.rds")
if (file.exists(inhib_file)) {
  inhib_obj <- readRDS(inhib_file)
  
  # Consider cells from inhib_obj that are still in combined_seurat.
  inhib_barcodes <- intersect(Cells(inhib_obj), Cells(combined_seurat))
  if(length(inhib_barcodes) > 0) {
    inhib_subclusters <- inhib_obj@meta.data$Subcluster[match(inhib_barcodes, rownames(inhib_obj@meta.data))]
    
    # Define refined inhibitory annotation:
    # Clusters 0, 1 -> "Parvalbumin Interneurons"
    # Cluster 3 -> "Somatostatin Interneurons"
    # Cluster 5 -> "VIP Interneurons"
    # Cluster 6 -> "CCK Interneurons"
    # All other inhib clusters -> "Interneurons (Other)"
    refined_inhib <- ifelse(inhib_subclusters %in% c("0", "1"), "Parvalbumin Interneurons",
                     ifelse(inhib_subclusters == "3", "Somatostatin Interneurons",
                     ifelse(inhib_subclusters == "5", "VIP Interneurons",
                     ifelse(inhib_subclusters == "6", "CCK Interneurons",
                            "Interneurons (Other)"))))
    
    # Place these labels in new metadata column Celltypes_i for inhibitory neurons
    full_meta <- combined_seurat@meta.data
    inhib_idx <- which(full_meta$Celltypes == "Inhibitory Neurons" & rownames(full_meta) %in% inhib_barcodes)
    full_meta$Celltypes_i[inhib_idx] <- refined_inhib
    combined_seurat@meta.data <- full_meta
  } else {
    message("  No inhibitory neuron cells in the subclustered object are present in the full object.")
  }
} else {
  message("Inhibitory neuron subclustered file not found. Inhibitory subtype refinement skipped.")
}

# For all other cell types, you can simply set:
# If Celltypes_r is NA, then assign original Celltypes.
full_meta <- combined_seurat@meta.data
full_meta$Celltypes_r[is.na(full_meta$Celltypes_r)] <- full_meta$Celltypes[is.na(full_meta$Celltypes_r)]
combined_seurat@meta.data <- full_meta

# Save the updated full Seurat object as a new RDS file.
saveRDS(combined_seurat, file = "combined_seurat_C_plaquedistance_cleanup.RDS")
message("Updated Seurat object saved as combined_seurat_plaquedistance_cleanup.RDS")


Loading required package: SeuratObject

Loading required package: sp




Attaching package: 'SeuratObject'


The following objects are masked from 'package:base':

    intersect, t



Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Processing removal for cell type: Excitatory Neurons

  Removing 46497 cells from Excitatory Neurons based on subcluster criteria.

"Removing 6723 cells missing data for vars requested"
  Total cells to remove for Excitatory Neurons: 46497

"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"N

In [2]:
# Load required libraries
library(Seurat)
library(dplyr)
setwd("/Users/katherineridley/Projects/CosMx/APP/Hpc Results")
# Define file paths
full_obj_file <- "/Users/katherineridley/Projects/CosMx/APP/Hpc Results/combined_seurat_H_plaquedistance.RDS"
subcluster_dir <- "/Users/katherineridley/Projects/CosMx/APP/Hpc Results"

# Load the full Seurat object
combined_seurat <- readRDS(full_obj_file)

# For later merging refined annotations, get a copy of the metadata
full_meta <- combined_seurat@meta.data

# Define the clusters to remove per cell type (using character values)
clusters_to_remove <- list(
  "Excitatory Neurons" = c("6", "7", "8", "9", "10", "11"),
  "Oligodendrocytes"    = c("6", "8", "9"),
  "Astrocytes"          = c("5", "9"),
  "Endothelial cells"   = c("0"),
  "OPCs"                = c("2", "5"),
  "Inhibitory Neurons"  = c("6")
)

# --- Step 1: Remove cells from the full object based on unwanted subclusters ---
# We'll loop over each cell type for which we have a corresponding subclustered file.
for (ct in names(clusters_to_remove)) {
  message("Processing removal for cell type: ", ct)
  
  # Construct filename for the subclustered object.
  file_name <- paste0("subclustered_", gsub(" ", "_", ct), ".rds")
  sub_obj_path <- file.path(subcluster_dir, file_name)
  if (!file.exists(sub_obj_path)) {
    message("  Subclustered file for ", ct, " not found: ", sub_obj_path, ". Skipping removal for this type.")
    next
  }
  
  # Load the subclustered Seurat object for this cell type.
  sub_obj <- readRDS(sub_obj_path)
  
  # Ensure that the subclustered object has a metadata column "Subcluster"
  if (!"Subcluster" %in% colnames(sub_obj@meta.data)) {
    message("  'Subcluster' column not found in ", ct, " subclustered object. Skipping removal.")
    next
  }
  
  # Identify cells (barcodes) to remove: those that in the subclustered object have a Subcluster in the removal list.
  rm_clusters <- clusters_to_remove[[ct]]
  if (length(rm_clusters) == 0) {
    message("  No clusters defined to remove for ", ct, ".")
    next
  }
  
  cells_to_remove <- rownames(sub_obj@meta.data)[sub_obj@meta.data$Subcluster %in% rm_clusters]
  message("  Removing ", length(cells_to_remove), " cells from ", ct, " based on subcluster criteria.")
  
  # In the full object, for cells of this cell type, remove those cells.
  # First, get cell barcodes in combined_seurat for this cell type:
  cells_full <- WhichCells(combined_seurat, expression = Celltypes == ct)
  # Intersection with cells_to_remove:
  cells_rm <- intersect(cells_full, cells_to_remove)
  message("  Total cells to remove for ", ct, ": ", length(cells_rm))
  
  if(length(cells_rm) > 0){
    combined_seurat <- subset(combined_seurat, cells = setdiff(Cells(combined_seurat), cells_rm))
  }
}

# --- Step 2: Create refined metadata columns for astrocytes (Celltypes_r) and inhibitory neurons (Celltypes_i) ---
# First, start by copying over the existing 'Celltypes' into new columns, so that non-target cell types remain unchanged.
combined_seurat@meta.data$Celltypes_r <- combined_seurat@meta.data$Celltypes  
combined_seurat@meta.data$Celltypes_i <- NA  # default NA for non-inhibitory cells

# For astrocytes:
# Load the astrocyte subclustered object
astro_file <- file.path(subcluster_dir, "subclustered_Astrocytes.rds")
if (file.exists(astro_file)) {
  astro_obj <- readRDS(astro_file)
  
  # We want only the cells that remain in the full object (after removal)
  astro_barcodes <- intersect(Cells(astro_obj), Cells(combined_seurat))
  if(length(astro_barcodes) > 0) {
    # For these astrocytes, take their Subcluster annotations.
    astro_subclusters <- astro_obj@meta.data$Subcluster[match(astro_barcodes, rownames(astro_obj@meta.data))]
    
    # Define refined astrocyte annotation: reactive if subcluster in c("2", "6", "8"), homeostatic otherwise.
    refined_astro <- ifelse(astro_subclusters %in% c("0", "7", "8"),
                            "Reactive Astrocytes",
                            "Homeostatic Astrocytes")
    
    # Place these refined labels in the new metadata column Celltypes_r for astrocytes.
    full_meta <- combined_seurat@meta.data  # current metadata from full object
    astro_idx <- which(full_meta$Celltypes == "Astrocytes" & rownames(full_meta) %in% astro_barcodes)
    full_meta$Celltypes_r[astro_idx] <- refined_astro
    combined_seurat@meta.data <- full_meta
  } else {
    message("  No astrocyte cells in the astro subclustered object are present in the full object.")
  }
} else {
  message("Astrocyte subclustered file not found. Astrocyte refinement skipped.")
}

# For inhibitory neurons:
# Load the inhibitory neuron subclustered object; assume its filename is "subclustered_Inhibitory_Neurons.rds"
inhib_file <- file.path(subcluster_dir, "subclustered_Inhibitory_Neurons.rds")
if (file.exists(inhib_file)) {
  inhib_obj <- readRDS(inhib_file)
  
  # Consider cells from inhib_obj that are still in combined_seurat.
  inhib_barcodes <- intersect(Cells(inhib_obj), Cells(combined_seurat))
  if(length(inhib_barcodes) > 0) {
    inhib_subclusters <- inhib_obj@meta.data$Subcluster[match(inhib_barcodes, rownames(inhib_obj@meta.data))]
    
    # Define refined inhibitory annotation:
    # Clusters 0, 1 -> "Parvalbumin Interneurons"
    # Cluster 3 -> "Somatostatin Interneurons"
    # Cluster 5 -> "VIP Interneurons"
    # Cluster 6 -> "CCK Interneurons"
    # All other inhib clusters -> "Interneurons (Other)"
    refined_inhib <- ifelse(inhib_subclusters %in% "1", "Parvalbumin Interneurons",
                     ifelse(inhib_subclusters == c("4", "8"), "Somatostatin Interneurons",
                     ifelse(inhib_subclusters == "9", "VIP Interneurons",
                     ifelse(inhib_subclusters == "7", "CCK Interneurons",
                            "Interneurons (Other)"))))
    
    # Place these labels in new metadata column Celltypes_i for inhibitory neurons
    full_meta <- combined_seurat@meta.data
    inhib_idx <- which(full_meta$Celltypes == "Inhibitory Neurons" & rownames(full_meta) %in% inhib_barcodes)
    full_meta$Celltypes_i[inhib_idx] <- refined_inhib
    combined_seurat@meta.data <- full_meta
  } else {
    message("  No inhibitory neuron cells in the subclustered object are present in the full object.")
  }
} else {
  message("Inhibitory neuron subclustered file not found. Inhibitory subtype refinement skipped.")
}

# For all other cell types, you can simply set:
# If Celltypes_r is NA, then assign original Celltypes.
full_meta <- combined_seurat@meta.data
full_meta$Celltypes_r[is.na(full_meta$Celltypes_r)] <- full_meta$Celltypes[is.na(full_meta$Celltypes_r)]
combined_seurat@meta.data <- full_meta

# Save the updated full Seurat object as a new RDS file.
saveRDS(combined_seurat, file = "combined_seurat_H_plaquedistance_cleanup.RDS")
message("Updated Seurat object saved as combined_seurat_plaquedistance_cleanup.RDS")


Processing removal for cell type: Excitatory Neurons

  Removing 19703 cells from Excitatory Neurons based on subcluster criteria.

"Removing 5139 cells missing data for vars requested"
  Total cells to remove for Excitatory Neurons: 19703

"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating Centroids objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating FOV objects"
"Not validating Seurat objects"
Processing removal for cell type: Oligodendrocytes

  Removing 1445 cells from Oligodendrocytes based on subcluster criteria.

"Removing 5139 cells missing data for vars requested"
  Total cells 