In [None]:
library(Seurat)
library(tidyverse)
library(dplyr)
library(purrr)

In [None]:
#Read in data
iN <- readRDS('seurat.rds')


In [3]:
iN

An object of class Seurat 
62710 features across 304912 samples within 1 assay 
Active assay: RNA (62710 features, 5000 variable features)
 3 dimensional reductions calculated: umap, tsne, pca

In [5]:
#Split seurat object based on final clusters
ClusterList <- SplitObject(iN, split.by = "final_clustering")

In [55]:
#Try to rank the number of cells in each clusters
Idents(iN) <- iN$final_clustering
ncells <- table(Idents(iN))


In [None]:
#Highres clusters with resolution = 6
info_6 <- purrr::map_dfr(ClusterList, function(x){
  print(head(Idents(x) <- x$final_clustering))
  x <- FindNeighbors(x, dims = 1:60)
  x <- FindClusters(x, resolution = 6)
  x@meta.data
})

In [81]:
#combine cluster ID and sample ID and add to metadata
library(stringr)
info_6[,57] <- str_c(info_6$final_clustering, "_", info_6$RNA_snn_res.6)
colnames(info_6)[57] <- "high_res_clusters"


In [83]:
#Adding metadata
add_info <- info_6[,56:57]
iN <- AddMetaData(iN, add_info)

In [84]:
#Generating a new seurat object from high-resolution clusters
#There is not much metadata to be added (because there is no sample information)
#Try to aggregate assay using seurat instead of Pando
Idents(iN) <- iN$high_res_clusters
iN.aggr <- AverageExpression(iN, group.by = "ident", return.seurat = TRUE)


"sparse->dense coercion: allocating vector of size 10.3 GiB"
"sparse->dense coercion: allocating vector of size 2.1 GiB"
Centering and scaling data matrix



In [None]:
#Aggregate/summerize metadata
iN.aggr.info <- iN@meta.data %>% group_by(high_res_clusters) %>% distinct(final_clustering) 
rownames(iN.aggr.info) <- iN.aggr.info$high_res_clusters #Organize this information so that rowname = cell barcode in high-res clusters
iN.aggr <- AddMetaData(iN.aggr, iN.aggr.info) #Append sample info onto high-res clusters seurat object


"Setting row names on a tibble is deprecated."


In [88]:
#Averaging the umap coordinates from each high-resolution clusters
#So one could overlay the original umap with high-res cluster umap
umap_cord <- iN[['umap']]@cell.embeddings
iN.meta <- iN@meta.data
umap_cord <- cbind(umap_cord, iN.meta[,56])
umap_cord <- as_tibble(umap_cord, rownames = NA) #Use rownames = NA to keep rownames. otherwise the default is NULL.
colnames(umap_cord)[3] <- "high_res_clusters"


In [90]:
#Summarize UMAP coordinates based on high_res_clusters
umap_cord.aggr <- umap_cord %>% 
  group_by(high_res_clusters) %>%
  summarise(UMAP_1 = mean(as.numeric(UMAP_1)),
            UMAP_2 = mean(as.numeric(UMAP_2))) #Need this as.numeric argument to change character to numbers


In [91]:
#Make the format of aggregated uamp_cord similar to the format of umap_coordinates
name_col <- c("UMAP_1", "UMAP_2")
name_row <- umap_cord.aggr$high_res_clusters
umap_cord.aggr$high_res_clusters <- NULL
umap_cord.aggr.mtx <- matrix(unlist(umap_cord.aggr), ncol=2)
rownames(umap_cord.aggr.mtx) <- name_row
colnames(umap_cord.aggr.mtx) <- name_col


In [92]:
#Add aggregated umap_coordinates to aggregated high_resolution_clusters' seurat object
#Use Seurat CreateDimReducObject
#Refer to this https://github.com/satijalab/seurat-wrappers/issues/13
iN.aggr@reductions[["umap"]] <- CreateDimReducObject(
  embeddings = umap_cord.aggr.mtx, key = "UMAP_", assay = "RNA", global = TRUE)


In [None]:
#Save data
saveRDS(iN.aggr, "iN_aggr.rds")
