## Read in and preprocess datasets


In [None]:
library(dplyr)
library(Seurat)
library(SeuratDisk)
library(DoubletFinder)
library(reticulate)
library(ggplot2)
library(patchwork)
library(devtools) 
library(png)
set.seed(1234)


### Read in combined data

In [None]:
H1 <- Read10X(data.dir = "../000.Process/Batch2/H1/outs/filtered_feature_bc_matrix/")
H2 <- Read10X(data.dir = "../000.Process/Batch2/H2/outs/filtered_feature_bc_matrix/")
GAL002 <- Read10X(data.dir = "../000.Process/Batch2/GAL002/outs/filtered_feature_bc_matrix/")
GAL004 <- Read10X(data.dir = "../000.Process/Batch2/GAL004/outs/filtered_feature_bc_matrix/")
GAL005 <- Read10X(data.dir = "../000.Process/Batch2/GAL005/outs/filtered_feature_bc_matrix/")



In [None]:
# List of dataset names
dataset_names <- c("H1", "H2", "GAL002", "GAL004", "GAL005")

# Define a function to create Seurat objects
create_seurat_object <- function(dataset) {
  CreateSeuratObject(counts = dataset$`Gene Expression`, min.cells = 3, min.features = 100)
}


# Process datasets and store in a list
seurat_objects <- lapply(dataset_names, function(name) {
  create_seurat_object(get(name))
})

# Assign back to named variables
names(seurat_objects) <- paste0(dataset_names, ".se")
list2env(seurat_objects, envir = .GlobalEnv)


In [None]:

# Extract 'Gene Expression' and 'Antibody Capture' for each dataset
for (name in dataset_names) {
  assign(paste0(name, ".umis"), get(name)$`Gene Expression`)
  assign(paste0(name, ".htos"), get(name)$`Antibody Capture`)
}


In [None]:
H1.se$

## Prepare each library 

In [None]:
# Define function to process each dataset
process_dataset <- function(rna_counts, hto_counts) {
  joint.bcs <- intersect(colnames(rna_counts), colnames(hto_counts))
  rna_counts <- rna_counts[, joint.bcs]
  hto_counts <- as.matrix(hto_counts[, joint.bcs])
  
  # Confirm HTO names
  print(rownames(hto_counts))
  
  # Create Seurat object
  return(CreateSeuratObject(counts = rna_counts, min.cells = 3))
}


# Process all datasets and store in a list
seurat_objects <- lapply(dataset_names, function(name) {
  process_dataset(get(paste0(name, ".umis")), get(paste0(name, ".htos")))
})

# Assign back to named variables
names(seurat_objects) <- paste0(dataset_names, ".hashtag")
list2env(seurat_objects, envir = .GlobalEnv)

# Example violin plot for one dataset
VlnPlot(GAL002.hashtag, features = "nCount_RNA") + NoLegend()


# H1

In [None]:
# Normalize RNA data with log normalization
H1.hashtag <- NormalizeData(H1.hashtag)

# Find and scale variable features
H1.hashtag <- FindVariableFeatures(H1.hashtag, selection.method = "mean.var.plot")
H1.hashtag <- ScaleData(H1.hashtag, features = VariableFeatures(H1.hashtag))

In [None]:
#Adding HTO data as an independent assay
H1.hashtag[["HTO"]] <- CreateAssayObject(counts = H1.htos)
H1.hashtag <- NormalizeData(H1.hashtag, assay = "HTO", normalization.method = "CLR")

H1.hashtag <- HTODemux(H1.hashtag, assay = "HTO", positive.quantile = 0.99)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(H1.hashtag) <- "HTO_maxID"
RidgePlot(H1.hashtag, assay = "HTO", features = rownames(H1.hashtag[["HTO"]])[1:12], ncol = 3)

In [None]:
#Visualize demultiplexing results

# Global classification results
table(H1.hashtag$HTO_classification.global)

In [None]:
Idents(H1.hashtag) <- "HTO_classification.global"


In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DefaultAssay(H1.hashtag) <- "RNA"
H1.hashtag.subset <- subset(H1.hashtag, idents = "Negative", invert = TRUE)

# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = H1.hashtag.subset, assay = "HTO"))))
HTOHeatmap(H1.hashtag, assay = "HTO", ncells = 5000)


In [None]:
# Extract the singlets
H1.singlet <- subset(H1.hashtag, idents = "Singlet")

In [None]:
options(repr.plot.width=20, repr.plot.height=7)

VlnPlot(H1.hashtag, features = c("nCount_RNA", "nFeature_RNA"), split.by = "HTO_classification.global",
                                           ncol = 5, log=TRUE) + NoLegend()

# H2

In [None]:

# Normalize RNA data with log normalization
H2.hashtag <- NormalizeData(H2.hashtag)
# Find and scale variable features
H2.hashtag <- FindVariableFeatures(H2.hashtag, selection.method = "mean.var.plot")
H2.hashtag <- ScaleData(H2.hashtag, features = VariableFeatures(H2.hashtag))

In [None]:
# Add HTO data as a new assay independent from RNA
H2.hashtag[["HTO"]] <- CreateAssayObject(counts = H2.htos)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
H2.hashtag <- NormalizeData(H2.hashtag, assay = "HTO", normalization.method = "CLR")

In [None]:
H2.hashtag <- HTODemux(H2.hashtag, assay = "HTO", positive.quantile = 0.99)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(H2.hashtag) <- "HTO_maxID"
RidgePlot(H2.hashtag, assay = "HTO", features = rownames(H2.hashtag[["HTO"]])[1:12], ncol = 3)

In [None]:
table(H2.hashtag$HTO_classification.global)

In [None]:
Idents(H2.hashtag) <- "HTO_classification.global"


In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DefaultAssay(H2.hashtag) <- "RNA"
H2.hashtag.subset <- subset(H2.hashtag, idents = "Negative", invert = TRUE)

# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = H2.hashtag.subset, assay = "HTO"))))



In [None]:
options(repr.plot.width=11, repr.plot.height=7)


HTOHeatmap(H2.hashtag, assay = "HTO", ncells = 5000)


In [None]:
# Extract the singlets
Idents(H2.hashtag) <- "HTO_classification.global"
H2.singlet <- subset(H2.hashtag, idents = "Singlet")



In [None]:
options(repr.plot.width=20, repr.plot.height=7)

VlnPlot(H2.hashtag, features = c("nCount_RNA", "nFeature_RNA"), split.by = "HTO_classification.global",
                                           ncol = 5, log=TRUE) + NoLegend()

# GAL002

In [None]:

# Normalize RNA data with log normalization
GAL002.hashtag <- NormalizeData(GAL002.hashtag)
# Find and scale variable features
GAL002.hashtag <- FindVariableFeatures(GAL002.hashtag, selection.method = "mean.var.plot")
GAL002.hashtag <- ScaleData(GAL002.hashtag, features = VariableFeatures(GAL002.hashtag))

In [None]:

# Add HTO data as a new assay independent from RNA
GAL002.hashtag[["HTO"]] <- CreateAssayObject(counts = GAL002.htos)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
GAL002.hashtag <- NormalizeData(GAL002.hashtag, assay = "HTO", normalization.method = "CLR")

In [None]:

GAL002.hashtag <- HTODemux(GAL002.hashtag, assay = "HTO", positive.quantile = 0.99)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(GAL002.hashtag) <- "HTO_maxID"
RidgePlot(GAL002.hashtag, assay = "HTO", features = rownames(GAL002.hashtag[["HTO"]])[1:12], ncol = 3)

In [None]:

# Global classification results
table(GAL002.hashtag$HTO_classification.global)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(GAL002.hashtag) <- "HTO_classification.global"


In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DefaultAssay(GAL002.hashtag) <- "RNA"
GAL002.hashtag.subset <- subset(GAL002.hashtag, idents = "Negative", invert = TRUE)

# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = GAL002.hashtag.subset, assay = "HTO"))))



In [None]:

HTOHeatmap(GAL002.hashtag, assay = "HTO", ncells = 5000)


In [None]:
# Extract the singlets
Idents(GAL002.hashtag) <- "HTO_classification.global"

GAL002.singlet <- subset(GAL002.hashtag, idents = "Singlet")



In [None]:
options(repr.plot.width=20, repr.plot.height=7)

VlnPlot(GAL002.hashtag, features = c("nCount_RNA", "nFeature_RNA"), split.by = "HTO_classification.global",
                                           ncol = 5, log=TRUE) + NoLegend()

In [None]:
table(GAL002.singlet@meta.data$HTO_classification)

# GAL004

In [None]:
# Normalize RNA data with log normalization
GAL004.hashtag <- NormalizeData(GAL004.hashtag)
# Find and scale variable features
GAL004.hashtag <- FindVariableFeatures(GAL004.hashtag, selection.method = "mean.var.plot")
GAL004.hashtag <- ScaleData(GAL004.hashtag, features = VariableFeatures(GAL004.hashtag))

In [None]:
#Adding HTO data as an independent assay

# Add HTO data as a new assay independent from RNA
GAL004.hashtag[["HTO"]] <- CreateAssayObject(counts = GAL004.htos)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
GAL004.hashtag <- NormalizeData(GAL004.hashtag, assay = "HTO", normalization.method = "CLR")

In [None]:

GAL004.hashtag <- HTODemux(GAL004.hashtag, assay = "HTO", positive.quantile = 0.99)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(GAL004.hashtag) <- "HTO_maxID"
RidgePlot(GAL004.hashtag, assay = "HTO", features = rownames(GAL004.hashtag[["HTO"]])[1:12], ncol = 3)

In [None]:

# Global classification results
table(GAL004.hashtag$HTO_classification.global)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(GAL004.hashtag) <- "HTO_classification.global"


In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DefaultAssay(GAL004.hashtag) <- "RNA"# First, we will remove negative cells from the object
GAL004.hashtag.subset <- subset(GAL004.hashtag, idents = "Negative", invert = TRUE)

# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = GAL004.hashtag.subset, assay = "HTO"))))


In [None]:
options(repr.plot.width=11, repr.plot.height=7)

DefaultAssay(GAL004.hashtag) <- "RNA"# First, we will remove negative cells from the object

HTOHeatmap(GAL004.hashtag, assay = "HTO", ncells = 5000)


In [None]:
# Extract the singlets
Idents(GAL004.hashtag) <- "HTO_classification.global"

GAL004.singlet <- subset(GAL004.hashtag, idents = "Singlet")



In [None]:
options(repr.plot.width=20, repr.plot.height=7)

VlnPlot(GAL004.hashtag, features = c("nCount_RNA", "nFeature_RNA"), split.by = "HTO_classification.global",
                                           ncol = 5, log=TRUE) + NoLegend()

# GAL005

In [None]:
# Normalize RNA data with log normalization
GAL005.hashtag <- NormalizeData(GAL005.hashtag)
# Find and scale variable features
GAL005.hashtag <- FindVariableFeatures(GAL005.hashtag, selection.method = "mean.var.plot")
GAL005.hashtag <- ScaleData(GAL005.hashtag, features = VariableFeatures(GAL005.hashtag))

In [None]:
# Add HTO data as a new assay independent from RNA
GAL005.hashtag[["HTO"]] <- CreateAssayObject(counts = GAL005.htos)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
GAL005.hashtag <- NormalizeData(GAL005.hashtag, assay = "HTO", normalization.method = "CLR")

In [None]:

GAL005.hashtag <- HTODemux(GAL005.hashtag, assay = "HTO", positive.quantile = 0.99)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(GAL005.hashtag) <- "HTO_maxID"
RidgePlot(GAL005.hashtag, assay = "HTO", features = rownames(GAL005.hashtag[["HTO"]])[1:12], ncol = 3)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

Idents(GAL005.hashtag) <- "HTO_classification.global"


In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DefaultAssay(GAL005.hashtag) <- "RNA"# First, we will remove negative cells from the object
GAL005.hashtag.subset <- subset(GAL005.hashtag, idents = "Negative", invert = TRUE)

# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = GAL005.hashtag.subset, assay = "HTO"))))


In [None]:
options(repr.plot.width=11, repr.plot.height=7)

DefaultAssay(GAL005.hashtag) <- "RNA"# First, we will remove negative cells from the object

HTOHeatmap(GAL005.hashtag, assay = "HTO", ncells = 5000)


In [None]:
# Extract the singlets
Idents(GAL005.hashtag) <- "HTO_classification.global"

GAL005.singlet <- subset(GAL005.hashtag, idents = "Singlet")



In [None]:
options(repr.plot.width=20, repr.plot.height=7)

VlnPlot(GAL005.hashtag, features = c("nCount_RNA", "nFeature_RNA"), split.by = "HTO_classification.global",
                                           ncol = 5, log=TRUE) + NoLegend()

## Subset singlets

In [None]:
H2.singlet <- subset(H2.hashtag, idents = "Singlet")
H1.singlet <- subset(H1.hashtag, idents = "Singlet")
GAL002.singlet <- subset(GAL002.hashtag, idents = "Singlet")
GAL004.singlet <- subset(GAL004.hashtag, idents = "Singlet")
GAL005.singlet <- subset(GAL005.hashtag, idents = "Singlet")


In [None]:
H2.singlet@meta.data$Library = "H2"
H1.singlet@meta.data$Library = "H1"
GAL002.singlet@meta.data$Library = "GAL002"
GAL004.singlet@meta.data$Library = "GAL004"
GAL005.singlet@meta.data$Library = "GAL005"


In [None]:
se.big.singlet = merge(x = H1.singlet, y = c(H2.singlet, GAL002.singlet, GAL004.singlet, GAL005.singlet))

In [None]:
se.big.singlet[['percent.human']] <- PercentageFeatureSet(se.big.singlet, pattern = "GRCh38-")
se.big.singlet[['percent.mouse']] <- PercentageFeatureSet(se.big.singlet, pattern = "mm10---")


se.big.singlet[['percent.hu.mt']] <- PercentageFeatureSet(se.big.singlet, pattern = "GRCh38-MT-")
se.big.singlet[['percent.hu.ribo']] <- PercentageFeatureSet(se.big.singlet, pattern = "GRCh38-RP[SL]")
se.big.singlet[['percent.m.mt']] <- PercentageFeatureSet(se.big.singlet, pattern = "mm10---mt-")
se.big.singlet[['percent.m.ribo']] <- PercentageFeatureSet(se.big.singlet, pattern = "mm10---Rp[sl]")



In [None]:
se.big.singlet@meta.data$background = ifelse(se.big.singlet@meta.data$hash.ID %in%
                                            c("AB56", "AB57", "AB86", "AB79", "AB90", "AB92", "AB93", "AB88", 
                                             "AB12", "AB13", "AB14", "AB16"), "FIRE", "NLGF")

In [None]:
saveRDS(se.big.singlet, "./LecMicro_Human_B1B2.singlets.raw.RDS")

# Remove low quality cells 

In [None]:
se.big.singlet = subset(se.big.singlet, subset = hash.ID != "AB09") # remove because sample failed 

In [None]:
#keep human genes only 
keep= c(grepl( "GRCh38-", rownames(se.big.singlet)))
se.big.singlet.hu = subset(x = se.big.singlet,features =c(1:(dim(se.big.singlet)[1]))[keep])


In [None]:
H1.singlet = subset(se.big.singlet.hu, subset = Library == "H1")
H2.singlet = subset(se.big.singlet.hu, subset = Library == "H2")
GAL002.singlet = subset(se.big.singlet.hu, subset = Library == "GAL002")
GAL004.singlet = subset(se.big.singlet.hu, subset = Library == "GAL004")
GAL005.singlet = subset(se.big.singlet.hu, subset = Library == "GAL005")

In [None]:
# define function to identify outliers in a library-specific manner

is_outlier <- function(adata, metric, nmads) {
    M =adata[[metric]][[1]]
    outlier = (M < median(M) - nmads * mad(M)) | (
        median(M) + nmads * mad(M) < M)
    return(outlier)
}

is_outlier_lower <- function(adata, metric, nmads) {
    M =adata[[metric]][[1]]
    outlier = (
        median(M) + nmads * mad(M) < M)
    return(outlier)
}

is_outlier_upper <- function(adata, metric, nmads) {
    M =adata[[metric]][[1]]
    outlier = (M < median(M) - nmads * mad(M))
    return(outlier)
}

In [None]:
# Define dataset names
dataset_names <- c("H1", "H2", "GAL002", "GAL004", "GAL005")

# Process each dataset
for (name in dataset_names) {
  singlet_obj <- get(paste0(name, ".singlet"))  # Retrieve the Seurat object
  
  # Compute log1p transformations
  singlet_obj[['log1p_nCount_RNA']] <- log1p(singlet_obj[['nCount_RNA']])
  singlet_obj[['log1p_nFeature_RNA']] <- log1p(singlet_obj[['nFeature_RNA']])
  
  # Detect outliers
  singlet_obj@meta.data$outlier <- (
    is_outlier(singlet_obj, "log1p_nCount_RNA", 2.5) |
    is_outlier(singlet_obj, "log1p_nFeature_RNA", 2.5) |
    is_outlier_lower(singlet_obj, "percent.hu.mt", 2.5) |
    is_outlier_lower(singlet_obj, "percent.mouse", 5)
  )
  
  # Assign back the modified object
  assign(paste0(name, ".singlet"), singlet_obj)
}


In [None]:
VlnPlot(subset(H1.singlet, subset = outlier == FALSE), features = c("nCount_RNA",
                                        "nFeature_RNA","percent.human", "percent.hu.mt"),  
                                           ncol = 4, 
                   log=FALSE) & NoLegend() & theme_classic(base_size=18) +
                theme(plot.title = element_text(hjust = 0.5), 
                      axis.title.x = element_blank(),
                     axis.text.x = element_text(size = 14), legend.position="none") 


In [None]:
VlnPlot(subset(H2.singlet, subset = outlier == FALSE), features = c("nCount_RNA",
                                        "nFeature_RNA","percent.human", "percent.hu.mt"),  
                                           ncol = 4, 
                   log=FALSE) & NoLegend() & theme_classic(base_size=18) +
                theme(plot.title = element_text(hjust = 0.5), 
                      axis.title.x = element_blank(),
                     axis.text.x = element_text(size = 14), legend.position="none") 


In [None]:
VlnPlot(subset(GAL002.singlet, subset = outlier == FALSE), features = c("nCount_RNA",
                                        "nFeature_RNA","percent.human", "percent.hu.mt"),  
                                           ncol = 4, 
                   log=FALSE) & NoLegend() & theme_classic(base_size=18) +
                theme(plot.title = element_text(hjust = 0.5), 
                      axis.title.x = element_blank(),
                     axis.text.x = element_text(size = 14), legend.position="none") 


In [None]:
VlnPlot(subset(GAL004.singlet, subset = outlier == FALSE), features = c("nCount_RNA",
                                        "nFeature_RNA","percent.human", "percent.hu.mt"),  
                                           ncol = 4, 
                   log=FALSE) & NoLegend() & theme_classic(base_size=18) +
                theme(plot.title = element_text(hjust = 0.5), 
                      axis.title.x = element_blank(),
                     axis.text.x = element_text(size = 14), legend.position="none") 


In [None]:
VlnPlot(subset(GAL005.singlet, subset = outlier == FALSE), features = c("nCount_RNA",
                                        "nFeature_RNA","percent.human", "percent.hu.mt"),  
                                           ncol = 4, 
                   log=FALSE) & NoLegend() & theme_classic(base_size=18) +
                theme(plot.title = element_text(hjust = 0.5), 
                      axis.title.x = element_blank(),
                     axis.text.x = element_text(size = 14), legend.position="none") 


In [None]:
# remove outliers 

H1.singlet.sub = subset(H1.singlet, subset = outlier == FALSE)
H2.singlet.sub = subset(H2.singlet, subset = outlier == FALSE)
GAL002.singlet.sub = subset(GAL002.singlet, subset = outlier == FALSE)
GAL004.singlet.sub = subset(GAL004.singlet, subset = outlier == FALSE)
GAL005.singlet.sub = subset(GAL005.singlet, subset = outlier == FALSE)

In [None]:
se.big.singlet.sub = merge(x = H1.singlet.sub, 
                           y = c(H2.singlet.sub, GAL002.singlet.sub, 
                                 GAL004.singlet.sub, GAL005.singlet.sub))

In [None]:
#remove genes expressed in less than 1% of cells 

counts <- GetAssayData(se.big.singlet.sub, slot="counts", assay="RNA")   
genes.percent.expressed <- rowMeans(counts>0 )*100   

genes.filter <- names(genes.percent.expressed[genes.percent.expressed>0.5])  #select genes expressed in at least 1% of cells
counts.sub <- counts[genes.filter,]
se.big.singlet <- CreateSeuratObject(counts=counts.sub, meta.data = se.big.singlet@meta.data)



# DoubletFinder

In [None]:
seu_list <- SplitObject(se.big.singlet, split.by = "Library")

In [None]:
sweep.stats.list <- list()
for (i in 1:length(seu_list)) {
  seu_list[[i]] <- seu_list[[i]]
    print(unique(seu_list[[i]]@meta.data$Library))
    seu_list[[i]] <- NormalizeData(seu_list[[i]])
    seu_list[[i]] <- FindVariableFeatures(seu_list[[i]], selection.method = "vst", nfeatures = 3000)
    seu_list[[i]] <- ScaleData(seu_list[[i]])
    seu_list[[i]] <- RunPCA(seu_list[[i]])
    seu_list[[i]] <- RunUMAP(seu_list[[i]], dims = 1:20)
  sweep.res.list <- paramSweep_v3(seu_list[[i]], PCs = 1:20, sct = FALSE)
  sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
  sweep.stats.list[[i]] <- sweep.stats
}

In [None]:
pk.vec<-list()

for (i in 1:length(seu_list)) {
    sweep = sweep.stats.list[[i]]
    bcmvn <- find.pK(sweep)

    pK=as.numeric(as.character(bcmvn$pK))
    BCmetric=bcmvn$BCmetric
    pK_choose = pK[which(BCmetric %in% max(BCmetric))]
    
    plot(x = pK, y = BCmetric, pch = 16,type="b",
    col = "blue",lty=1, xlim=c(0, 0.35))
    abline(v=pK_choose,lwd=2,col='red',lty=2)
    title("The BCmvn distributions")
    text(pK_choose,max(BCmetric),as.character(pK_choose),pos = 4,col = "red")
    
    pk.vec[[i]] <- pK_choose
}

In [None]:
for (i in 1:length(seu_list)) {
  seu_temp <- seu_list[[i]]
  nExp_poi <- 0.02*nrow(seu_temp@meta.data)
  seu_temp <- doubletFinder_v3(seu_temp, PCs = 
      1:20, pN = 0.25, pK = pk.vec[[i]], nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
  seu_list[[i]] <- seu_temp
}



In [None]:
for (i in 1:length(seu_list)){
      seu_temp <- seu_list[[i]]


    DF.name = colnames(seu_temp@meta.data)[grepl("classifications_0.25", colnames(seu_temp@meta.data))]
    
    print(colnames(seu_temp@meta.data)[grepl("classifications_0.25", colnames(seu_temp@meta.data))])
    options(repr.plot.width=14, repr.plot.height=6)
        
    print(cowplot::plot_grid(ncol = 2, DimPlot(seu_temp, group.by = "orig.ident") + NoAxes(), 
        DimPlot(seu_temp, group.by = DF.name) + NoAxes()))}

In [None]:
seu_list_df <- list()

for (i in 1:length(seu_list)){
    seu_temp <- seu_list[[i]]
    x <- (colnames(seu_temp@meta.data)[grepl("classifications_0.25", colnames(seu_temp@meta.data))])
    print(x)
}




In [None]:
seu_list_df[[1]] <- subset(seu_list[[1]], subset = DF.classifications_0.25_0.15_234.14 == "Singlet")
seu_list_df[[2]] <- subset(seu_list[[2]], subset = DF.classifications_0.25_0.26_298.34 == "Singlet")
seu_list_df[[3]] <- subset(seu_list[[3]], subset = DF.classifications_0.25_0.3_165.64 == "Singlet")
seu_list_df[[4]] <- subset(seu_list[[4]], subset = DF.classifications_0.25_0.3_140.7 == "Singlet")
seu_list_df[[5]] <- subset(seu_list[[5]], subset = DF.classifications_0.25_0.3_121.64 == "Singlet")


In [None]:
samps <- list()
for (i in 1:length(seu_list)){
    samps[[i]] <- (unique(seu_list[[i]]@meta.data$Library))}


se.big.singlet.df=merge(seu_list_df[[1]], y=c(seu_list_df[[2]], seu_list_df[[3]],
                                              seu_list_df[[4]],seu_list_df[[5]]), 
                        add.cell.ids = samps, project="LecMicro")

In [None]:
saveRDS(se.big.singlet.df, './scrna_lecMicro_preprocess_df.RDS')