#Task 1: Dataset Integration

Ensure cell type homogeneity of the current annotations across datasets

- Integrate scRNA-seq datasets using **RPCA**, **CCA**,**Harmony** to 
check the comparability across datasets.

- STACAS(Andreatta et al., 2024) is used in other script

In [0]:
.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat_v2", .libPaths()))

library(Seurat)
library(dplyr)
library(ggplot2)
library(patchwork)
options(future.globals.maxSize = 1e9)


In [0]:
alkon <- readRDS("/dbfs/mnt/sandbox/TFM_PAULA/ALKON_PROCESSED_TFM.rds")
reynolds <- readRDS("/dbfs/mnt/sandbox/TFM_PAULA/REYNOLDS_PROCESSED_TFM.rds")

In [0]:
alkon$dataset <- "alkon"
reynolds$dataset <- "reynolds"

In [0]:
alkon$celltype_AR <- alkon$h_celltype_v4
reynolds$celltype_AR <- reynolds$h_celltype

In [0]:
unique(alkon$Condition)

In [0]:
alkon$Condition <- ifelse(alkon$Condition == "AD", "Lesional", alkon$Condition)

In [0]:
# # Create a new column 'Condition' with default value 'healthy'
reynolds$Condition <- "HC"

# Update 'Condition' based on the 'Status' and 'Site' columns
reynolds$Condition[reynolds$Status == "Eczema" & reynolds$Site == "lesion"] <- "Lesional"
reynolds$Condition[reynolds$Status == "Eczema" & reynolds$Site == "non_lesion"] <- "Non_lesional"


In [0]:
unique(reynolds$Condition)

In [0]:
alkon$Condition_AR <- alkon$Condition
reynolds$Condition_AR <- reynolds$Condition

In [0]:
reynolds$Sample_id <- reynolds$donor_id

##Normalization and scale for PCA and UMAP

In [0]:
AR <- merge(alkon, y = reynolds, add.cell.ids = c("alkon", "reynolds"))

In [0]:
AR <- NormalizeData(AR)
AR <- FindVariableFeatures(AR)
AR <- ScaleData(AR)
AR <- RunPCA(AR)

In [0]:
AR <- FindNeighbors(AR, dims = 1:30, reduction = "pca")
AR <- FindClusters(AR, resolution = 2, cluster.name = "unintegrated_clusters")

In [0]:
# Save the merged Seurat object
 saveRDS(AR, file="/dbfs/mnt/sandbox/TFM_PAULA/MERGED_ARdatasets_TFM.rds")

In [0]:
AR <- RunUMAP(AR, dims = 1:30, reduction = "pca", reduction.name = "umap.unintegrated")

In [0]:
# visualize by batch and cell type annotation
options(repr.plot.width=1600, repr.plot.height=1200)
DimPlot(AR, reduction = "umap.unintegrated", group.by = c("dataset", "celltype_AR"))

In [0]:
DimPlot(AR, reduction = "pca", group.by = c("dataset", "celltype_AR"))

In [0]:
AR <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/MERGED_ARdatasets_TFM.rds")

##SCTransform for the integration
It is performed in the datasets before the merging, as a normalization on each data sepparately is the correct way to do it.

Note that this single command replaces NormalizeData(), ScaleData(), and FindVariableFeatures().


In [0]:
alkon <- SCTransform(alkon, assay = "RNA", new.assay.name = "SCT", conserve.memory= TRUE) #Conserve memory to save memory

In [0]:
saveRDS(alkon, file="/dbfs/mnt/sandbox/TFM_PAULA/alkon_SCT_TFM.rds")

In [0]:
reynolds <- SCTransform(reynolds, assay = "RNA", new.assay.name = "SCT", conserve.memory= TRUE) #Conserve memory to save memory

In [0]:
saveRDS(reynolds, file="/dbfs/mnt/sandbox/TFM_PAULA/reynolds_SCT_TFM.rds")

##Read the sctransformed objects


In [0]:
#Read the sctransform objects
reynolds <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/reynolds_SCT_TFM.rds")
alkon <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/alkon_SCT_TFM.rds")

In [0]:
AR <- merge(alkon, y = reynolds, add.cell.ids = c("alkon", "reynolds"))

In [0]:
AR

In [0]:
DefaultAssay(AR)

In [0]:
#I need the variable features and they are not stored in the merged object
r_features <- VariableFeatures(reynolds)
a_features <- VariableFeatures(alkon)
common_features <- intersect(a_features, r_features)

In [0]:
VariableFeatures(AR) <- common_features

In [0]:
AR

###PCA and UMAP before integration to compare later

In [0]:
AR <- RunPCA(AR)
AR <- FindNeighbors(AR, dims = 1:30, reduction = "pca")
AR <- FindClusters(AR, resolution = 2, cluster.name = "unintegrated_clusters")

In [0]:
# visualize by batch and cell type annotation
options(repr.plot.width=1600, repr.plot.height=1200)
DimPlot(AR, reduction = "pca", group.by = c("dataset", "celltype_AR"))

In [0]:
AR <- RunUMAP(AR, dims = 1:30, reduction = "pca", reduction.name = "umap.unintegrated")

In [0]:
# visualize by batch and cell type annotation
options(repr.plot.width=1600, repr.plot.height=1200)
DimPlot(AR, reduction = "umap.unintegrated", group.by = c("dataset", "celltype_AR"), label = TRUE)

In [0]:
AR

In [0]:
help(IntegrateLayers)

In [0]:
Reductions(AR)

In [0]:
DefaultAssay(AR) <- "SCT"

In [0]:
class(AR)

In [0]:
AR[["SCT"]]@data

In [0]:
VariableFeatures(AR)

###Integration with SEURAT using ANCHORS
- RPCA
- CCA

In [0]:
object_list <- list(alkon, reynolds)

In [0]:
# Step 1: Select integration features
features <- SelectIntegrationFeatures(object.list=object_list, nfeatures=2000)

In [0]:
alkon <- RunPCA(alkon, features= features)
reynolds <- RunPCA(reynolds, features= features)

In [0]:
object_list <- list(alkon, reynolds)

In [0]:
# Step 2: Prepare for SCT integration
object_list <- PrepSCTIntegration(object.list = object_list, anchor.features = features)

####CCA

In [0]:
# Step 3: Find integration anchors
anchors_cca <- FindIntegrationAnchors(
  object.list = object_list, 
  normalization.method = "SCT", 
  anchor.features = features, 
  reduction= "cca"
)

In [0]:
#Step 4: integration
integratedAR_cca <- IntegrateData(
  anchorset=anchors_cca,
  normalization.method="SCT"
)

In [0]:
integratedAR_cca <- RunPCA(integratedAR_cca, reduction.name = "pca.cca")

In [0]:
integratedAR_cca <- FindNeighbors(integratedAR_cca, dims = 1:30, reduction = "pca.cca")
integratedAR_cca <- FindClusters(integratedAR_cca, resolution = 2, cluster.name= "clusters.cca")

In [0]:
#Create a UMAP reduction fo rthe integrated data
integratedAR_cca <- RunUMAP(integratedAR_cca, dims = 1:30, reduction = "pca.cca", reduction.name = "umap.cca")

In [0]:
#After integration visualize by batch and cell type annotation
options(repr.plot.width=1600, repr.plot.height=1200)

In [0]:
DimPlot(integratedAR_cca, reduction = "umap.cca", group.by = c("dataset", "celltype_AR"))

In [0]:
#Save results
saveRDS(integratedAR_cca, file="/dbfs/mnt/sandbox/TFM_PAULA/integrated_objects/integrated_AR_CCA_TFM.rds")

#Then read
integratedAR_cca <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/integrated_objects/integrated_AR_CCA_TFM.rds")

In [0]:
DimPlot(integratedAR_cca, reduction = "umap.cca", group.by = c("dataset", "celltype_AR"), label=TRUE)

####RPCA

In [0]:
# Step 3: Find integration anchors
anchors_rpca <- FindIntegrationAnchors(
  object.list = object_list, 
  normalization.method = "SCT", 
  anchor.features = features, 
  reduction= "rpca"
)

In [0]:
#Step 4: integration
integratedAR <- IntegrateData(
  anchorset=anchors_rpca,
  normalization.method="SCT"
)

In [0]:
Reductions(integratedAR)

In [0]:
integratedAR <- RunPCA(integratedAR, reduction.name = "pca.rpca")

In [0]:
integratedAR <- FindNeighbors(integratedAR, dims = 1:30, reduction = "pca.rpca")
integratedAR <- FindClusters(integratedAR, resolution = 2, cluster.name= "clusters.rpca")

In [0]:
#Create a UMAP reduction fo rthe integrated data
integratedAR <- RunUMAP(integratedAR, dims = 1:30, reduction = "pca.rpca", reduction.name = "umap.rpca")

In [0]:
#After integration visualize by batch and cell type annotation
options(repr.plot.width=1600, repr.plot.height=1200)
DimPlot(integratedAR, reduction = "umap.rpca", group.by = c("dataset", "celltype_AR"))

In [0]:
# #Save results
# saveRDS(integratedAR, file="/dbfs/mnt/sandbox/TFM_PAULA/integrated_objects/integrated_AR_RPCA_TFM.rds")
# #Read results
# integratedAR_RPCA <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/integrated_objects/integrated_AR_RPCA_TFM.rds")
# integratedAR_RPCA <- NULL

In [0]:
#After integration visualize by batch and cell type annotation
options(repr.plot.width=1600, repr.plot.height=1200)
DimPlot(integratedAR_RPCA, reduction = "umap.rpca", group.by = c("dataset", "celltype_AR"), label=TRUE)

##Integration with Harmony in Seurat

In [0]:
#Read the objects scaled with SCTransform
reynolds <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/reynolds_SCT_TFM.rds")
alkon <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/alkon_SCT_TFM.rds")

In [0]:
alkon$dataset

In [0]:
AR <- merge(alkon, y = reynolds, add.cell.ids = c("alkon", "reynolds"))

In [0]:
AR[["SCT"]] <- split(AR[["SCT"]], f = AR$dataset) #Split the object layers counts and scale.counts ito the different layers that correspond to each dataset

In [0]:
#I need the variable features and they are not stored in the merged object
r_features <- VariableFeatures(reynolds)
a_features <- VariableFeatures(alkon)
common_features <- intersect(a_features, r_features)

In [0]:
VariableFeatures(AR) <- common_features

In [0]:
AR <- RunPCA(AR, assay="SCT")

In [0]:
AR

In [0]:
#I am having this error:
# Error in IntegrateLayers(object = AR, method = HarmonyIntegration, normalization.method = "SCT") : 
#   None of the features provided are found in this assay
# Error in `IntegrateLayers()`:
# Error in `IntegrateLayers()`:
# ! None of the features provided are found in this assay


In [0]:
DefaultAssay(AR) <- "SCT"

In [0]:
FindVariableFeatures(AR)

In [0]:
# Try to install harmony but problems with rcpp as with presto: Error in unloadNamespace(package) : namespace ‘Rcpp’ is imported by ‘SeuratObject’, ‘reticulate’, ‘uwot’, ‘later’, ‘RcppHNSW’, ‘spam’, ‘ggrepel’, ‘RcppAnnoy’, ‘reshape2’, ‘Rtsne’, ‘promises’, ‘httpuv’, ‘RSpectra’, ‘Seurat’, ‘plyr’ so cannot be unloaded
#  temp_lib <- tempfile()
# dir.create(temp_lib)
# install.packages(c("RhpcBLASctl", "Rcpp", "RcppArmadillo", "RcppProgress"), lib = temp_lib)
# if (!requireNamespace("devtools", quietly = TRUE)) {
#   install.packages("devtools", lib = temp_lib)
# }
# devtools::install_github("immunogenomics/harmony", lib = temp_lib)
# library(harmony, lib.loc = temp_lib)

In [0]:
#Using SCT it does not work
# integrated_AR_harmony <- IntegrateLayers(object = AR, method = HarmonyIntegration,
#   orig.reduction = "pca", new.reduction = 'harmony',verbose = TRUE)

As I cannot run the harmony integration using SCT data, I used RNA counts and proceed with the integration the same way, but adding the steps that SCTransform does not required as scale the data

In [0]:
# Try with assay RNA
AR[["RNA"]] <- split(AR[["RNA"]], f = AR$dataset) #Split the object layers counts and scale.counts ito the different layers that correspond to each dataset
DefaultAssay(AR) <- "RNA"
AR <- FindVariableFeatures(AR)
AR <- ScaleData(AR)
AR <- RunPCA(AR)

In [0]:
.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/v13", .libPaths()))
library(harmony) #Required to load the package before runing

In [0]:
DefaultAssay(AR) <- "RNA"
IntegratedAR_harmony <- IntegrateLayers(object = AR, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = "integrated.harmony", verbose = TRUE)

In [0]:
# re-join layers after integration
IntegratedAR_harmony[["RNA"]] <- JoinLayers(IntegratedAR_harmony[["RNA"]])

In [0]:
IntegratedAR_harmony <- FindNeighbors(IntegratedAR_harmony, reduction = "integrated.harmony", dims = 1:30)
IntegratedAR_harmony <- FindClusters(IntegratedAR_harmony, resolution = 2, cluster.name="clusters.harmony")

In [0]:
IntegratedAR_harmony <- RunUMAP(IntegratedAR_harmony, dims = 1:30, reduction = "integrated.harmony", reduction.name="umap.harmony")


In [0]:
IntegratedAR_harmony <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/integrated_objects/integrated_AR_Harmony_TFM.rds")

In [0]:
#After integration visualize by batch and cell type annotation
options(repr.plot.width=1600, repr.plot.height=1200)

In [0]:
DimPlot(IntegratedAR_harmony, reduction = "umap.harmony", group.by = c("dataset", "celltype_AR"), label=TRUE)

In [0]:
#Save results
saveRDS(IntegratedAR_harmony, file="/dbfs/mnt/sandbox/TFM_PAULA/integrated_objects/integrated_AR_Harmony_TFM.rds")