In [1]:
# Load packages
suppressPackageStartupMessages({
  library(Seurat)
  library(SeuratData)
  library(ggplot2)
  library(patchwork)
  library(scales)
  library(dplyr)
  library(reshape2)
})

In [26]:
# Load dataset
InstallData(ds = "thp1.eccite")
eccite <- LoadData(ds = "thp1.eccite")
eccite <- UpdateSeuratObject(eccite)

“The following packages are already installed and will not be reinstalled: thp1.eccite”
Validating object structure

Updating object slots

Ensuring keys are in the proper structure

“Assay RNA changing from Assay to Assay”
“Assay ADT changing from Assay to Assay”
“Assay HTO changing from Assay to Assay”
“Assay GDO changing from Assay to Assay”
Ensuring keys are in the proper structure

Ensuring feature names don't have underscores or pipes

Updating slots in RNA

Updating slots in ADT

Updating slots in HTO

Updating slots in GDO

Validating object structure for Assay ‘RNA’

Validating object structure for Assay ‘ADT’

Validating object structure for Assay ‘HTO’

Validating object structure for Assay ‘GDO’

Object representation is consistent with the most current Seurat version



In [27]:
# Upsample
target_n <- 1000

rna <- GetAssayData(eccite, slot = "counts", assay = "RNA")
adt <- GetAssayData(eccite, slot = "counts", assay = "ADT")
orig_meta <- eccite[[]]
rownames(orig_meta) <- colnames(rna)
cell_names <- colnames(rna)
# Sample cell names with replacement
new_cell_names <- sample(cell_names, target_n, replace = TRUE)
new_rna <- rna[, new_cell_names]
new_adt <- adt[, new_cell_names]
upsampled_meta <- orig_meta[new_cell_names, ]
new_names <- paste0(new_cell_names, "_dup", seq_along(new_cell_names))
colnames(new_rna) <- new_names
colnames(new_adt) <- new_names
rownames(upsampled_meta) <- new_names
eccite_upsampled <- CreateSeuratObject(new_rna, meta.data = upsampled_meta)
# add upsampled data
eccite_upsampled[["RNA"]] <- CreateAssay5Object(counts = new_rna)
eccite_upsampled[["ADT"]] <- CreateAssay5Object(counts = new_adt)


In [33]:
# Preprocessing
# Protein
eccite_upsampled <- Seurat::NormalizeData(
  object = eccite_upsampled,
  assay = "ADT",
  normalization.method = "CLR",
  margin = 2)

# RNA
DefaultAssay(object = eccite_upsampled) <- 'RNA'
eccite_upsampled <- NormalizeData(object = eccite_upsampled) %>% FindVariableFeatures() %>% ScaleData()


Normalizing layer: counts

Normalizing across cells

Normalizing layer: counts

Finding variable features for layer counts

Centering and scaling data matrix



In [34]:
# Create a random matrix with one row per cell and 50 principal components
set.seed(123)
cells <- Cells(eccite_upsampled)
rand_mat <- matrix(rnorm(length(cells) * 50, 0, 1), nrow = length(cells), ncol = 50)
rownames(rand_mat) <- cells
eccite_upsampled[["pca"]] <- CreateDimReducObject(
  embeddings = rand_mat,
  key = "PC_",
  assay = DefaultAssay(eccite_upsampled)
)


“No columnames present in cell embeddings, setting to 'PC_1:50'”


In [35]:
# Mitigating confounding effects
eccite_upsampled<- CalcPerturbSig(
  object = eccite_upsampled,
  assay = "RNA",
  slot = "data",
  gd.class ="gene",
  nt.cell.class = "NT",
  reduction = "pca",
  ndims = 40,
  num.neighbors = 20,
  split.by = "replicate",
  new.assay.name = "PRTB")

Processing rep3

Processing rep2

Processing rep1

“Layer counts isn't present in the assay object; returning NULL”


In [36]:
# Prepare PRTB assay for dimensionality reduction:
# Normalize data, find variable features and center data
DefaultAssay(object = eccite_upsampled) <- 'PRTB'

# identify cells with no detectable perturbation
eccite_upsampled <- RunMixscape(
  object = eccite_upsampled,
  assay = "PRTB",
  slot = "scale.data",
  labels = "gene",
  nt.class.name = "NT",
  min.de.genes = 5,
  iter.num = 10,
  de.assay = "RNA",
  verbose = F,
  prtb.type = "KO")

For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
(default method for FindMarkers) please install the presto package
--------------------------------------------
install.packages('devtools')
devtools::install_github('immunogenomics/presto')
--------------------------------------------
After installation of presto, Seurat will automatically use the more 
efficient implementation (no further action necessary).
This message will be shown once per session

“Different cells and/or features from existing assay PRTB”
“Layer counts isn't present in the assay object; returning NULL”


number of iterations= 42 
number of iterations= 26 
number of iterations= 18 
number of iterations= 10 
number of iterations= 15 
number of iterations= 5 
number of iterations= 21 
number of iterations= 10 
number of iterations= 5 
number of iterations= 37 
number of iterations= 18 
number of iterations= 62 
number of iterations= 49 
number of iterations= 29 
number of iterations= 23 
number of iterations= 10 
number of iterations= 10 
