Skip to content

Commit

Permalink
Merge pull request #896 from satijalab/fix/sctransform_fixes
Browse files Browse the repository at this point in the history
Fixes for SCTransform using `vars.to.regress`
  • Loading branch information
dcollins15 committed Feb 7, 2024
2 parents 9ad1627 + 5d3fdc5 commit fc3bbe2
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 10 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Seurat
Version: 5.0.1.9004
Date: 2024-01-22
Version: 5.0.1.9005
Date: 2024-01-30
Title: Tools for Single Cell Genomics
Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
Authors@R: c(
Expand Down Expand Up @@ -113,7 +113,7 @@ Collate:
'sketching.R'
'tree.R'
'utilities.R'
RoxygenNote: 7.3.0
RoxygenNote: 7.3.1
Encoding: UTF-8
Suggests:
ape,
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Changes

- Fixed `SCTransform` to handle `vars.to.regress` ([#8148](https://github.com/satijalab/seurat/issues/8148))
- Fixed `SCTransform` to handle `vars.to.regress` ([#8148](https://github.com/satijalab/seurat/issues/8148)) and ([#8349](https://github.com/satijalab/seurat/issues/8349))
- Fixed `SCTransform` to handle fetching residuals ([#8185](https://github.com/satijalab/seurat/issues/8185))

# Seurat 5.0.1 (2023-11-16)
Expand Down
4 changes: 0 additions & 4 deletions R/objects.R
Original file line number Diff line number Diff line change
Expand Up @@ -1999,10 +1999,6 @@ VariableFeatures.SCTAssay <- function(
if (length(x = var.features) == 0) {
return(NULL)
}
for (i in 1:length(x = layer)) {
vst_out <- SCTModel_to_vst(SCTModel = slot(object = object, name = "SCTModel.list")[[layer[[i]]]])
var.features <- var.features[names(x = var.features) %in% rownames(x = vst_out$gene_attr)]
}
tie.val <- var.features[min(nfeatures, length(x = var.features))]
features <- names(x = var.features[which(x = var.features > tie.val)])
if (length(x = features) > 0) {
Expand Down
13 changes: 12 additions & 1 deletion R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -3136,6 +3136,7 @@ SampleUMI <- function(
#' use this residual variance cutoff; this is only used when \code{variable.features.n}
#' is set to NULL; default is 1.3. Only applied if residual.features is not set.
#' @param vars.to.regress Variables to regress out in a second non-regularized linear
#' @param latent.data Extra data to regress out, should be cells x latent data
#' regression. For example, percent.mito. Default is NULL
#' @param do.scale Whether to scale residuals to have unit variance; default is FALSE
#' @param do.center Whether to center residuals to have mean zero; default is TRUE
Expand Down Expand Up @@ -3180,6 +3181,7 @@ SCTransform.default <- function(
variable.features.n = 3000,
variable.features.rv.th = 1.3,
vars.to.regress = NULL,
latent.data = NULL,
do.scale = FALSE,
do.center = TRUE,
clip.range = c(-sqrt(x = ncol(x = umi) / 30), sqrt(x = ncol(x = umi) / 30)),
Expand Down Expand Up @@ -3418,7 +3420,7 @@ SCTransform.default <- function(
scale.data,
features = NULL,
vars.to.regress = vars.to.regress,
latent.data = cell.attr[, vars.to.regress, drop = FALSE],
latent.data = latent.data,
model.use = 'linear',
use.umi = FALSE,
do.scale = do.scale,
Expand Down Expand Up @@ -3452,6 +3454,7 @@ SCTransform.Assay <- function(
variable.features.n = 3000,
variable.features.rv.th = 1.3,
vars.to.regress = NULL,
latent.data = NULL,
do.scale = FALSE,
do.center = TRUE,
clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)),
Expand Down Expand Up @@ -3480,6 +3483,7 @@ SCTransform.Assay <- function(
variable.features.n = variable.features.n,
variable.features.rv.th = variable.features.rv.th,
vars.to.regress = vars.to.regress,
latent.data = latent.data,
do.scale = do.scale,
do.center = do.center,
clip.range = clip.range,
Expand Down Expand Up @@ -3558,6 +3562,12 @@ SCTransform.Seurat <- function(
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
if (any(vars.to.regress %in% colnames(x = object[[]]))) {
vars.to.regress.subset <- vars.to.regress[vars.to.regress %in% colnames(x = object[[]])]
latent.data <- object[[vars.to.regress.subset]]
} else {
latent.data <- NULL
}
assay <- assay %||% DefaultAssay(object = object)
if (assay == "SCT") {
# if re-running SCTransform, use the RNA assay
Expand All @@ -3578,6 +3588,7 @@ SCTransform.Seurat <- function(
variable.features.n = variable.features.n,
variable.features.rv.th = variable.features.rv.th,
vars.to.regress = vars.to.regress,
latent.data = latent.data,
do.scale = do.scale,
do.center = do.center,
clip.range = clip.range,
Expand Down
19 changes: 19 additions & 0 deletions R/preprocessing5.R
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,7 @@ SCTransform.IterableMatrix <- function(
variable.features.n = 3000,
variable.features.rv.th = 1.3,
vars.to.regress = NULL,
latent.data = NULL,
do.scale = FALSE,
do.center = TRUE,
clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)),
Expand Down Expand Up @@ -1118,6 +1119,7 @@ SCTransform.IterableMatrix <- function(
variable.features.n = variable.features.n,
variable.features.rv.th = variable.features.rv.th,
vars.to.regress = vars.to.regress,
latent.data = latent.data,
do.scale = do.scale,
do.center = do.center,
clip.range = clip.range,
Expand Down Expand Up @@ -1180,6 +1182,7 @@ SCTransform.StdAssay <- function(
variable.features.n = 3000,
variable.features.rv.th = 1.3,
vars.to.regress = NULL,
latent.data = NULL,
do.scale = FALSE,
do.center = TRUE,
clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)),
Expand Down Expand Up @@ -1253,6 +1256,7 @@ SCTransform.StdAssay <- function(
variable.features.n = variable.features.n,
variable.features.rv.th = variable.features.rv.th,
vars.to.regress = vars.to.regress,
latent.data = latent.data,
do.scale = do.scale,
do.center = do.center,
clip.range = clip.range,
Expand Down Expand Up @@ -1353,6 +1357,7 @@ SCTransform.StdAssay <- function(
new.residuals,
features = NULL,
vars.to.regress = vars.to.regress,
latent.data = latent.data,
model.use = 'linear',
use.umi = FALSE,
do.scale = do.scale,
Expand Down Expand Up @@ -1408,6 +1413,20 @@ SCTransform.StdAssay <- function(
verbose = FALSE)
old_residual <- GetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data')
merged_residual <- rbind(old_residual, new_residual)
merged_residual <- ScaleData(
merged_residual,
features = NULL,
vars.to.regress = vars.to.regress,
latent.data = latent.data,
model.use = 'linear',
use.umi = FALSE,
do.scale = do.scale,
do.center = do.center,
scale.max = Inf,
block.size = 750,
min.cells.to.block = 3000,
verbose = verbose
)
sct.assay.list[[layer.name]] <- SetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data', new.data = merged_residual)
VariableFeatures(sct.assay.list[[layer.name]]) <- rownames(x = merged_residual)
}
Expand Down
7 changes: 6 additions & 1 deletion man/SCTransform.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions tests/testthat/test_preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,70 @@ test_that("SCTransform v2 works as expected", {
expect_equal(fa["FCER2", "theta"], Inf)
})

test_that("SCTransform `vars.to.regress` param works as expected", {
# make a copy of the testing data
test.data <- object
# add a fake mitochondrial gene to the counts matrix
counts <- LayerData(test.data, assay = "RNA", layer = "counts")
counts <- rbind(counts, 5)
rownames(counts)[nrow(counts)] <- "MT-TEST"
# use the fake feature to populate a new meta.data column
test.data[[ "percent.mt" ]] <- PercentageFeatureSet(
test.data,
pattern="^MT-"
)

# make sure that `ncells` is smaller than the datset being transformed
# so tha the regression model is trained on a subset of the data - make sure
# the regression is applied to the entire dataset
left <- suppressWarnings(
SCTransform(
test.data,
vars.to.regress = NULL,
ncells = ncol(test.data) / 2,
verbose = FALSE
)
)
right <- suppressWarnings(
SCTransform(
test.data,
vars.to.regress = "percent.mt",
ncells = ncol(test.data) / 2,
verbose = FALSE
)
)
expect_false(identical(left[["SCT"]]$scale.data, right[["SCT"]]$scale.data))

# if the `assay` points to an `Assay5` instance the regression is handled
# using separate logic
test.data[["RNAv5"]] <- CreateAssay5Object(
counts = LayerData(
test.data,
assay = "RNA",
layer = "counts"
)
)
left <- suppressWarnings(
SCTransform(
test.data,
assay = "RNAv5",
vars.to.regress = NULL,
ncells = ncol(test.data) / 2,
verbose = FALSE
)
)
right <- suppressWarnings(
SCTransform(
test.data,
assay = "RNAv5",
vars.to.regress = "percent.mt",
ncells = ncol(test.data) / 2,
verbose = FALSE
)
)
expect_false(identical(left[["SCT"]]$scale.data, right[["SCT"]]$scale.data))
})

test_that("SCTransform is equivalent for BPcells ", {
skip_on_cran()
skip_on_cran()
Expand Down

0 comments on commit fc3bbe2

Please sign in to comment.