diff --git a/CHANGELOG.md b/CHANGELOG.md index c103557f235..3b8c16e61ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ * `scarches`: Loading of legacy models no longer asumes the model to based on SCANVI. An argument (`reference_class`) was added which need to be set in this case (PR #1035). +* `convert/from_h5mu_to_seurat` has been deprecated and will be removed in openpipeline 4.0. Use `convert/from_h5mu_or_h5ad_to_seurat` instead (PR #1046). + ## NEW FUNCTIONALITY * (Experimental) Added `from_h5mu_or_h5ad_to_tiledb` component. Warning: the functionality in this component is experimental @@ -23,6 +25,8 @@ * `liana`: enabled jobs to be run in parallel and added two new arguments: `consensus_opts`, `de_method` (PR #1039) +* `from_h5mu_or_h5ad_to_seurat`: converts an h5ad file or a single modality from an h5mu file to a seurat object (PR #1046). + ## MAJOR CHANGES * `mapping/cellranger_*`: Upgrade CellRanger to v9.0 (PR #992 and #1006). diff --git a/src/convert/from_h5mu_or_h5ad_to_seurat/config.vsh.yaml b/src/convert/from_h5mu_or_h5ad_to_seurat/config.vsh.yaml new file mode 100644 index 00000000000..208395483b9 --- /dev/null +++ b/src/convert/from_h5mu_or_h5ad_to_seurat/config.vsh.yaml @@ -0,0 +1,56 @@ +name: "from_h5mu_or_h5ad_to_seurat" +namespace: "convert" +scope: "public" +description: | + Converts an h5ad file or a single modality of an h5mu file into a Seurat file. +authors: + - __merge__: /src/authors/dorien_roosen.yaml + roles: [ author, maintainer ] +arguments: + - name: "--input" + alternatives: ["-i"] + type: file + description: Input h5ad or h5mu file + direction: input + required: true + example: input.h5ad + - name: "--modality" + type: string + default: "rna" + required: false + description: Modality to be converted if the input file is an h5mu file. + - name: "--assay" + type: string + default: "RNA" + description: Name of the assay to be created. + - name: "--output" + alternatives: ["-o"] + type: file + description: Output Seurat file + direction: output + required: true + example: output.rds +resources: + - type: r_script + path: script.R +test_resources: + - type: r_script + path: test.R + - path: /resources_test/pbmc_1k_protein_v3/ +engines: + - type: docker + image: rocker/r2u:22.04 + setup: + - type: apt + packages: [ libhdf5-dev, libgeos-dev, hdf5-tools ] + - type: r + cran: [ anndata, hdf5r, Seurat, SeuratObject ] + github: scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a + test_setup: + - type: r + cran: [ testthat ] +runners: + - type: executable + - type: nextflow + directives: + label: [lowmem, singlecpu] \ No newline at end of file diff --git a/src/convert/from_h5mu_or_h5ad_to_seurat/script.R b/src/convert/from_h5mu_or_h5ad_to_seurat/script.R new file mode 100644 index 00000000000..86515882e37 --- /dev/null +++ b/src/convert/from_h5mu_or_h5ad_to_seurat/script.R @@ -0,0 +1,81 @@ +library(anndataR) +library(hdf5r) + + +### VIASH START +par <- list( + input = "resources_test/annotation_test_data/TS_Blood_filtered.h5mu", + output = "resources_test/annotation_test_data/TS_Blood_filtered.rds", + assay = "RNA", + modality = "rna" +) +### VIASH END + +is_mudata_file <- function(file_path) { + con <- file(file_path, "rb") + tryCatch({ + # It is possible to create a MuData compatible h5 file without using + # MuData, and those could not have "MuData" as the first bytes. + # But that is really an edge case and this check should hold. + header <- readBin(con, "raw", n = 6) + identical(header, charToRaw("MuData")) + }, finally = { + close(con) + }) +} + +h5mu_to_h5ad <- function(h5mu_path, modality_name) { + tmp_path <- tempfile(fileext = ".h5ad") + mod_location <- paste("mod", modality_name, sep = "/") + h5src <- hdf5r::H5File$new(h5mu_path, "r") + h5dest <- hdf5r::H5File$new(tmp_path, "w") + # Copy over the child objects and the child attributes from root + # Root cannot be copied directly because it always exists and + # copying does not allow overwriting. + children <- hdf5r::list.objects(h5src, + path = mod_location, + full.names = FALSE, recursive = FALSE + ) + for (child in children) { + h5dest$obj_copy_from( + h5src, paste(mod_location, child, sep = "/"), + paste0("/", child) + ) + } + # Also copy the root attributes + root_attrs <- hdf5r::h5attr_names(x = h5src) + for (attr in root_attrs) { + h5a <- h5src$attr_open(attr_name = attr) + robj <- h5a$read() + h5dest$create_attr_by_name( + attr_name = attr, + obj_name = ".", + robj = robj, + space = h5a$get_space(), + dtype = h5a$get_type() + ) + } + h5src$close() + h5dest$close() + + tmp_path +} + +# If the input is a MuData file, use a single modality as input instead +if (is_mudata_file(par$input)) { + if (is.null(par$modality) || par$modality == "") { + stop("'modality' argument must be set if the input is a MuData file.") + } + h5ad_path <- h5mu_to_h5ad(par$input, par$modality) +} else { + h5ad_path <- par$input +} + +seurat_obj <- read_h5ad( + h5ad_path, + mode = "r", + as = "Seurat", + assay_name = par$assay +) + +saveRDS(seurat_obj, file = par$output) diff --git a/src/convert/from_h5mu_or_h5ad_to_seurat/test.R b/src/convert/from_h5mu_or_h5ad_to_seurat/test.R new file mode 100644 index 00000000000..09367171d21 --- /dev/null +++ b/src/convert/from_h5mu_or_h5ad_to_seurat/test.R @@ -0,0 +1,82 @@ +library(testthat, warn.conflicts = FALSE) +library(hdf5r) + + +## VIASH START +meta <- list( + executable = "target/executable/convert/from_h5ad_to_seurat", + resources_dir = "resources_test", + name = "from_h5ad_to_seurat" +) +## VIASH END + +cat("> Checking conversion of h5ad file\n") + +in_h5ad <- paste0( + meta[["resources_dir"]], + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5ad" +) +out_rds <- "output.rds" + +cat("> Running ", meta[["name"]], "\n", sep = "") +out <- processx::run( + meta[["executable"]], + c( + "--input", in_h5ad, + "--output", out_rds + ) +) + +cat("> Checking whether output file exists\n") +expect_equal(out$status, 0) +expect_true(file.exists(out_rds)) + +cat("> Reading output file\n") +obj <- readRDS(file = out_rds) + +cat("> Checking whether Seurat object is in the right format\n") +expect_is(obj, "Seurat") +expect_equal(names(slot(obj, "assays")), "RNA") + +open_file <- H5File$new(in_h5ad, mode = "r+") + +dim_rds <- dim(obj) +dim_ad <- open_file[["X"]]$attr_open("shape")$read() + +expect_equal(dim_rds[1], dim_ad[2]) +expect_equal(dim_rds[2], dim_ad[1]) + +cat("> Checking conversion of h5mu file\n") + +in_h5mu <- paste0( + meta[["resources_dir"]], + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" +) +out_rds <- "output.rds" + +cat("> Running ", meta[["name"]], "\n", sep = "") +out <- processx::run( + meta[["executable"]], + c( + "--input", in_h5mu, + "--output", out_rds + ) +) + +cat("> Checking whether output file exists\n") +expect_equal(out$status, 0) +expect_true(file.exists(out_rds)) + +cat("> Reading output file\n") +obj <- readRDS(file = out_rds) + +cat("> Checking whether Seurat object is in the right format\n") +expect_is(obj, "Seurat") +expect_equal(names(slot(obj, "assays")), "RNA") + +dim_rds <- dim(obj) +mu_in <- H5File$new(in_h5mu, mode = "r") +dim_ad <- mu_in[["/mod/rna/X"]]$attr_open("shape")$read() + +expect_equal(dim_rds[1], dim_ad[2]) +expect_equal(dim_rds[2], dim_ad[1]) diff --git a/src/convert/from_h5mu_to_seurat/config.vsh.yaml b/src/convert/from_h5mu_to_seurat/config.vsh.yaml index bc9ba3a3124..f67791a5363 100644 --- a/src/convert/from_h5mu_to_seurat/config.vsh.yaml +++ b/src/convert/from_h5mu_to_seurat/config.vsh.yaml @@ -1,6 +1,7 @@ name: "from_h5mu_to_seurat" namespace: "convert" scope: "public" +status: "deprecated" description: | Converts an h5mu file into a Seurat file.