diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fc70571af0..75092f07d2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# openpipelines dev + +## BREAKING CHANGES / MAJOR CHANGES? + +* `mapping/cellranger_*`: Upgrade CellRanger to v9.0 (PR #992). + # openpipelines 2.1.0 ## BREAKING CHANGES diff --git a/src/demux/cellranger_mkfastq/config.vsh.yaml b/src/demux/cellranger_mkfastq/config.vsh.yaml index a60ca4b1026..1751bff1c29 100644 --- a/src/demux/cellranger_mkfastq/config.vsh.yaml +++ b/src/demux/cellranger_mkfastq/config.vsh.yaml @@ -48,7 +48,7 @@ test_resources: engines: - type: docker - image: ghcr.io/data-intuitive/cellranger:8.0 + image: ghcr.io/data-intuitive/cellranger:9.0 setup: - type: docker run: | diff --git a/src/mapping/cellranger_count/config.vsh.yaml b/src/mapping/cellranger_count/config.vsh.yaml index e1a472abf30..8e3024296a5 100644 --- a/src/mapping/cellranger_count/config.vsh.yaml +++ b/src/mapping/cellranger_count/config.vsh.yaml @@ -22,6 +22,11 @@ argument_groups: required: true description: The path to Cell Ranger reference tar.gz file. Can also be a directory. example: reference.tar.gz + - type: file + name: "--feature_reference" + required: false + description: | + Feature reference CSV file, declaring Feature Barcode constructs and associated barcodes - name: Outputs arguments: - type: file @@ -57,6 +62,7 @@ argument_groups: - SC3Pv4: Single Cell 3' v4 - SC3Pv3LT: Single Cell 3' v3 LT - SC3Pv3HT: Single Cell 3' v3 HT + - SC5P-R2-v3: Single Cell 5', paired-end/R2-only - SC5P-PE-v3: Single Cell 5' paired-end v3 (GEM-X) - SC5P-PE: Single Cell 5' paired-end - SC5P-R2: Single Cell 5' R2-only @@ -64,7 +70,18 @@ argument_groups: - ARC-v1: for analyzing the Gene Expression portion of Multiome data. NOTE: when the pipeline auto-detects ARC-v1 chemistry, an error is triggered. See https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information. - choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3, SC3Pv4, SC3Pv3LT, SC3Pv3HT, SC5P-PE-v3, SC5P-PE, SC5P-R2, SC-FB, ARC-v1 ] + choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3, SC3Pv4, SC3Pv3LT, SC3Pv3HT, SC5P-PE-v3, SC5P-PE, SC5P-R2, SC5P-R2-v3, SC-FB, ARC-v1 ] + + - type: file + name: "--tenx_cloud_token_path" + description: The 10x Cloud Analysis user token used to enable cell annotation. + + - type: string + name: "--cell_annotation_model" + description: | + "Cell annotation model to use. If auto, uses the default model for the species. + If not given, does not run cell annotation." + choices: ["auto", "human_pca_v1_beta", "mouse_pca_v1_beta"] - type: boolean name: "--secondary_analysis" @@ -123,7 +140,7 @@ test_resources: - path: /src/utils/setup_logger.py engines: - type: docker - image: ghcr.io/data-intuitive/cellranger:8.0 + image: ghcr.io/data-intuitive/cellranger:9.0 setup: - type: docker run: | diff --git a/src/mapping/cellranger_count/script.sh b/src/mapping/cellranger_count/script.sh index 6bfba0b8035..81da041885f 100644 --- a/src/mapping/cellranger_count/script.sh +++ b/src/mapping/cellranger_count/script.sh @@ -61,6 +61,9 @@ cellranger count \ --fastqs="$fastq_dir" \ --transcriptome="$par_reference" \ --include-introns="$par_include_introns" \ + ${par_feature_reference:+--feature-ref=$par_feature_reference} \ + ${par_cell_annotation_model:+--cell-annotation-model=$par_cell_annotation_model} \ + ${par_tenx_cloud_token_path:+--tenx-cloud-token-path=$par_tenx_cloud_token_path} \ ${meta_cpus:+--localcores=$meta_cpus} \ ${meta_memory_gb:+--localmem=$((meta_memory_gb-2))} \ ${par_expect_cells:+--expect-cells=$par_expect_cells} \ diff --git a/src/mapping/cellranger_multi/cellranger_multi.yaml b/src/mapping/cellranger_multi/cellranger_multi.yaml index a82de5a1d12..242abb07904 100644 --- a/src/mapping/cellranger_multi/cellranger_multi.yaml +++ b/src/mapping/cellranger_multi/cellranger_multi.yaml @@ -156,6 +156,7 @@ argument_groups: not need to specify a chemistry. - name: Sample parameters + # Corresponds to the [samples] section arguments: - type: string name: --sample_ids @@ -184,6 +185,7 @@ argument_groups: Force pipeline to use this number of cells, bypassing cell detection. - name: "Feature Barcode library specific arguments" + # Corresponds to the [feature] section arguments: - name: "--feature_reference" type: file @@ -217,6 +219,7 @@ argument_groups: according to specific experimental needs. Applicable only to datasets that include a CRISPR Guide Capture library. - name: Gene expression arguments + # Corresponds to the [gene-expression] section description: Arguments relevant to the analysis of gene expression data. arguments: - name: "--gex_reference" @@ -232,6 +235,15 @@ argument_groups: name: "--gex_generate_bam" default: false description: Whether to generate a BAM file. + - type: file + name: "--tenx_cloud_token_path" + description: The 10x Cloud Analysis user token used to enable cell annotation. + - type: string + name: "--cell_annotation_model" + description: | + "Cell annotation model to use. If auto, uses the default model for the species. + If not given, does not run cell annotation." + choices: ["auto", "human_pca_v1_beta", "mouse_pca_v1_beta"] - type: integer name: --gex_expect_cells example: 3000 @@ -271,11 +283,12 @@ argument_groups: to only applicable to Fixed RNA Profiling. - auto: Chemistry autodetection (default) - threeprime: Single Cell 3' - - SC3Pv1, SC3Pv2, SC3Pv3, SC3Pv4: Single Cell 3' v1, v2, v3, or v4 - - SC3Pv3HT: Single Cell 3' v3.1 HT + - SC3Pv1, SC3Pv2, SC3Pv3(-polyA), SC3Pv4(-polyA): Single Cell 3' v1, v2, v3, or v4 + - SC3Pv3HT(-polyA): Single Cell 3' v3.1 HT - SC-FB: Single Cell Antibody-only 3' v2 or 5' - fiveprime: Single Cell 5' - SC5P-PE: Paired-end Single Cell 5' + - SC5P-PE-v3: Paired-end Single Cell 5' v3 - SC5P-R2: R2-only Single Cell 5' - SC5P-R2-v3: R2-only Single Cell 5' v3 - SCP5-PE-v3: Single Cell 5' paired-end v3 (GEM-X) @@ -290,11 +303,12 @@ argument_groups: - MFRP-Ab-R1: Fixed RNA Profiling (Multiplex, Antibody, Probe Barcode on R1) - ARC-v1 for analyzing the Gene Expression portion of Multiome data. If Cell Ranger auto-detects ARC-v1 chemistry, an error is triggered. See https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information. - choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3, SC3Pv4, SC3Pv3LT, SC3Pv3HT, - SC5P-PE, SC5P-R2, SC-FB, SC5P-R2-v3, SCP5-PE-v3, SC5PHT, MFRP, MFRP-R1, MFRP-RNA, MFRP-Ab, + choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3, SC3Pv3-polyA, SC3Pv4, SC3Pv4-polyA, SC3Pv3LT, SC3Pv3HT, SC3Pv3HT-polyA, + SC5P-PE, SC5P-PE-v3, SC5P-R2, SC-FB, SC5P-R2-v3, SCP5-PE-v3, SC5PHT, MFRP, MFRP-R1, MFRP-RNA, MFRP-Ab, SFRP, MFRP-Ab-R2pos50, MFRP-RNA-R1, MFRP-Ab-R1, ARC-v1] - name: "VDJ related parameters" + # The [vdj] section arguments: - name: "--vdj_reference" type: file @@ -322,16 +336,18 @@ argument_groups: Limit the length of the input Read 2 sequence of V(D)J libraries to the first N bases, where N is a user-supplied value. Trimming occurs before sequencing metrics are computed and therefore, limiting the length of Read 2 may affect Q30 scores - - name: Cell multiplexing parameters + - name: 3' Cell multiplexing parameters (CellPlex Multiplexing) + # cell_multiplex_oligo_ids adds to [samples] section + # min_assignment_confidence, cmo_set barcode_sample_assignment are added to [gene-expression] arguments: - type: string name: --cell_multiplex_oligo_ids + alternatives: [--cmo_ids] multiple: true description: | The Cell Multiplexing oligo IDs used to multiplex this sample. If multiple CMOs were used for a sample, separate IDs with a pipe (e.g., CMO301|CMO302). Required for Cell Multiplexing libraries. - # These end up in the [gene-expression] section - type: double name: --min_assignment_confidence description: | @@ -352,8 +368,31 @@ argument_groups: description: | Path to a barcode-sample assignment CSV file that specifies the barcodes that belong to each sample. - - name: Fixed RNA profiling paramaters - # These end up in the [gene-expression] section + - name: Hashtag multiplexing parameters + # Is added to [samples] + arguments: + - name: --hashtag_ids + type: string + multiple: true + description: | + The hashtag IDs used to multiplex this sample. If multiple antibody hashtags were used for the same sample, + you can separate IDs with a pipe. + + - name: On-chip multiplexing parameters + # Is added to [samples] + arguments: + - name: --ocm_barcode_ids + type: string + multiple: true + # Note: choices is not an option here because multiple values can be added using pipe + description: | + The OCM barcode IDs used to multiplex this sample. Must be one of OB1, OB2, OB3, OB4. + If multiple OCM Barcodes were used for the same sample, you can separate IDs + with a pipe (e.g., OB1|OB2). + + - name: Flex multiplexing paramaters + # probe_set, filter_probes and emptydrops_minimum_umis end up in [gene-expression] + # probe_barcode_ids ends up in [samples] arguments: - type: file name: "--probe_set" @@ -381,6 +420,7 @@ argument_groups: Not filtering will result in UMI counts from all non-deprecated probes, including those with predicted off-target activity, to be used in the analysis. Probes whose ID is prefixed with DEPRECATED are always excluded from the analysis. + - type: string name: "--probe_barcode_ids" multiple: true @@ -391,8 +431,16 @@ argument_groups: are separated with a "+" (no spaces). Alternatively, you can specify the Probe Barcode ID alone and Cell Ranger's barcode pairing auto-detection algorithm will automatically match to the corresponding Antibody Multiplexing Barcode. + - type: integer + name: --emptydrops_minimum_umis + min: 1 + description: | + For singleplex Flex experiments, use this option to adjust the UMI cutoff during the second step of cell calling. + Cell Ranger will still perform the full cell calling process but will only evaluate barcodes with UMIs above + the threshold you specify. - name: Antigen Capture (BEAM) libary arguments + # These end up in the [antigen-specificity] section description: | These arguments are recommended if an Antigen Capture (BEAM) library is present. It is needed to calculate the antigen specificity score. diff --git a/src/mapping/cellranger_multi/config.vsh.yaml b/src/mapping/cellranger_multi/config.vsh.yaml index b6e89d7ae3d..1bebd4e552e 100644 --- a/src/mapping/cellranger_multi/config.vsh.yaml +++ b/src/mapping/cellranger_multi/config.vsh.yaml @@ -47,7 +47,7 @@ test_resources: - path: /resources_test/reference_gencodev41_chr1 engines: - type: docker - image: ghcr.io/data-intuitive/cellranger:8.0 + image: ghcr.io/data-intuitive/cellranger:9.0 setup: - type: docker run: | diff --git a/src/mapping/cellranger_multi/script.py b/src/mapping/cellranger_multi/script.py index 15b6fac7892..1f3648ab1e0 100644 --- a/src/mapping/cellranger_multi/script.py +++ b/src/mapping/cellranger_multi/script.py @@ -105,6 +105,9 @@ "filter_probes": "filter-probes", "gex_r1_length": "r1-length", "gex_r2_length": "r2-length", + "tenx_cloud_token_path": "tenx-cloud-token-path", + "cell_annotation_model": "cell-annotation-model", + "emptydrops_minimum_umis": "emptydrops_minimum_umis", } FEATURE_CONFIG_KEYS = { @@ -151,6 +154,8 @@ "probe_barcode_ids": "probe_barcode_ids", "sample_expect_cells": "expect_cells", "sample_force_cells": "force_cells", + "hashtag_ids": "hashtag_ids", + "ocm_barcode_ids": "ocm_barcode_ids", } diff --git a/src/reference/build_cellranger_reference/config.vsh.yaml b/src/reference/build_cellranger_reference/config.vsh.yaml index 19b223f8e58..b3421000cb3 100644 --- a/src/reference/build_cellranger_reference/config.vsh.yaml +++ b/src/reference/build_cellranger_reference/config.vsh.yaml @@ -38,7 +38,7 @@ test_resources: engines: - type: docker - image: ghcr.io/data-intuitive/cellranger:8.0 + image: ghcr.io/data-intuitive/cellranger:9.0 setup: - type: docker run: | diff --git a/src/reference/cellranger_mkgtf/config.vsh.yaml b/src/reference/cellranger_mkgtf/config.vsh.yaml index d9104c8b65f..496fc4d82a6 100644 --- a/src/reference/cellranger_mkgtf/config.vsh.yaml +++ b/src/reference/cellranger_mkgtf/config.vsh.yaml @@ -32,7 +32,7 @@ test_resources: - path: /resources_test/reference_gencodev41_chr1 engines: - type: docker - image: ghcr.io/data-intuitive/cellranger:8.0 + image: ghcr.io/data-intuitive/cellranger:9.0 setup: - type: docker run: |