Skip to content

Commit

Permalink
Merge pull request #403 from lilab-bcb/strato-local
Browse files Browse the repository at this point in the history
Fix issues in Cellranger series workflows
  • Loading branch information
yihming committed May 8, 2024
2 parents 8194a1a + b566350 commit 822c175
Show file tree
Hide file tree
Showing 20 changed files with 71 additions and 43 deletions.
2 changes: 1 addition & 1 deletion docker/cellranger-arc/2.0.2/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.7.11.zip" -o "a
RUN pip3 install --upgrade pip && \
pip3 install pandas==1.4.3 && \
pip3 install packaging==21.3 && \
pip3 install stratocumulus==0.1.7
pip3 install stratocumulus==0.2.3

RUN mkdir /software
ADD https://raw.githubusercontent.com/lilab-bcb/cumulus/master/docker/monitor_script.sh /software
Expand Down
2 changes: 1 addition & 1 deletion docker/cellranger-atac/2.1.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ RUN ln -s /usr/bin/python3 /usr/bin/python
RUN python -m pip install --upgrade pip && \
python -m pip install pandas==1.4.3 && \
python -m pip install packaging==21.3 && \
python -m pip install stratocumulus==0.1.7
python -m pip install stratocumulus==0.2.3

RUN mkdir /software
ADD https://raw.githubusercontent.com/lilab-bcb/cumulus/master/docker/monitor_script.sh /software
Expand Down
4 changes: 2 additions & 2 deletions docker/cellranger/7.2.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM debian:bullseye-slim
SHELL ["/bin/bash", "-c"]

RUN apt-get update && \
apt-get install --no-install-recommends -y alien unzip rsync build-essential dpkg-dev curl gnupg procps python3 python3-pip
apt-get install --no-install-recommends -y unzip rsync build-essential dpkg-dev curl gnupg procps python3 python3-pip

RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
Expand All @@ -19,7 +19,7 @@ RUN ln -s /usr/bin/python3 /usr/bin/python
RUN python -m pip install --upgrade pip --no-cache-dir && \
python -m pip install pandas==2.1.1 --no-cache-dir && \
python -m pip install packaging==23.2 --no-cache-dir && \
python -m pip install stratocumulus==0.2.1 --no-cache-dir
python -m pip install stratocumulus==0.2.3 --no-cache-dir

RUN mkdir /software
ADD https://raw.githubusercontent.com/lilab-bcb/cumulus/master/docker/monitor_script.sh /software
Expand Down
4 changes: 2 additions & 2 deletions docker/cellranger/8.0.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM debian:bookworm-slim
SHELL ["/bin/bash", "-c"]

RUN apt-get update && \
apt-get install --no-install-recommends -y alien unzip rsync build-essential dpkg-dev curl gnupg procps python3 python3-pip python3-venv
apt-get install --no-install-recommends -y unzip rsync build-essential dpkg-dev curl gnupg procps python3 python3-pip python3-venv

RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
Expand All @@ -20,7 +20,7 @@ ENV PATH=/software/python/bin:$PATH
RUN python -m pip install --upgrade pip --no-cache-dir && \
python -m pip install pandas==2.2.1 --no-cache-dir && \
python -m pip install packaging==24.0 --no-cache-dir && \
python -m pip install stratocumulus==0.2.1 --no-cache-dir
python -m pip install stratocumulus==0.2.3 --no-cache-dir

ADD https://raw.githubusercontent.com/lilab-bcb/cumulus/master/docker/monitor_script.sh /software
ADD cellranger-8.0.0.tar.gz /software
Expand Down
1 change: 1 addition & 0 deletions docker/config/0.3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ RUN apt-get -qq update && \

RUN mkdir /software
ADD https://raw.githubusercontent.com/lilab-bcb/cumulus/yiming/docker/config/check_uri.py /software
RUN chmod a+rx /software/*
2 changes: 1 addition & 1 deletion docker/cumulus_feature_barcoding/0.11.3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ RUN python -m pip install --upgrade pip --no-cache-dir && \
python -m pip install numpy==1.26.1 --no-cache-dir && \
python -m pip install pandas==2.1.1 --no-cache-dir && \
python -m pip install matplotlib==3.8.0 --no-cache-dir && \
python -m pip install stratocumulus==0.2.1 --no-cache-dir
python -m pip install stratocumulus==0.2.3 --no-cache-dir

RUN tar -xzf /software/0.11.3.tar.gz -C /software && \
cd /software/cumulus_feature_barcoding-0.11.3 && make clean && make all && cd ../.. && \
Expand Down
4 changes: 2 additions & 2 deletions docker/spaceranger/3.0.0/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM debian:bookworm-slim
SHELL ["/bin/bash", "-c"]

RUN apt-get update && \
apt-get install --no-install-recommends -y alien unzip build-essential dpkg-dev curl gnupg procps python3 python3-pip python3-venv
apt-get install --no-install-recommends -y unzip build-essential dpkg-dev curl gnupg procps python3 python3-pip python3-venv

RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
Expand All @@ -19,7 +19,7 @@ ENV PATH=/software/python/bin:$PATH
RUN python -m pip install --upgrade pip --no-cache-dir && \
python -m pip install pandas==2.2.1 --no-cache-dir && \
python -m pip install packaging==24.0 --no-cache-dir && \
python -m pip install stratocumulus==0.2.1 --no-cache-dir
python -m pip install stratocumulus==0.2.3 --no-cache-dir

ADD https://raw.githubusercontent.com/lilab-bcb/cumulus/master/docker/monitor_script.sh /software
ADD spaceranger-3.0.0.tar.gz /software
Expand Down
12 changes: 6 additions & 6 deletions docs/cellranger/build_refs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ We provide a wrapper of ``cellranger mkref`` to build sc/snRNA-seq references. P
- Ensembl v94
-
* - cellranger_version
- cellranger version, could be: 7.1.0, 7.0.1, 7.0.0, 6.1.2, 6.1.1
- "7.1.0"
- "7.1.0"
- cellranger version, could be: 8.0.0, 7.2.0 7.1.0, 7.0.1, 7.0.0, 6.1.2, 6.1.1
- "8.0.0"
- "8.0.0"
* - docker_registry
- Docker registry to use for cellranger_workflow. Options:

Expand Down Expand Up @@ -320,9 +320,9 @@ We provide a wrapper of ``cellranger mkvdjref`` to build single-cell immune prof
- Ensembl v94
-
* - cellranger_version
- cellranger version, could be: 7.2.0, 7.1.0, 7.0.1, 7.0.0, 6.1.2, 6.1.1
- "7.2.0"
- "7.2.0"
- cellranger version, could be: 8.0.0, 7.2.0, 7.1.0, 7.0.1, 7.0.0, 6.1.2, 6.1.1
- "8.0.0"
- "8.0.0"
* - docker_registry
- Docker registry to use for cellranger_workflow. Options:

Expand Down
2 changes: 2 additions & 0 deletions docs/cellranger/fixed_rna_profiling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Sample Sheet
- Description
* - **GRCh38-2020-A**
- Human GRCh38 (GENCODE v32/Ensembl 98)
* - **mm10-2020-A**
- Mouse mm10 (GENCODE vM23/Ensembl 98)

#. *DataType* column.

Expand Down
6 changes: 6 additions & 0 deletions docs/cellranger/sc_sn_rnaseq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ Sample sheet

* - Keyword
- Description
* - **GRCh38-2024-A**
- Human GRCh38, comparable to cellranger reference 2024-A (GENCODE v44/Ensembl 110). *Notice: This reference only supports Cell Ranger v6.0.0+.*
* - **GRCm39-2024-A**
- Mouse GRCm39, comparable to cellranger reference 2024-A (GENCODE vM33/Ensembl 110). *Notice: This reference only supports Cell Ranger v6.0.0+.*
* - **GRCh38_and_GRCm39-2024-A**
- Human GRCh38 (v44/Ensembl 110) and mouse GRCm39 (GENCODE vM33/Ensembl 110). *Notice: This reference only supports Cell Ranger v6.0.0+.*
* - **GRCh38-2020-A**
- Human GRCh38 (GENCODE v32/Ensembl 98)
* - **mm10-2020-A**
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = '2.6'
# The full version, including alpha/beta/rc tags
release = '2.6.0'
release = '2.6.1'


# -- General configuration ---------------------------------------------------
Expand Down
8 changes: 8 additions & 0 deletions docs/release_notes/version_2_6.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
2.6.1 :small:`May 8, 2024`
^^^^^^^^^^^^^^^^^^^^^^^^^^^

* In Cellranger workflow: Add new genome references for single-cell/nucleus RNA-Seq: **GRCh38-2024-A** for human, **GRCm39-2024-A** for mouse, and **GRCh38_and_GRCm39-2024-A** for human and mouse.
* In Spaceranger workflow: Add a new probe set **mouse_probe_v2** for mouse.
* Some underlying workflow improvement.


2.6.0 :small:`April 22, 2024`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
21 changes: 16 additions & 5 deletions docs/spaceranger.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ Alternatively, users can submit jobs through command line interface (CLI) using
* - **Sample**
- Contains sample names. Each 10x channel should have a unique sample name.
* - **Reference**
-
| Provides the reference genome used by Space Ranger for each 10x channel.
| The elements in the *reference* column can be either Google bucket URLs to reference tarballs or keywords such as *GRCh38-2020-A*.
| A full list of available keywords is included in each of the following data type sections (e.g. sc/snRNA-seq) below.
- Provides the reference genome used by Space Ranger for each 10x channel. The elements in the *reference* column can be either keywords of pre-built references or Google bucket URLs to reference tarballs. Below are available keywords of pre-built references:

- **GRCh38-2020-A**: Human GRCh38 (GENCODE v32/Ensembl 98)

- **mm10-2020-A**: Mouse mm10 (GENCODE vM23/Ensembl 98)
* - **Flowcell**
-
| Indicates the Google bucket URLs of uploaded BCL folders.
Expand All @@ -88,7 +89,17 @@ Alternatively, users can submit jobs through command line interface (CLI) using
* - **Index**
- Sample index (e.g. SI-GA-A12).
* - ProbeSet
- Probe set for FFPE samples. **Choosing** from ``human_probe_v1`` (10x human probe set, CytoAssist-incompatible), ``human_probe_v2`` (10x human probe set, CytoAssist-compatible) and ``mouse_probe_v1`` (10x mouse probe set). Alternatively, a CSV file describing the probe set can be directly used. Setting ProbeSet to ``""`` for a sample implies the sample is not FFPE.
- Probe set for FFPE samples. Can be either a keyword or a cloud URI to a custom probe set. Below are keywords of available probe sets:

- **human_probe_v2**: 10x human probe set version 2, CytAssist-compatible

- **mouse_probe_v2**: 10x mouse probe set version 2, CytAssist-compatible

- **human_probe_v1**: 10x human probe set version 1, CytAssist-compatible

- **mouse_probe_v1**: 10x mouse probe set version 1, CytAssist-compatible

**Notice:** Set ProbeSet to ``""`` for a sample implies the sample is not FFPE.
* - Image
- Cloud bucket url for a brightfield tissue H&E image in .jpg or .tiff format. This column is mutually exclusive with DarkImage and ColorizedImage columns.
* - DarkImage
Expand Down
8 changes: 4 additions & 4 deletions workflows/cellranger/cellranger_arc_count.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,16 @@ task run_cellranger_arc_count {
directory = re.sub('/+$', '', directory) # remove trailing slashes
target = samples[i] + '_' + str(i)
try:
call_args = ['strato', 'exists', '--backend', '~{backend}', directory + '/' + samples[i] + '/']
call_args = ['strato', 'exists', directory + '/' + samples[i] + '/']
print(' '.join(call_args))
check_call(call_args, stdout=DEVNULL, stderr=STDOUT)
call_args = ['strato', 'sync', '--backend', '~{backend}', '-m', directory + '/' + samples[i], target]
call_args = ['strato', 'sync', '-m', directory + '/' + samples[i], target]
print(' '.join(call_args))
check_call(call_args)
except CalledProcessError:
if not os.path.exists(target):
os.mkdir(target)
call_args = ['strato', 'cp', '--backend', '~{backend}', '-m', directory + '/' + samples[i] + '_S*_L*_*_001.fastq.gz' , target]
call_args = ['strato', 'cp', '-m', directory + '/' + samples[i] + '_S*_L*_*_001.fastq.gz' , target]
print(' '.join(call_args))
check_call(call_args)
fout.write(os.path.abspath(target) + ',' + samples[i] + ',' + ('Gene Expression' if data_types[i] == 'rna' else 'Chromatin Accessibility') + '\n')
Expand All @@ -168,7 +168,7 @@ task run_cellranger_arc_count {
check_call(call_args)
CODE

strato sync --backend ~{backend} -m results/outs "~{output_directory}"/~{link_id}
strato sync -m results/outs "~{output_directory}"/~{link_id}
}

output {
Expand Down
8 changes: 4 additions & 4 deletions workflows/cellranger/cellranger_arc_mkfastq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ task run_cellranger_arc_mkfastq {
export TMPDIR=/tmp
export BACKEND=~{backend}
monitor_script.sh > monitoring.log &
strato cp --backend ~{backend} -r -m ~{input_bcl_directory} .
strato cp -r -m ~{input_bcl_directory} .

python <<CODE
import os
Expand Down Expand Up @@ -141,16 +141,16 @@ task run_cellranger_arc_mkfastq {

CODE

strato sync --backend ~{backend} -m results/outs "~{output_directory}/~{run_id}_arcfastqs"
strato sync -m results/outs "~{output_directory}/~{run_id}_arcfastqs"

python <<CODE
from subprocess import check_call, check_output, CalledProcessError
if '~{delete_input_bcl_directory}' == 'true':
try:
call_args = ['strato', 'exists', '--backend', '~{backend}', '~{output_directory}/~{run_id}_arcfastqs/input_samplesheet.csv']
call_args = ['strato', 'exists', '~{output_directory}/~{run_id}_arcfastqs/input_samplesheet.csv']
print(' '.join(call_args))
check_output(call_args)
call_args = ['strato', 'rm', '--backend', '~{backend}', '-m' ,'-r', '~{input_bcl_directory}']
call_args = ['strato', 'rm', '-m' ,'-r', '~{input_bcl_directory}']
print(' '.join(call_args))
check_call(call_args)
print('~{input_bcl_directory} is deleted!')
Expand Down
4 changes: 2 additions & 2 deletions workflows/cellranger/cellranger_atac_aggr.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ task run_cellranger_atac_aggr {
raise Exception("Found duplicated library id " + library_id + "!")
libs_seen.add(library_id)

call_args = ['strato', 'cp', '--backend', '~{backend}', '-m', '-r', directory, current_dir]
call_args = ['strato', 'cp', '-m', '-r', directory, current_dir]
print(' '.join(call_args))
check_call(call_args)
counts.append(library_id)
Expand All @@ -145,7 +145,7 @@ task run_cellranger_atac_aggr {
check_call(call_args)
CODE

strato sync --backend ~{backend} -m results/outs "~{output_directory}/~{aggr_id}"
strato sync -m results/outs "~{output_directory}/~{aggr_id}"
}

output {
Expand Down
8 changes: 4 additions & 4 deletions workflows/cellranger/cellranger_atac_count.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,16 @@ task run_cellranger_atac_count {
directory = re.sub('/+$', '', directory) # remove trailing slashes
target = '~{sample_id}_' + str(i)
try:
call_args = ['strato', 'exists', '--backend', '~{backend}', directory + '/~{sample_id}/']
call_args = ['strato', 'exists', directory + '/~{sample_id}/']
print(' '.join(call_args))
check_call(call_args, stdout=DEVNULL, stderr=STDOUT)
call_args = ['strato', 'sync', '--backend', '~{backend}', '-m', directory + '/~{sample_id}', target]
call_args = ['strato', 'sync', '-m', directory + '/~{sample_id}', target]
print(' '.join(call_args))
check_call(call_args)
except CalledProcessError:
if not os.path.exists(target):
os.mkdir(target)
call_args = ['strato', 'cp', '--backend', '~{backend}', '-m', directory + '/~{sample_id}' + '_S*_L*_*_001.fastq.gz' , target]
call_args = ['strato', 'cp', '-m', directory + '/~{sample_id}' + '_S*_L*_*_001.fastq.gz' , target]
print(' '.join(call_args))
check_call(call_args)
fastqs.append(target)
Expand All @@ -151,7 +151,7 @@ task run_cellranger_atac_count {
check_call(call_args)
CODE

strato sync --backend ~{backend} -m results/outs "~{output_directory}/~{sample_id}"
strato sync -m results/outs "~{output_directory}/~{sample_id}"
}

output {
Expand Down
2 changes: 1 addition & 1 deletion workflows/cellranger/cellranger_atac_create_reference.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ task run_cellranger_atac_create_reference {

cellranger-atac mkref --config=ref.config
tar -czf ~{genome}.tar.gz ~{genome}
strato cp --backend ~{backend} -m ~{genome}.tar.gz "~{output_dir}"/
strato cp -m ~{genome}.tar.gz "~{output_dir}"/
}

output {
Expand Down
8 changes: 4 additions & 4 deletions workflows/cellranger/cellranger_atac_mkfastq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ task run_cellranger_atac_mkfastq {
export TMPDIR=/tmp
export BACKEND=~{backend}
monitor_script.sh > monitoring.log &
strato cp --backend '~{backend}' -m -r ~{input_bcl_directory} .
strato cp -m -r ~{input_bcl_directory} .

python <<CODE
import os
Expand Down Expand Up @@ -140,16 +140,16 @@ task run_cellranger_atac_mkfastq {
fout.write('~{output_directory}/~{run_id}_atacfastqs/fastq_path/' + flowcell + '\n')
CODE

strato sync --backend ~{backend} -m results/outs "~{output_directory}/~{run_id}_atacfastqs"
strato sync -m results/outs "~{output_directory}/~{run_id}_atacfastqs"

python <<CODE
from subprocess import check_call, check_output, CalledProcessError
if '~{delete_input_bcl_directory}' == 'true':
try:
call_args = ['strato', 'exists', '--backend', '~{backend}', '~{output_directory}/~{run_id}_atacfastqs/input_samplesheet.csv']
call_args = ['strato', 'exists', '~{output_directory}/~{run_id}_atacfastqs/input_samplesheet.csv']
print(' '.join(call_args))
check_output(call_args)
call_args = ['strato', 'rm', '--backend', '~{backend}', '-m', '-r', '~{output_directory}/~{run_id}_atacfastqs/input_samplesheet.csv']
call_args = ['strato', 'rm', '-m', '-r', '~{output_directory}/~{run_id}_atacfastqs/input_samplesheet.csv']
print(' '.join(call_args))
check_call(call_args)
print('~{input_bcl_directory} is deleted!')
Expand Down
6 changes: 3 additions & 3 deletions workflows/cellranger/cellranger_vdj_create_reference.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ workflow cellranger_vdj_create_reference {

# Which docker registry to use
String docker_registry = "quay.io/cumulus"
# 7.1.0, 7.0.1, 7.0.0, 6.1.2, 6.1.1
String cellranger_version = "7.1.0"
# 8.0.0, 7.2.0 7.1.0, 7.0.1, 7.0.0, 6.1.2, 6.1.1
String cellranger_version = "8.0.0"

# Disk space in GB
Int disk_space = 100
Expand Down Expand Up @@ -114,7 +114,7 @@ task run_cellranger_vdj_create_reference {
CODE

tar -czf ~{genome}.tar.gz ~{genome}
strato cp --backend ~{backend} -m ~{genome}.tar.gz "~{output_dir}"/
strato cp -m ~{genome}.tar.gz "~{output_dir}"/
}

output {
Expand Down

0 comments on commit 822c175

Please sign in to comment.