Skip to content

Commit

Permalink
Merge pull request #908 from drpatelh/updates
Browse files Browse the repository at this point in the history
  • Loading branch information
drpatelh committed Dec 19, 2022
2 parents 721d772 + d410de1 commit 59a9648
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 20 deletions.
13 changes: 10 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Bump minimum Nextflow version from `21.10.3` -> `22.10.1`
- Updated pipeline template to [nf-core/tools 2.7.1](https://github.com/nf-core/tools/releases/tag/2.7.1)
- [[#896](https://github.com/nf-core/rnaseq/issues/896)] - Remove `copyTo` call for iGenomes README
- [[#897](https://github.com/nf-core/rnaseq/issues/897)] - Use `--skip_preseq` by default
- [[#900](https://github.com/nf-core/rnaseq/issues/900)] - Add `--recursive` option to `fastq_dir_to_samplesheet.py` script
- [[#902](https://github.com/nf-core/rnaseq/issues/902)] - `check_samplesheet.py` script doesn't output optional columns in samplesheet
- [[#907](https://github.com/nf-core/rnaseq/issues/907)] - Add `--extra_star_align_args` and `--extra_salmon_quant_args` parameter

### Parameters

| Old parameter | New parameter |
| ---------------- | ------------- |
| `--enable_conda` | |
| Old parameter | New parameter |
| ---------------- | --------------------------- |
| `--enable_conda` | |
| | `--extra_star_align_args` |
| | `--extra_salmon_quant_args` |

> **NB:** Parameter has been **updated** if both old and new parameter information is present.
> **NB:** Parameter has been **added** if just the new parameter information is present.
Expand Down
9 changes: 6 additions & 3 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def check_samplesheet(file_in, file_out):
line,
)

num_cols = len([x for x in lspl if x])
num_cols = len([x for x in lspl[: len(HEADER)] if x])
if num_cols < MIN_COLS:
print_error(
f"Invalid number of populated columns (minimum = {MIN_COLS})!",
Expand Down Expand Up @@ -124,6 +124,7 @@ def check_samplesheet(file_in, file_out):
print_error("Invalid combination of columns provided!", "Line", line)

## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, strandedness ]]}
sample_info = sample_info + lspl[len(HEADER) :]
if sample not in sample_mapping_dict:
sample_mapping_dict[sample] = [sample_info]
else:
Expand All @@ -137,7 +138,9 @@ def check_samplesheet(file_in, file_out):
out_dir = os.path.dirname(file_out)
make_dir(out_dir)
with open(file_out, "w") as fout:
fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "strandedness"]) + "\n")
fout.write(
",".join(["sample", "single_end", "fastq_1", "fastq_2", "strandedness"] + header[len(HEADER) :]) + "\n"
)
for sample in sorted(sample_mapping_dict.keys()):

## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
Expand All @@ -149,7 +152,7 @@ def check_samplesheet(file_in, file_out):
)

## Check that multiple runs of the same sample are of the same strandedness
if not all(x[-1] == sample_mapping_dict[sample][0][-1] for x in sample_mapping_dict[sample]):
if not all(x[3] == sample_mapping_dict[sample][0][3] for x in sample_mapping_dict[sample]):
print_error(
f"Multiple runs of a sample must have the same strandedness!",
"Sample",
Expand Down
20 changes: 16 additions & 4 deletions bin/fastq_dir_to_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ def parse_args(args=None):
default=1,
help="After splitting FastQ file name by --sanitise_name_delimiter all elements before this index (1-based) will be joined to create final sample name.",
)
parser.add_argument(
"-re",
"--recursive",
dest="RECURSIVE",
action="store_true",
help="Whether or not to search for FastQ files recursively in <FASTQ_DIR>.",
)
return parser.parse_args(args)


Expand All @@ -80,6 +87,7 @@ def fastq_dir_to_samplesheet(
sanitise_name=False,
sanitise_name_delimiter="_",
sanitise_name_index=1,
recursive=False,
):
def sanitize_sample(path, extension):
"""Retrieve sample id from filename"""
Expand All @@ -90,27 +98,30 @@ def sanitize_sample(path, extension):
)
return sample

def get_fastqs(extension):
def get_fastqs(extension, recursive=False):
"""
Needs to be sorted to ensure R1 and R2 are in the same order
when merging technical replicates. Glob is not guaranteed to produce
sorted results.
See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered
"""
return sorted(glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False))
search_path = f"*{extension}"
if recursive:
search_path = f"**/*{extension}"
return sorted(glob.glob(os.path.join(fastq_dir, search_path), recursive=recursive))

read_dict = {}

## Get read 1 files
for read1_file in get_fastqs(read1_extension):
for read1_file in get_fastqs(read1_extension, recursive):
sample = sanitize_sample(read1_file, read1_extension)
if sample not in read_dict:
read_dict[sample] = {"R1": [], "R2": []}
read_dict[sample]["R1"].append(read1_file)

## Get read 2 files
if not single_end:
for read2_file in get_fastqs(read2_extension):
for read2_file in get_fastqs(read2_extension, recursive):
sample = sanitize_sample(read2_file, read2_extension)
read_dict[sample]["R2"].append(read2_file)

Expand Down Expand Up @@ -157,6 +168,7 @@ def main(args=None):
sanitise_name=args.SANITISE_NAME,
sanitise_name_delimiter=args.SANITISE_NAME_DELIMITER,
sanitise_name_index=args.SANITISE_NAME_INDEX,
recursive=args.RECURSIVE,
)


Expand Down
5 changes: 4 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,8 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
'--outSAMattributes NH HI AS NM MD',
'--quantTranscriptomeBan Singleend',
'--outSAMstrandField intronMotif',
params.save_unaligned ? '--outReadsUnmapped Fastx' : ''
params.save_unaligned ? '--outReadsUnmapped Fastx' : '',
params.extra_star_align_args ?: ''
].join(' ').trim()
publishDir = [
[
Expand All @@ -541,6 +542,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
}

withName: '.*:QUANTIFY_STAR_SALMON:SALMON_QUANT' {
ext.args = params.extra_salmon_quant_args ?: ''
publishDir = [
path: { "${params.outdir}/${params.aligner}" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -1045,6 +1047,7 @@ if (!params.skip_multiqc) {
if (params.pseudo_aligner == 'salmon') {
process {
withName: '.*:QUANTIFY_SALMON:SALMON_QUANT' {
ext.args = params.extra_salmon_quant_args ?: ''
publishDir = [
path: { "${params.outdir}/${params.pseudo_aligner}" },
mode: params.publish_dir_mode,
Expand Down
4 changes: 3 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ params {
hisat2_build_memory = '200.GB' // Amount of memory required to build HISAT2 index with splice sites
stringtie_ignore_gtf = false
min_mapped_reads = 5
extra_star_align_args = null
extra_salmon_quant_args = null
save_merged_fastq = false
save_unaligned = false
save_align_intermeds = false
Expand All @@ -79,7 +81,7 @@ params {
skip_bigwig = false
skip_stringtie = false
skip_fastqc = false
skip_preseq = false
skip_preseq = true
skip_dupradar = false
skip_qualimap = false
skip_rseqc = false
Expand Down
13 changes: 12 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,16 @@
"description": "Perform reference-guided de novo assembly of transcripts using StringTie i.e. dont restrict to those in GTF file.",
"fa_icon": "fas fa-ban"
},
"extra_star_align_args": {
"type": "string",
"description": "Extra arguments to pass to STAR alignment command in addition to defaults defined by the pipeline.",
"fa_icon": "fas fa-plus"
},
"extra_salmon_quant_args": {
"type": "string",
"description": "Extra arguments to pass to Salmon quant command in addition to defaults defined by the pipeline.",
"fa_icon": "fas fa-plus"
},
"save_unaligned": {
"type": "boolean",
"fa_icon": "fas fa-save",
Expand Down Expand Up @@ -485,7 +495,8 @@
"skip_preseq": {
"type": "boolean",
"description": "Skip Preseq.",
"fa_icon": "fas fa-fast-forward"
"fa_icon": "fas fa-fast-forward",
"default": true
},
"skip_dupradar": {
"type": "boolean",
Expand Down
7 changes: 0 additions & 7 deletions workflows/rnaseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,6 @@ if (params.bam_csi_index) {
}
}

// Save AWS IGenomes file containing annotation version
def anno_readme = params.genomes[ params.genome ]?.readme
if (anno_readme && file(anno_readme).exists()) {
file("${params.outdir}/genome/").mkdirs()
file(anno_readme).copyTo("${params.outdir}/genome/")
}

// Stage dummy file to be used as an optional input where required
ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true)

Expand Down

0 comments on commit 59a9648

Please sign in to comment.