Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #55 from KevinMenden/dev
DSL2 version with working alevin workflow
- Loading branch information
Showing
46 changed files
with
2,609 additions
and
928 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
#!/usr/bin/env python | ||
|
||
import os | ||
import sys | ||
import errno | ||
import argparse | ||
|
||
|
||
def parse_args(args=None): | ||
Description = "Reformat nf-core/cageseq samplesheet file and check its contents." | ||
Epilog = "Example usage: python check_samplesheet.py <FILE_IN> <FILE_OUT>" | ||
|
||
parser = argparse.ArgumentParser(description=Description, epilog=Epilog) | ||
parser.add_argument("FILE_IN", help="Input samplesheet file.") | ||
parser.add_argument("FILE_OUT", help="Output file.") | ||
return parser.parse_args(args) | ||
|
||
|
||
def make_dir(path): | ||
if len(path) > 0: | ||
try: | ||
os.makedirs(path) | ||
except OSError as exception: | ||
if exception.errno != errno.EEXIST: | ||
raise exception | ||
|
||
|
||
def print_error(error, context="Line", context_str=""): | ||
error_str = "ERROR: Please check samplesheet -> {}".format(error) | ||
if context != "" and context_str != "": | ||
error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( | ||
error, context.strip(), context_str.strip() | ||
) | ||
print(error_str) | ||
sys.exit(1) | ||
|
||
|
||
def check_samplesheet(file_in, file_out): | ||
""" | ||
This function checks that the samplesheet follows the following structure: | ||
group,replicate,fastq_1,fastq_2 | ||
WT,1,WT_LIB1_REP1_1.fastq.gz,WT_LIB1_REP1_2.fastq.gz | ||
WT,1,WT_LIB2_REP1_1.fastq.gz,WT_LIB2_REP1_2.fastq.gz | ||
WT,2,WT_LIB1_REP2_1.fastq.gz,WT_LIB1_REP2_2.fastq.gz | ||
KO,1,KO_LIB1_REP1_1.fastq.gz,KO_LIB1_REP1_2.fastq.gz | ||
""" | ||
|
||
sample_run_dict = {} | ||
with open(file_in, "r") as fin: | ||
|
||
## Check header | ||
MIN_COLS = 3 | ||
HEADER = ["group", "replicate", "fastq_1", "fastq_2"] | ||
header = [x.strip('"') for x in fin.readline().strip().split(",")] | ||
if header[: len(HEADER)] != HEADER: | ||
print( | ||
"ERROR: Please check samplesheet header -> {} != {}".format( | ||
",".join(header), ",".join(HEADER) | ||
) | ||
) | ||
sys.exit(1) | ||
|
||
## Check sample entries | ||
for line in fin: | ||
lspl = [x.strip().strip('"') for x in line.strip().split(",")] | ||
|
||
## Check valid number of columns per row | ||
if len(lspl) < len(HEADER): | ||
print_error( | ||
"Invalid number of columns (minimum = {})!".format(len(HEADER)), | ||
"Line", | ||
line, | ||
) | ||
|
||
num_cols = len([x for x in lspl if x]) | ||
if num_cols < MIN_COLS: | ||
print_error( | ||
"Invalid number of populated columns (minimum = {})!".format( | ||
MIN_COLS | ||
), | ||
"Line", | ||
line, | ||
) | ||
|
||
## Check sample name entries | ||
sample, replicate, fastq_1, fastq_2 = lspl[: len(HEADER)] | ||
if sample: | ||
if sample.find(" ") != -1: | ||
print_error("Group entry contains spaces!", "Line", line) | ||
else: | ||
print_error("Group entry has not been specified!", "Line", line) | ||
|
||
## Check replicate entry is integer | ||
if not replicate.isdigit(): | ||
print_error("Replicate id not an integer!", "Line", line) | ||
replicate = int(replicate) | ||
|
||
## Check FastQ file extension | ||
for fastq in [fastq_1, fastq_2]: | ||
if fastq: | ||
if fastq.find(" ") != -1: | ||
print_error("FastQ file contains spaces!", "Line", line) | ||
if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): | ||
print_error( | ||
"FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", | ||
"Line", | ||
line, | ||
) | ||
|
||
## Auto-detect paired-end/single-end | ||
sample_info = [] ## [single_end, fastq_1, fastq_2] | ||
if sample and fastq_1 and fastq_2: ## Paired-end short reads | ||
sample_info = ["0", fastq_1, fastq_2] | ||
elif sample and fastq_1 and not fastq_2: ## Single-end short reads | ||
sample_info = ["1", fastq_1, fastq_2] | ||
else: | ||
print_error("Invalid combination of columns provided!", "Line", line) | ||
|
||
## Create sample mapping dictionary = {sample: {replicate : [ single_end, fastq_1, fastq_2]}} | ||
if sample not in sample_run_dict: | ||
sample_run_dict[sample] = {} | ||
if replicate not in sample_run_dict[sample]: | ||
sample_run_dict[sample][replicate] = [sample_info] | ||
else: | ||
if sample_info in sample_run_dict[sample][replicate]: | ||
print_error("Samplesheet contains duplicate rows!", "Line", line) | ||
else: | ||
sample_run_dict[sample][replicate].append(sample_info) | ||
|
||
## Write validated samplesheet with appropriate columns | ||
if len(sample_run_dict) > 0: | ||
out_dir = os.path.dirname(file_out) | ||
make_dir(out_dir) | ||
with open(file_out, "w") as fout: | ||
|
||
fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n") | ||
for sample in sorted(sample_run_dict.keys()): | ||
|
||
## Check that replicate ids are in format 1..<NUM_REPS> | ||
uniq_rep_ids = set(sample_run_dict[sample].keys()) | ||
if len(uniq_rep_ids) != max(uniq_rep_ids): | ||
print_error( | ||
"Replicate ids must start with 1..<num_replicates>!", | ||
"Group", | ||
sample, | ||
) | ||
|
||
for replicate in sorted(sample_run_dict[sample].keys()): | ||
|
||
## Check that multiple runs of the same sample are of the same datatype | ||
if not all( | ||
x[0] == sample_run_dict[sample][replicate][0][0] | ||
for x in sample_run_dict[sample][replicate] | ||
): | ||
print_error( | ||
"Multiple runs of a sample must be of the same datatype!", | ||
"Group", | ||
sample, | ||
) | ||
|
||
## Write to file | ||
for idx, sample_info in enumerate( | ||
sample_run_dict[sample][replicate] | ||
): | ||
sample_id = "{}_R{}_T{}".format(sample, replicate, idx + 1) | ||
fout.write(",".join([sample_id] + sample_info) + "\n") | ||
|
||
|
||
def main(args=None): | ||
args = parse_args(args) | ||
check_samplesheet(args.FILE_IN, args.FILE_OUT) | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,40 @@ | ||
/* | ||
* ------------------------------------------------- | ||
* nf-core/scrnaseq Nextflow base config file | ||
* ------------------------------------------------- | ||
* A 'blank slate' config file, appropriate for general | ||
* use on most high performace compute environments. | ||
* Assumes that all software is installed and available | ||
* on the PATH. Runs in `local` mode - all jobs will be | ||
* run on the logged in environment. | ||
*/ | ||
|
||
process { | ||
|
||
cpus = { check_max( 1 * task.attempt, 'cpus' ) } | ||
memory = { check_max( 7.GB * task.attempt, 'memory' ) } | ||
time = { check_max( 4.h * task.attempt, 'time' ) } | ||
cpus = { check_max( 1 * task.attempt, 'cpus' ) } | ||
memory = { check_max( 6.GB * task.attempt, 'memory' ) } | ||
time = { check_max( 4.h * task.attempt, 'time' ) } | ||
|
||
errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } | ||
maxRetries = 1 | ||
maxErrors = '-1' | ||
|
||
// Process-specific resource requirements | ||
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors | ||
maxRetries = 1 | ||
maxErrors = '-1' | ||
|
||
// Process-specific resource requirements | ||
withLabel: low_memory { | ||
cpus = { check_max (4, 'cpus')} | ||
memory = { check_max( 16.GB * task.attempt, 'memory' ) } | ||
withLabel:process_low { | ||
cpus = { check_max( 2 * task.attempt, 'cpus' ) } | ||
memory = { check_max( 12.GB * task.attempt, 'memory' ) } | ||
time = { check_max( 4.h * task.attempt, 'time' ) } | ||
} | ||
withLabel: mid_memory { | ||
cpus = { check_max (8, 'cpus')} | ||
memory = { check_max( 32.GB * task.attempt, 'memory' ) } | ||
time = { check_max( 8.h * task.attempt, 'time' ) } | ||
withLabel:process_medium { | ||
cpus = { check_max( 6 * task.attempt, 'cpus' ) } | ||
memory = { check_max( 36.GB * task.attempt, 'memory' ) } | ||
time = { check_max( 8.h * task.attempt, 'time' ) } | ||
} | ||
withLabel: high_memory { | ||
cpus = { check_max (10, 'cpus')} | ||
memory = { check_max( 80.GB * task.attempt, 'memory' ) } | ||
time = { check_max( 8.h * task.attempt, 'time' ) } | ||
withLabel:process_high { | ||
cpus = { check_max( 12 * task.attempt, 'cpus' ) } | ||
memory = { check_max( 72.GB * task.attempt, 'memory' ) } | ||
time = { check_max( 16.h * task.attempt, 'time' ) } | ||
} | ||
|
||
} | ||
|
||
params { | ||
// Defaults only, expecting to be overwritten | ||
max_memory = 128.GB | ||
max_cpus = 16 | ||
max_time = 240.h | ||
igenomes_base = 's3://ngi-igenomes/igenomes/' | ||
} | ||
withLabel:process_long { | ||
time = { check_max( 20.h * task.attempt, 'time' ) } | ||
} | ||
withLabel:error_ignore { | ||
errorStrategy = 'ignore' | ||
} | ||
withLabel:error_retry { | ||
errorStrategy = 'retry' | ||
maxRetries = 2 | ||
} | ||
withName:PLASMIDID { | ||
errorStrategy = 'ignore' | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
/* | ||
* --------------------------------------------------------- | ||
* Nextflow config file for module specific default options | ||
* --------------------------------------------------------- | ||
*/ | ||
|
||
params { | ||
modules { | ||
'cellranger_mkgtf' { | ||
publish_dir = "cellranger/mkgtf" | ||
args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" | ||
} | ||
'cellranger_mkref' { | ||
publish_dir = "cellranger/mkref" | ||
publish_files = false | ||
} | ||
'cellranger_count' { | ||
publish_dir = "cellranger/count" | ||
publish_files = ['gz':'filtered_feature_bc_matrix'] | ||
} | ||
'gffread_tx2pgene' { | ||
args = "--table transcript_id,gene_id" | ||
} | ||
'salmon_alevin' { | ||
args = "" | ||
} | ||
'salmon_index' { | ||
args = "" | ||
} | ||
'alevinqc' { | ||
args = "" | ||
} | ||
'multiqc_alevin' { | ||
args = "" | ||
} | ||
'star_genomegenerate' { | ||
args = "" | ||
} | ||
'star_align' { | ||
args = "--soloType CB_UMI_Simple --readFilesCommand zcat --runDirPerm All_RWX --outWigType bedGraph --twopassMode Basic --outSAMtype BAM SortedByCoordinate" | ||
} | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.