-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Busco subworkflow #28
Changes from all commits
4bfbbe4
b9958fd
81350bf
c593f27
3e16ed9
5ccd52f
bdc4795
3f2d23a
156673e
3ce4a04
cbd4068
39f58da
cd82840
e4b2cfb
110bb40
043dbab
621136f
fcbb381
4545974
a97172e
85f82ab
bc41e77
7599aa9
3a4f97c
64be240
ae83fb7
092724f
8fa2602
ffe31ce
f554e82
11c7447
e791849
401a572
edc91c2
df34fe2
1421d16
1f1cd3c
050c2eb
ae30a7d
ad8efe4
0e1b1df
8fbef6f
7b4e7e3
178a8f2
afd328b
905139b
d97d32b
a129609
10f0933
548de2e
5df7f6e
b7128ff
dc68ff2
1baa58a
b07355d
2f8bd02
2ace6b3
efe3398
4fac860
0a89c32
ff9b2ae
db0bf9a
95778be
e0f9d39
9225b02
1e2c570
45b424e
993f358
c2da4ba
dd50ee4
1538041
75ba460
48550d2
684effd
0870bf4
69a1f44
9957d8d
9878ab4
80fff6b
efb4345
e0660a4
230f57c
5d8fc53
4ff30de
58cb0e2
6bc1772
16c042c
301fa8d
1e6307d
cc9a5d7
2d0146f
35f96db
88495ff
a29a8a9
939e99e
dae3427
a017a26
3a1be38
9360d60
01f1f59
0e19534
386aefd
ab65ae0
3722aad
9802d97
05e3c8b
b4f92e2
07fc3db
8376b56
f38104c
91fd40d
208e5c3
eed44c7
fe5402c
d2aa6f2
d9a36ec
ce30e21
123e1c6
30e0b3b
10be5ad
a210a28
ee0d388
fbd2409
cbca827
e84ec13
2ea4ddc
72b4b23
6aa8065
d2b330c
ed6a1b4
8658908
12b5cee
d84dc3d
d0c04b4
e559e86
adfbbd1
63a2a5c
ce3ee0f
d01f4ef
b5e73f6
ce59104
0f0cc17
fdc2072
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
sample,fastq_1,fastq_2 | ||
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz | ||
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, | ||
sample,datatype,datafile | ||
mMelMel3,hic,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram | ||
mMelMel1,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram | ||
mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram | ||
mMelMel3,ont,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
process CREATE_BED { | ||
tag "$meta.id" | ||
label 'process_single' | ||
|
||
conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null) | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : | ||
'quay.io/biocontainers/gawk:5.1.0' }" | ||
|
||
input: | ||
tuple val(meta), path(tsv) //path to tsv output from fasta windows | ||
|
||
output: | ||
path '*.bed' , emit: bed | ||
path "versions.yml", emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
cut -f 1,2,3 $tsv | sed '1d' $args > ${prefix}.bed | ||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
create_bed: 1.01 | ||
END_VERSIONS | ||
""" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
process EXTRACT_BUSCO_GENES { | ||
tag "$meta.id" | ||
|
||
container "genomehubs/blobtoolkit-blobtools:3.3.4" | ||
|
||
input: | ||
tuple val(meta), path(arc), path(bac), path(euk) | ||
|
||
output: | ||
tuple val(meta), path('*_busco_genes.fasta') , emit: fasta | ||
path "versions.yml" , emit: versions | ||
|
||
script: | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def tables = ["\"$arc\"", "\"$bac\"", "\"$euk\""] | ||
""" | ||
btk pipeline extract-busco-genes \\ | ||
--busco $tables \\ | ||
--out ${prefix}_busco_genes.fasta | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
blobtoolkit: \$(btk --version | cut -d' ' -f2 | sed 's/v//') | ||
END_VERSIONS | ||
""" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
|
||
process GOAT_TAXONSEARCH { | ||
tag "$meta.id" | ||
|
||
conda (params.enable_conda ? "bioconda::goat=0.2.0" : null) | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/goat:0.2.0--h92d785c_0': | ||
'quay.io/biocontainers/goat:0.2.0--h92d785c_0' }" | ||
|
||
input: | ||
tuple val(meta), val(taxon), path(taxa_file) | ||
|
||
output: | ||
path "*.tsv" , emit: taxonsearch | ||
tuple val(meta), path("*.txt"), emit: busco_lineages | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
input = taxa_file ? "-f ${taxa_file}" : "-t ${taxon}" | ||
if (!taxon && !taxa_file) error "No input. Valid input: single taxon identifier or a .txt file with identifiers" | ||
if (taxon && taxa_file ) error "Only one input is required: a single taxon identifier or a .txt file with identifiers" | ||
// ${prefix}.txt contains the list of BUSCO (odb10) lineages, one lineage per line without empty lines | ||
""" | ||
goat-cli taxon search \\ | ||
$args \\ | ||
"$input" > ${prefix}.tsv | ||
cat ${prefix}.tsv | cut -f5 | sed '1d' | grep . > ${prefix}.txt | ||
echo "bacteria_odb10" >> ${prefix}.txt | ||
echo "archaea_odb10" >> ${prefix}.txt | ||
Comment on lines
+32
to
+34
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can see how convenient that is, and simpler to do it in shell, but I'd want this to eventually move to pure Nextflow channel modifications instructions, so that the nf-core module of goat/taxonsearch can be used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Understood. I don't know exactly how to make those same transformations with Nextflow, but I think a good first step will be to get the output file from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use splitCsv with tsp as well, just change the default value of sep |
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
goat: \$(goat-cli --version | cut -d' ' -f2) | ||
END_VERSIONS | ||
""" | ||
} |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider the
--tar
option for BUSCO