generated from snakemake-workflows/snakemake-workflow-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from metagenome-atlas/test
test: Add testing in ci
- Loading branch information
Showing
5 changed files
with
93 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ logs/** | |
.snakemake/** | ||
.test/** | ||
!.test/dryrun.sh | ||
!.test/copy_example_data.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -euo pipefail | ||
|
||
git clone https://github.com/metagenome-atlas/example_data.git | ||
|
||
src_dir="example_data/reads/stub" | ||
|
||
test_dir="$1" | ||
|
||
mkdir -p "$test_dir"/.. | ||
mkdir "$test_dir" | ||
|
||
|
||
|
||
#copy files | ||
for sample in Mycoplasma Streptococcus; do | ||
|
||
qc_folder="$test_dir/$sample/sequence_quality_control/" | ||
mkdir -p "$qc_folder" | ||
for fraction in R1 R2 ; | ||
do | ||
mv $src_dir/${sample}_${fraction}.fastq.gz "$qc_folder/${sample}_QC_${fraction}.fastq.gz" | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,90 @@ | ||
from pathlib import Path | ||
import pandas as pd | ||
import os,sys | ||
import os, sys | ||
|
||
# absolute path to snakemake dir | ||
snakemake_dir = Path(workflow.snakefile).parent.resolve() | ||
|
||
|
||
# include default config values | ||
configfile: snakemake_dir.parent /"config/default_confg.yaml" | ||
configfile: snakemake_dir.parent / "config/default_confg.yaml" | ||
|
||
|
||
# global variables | ||
PAIRED_END = bool(config["paired_end"]) | ||
|
||
logger.debug(f"Reads are: {'paired' if PAIRED_END else 'single-end'}") | ||
|
||
MULTIFILE_FRACTIONS= ["R1", "R2", "se"] if PAIRED_END else ["se"] | ||
MULTIFILE_FRACTIONS = ["R1", "R2"] if PAIRED_END else ["se"] | ||
|
||
|
||
include: "rules/functions.smk" | ||
|
||
|
||
## Input | ||
#Functions to define input files | ||
# Functions to define input files | ||
|
||
def get_quality_controlled_reads(wildcards): | ||
|
||
def get_quality_controlled_reads(wildcards): | ||
return expand( | ||
"{sample}/sequence_quality_control/{sample}_QC_{fraction}.fastq.gz", | ||
fraction=MULTIFILE_FRACTIONS, | ||
sample=wildcards.sample, | ||
) | ||
|
||
|
||
|
||
def get_long_reads(wildcards): | ||
"Define the long read files to be used for assembly" | ||
return "{sample}/sequence_quality_control/{sample}_QC_naopore.fastq.gz".format(**wildcards) | ||
|
||
return "{sample}/sequence_quality_control/{sample}_QC_naopore.fastq.gz".format( | ||
**wildcards | ||
) | ||
|
||
|
||
### Target rules | ||
|
||
final_assembly = "Assembly/assemblies/{sample}.fasta" | ||
|
||
|
||
rule all: | ||
input: | ||
assemblies= expand(final_assembly, sample=get_all_samples()), | ||
bams = expand("Assembly/alignments/{sample}.bam",sample=get_all_samples()), | ||
genes = expand("Assembly/annotations/genes/{sample}{ext}" , ext=[".faa", ".fna",".tsv"],sample=get_all_samples()), | ||
stats= "stats/combined_contig_stats.tsv" | ||
assemblies=expand(final_assembly, sample=get_all_samples()), | ||
bams=expand("Assembly/alignments/{sample}.bam", sample=get_all_samples()), | ||
genes=expand( | ||
"Assembly/annotations/genes/{sample}{ext}", | ||
ext=[".faa", ".fna", ".tsv"], | ||
sample=get_all_samples(), | ||
), | ||
stats="stats/combined_contig_stats.tsv", | ||
output: | ||
temp(touch("finished_assembly")), | ||
|
||
|
||
|
||
# decide wich assembler to use | ||
|
||
if config["assembler"] == "megahit": | ||
|
||
include: "rules/megahit.smk" | ||
raw_assembly = "Intermediate/Assembly/megahit/{sample}/{sample}_prefilter.contigs.fa" | ||
raw_assembly = ( | ||
"Intermediate/Assembly/megahit/{sample}/{sample}_prefilter.contigs.fa" | ||
) | ||
|
||
elif config["assembler"] == "spades": | ||
include: "rules/spades.smk" | ||
raw_assembly = "Intermediate/Assembly/spades/{sample}/scaffolds.fasta" | ||
|
||
|
||
else: | ||
raise ValueError("Invalid assembler specified in config file") | ||
|
||
### Rules | ||
include: "rules/pre_assembly.smk" | ||
include: "rules/annotations.smk" | ||
include: "rules/post_assembly.smk" | ||
|
||
|
||
if config["assembler"] == "megahit": | ||
|
||
include: "rules/megahit.smk" | ||
|
||
elif config["assembler"] == "spades": | ||
|
||
include: "rules/spades.smk" | ||
|
||
|
||
### Rules | ||
|
||
|
||
include: "rules/pre_assembly.smk" | ||
include: "rules/annotations.smk" | ||
include: "rules/post_assembly.smk" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters