Skip to content

Commit

Permalink
Merge pull request #1 from metagenome-atlas/test
Browse files Browse the repository at this point in the history
test: Add testing in ci
  • Loading branch information
SilasK committed Aug 4, 2023
2 parents 399b5a5 + 2584d8f commit e7c28bb
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 43 deletions.
43 changes: 28 additions & 15 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
Linting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Lint workflow
uses: snakemake/snakemake-github-action@v1.24.0
with:
Expand All @@ -35,36 +35,49 @@ jobs:
Dryrun:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2.2.0
with:
channels: conda-forge,bioconda,defaults
- run: conda install -y snakemake
- run: snakemake --version
- name: Dryrun
run: .test/dryrun.sh
run: bash .test/dryrun.sh



Testing:
runs-on: ubuntu-latest
needs:
# - Linting
- Dryrun
- Formatting
steps:
- uses: actions/checkout@v2

- name: Test workflow
- uses: actions/checkout@v3
- run: pwd; ls -la
- name: Test workflow with spades
uses: snakemake/snakemake-github-action@v1.24.0
with:
directory: .test
with:
stagein: .test/copy_example_data.sh .test/spades
directory: .test/spades
snakefile: workflow/Snakefile
args: "--use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp"

- name: Test report

- name: Test report spades
uses: snakemake/snakemake-github-action@v1.24.0
with:
directory: .test/spades
snakefile: workflow/Snakefile
args: "--report report.zip"

- name: Test workflow with megahit
uses: snakemake/snakemake-github-action@v1.24.0
with:
stagein: .test/copy_example_data.sh .test/megahit
directory: .test/megahit
snakefile: workflow/Snakefile
args: "--use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp --config assembler=megahit "

- name: Test report megahit
uses: snakemake/snakemake-github-action@v1.24.0
with:
directory: .test
directory: .test/megahit
snakefile: workflow/Snakefile
args: "--report report.zip"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ logs/**
.snakemake/**
.test/**
!.test/dryrun.sh
!.test/copy_example_data.sh
25 changes: 25 additions & 0 deletions .test/copy_example_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash

set -euo pipefail

git clone https://github.com/metagenome-atlas/example_data.git

src_dir="example_data/reads/stub"

test_dir="$1"

mkdir -p "$test_dir"/..
mkdir "$test_dir"



#copy files
for sample in Mycoplasma Streptococcus; do

qc_folder="$test_dir/$sample/sequence_quality_control/"
mkdir -p "$qc_folder"
for fraction in R1 R2 ;
do
mv $src_dir/${sample}_${fraction}.fastq.gz "$qc_folder/${sample}_QC_${fraction}.fastq.gz"
done
done
55 changes: 32 additions & 23 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
@@ -1,81 +1,90 @@
from pathlib import Path
import pandas as pd
import os,sys
import os, sys

# absolute path to snakemake dir
snakemake_dir = Path(workflow.snakefile).parent.resolve()


# include default config values
configfile: snakemake_dir.parent /"config/default_confg.yaml"
configfile: snakemake_dir.parent / "config/default_confg.yaml"


# global variables
PAIRED_END = bool(config["paired_end"])

logger.debug(f"Reads are: {'paired' if PAIRED_END else 'single-end'}")

MULTIFILE_FRACTIONS= ["R1", "R2", "se"] if PAIRED_END else ["se"]
MULTIFILE_FRACTIONS = ["R1", "R2"] if PAIRED_END else ["se"]


include: "rules/functions.smk"


## Input
#Functions to define input files
# Functions to define input files

def get_quality_controlled_reads(wildcards):

def get_quality_controlled_reads(wildcards):
return expand(
"{sample}/sequence_quality_control/{sample}_QC_{fraction}.fastq.gz",
fraction=MULTIFILE_FRACTIONS,
sample=wildcards.sample,
)



def get_long_reads(wildcards):
"Define the long read files to be used for assembly"
return "{sample}/sequence_quality_control/{sample}_QC_naopore.fastq.gz".format(**wildcards)

return "{sample}/sequence_quality_control/{sample}_QC_naopore.fastq.gz".format(
**wildcards
)


### Target rules

final_assembly = "Assembly/assemblies/{sample}.fasta"


rule all:
input:
assemblies= expand(final_assembly, sample=get_all_samples()),
bams = expand("Assembly/alignments/{sample}.bam",sample=get_all_samples()),
genes = expand("Assembly/annotations/genes/{sample}{ext}" , ext=[".faa", ".fna",".tsv"],sample=get_all_samples()),
stats= "stats/combined_contig_stats.tsv"
assemblies=expand(final_assembly, sample=get_all_samples()),
bams=expand("Assembly/alignments/{sample}.bam", sample=get_all_samples()),
genes=expand(
"Assembly/annotations/genes/{sample}{ext}",
ext=[".faa", ".fna", ".tsv"],
sample=get_all_samples(),
),
stats="stats/combined_contig_stats.tsv",
output:
temp(touch("finished_assembly")),



# decide wich assembler to use

if config["assembler"] == "megahit":

include: "rules/megahit.smk"
raw_assembly = "Intermediate/Assembly/megahit/{sample}/{sample}_prefilter.contigs.fa"
raw_assembly = (
"Intermediate/Assembly/megahit/{sample}/{sample}_prefilter.contigs.fa"
)

elif config["assembler"] == "spades":
include: "rules/spades.smk"
raw_assembly = "Intermediate/Assembly/spades/{sample}/scaffolds.fasta"


else:
raise ValueError("Invalid assembler specified in config file")

### Rules
include: "rules/pre_assembly.smk"
include: "rules/annotations.smk"
include: "rules/post_assembly.smk"


if config["assembler"] == "megahit":

include: "rules/megahit.smk"

elif config["assembler"] == "spades":

include: "rules/spades.smk"


### Rules


include: "rules/pre_assembly.smk"
include: "rules/annotations.smk"
include: "rules/post_assembly.smk"
12 changes: 7 additions & 5 deletions workflow/rules/post_assembly.smk
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ else:
query=get_quality_controlled_reads,
target=raw_assembly,
output:
bam=temp("Intermediate/Assembly/filtering/{sample}_alignment_to_prefilter_contigs.bam"),
bam=temp(
"Intermediate/Assembly/filtering/{sample}_alignment_to_prefilter_contigs.bam"
),
params:
extra= config["minimap_extra"],
extra=config["minimap_extra"],
log:
"{sample}/logs/assembly/post_process/align_reads_to_prefiltered_contigs.log",
threads: config["threads"]
Expand All @@ -23,8 +25,8 @@ else:

rule pileup_prefilter:
input:
fasta= raw_assembly,
bam= rules.align_reads_to_prefilter_contigs.output.bam,
fasta=raw_assembly,
bam=rules.align_reads_to_prefilter_contigs.output.bam,
output:
covstats="Intermediate/Assembly/filtering/{sample}_prefilter_coverage_stats.txt",
params:
Expand All @@ -48,7 +50,7 @@ else:

rule filter_by_coverage:
input:
fasta= raw_assembly,
fasta=raw_assembly,
covstats="Intermediate/Assembly/filtering/{sample}_prefilter_coverage_stats.txt",
output:
fasta=temp("Intermediate/Assembly/filtering/{sample}_final_contigs.fasta"),
Expand Down

0 comments on commit e7c28bb

Please sign in to comment.