Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added bioinformatics/data/chr20.vcf.gz
Binary file not shown.
Binary file added bioinformatics/data/chr20.vcf.gz.tbi
Binary file not shown.
4 changes: 4 additions & 0 deletions bioinformatics/data/get_bam.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00154/alignment/HG00154.chrom20.ILLUMINA.bwa.GBR.low_coverage.20101123.bam
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00154/alignment/HG00154.chrom20.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai
29 changes: 29 additions & 0 deletions bioinformatics/tools/bcftools_filter.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "filter variants"

baseCommand: "bcftools"
stdout: $(inputs.output_filename)
arguments: ["filter", "-i"]

inputs:
condition:
type: string
inputBinding:
position: 1

input_file:
type: File
streamable: true
inputBinding:
position: 2

output_filename:
type: string?
default: output_filtered.vcf

outputs:
subset:
type: stdout
25 changes: 25 additions & 0 deletions bioinformatics/tools/samtools_merge.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "merge bams"

baseCommand: samtools
arguments: ["merge"]

inputs:
output_bam:
type: string
inputBinding:
position: 2

input_bams:
type: File[]
inputBinding:
position: 3

outputs:
merged_bam:
type: File
outputBinding:
glob: $(inputs.output_bam)
25 changes: 25 additions & 0 deletions bioinformatics/tools/samtools_sort.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "sort bam"

baseCommand: samtools
arguments: ["sort", "-o"]

inputs:
output_bam:
type: string
inputBinding:
position: 3

input_bam:
type: File
inputBinding:
position: 4

outputs:
sorted_bam:
type: File
outputBinding:
glob: $(inputs.output_bam)
28 changes: 28 additions & 0 deletions bioinformatics/tools/samtools_view.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "filter bam"

baseCommand: samtools_view_region.sh
stdout: $(inputs.output_bam)

inputs:
region:
type: File
inputBinding:
position: 1

input_bam:
type: File
secondaryFiles: .csi
inputBinding:
position: 2

output_bam:
type: string?
default: out.bam

outputs:
filtered_bam:
type: stdout
8 changes: 8 additions & 0 deletions bioinformatics/tools/samtools_view_region.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

readonly REGION_FILE="${1}"
readonly INPUTFILE="${2:-/dev/stdin}"
readonly OUTPUTFILE="${3:-/dev/stdout}"

region=$( cat "${REGION_FILE}" )
samtools view -b ${INPUTFILE} "${region}"
29 changes: 29 additions & 0 deletions bioinformatics/tools/tabix_extract.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "extract region"

baseCommand: "tabix"
stdout: $(inputs.output_filename)
arguments: ["-h"] # include header

inputs:
input_file:
type: File
secondaryFiles: .tbi
inputBinding:
position: 1

region:
type: string
inputBinding:
position: 2

output_filename:
type: string?
default: output_tabix.vcf

outputs:
subset_by_region:
type: stdout
24 changes: 24 additions & 0 deletions bioinformatics/tools/tabix_index.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "vcf index"
baseCommand: "tabix"
arguments: ["-p", "vcf"]
requirements:
- class: InitialWorkDirRequirement
listing:
- $(inputs.vcf)
inputs:
vcf:
type: File
inputBinding:
valueFrom:
$(self.basename)
position: 1
outputs:
indexed_vcf:
type: File
secondaryFiles: .tbi
outputBinding:
glob: $(inputs.vcf.basename)
28 changes: 28 additions & 0 deletions bioinformatics/tools/vcf_to_region.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "VCF to regions"

baseCommand: vcf_to_region.sh

inputs:
windowsize:
type: int
default: 50
inputBinding:
position: 1

input_file:
type: File
streamable: true
inputBinding:
position: 2

outputs:
regions:
type:
type: array
items: File
outputBinding:
glob: "regions.*.txt"
26 changes: 26 additions & 0 deletions bioinformatics/tools/vcf_to_region.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

function vcf_to_region {
local window="$1"
local input="$2"

local tmpfile=tmp.$$
awk -v window="$window" '/^[^#]/{
s = $2 - window;
e = $2 + window;
s = (s < 1) ? 1 : s;
printf "%s_%d\t%s:%d-%d\n", $1, $2, $1, s, e
}' "$input" > "$tmpfile"


while IFS=$'\t' read -ra line
do
echo "${line[1]}" > regions."${line[0]}".txt
done < "$tmpfile"
rm -f "$tmpfile"
}

readonly WINDOWSIZE=${1:-50}
readonly INPUTFILE="${2:-/dev/stdin}"

vcf_to_region "$WINDOWSIZE" "$INPUTFILE"
85 changes: 85 additions & 0 deletions bioinformatics/tools/workflow.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow

requirements:
- class: ScatterFeatureRequirement
- class: StepInputExpressionRequirement

inputs:
input_vcf_file:
type: File
input_bam_file:
type: File
region:
type: string
condition:
type: string
windowsize:
type: int
default: 50
output_filename:
type: string

outputs:
smallsortbam:
type: File
outputSource: samtools_sort/sorted_bam

steps:
extract:
run: tabix_extract.cwl
in:
input_file:
source: input_vcf_file
region:
source: region
out: [subset_by_region]

filter:
run: bcftools_filter.cwl
in:
input_file:
source: extract/subset_by_region
condition:
source: condition
out: [subset]

regions:
run: vcf_to_region.cwl
in:
windowsize:
source: windowsize
input_file:
source: filter/subset
out: [regions]

samtools_view:
run: samtools_view.cwl
scatter: region
in:
input_bam:
source: input_bam_file
region:
source: regions/regions
output_bam:
valueFrom: $(inputs.region.nameroot).bam
out: [filtered_bam]

samtools_merge:
run: samtools_merge.cwl
in:
input_bams:
source: samtools_view/filtered_bam
output_bam:
source: output_filename
out: [merged_bam]

samtools_sort:
run: samtools_sort.cwl
in:
input_bam:
source: samtools_merge/merged_bam
output_bam:
source: output_filename
out: [sorted_bam]
13 changes: 13 additions & 0 deletions bioinformatics/work-inputs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
region: "20:50000-350000"

condition: "INFO/AF < 0.05"

input_vcf_file:
class: File
path: data/chr20.vcf.gz

input_bam_file:
class: File
path: data/HG00154.chrom20.ILLUMINA.bwa.GBR.low_coverage.20101123.bam

output_filename: sorted.bam