Added example for how to run paired end reads

ssadedin · Feb 21, 2013 · 14c9a50 · 14c9a50
1 parent 9d2751e
commit 14c9a50
Show file tree

Hide file tree

Showing 4 changed files with 104 additions and 0 deletions.
diff --git a/examples/paired_end_alignment/pipeline.groovy b/examples/paired_end_alignment/pipeline.groovy
@@ -0,0 +1,61 @@
+// 
+// ----------- Example: Paired End Alignment
+//
+// Illustrates how to align paired end reads from eg. Illumina
+// using BWA.
+//
+// Notes:
+//
+// 1. to run this example you need a human reference genome
+//    downloaded and indexed in ~/hg19/gatk.ucsc.hg19.fasta"
+//    or you can change the reference below to point to one
+//    you already have.
+//
+// 2. the magic $threads variable used below will (by default) use
+//    50% of all the available cores on your computer for each
+//    alignment command (thus taking 100% of available threads). You can 
+//    control it by running using the -n command, eg, to run using 4 cores 
+//    in total:
+//
+//      bpipe run -n 4 pipeline.groovy 
+//
+// ----------- Reference
+//
+// The reference that we are aligning to
+// This must have already been indexed using something like:
+//
+//     bwa index -a bwtsw ~/hg19/gatk.ucsc.hg19.fasta
+//
+REF="~/hg19/gatk.ucsc.hg19.fasta"
+
+align_bwa = { 
+
+    // We are going to transform FASTQ into two .sai files and a .bam file
+    transform("sai","sai","bam") {
+
+        // Alignment with bwa consists of two separate commands:
+        //
+        //   1. finding the SA coordinates by running bwa aln
+        //   2. converting coordinates to actual read alignments and
+        //      matching ends together with bwa sampe
+
+        // Step 1 - bwa aln
+        multi "gunzip -c $input1.gz |  bwa aln -t $threads $REF - > $output1",
+              "gunzip -c $input2.gz |  bwa aln -t $threads $REF - > $output2"
+
+        // Step 2 - bwa sampe
+        exec """
+            bwa sampe $REF $output1 $output2 $input1.gz $input2.gz | 
+            samtools view -bSu - | 
+            samtools sort - $output.bam.prefix
+        """
+    }
+}
+
+// Single sample, simple execution
+run { align_bwa }
+
+// Multiple samples where file names begin with sample
+// name and are separated by underscore from the rest of the 
+// file name
+run { "%_*.fastq.gz" * [ align_bwa ] }
diff --git a/examples/paired_end_alignment/sample1_r1.fastq.gz b/examples/paired_end_alignment/sample1_r1.fastq.gz
diff --git a/examples/paired_end_alignment/sample1_r2.fastq.gz b/examples/paired_end_alignment/sample1_r2.fastq.gz
diff --git a/src/misc/create_examples.groovy b/src/misc/create_examples.groovy
@@ -0,0 +1,43 @@
+//
+// A hack script that converts "live" examples into Wiki format
+// so that examples can be edited in place and updated on the wiki
+// easily.
+//
+new File("../../examples").eachDir { exampleDir ->
+  println " Processing $exampleDir ".center(80,'-')
+  println "\n"
+
+  def lines = new File(exampleDir, "pipeline.groovy").readLines()
+  int headerIndex = lines.findIndexOf { it.startsWith("// ------") }
+
+  int endHeader = lines.findIndexOf(headerIndex+1) { !it.startsWith("//") } - 1
+
+
+  def exampleHumanName = (lines[headerIndex] =~ '^[/ ]*-{1,} (.*$)')[0][1].replaceAll("Example: *","")
+  def exampleWikiName = exampleHumanName.replaceAll(" ","")+".wiki"
+
+  println "Example name is $exampleWikiName"
+
+  println "Header is from $headerIndex - $endHeader"
+
+  boolean firstHeader = true
+
+  // Now transform to simple wiki syntax
+  output =  ["#summary Bpipe Example : $exampleHumanName",
+             "#sidebar Reference" ] +             
+            lines[headerIndex..endHeader].collect { line ->
+                 line = line.replaceAll("^// *","")
+                            .replaceAll('^-{1,} (.*$)', (firstHeader ? '== $1 ==' : '=== $1 ==='))
+                 firstHeader = false    
+                 return line
+            } +
+            ["{{{"] +
+            lines[(endHeader+1)..-1] +
+            ["}}}"]
+
+  println output.join("\n")
+
+ new File(exampleWikiName).text = output.join("\n") 
+
+}
+