Merge pgenome:~/project/moa11 into moa.0.11

mfiers · Aug 3, 2012 · 1669cee · 1669cee
2 parents 56e620b + 623463f
commit 1669cee
Show file tree

Hide file tree

Showing 6 changed files with 251 additions and 33 deletions.
diff --git a/bin/fastq2contigs b/bin/fastq2contigs
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+import os
+import re
+import sys
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('fastq')
+parser.add_argument('output')
+parser.add_argument('-n', dest='non', help='nubmer NNNs to cut a fastq file',
+                    type=int, default=10)
+
+args = parser.parse_args()
+
+F =  open(args.fastq)
+G = open(args.output, 'w')
+
+splitter = re.compile('n{'+str(args.non)+',}', re.I)
+
+header = F.readline()
+assert(header[0] == '@')
+header = header[1:].split()[0]
+print 'header', header
+
+seq = []
+for line in F:
+    if line[0] == '+': break
+    seq.append(line.strip())
+seq = "".join(seq)
+print 'read %d nt' % len(seq)
+
+contigs = splitter.split(seq)
+for i, c in enumerate(contigs):
+    G.write(">%s_%s\n" % (header, i))
+    while c:
+        G.write("%s\n" % c[:80])
+        c = c[80:]
+
+print "wrote %d contigs to %s" %  (len(contigs), sys.argv[2])
+
+F.close()
+G.close()
+
diff --git a/bin/gff_regionify b/bin/gff_regionify
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import gzip
+import time
+import cPickle
+import argparse
+import collections
+import numpy as np
+
+seqs = {}
+types = []
+
+parser = argparse.ArgumentParser(description='Do fancy region stuff based on GFFs.')
+
+parser.add_argument('-b', dest='base', help='basename for output', default='gff_regionify')
+parser.add_argument('lengths', help='file with the lengths of all sequences')
+parser.add_argument('gff', help='input gff')
+parser.add_argument('--vcf', dest='vcf', help='vcf file to categorize')
+
+args = parser.parse_args()
+
+starttime= time.time()
+
+print 'creating numpy structure'
+totn = 0
+with open(args.lengths) as F:
+    for i, line in enumerate(F):
+        si, ln = line.split()
+        ln = int(ln)
+        totn += ln
+        seqs[si] = np.zeros(ln, dtype=np.uint16)
+        if (i+1) % 72 == 0:
+            sys.stdout.write('\r%d                 ' % i)
+            sys.stdout.flush()
+    print
+print 'generated space for %d nt' % totn
+
+print 'loading annotations'
+with open(args.gff) as F:
+    for i, line in enumerate(F):
+        line = line.strip()
+        if not line: continue
+        if line[0] == '#': continue
+        ls = line.strip().split()
+        atype = ls[2]
+
+        if not atype in types:
+            print 'adding type', atype
+            types.append(atype)
+
+        sqid = ls[0]
+        start = int(ls[3])
+        stop = int(ls[4])
+        seqs[sqid][start-1:stop-1] = \
+            np.bitwise_or(seqs[sqid][start-1:stop-1], 2 ** types.index(atype))
+
+        if i % (72*9) == 0 and i > 1000:
+            sys.stdout.write('\r%d                 ' % i)
+            sys.stdout.flush()
+    print
+
+print 'summarizing'
+summ = collections.defaultdict(int)
+tlen = 0
+for j, sid in enumerate(seqs):
+    tlen += len(seqs[sid])
+    for i, t in enumerate(types):
+        summ[t] += len(np.nonzero(np.bitwise_and(seqs[sid], 2 ** i))[0])
+    if j % 72 == 0:
+        sys.stdout.write('\r%d                 ' % i)
+        sys.stdout.flush()
+print
+
+with open(args.base + '.wg.stats', 'w') as F:
+    print "total\t%s" % (tlen)
+    F.write("total\t%s\n" % (tlen))
+    for t in summ:
+        print "%s\t%s" % (t, summ[t])
+        F.write("%s\t%s\n" % (t, summ[t]))
+
+if not args.vcf:
+    sys.exit()
+
+def vcfreader(filename):
+    with open(filename) as F:
+        for line in F: 
+            line = line.strip()
+            if not line: continue
+            if line[0] == '#': continue
+            ls = line.split()
+            if len(ls) < 2: continue
+            yield  ls[0], int(ls[1])
+
+
+vsumm = collections.defaultdict(int)
+vcfi = 0
+print '\nProcessing VCF %s' % args.vcf
+for sid, pos in vcfreader(args.vcf):
+    pos = pos -1 #vcf is 1 based!!
+    try:
+        m = seqs[sid][pos]
+    except IndexError:
+        print "cannot find mask value for %s/%s" % pos, sid
+        raise
+    vcfi += 1
+    for i, t in enumerate(types):
+        inni = np.bitwise_and(m, 2 ** i)
+        if inni > 0:
+            #print "%s:%s mask %2d type %2d/%10s inni %d" % (
+            #    sid, pos, m, i, t, inni)
+            vsumm[t] += 1
+
+    if vcfi % 72 == 0:
+        sys.stdout.write('\r%d' % vcfi)
+        sys.stdout.flush()
+
+with open(args.base + '.vcf.stats', 'w') as F:
+    print "total\t%s" % (vcfi)
+    F.write("total\t%s\n" % (vcfi))
+    for t in vsumm:
+        print "%s\t%s" % (t, vsumm[t])
+        F.write("%s\t%s\n" % (t, vsumm[t]))
+
+
+
+
diff --git a/lib/python/moa/plugin/system/doc.py b/lib/python/moa/plugin/system/doc.py
@@ -24,17 +24,21 @@
 import moa.logger as l
 from moa.sysConf import sysConf
 
+# def hook_prepare_3():
+#     if not job.template.parameters.has_key('title'):
+#         job.template.parameters.title = {
+#             'optional' : False,
+#             'help' : 'A short and consise title for this job',
+#             'type' : 'string',
+#             'recursive' : False,
+#             }
+
 def hook_finish():
 
     job = sysConf['job']
-
-    if not job.template.parameters.has_key('title'):
-        job.template.parameters.title = {
-            'optional' : False,
-            'help' : 'A short and consise title for this job',
-            'type' : 'string',
-            'recursive' : False,
-            }
+
+    if not sysConf.commands[sysConf.args.command]['logJob']:
+        return
 
     message = moa.ui._textFormattedMessage(
         [sysConf.args.changeMessage,
@@ -76,8 +80,11 @@ def _readFromuser(job, header, fileName):
     """
     gather Blog or CHANGELOG information
     """
+
     #moa.utils.moaDirOrExit(job)
 
+    oldstdin = sys.stdin
+    sys.stdin = open('/dev/tty')
     txt = []
     print header, "..."
     while True:
@@ -88,6 +95,7 @@ def _readFromuser(job, header, fileName):
             break
 
     _appendMessage(fileName, txt)
+    sys.stdin = oldstdin
     return txt
 
 @moa.args.command
@@ -111,13 +119,33 @@ def blog(job, args):
 
     .. _Markdown: http://daringfireball.net/projects/markdown/ markdown.
     """
+
+    sin = _getFromStdin()
+
+
     message = _readFromuser(
         job, 
         header="Enter your blog message (ctrl-d on an empty line to finish)",
         fileName="BLOG.md")
+
+    if sin:
+        message.append("\nStdin:\n")
+        message.extend(["   " + x for x in sin.split("\n")])
+
     moa.ui.message("Created a blog entry", store=False)
     sysConf.doc.blog = message
 
+def _getFromStdin():
+    import re
+
+    if not sys.stdin.isatty():
+        m = sys.stdin.read()
+        #print m
+        m = re.sub(r'\x1b\[[^m]*m', '', m)
+        #print m
+        return m
+    return ""
+
 @moa.args.command
 def change(job, args):
     """
@@ -144,16 +172,31 @@ def change(job, args):
 
     `moa -m 'intelligent remark' set ...`
 
+    Note. It is also possible to cat some text into moa change:
+
+    wc -l | moa change 
+
+    Moa will still query you for a message and append the data from
+    stdin to the message
+    
+
     """
 
+    sin = _getFromStdin()
+
     message = _readFromuser(
         job, 
         header="Enter your CHANGELOG message (ctrl-d on an empty " +
         "line to finish)", fileName="CHANGELOG.md")
 
+    if sin:
+        message.append("\nStdin:\n")
+        message.extend(["   " + x for x in sin.split("\n")])
+
     moa.ui.message("Created a changelog entry", store=False)
     sysConf.doc.changeMessage = "\n".join(message)
 
+
 @moa.args.command
 def readme(job, args):
     """

diff --git a/lib/python/moa/plugin/system/info.py b/lib/python/moa/plugin/system/info.py
@@ -135,8 +135,8 @@ def version(job, args):
     print sysConf.getVersion()
 
 @moa.args.private
-@moa.args.command
 @moa.args.doNotLog
+@moa.args.command
 def rehash(job, args):
     """
     cache a list of variables for command line completion
@@ -159,8 +159,8 @@ def rehash(job, args):
 
 
 @moa.args.private
-@moa.args.command
 @moa.args.doNotLog
+@moa.args.command
 def raw_commands(job, args):
     """
     return a list available commands
@@ -176,8 +176,8 @@ def raw_commands(job, args):
     print ' '.join(c)
 
 @moa.args.private
-@moa.args.command
 @moa.args.doNotLog
+@moa.args.command
 def raw_parameters(job, args):
     """
     Print a list of all known parameters

diff --git a/template2/recursive_map.jinja2 b/template2/recursive_map.jinja2
@@ -5,33 +5,34 @@ lastreference=$reference
 lasti=0
 
 {% for i in range(iterations) %}
-    {% if i <= (iterations / 2) %}{% set aln_param=param_strict %}
-    {% else %}{% set aln_param=param_loose %}
+
+
+    {% if i <= (iterations / 2) %}{% set aln_param=param_first %}
+    {% else %}{% set aln_param=param_second %}
     {% endif %}
     echo "Iteration {{i}} ---"
     echo "With reference: ${reference}"
-    if [[ ! -f "db.{{i}}.ann" ]]
+    if [[ ! -f "db.{{i}}.1.bt2" ]]
     then
-        echo "create bwa database {{i}}"
-        bwa index -p db.{{i}} ${reference}
+        echo "create bowtie2 database {{i}}"
+        bowtie2-build ${reference} db.{{i}}
     fi
 
     echo "start aligning $i"
+
     bamfiles=""
     {% for ffq in fq_forward %}
        {% set rfq =  fq_reverse[loop.index0] %}
        base=`basename {{ ffq }} .fq`
-       [[ -f "${base}__{{i}}.f.sai" ]] || \
-           bwa aln {{ aln_param }} -t {{ threads }} \
-               -f ${base}__{{i}}.f.sai db.{{i}} {{ ffq }}
-       [[ -f "${base}__{{i}}.r.sai" ]] || \
-           bwa aln {{ aln_param }} -t {{ threads }} \
-               -f ${base}__{{i}}.r.sai db.{{i}} {{ rfq }} 
-       [[ -f ${base}__{{i}}.bam ]] || \
-           bwa sampe db.{{i}} ${base}__{{i}}.f.sai ${base}__{{i}}.r.sai \
-           {{ ffq }} {{ rfq }} \
-           | samtools view -Sb -f 2 - \
-           | samtools sort - ${base}__{{i}}
+       if [[ ! -f "${base}__{{i}}.bam" ]]
+       then
+           echo "start align {{i}} - "
+           bowtie2 --reorder -p {{ threads }} {{ aln_param }} \
+               -x db.{{i}} \
+               -1 {{ ffq }} -2 {{ rfq }} \
+               | samtools view -Sb -f 2 - \
+               | samtools sort - ${base}__{{i}}
+       fi
        bamfiles="${bamfiles} ${base}__{{i}}.bam"
     {% endfor %}
 
@@ -54,7 +55,9 @@ lasti=0
     fi
 
     lastreference=$reference
+
     reference=consensus.{{i}}.fasta
+
     lasti={{i}}
 {% endfor %}