Skip to content

Commit

Permalink
Merge pgenome:~/project/moa11 into moa.0.11
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Fiers committed Aug 3, 2012
2 parents 56e620b + 623463f commit 1669cee
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 33 deletions.
44 changes: 44 additions & 0 deletions bin/fastq2contigs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python

import os
import re
import sys
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('fastq')
parser.add_argument('output')
parser.add_argument('-n', dest='non', help='nubmer NNNs to cut a fastq file',
type=int, default=10)

args = parser.parse_args()

F = open(args.fastq)
G = open(args.output, 'w')

splitter = re.compile('n{'+str(args.non)+',}', re.I)

header = F.readline()
assert(header[0] == '@')
header = header[1:].split()[0]
print 'header', header

seq = []
for line in F:
if line[0] == '+': break
seq.append(line.strip())
seq = "".join(seq)
print 'read %d nt' % len(seq)

contigs = splitter.split(seq)
for i, c in enumerate(contigs):
G.write(">%s_%s\n" % (header, i))
while c:
G.write("%s\n" % c[:80])
c = c[80:]

print "wrote %d contigs to %s" % (len(contigs), sys.argv[2])

F.close()
G.close()

128 changes: 128 additions & 0 deletions bin/gff_regionify
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env python

import os
import sys
import gzip
import time
import cPickle
import argparse
import collections
import numpy as np

seqs = {}
types = []

parser = argparse.ArgumentParser(description='Do fancy region stuff based on GFFs.')

parser.add_argument('-b', dest='base', help='basename for output', default='gff_regionify')
parser.add_argument('lengths', help='file with the lengths of all sequences')
parser.add_argument('gff', help='input gff')
parser.add_argument('--vcf', dest='vcf', help='vcf file to categorize')

args = parser.parse_args()

starttime= time.time()

print 'creating numpy structure'
totn = 0
with open(args.lengths) as F:
for i, line in enumerate(F):
si, ln = line.split()
ln = int(ln)
totn += ln
seqs[si] = np.zeros(ln, dtype=np.uint16)
if (i+1) % 72 == 0:
sys.stdout.write('\r%d ' % i)
sys.stdout.flush()
print
print 'generated space for %d nt' % totn

print 'loading annotations'
with open(args.gff) as F:
for i, line in enumerate(F):
line = line.strip()
if not line: continue
if line[0] == '#': continue
ls = line.strip().split()
atype = ls[2]

if not atype in types:
print 'adding type', atype
types.append(atype)

sqid = ls[0]
start = int(ls[3])
stop = int(ls[4])
seqs[sqid][start-1:stop-1] = \
np.bitwise_or(seqs[sqid][start-1:stop-1], 2 ** types.index(atype))

if i % (72*9) == 0 and i > 1000:
sys.stdout.write('\r%d ' % i)
sys.stdout.flush()
print

print 'summarizing'
summ = collections.defaultdict(int)
tlen = 0
for j, sid in enumerate(seqs):
tlen += len(seqs[sid])
for i, t in enumerate(types):
summ[t] += len(np.nonzero(np.bitwise_and(seqs[sid], 2 ** i))[0])
if j % 72 == 0:
sys.stdout.write('\r%d ' % i)
sys.stdout.flush()
print

with open(args.base + '.wg.stats', 'w') as F:
print "total\t%s" % (tlen)
F.write("total\t%s\n" % (tlen))
for t in summ:
print "%s\t%s" % (t, summ[t])
F.write("%s\t%s\n" % (t, summ[t]))

if not args.vcf:
sys.exit()

def vcfreader(filename):
with open(filename) as F:
for line in F:
line = line.strip()
if not line: continue
if line[0] == '#': continue
ls = line.split()
if len(ls) < 2: continue
yield ls[0], int(ls[1])


vsumm = collections.defaultdict(int)
vcfi = 0
print '\nProcessing VCF %s' % args.vcf
for sid, pos in vcfreader(args.vcf):
pos = pos -1 #vcf is 1 based!!
try:
m = seqs[sid][pos]
except IndexError:
print "cannot find mask value for %s/%s" % pos, sid
raise
vcfi += 1
for i, t in enumerate(types):
inni = np.bitwise_and(m, 2 ** i)
if inni > 0:
#print "%s:%s mask %2d type %2d/%10s inni %d" % (
# sid, pos, m, i, t, inni)
vsumm[t] += 1

if vcfi % 72 == 0:
sys.stdout.write('\r%d' % vcfi)
sys.stdout.flush()

with open(args.base + '.vcf.stats', 'w') as F:
print "total\t%s" % (vcfi)
F.write("total\t%s\n" % (vcfi))
for t in vsumm:
print "%s\t%s" % (t, vsumm[t])
F.write("%s\t%s\n" % (t, vsumm[t]))




59 changes: 51 additions & 8 deletions lib/python/moa/plugin/system/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,21 @@
import moa.logger as l
from moa.sysConf import sysConf

# def hook_prepare_3():
# if not job.template.parameters.has_key('title'):
# job.template.parameters.title = {
# 'optional' : False,
# 'help' : 'A short and consise title for this job',
# 'type' : 'string',
# 'recursive' : False,
# }

def hook_finish():

job = sysConf['job']

if not job.template.parameters.has_key('title'):
job.template.parameters.title = {
'optional' : False,
'help' : 'A short and consise title for this job',
'type' : 'string',
'recursive' : False,
}

if not sysConf.commands[sysConf.args.command]['logJob']:
return

message = moa.ui._textFormattedMessage(
[sysConf.args.changeMessage,
Expand Down Expand Up @@ -76,8 +80,11 @@ def _readFromuser(job, header, fileName):
"""
gather Blog or CHANGELOG information
"""

#moa.utils.moaDirOrExit(job)

oldstdin = sys.stdin
sys.stdin = open('/dev/tty')
txt = []
print header, "..."
while True:
Expand All @@ -88,6 +95,7 @@ def _readFromuser(job, header, fileName):
break

_appendMessage(fileName, txt)
sys.stdin = oldstdin
return txt

@moa.args.command
Expand All @@ -111,13 +119,33 @@ def blog(job, args):
.. _Markdown: http://daringfireball.net/projects/markdown/ markdown.
"""

sin = _getFromStdin()


message = _readFromuser(
job,
header="Enter your blog message (ctrl-d on an empty line to finish)",
fileName="BLOG.md")

if sin:
message.append("\nStdin:\n")
message.extend([" " + x for x in sin.split("\n")])

moa.ui.message("Created a blog entry", store=False)
sysConf.doc.blog = message

def _getFromStdin():
import re

if not sys.stdin.isatty():
m = sys.stdin.read()
#print m
m = re.sub(r'\x1b\[[^m]*m', '', m)
#print m
return m
return ""

@moa.args.command
def change(job, args):
"""
Expand All @@ -144,16 +172,31 @@ def change(job, args):
`moa -m 'intelligent remark' set ...`
Note. It is also possible to cat some text into moa change:
wc -l | moa change
Moa will still query you for a message and append the data from
stdin to the message
"""

sin = _getFromStdin()

message = _readFromuser(
job,
header="Enter your CHANGELOG message (ctrl-d on an empty " +
"line to finish)", fileName="CHANGELOG.md")

if sin:
message.append("\nStdin:\n")
message.extend([" " + x for x in sin.split("\n")])

moa.ui.message("Created a changelog entry", store=False)
sysConf.doc.changeMessage = "\n".join(message)


@moa.args.command
def readme(job, args):
"""
Expand Down
6 changes: 3 additions & 3 deletions lib/python/moa/plugin/system/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ def version(job, args):
print sysConf.getVersion()

@moa.args.private
@moa.args.command
@moa.args.doNotLog
@moa.args.command
def rehash(job, args):
"""
cache a list of variables for command line completion
Expand All @@ -159,8 +159,8 @@ def rehash(job, args):


@moa.args.private
@moa.args.command
@moa.args.doNotLog
@moa.args.command
def raw_commands(job, args):
"""
return a list available commands
Expand All @@ -176,8 +176,8 @@ def raw_commands(job, args):
print ' '.join(c)

@moa.args.private
@moa.args.command
@moa.args.doNotLog
@moa.args.command
def raw_parameters(job, args):
"""
Print a list of all known parameters
Expand Down
35 changes: 19 additions & 16 deletions template2/recursive_map.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,34 @@ lastreference=$reference
lasti=0

{% for i in range(iterations) %}
{% if i <= (iterations / 2) %}{% set aln_param=param_strict %}
{% else %}{% set aln_param=param_loose %}


{% if i <= (iterations / 2) %}{% set aln_param=param_first %}
{% else %}{% set aln_param=param_second %}
{% endif %}
echo "Iteration {{i}} ---"
echo "With reference: ${reference}"
if [[ ! -f "db.{{i}}.ann" ]]
if [[ ! -f "db.{{i}}.1.bt2" ]]
then
echo "create bwa database {{i}}"
bwa index -p db.{{i}} ${reference}
echo "create bowtie2 database {{i}}"
bowtie2-build ${reference} db.{{i}}
fi

echo "start aligning $i"

bamfiles=""
{% for ffq in fq_forward %}
{% set rfq = fq_reverse[loop.index0] %}
base=`basename {{ ffq }} .fq`
[[ -f "${base}__{{i}}.f.sai" ]] || \
bwa aln {{ aln_param }} -t {{ threads }} \
-f ${base}__{{i}}.f.sai db.{{i}} {{ ffq }}
[[ -f "${base}__{{i}}.r.sai" ]] || \
bwa aln {{ aln_param }} -t {{ threads }} \
-f ${base}__{{i}}.r.sai db.{{i}} {{ rfq }}
[[ -f ${base}__{{i}}.bam ]] || \
bwa sampe db.{{i}} ${base}__{{i}}.f.sai ${base}__{{i}}.r.sai \
{{ ffq }} {{ rfq }} \
| samtools view -Sb -f 2 - \
| samtools sort - ${base}__{{i}}
if [[ ! -f "${base}__{{i}}.bam" ]]
then
echo "start align {{i}} - "
bowtie2 --reorder -p {{ threads }} {{ aln_param }} \
-x db.{{i}} \
-1 {{ ffq }} -2 {{ rfq }} \
| samtools view -Sb -f 2 - \
| samtools sort - ${base}__{{i}}
fi
bamfiles="${bamfiles} ${base}__{{i}}.bam"
{% endfor %}

Expand All @@ -54,7 +55,9 @@ lasti=0
fi

lastreference=$reference

reference=consensus.{{i}}.fasta

lasti={{i}}
{% endfor %}

Expand Down

0 comments on commit 1669cee

Please sign in to comment.