Skip to content

Commit

Permalink
merged
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Fiers committed Jun 15, 2012
2 parents 6d4cb33 + d1c7a06 commit 361e9e6
Show file tree
Hide file tree
Showing 14 changed files with 440 additions and 174 deletions.
161 changes: 80 additions & 81 deletions bin/fastaExtractMerge
Original file line number Diff line number Diff line change
Expand Up @@ -187,91 +187,90 @@ reGC = None
if options.gapConvert:
reGC = re.compile("N{6,}", re.I)

GFFFILE = open('%s.gff' % outId, 'w')
AGPFILE = open('%s.agp' % outId, 'w')
SCFFILE = open('%s.fasta' % outId, 'w')

if options.organism:
AGPFILE.write("# ORGANISM: %s \n" % options.organism)
if options.taxid:
AGPFILE.write("# TAX_ID: %s \n" % options.taxid)
if options.genomecenter:
AGPFILE.write("# GENOME CENTER: %s \n" % options.genomecenter)

GFFFILE.write("##gff-version 3\n")
allseq = []
i = 0
cp = 1

#run through the input sequences

for seqId in ids:
cleanSeqId = re.sub('\|.*$', '', seqId)
print seqId, cleanSeqId
_i, header, seq = output[seqId]

if i > 0:
#create a 'gap'
i += 1
#print "print gap", seqId, seqId in mappedContigs
if seqId in mappedContigs:
hasLink = "yes"
else:
hasLink = "no"

#write the gap
AGPFILE.write(
"\t".join(
map( str, [ outId, cp, cp + len(linker)-1, i,
'N', len(linker), 'clone', hasLink, ''
]))+ "\n")
#write the GFF
GFFFILE.write(
"%s\n" %
"\t".join(map(str, [
outId, options.source, 'gap',
cp, cp+len(options.linker)-1, '.',
',', '.', "ID=%s_linker_%s" % (cleanSeqId,i)])))

cp += len(linker) # correct for linker sequence

i += 1

with open('%s.gff' % outId, 'w') as G:

with open('%s.agp' % outId, 'w') as H:

if options.organism:
H.write("# ORGANISM: %s \n" % options.organism)
if options.taxid:
H.write("# TAX_ID: %s \n" % options.taxid)
if options.genomecenter:
H.write("# GENOME CENTER: %s \n" % options.genomecenter)

G.write("##gff-version 3\n")
allseq = []
i = 0
cp = 1

#run through the input sequences

for seqId in ids:
_i, header, seq = output[seqId]

if i > 0:
#create a 'gap'
i += 1
print "print gap", seqId, seqId in mappedContigs
if seqId in mappedContigs:
hasLink = "yes"
else:
hasLink = "no"
H.write(
"\t".join(
map(
str, [
outId,
cp,
cp + len(linker)-1,
i,
'N',
len(linker),
'clone',
hasLink,
''
]))+ "\n")

G.write(
"%s\n" %
"\t".join(map(str, [
outId, options.source, 'gap',
cp, cp+len(options.linker)-1, '.',
',', '.', "ID=%s_linker_%s" % (seqId,i)])))

cp += len(linker) # correct for linker sequence

i += 1


if options.gapConvert:
def _replacer(o):
return options.linker

seq = reGC.sub(_replacer, seq)

G.write(
"%s\n" %
"\t".join(map(str, [
outId, options.source, 'scaffold',
cp, cp+len(seq)-1, '.',
'-' if seqId in revcomp else '+',
'.', "ID=%s" % seqId])))

if options.genomecenter: agpSeqId = "gnl|pflnz|%s" % seqId
else: agpSeqId = seqId

H.write(
"\t".join(map(str, [
outId, cp, cp + len(seq) -1, i, 'W',
agpSeqId,
1, len(seq),

if options.gapConvert:
def _replacer(o):
return options.linker

seq = reGC.sub(_replacer, seq)

GFFFILE.write(
"%s\n" %
"\t".join(map(str, [
outId, options.source, 'scaffold',
cp, cp+len(seq)-1, '.',
'-' if seqId in revcomp else '+',
] ) ) + "\n" )
'.', "ID=%s" % cleanSeqId])))


if options.genomecenter:
agpSeqId = "gnl|pflnz|%s" % cleanSeqId
fastaheader = "gnl|pflnz|%s" % cleanSeqId
else:
fastaheader = cleanSeqId
agpSeqId = cleanSeqId

AGPFILE.write(
"\t".join(map(str, [
outId, cp, cp + len(seq) -1, i, 'W',
agpSeqId,
1, len(seq),
'-' if seqId in revcomp else '+',
] ) ) + "\n" )


cp += len(seq)
allseq.append(seq)
cp += len(seq)
allseq.append(seq)

with open('%s.fasta' % outId, 'w') as F:
F.write(">%s\n" % outId)
Expand Down
48 changes: 48 additions & 0 deletions bin/vcf_applicator
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python

import os
import sys

import argparse

parser = argparse.ArgumentParser('vcf applicator')
parser.add_argument('seq')
parser.add_argument('vcf')
parser.add_argument('out')
args = parser.parse_args()

seq = []
head = ''
with open(args.seq) as F:
head = F.readline().strip()
for line in F:
seq.append(line.strip().lower())

seq = list("".join(seq))

print 'Input sequence length', len(seq)
for i, line in enumerate(reversed(open(args.vcf).readlines())):
if line[:1] == '#': continue
ls = line.split()
if not ls: continue
if len(ls) < 8: continue

pos = int(ls[1])-1
ref = ls[3]
alt = ls[4]

#print pos, ref, alt,
#print seq[pos-2:pos+len(ref)+2], '-->',
for j in range(pos, pos+len(ref)):
seq[j] = ''
seq[pos] = alt.upper()
#print seq[pos-2:pos+len(ref)+2]

seq = "".join(seq)
print 'Output sequence length', len(seq)
with open(args.out, 'w') as F:
F.write(head+"\n")
while seq:
F.write("%s\n" % seq[:60])
seq = seq[60:]

2 changes: 1 addition & 1 deletion lib/python/moa/backend/ruff/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def writeScript(self):
tmpfile = tempfile.NamedTemporaryFile(dir=tmpdir, prefix='moa.',
delete=False, suffix='.sh')
#tmpfile.write("\n".join(sc))
self.scriptFile = os.path.realpath(os.path.abspath(tmpfile.name))
self.scriptFile = os.path.abspath(tmpfile.name)

script = self.commands.render(self.command, self.jobData)
tmpfile.write(script + "\n\n")
Expand Down
33 changes: 31 additions & 2 deletions lib/python/moa/plugin/job/metavar.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@
directory names. `9.dir` comes after `10.dir`. (so use `09.dir`).
The latter definitions override the earlier ones.
Another feature that metavar does it try to parse out variables from
the directory name. So if a directory name is:
./test__qual_13__cutoff_0.12
Note that
* variables are delimited by a double underscore
* Key/values are separate by a single underscore
* the item before the first __ is not interpreted
* items without a '_' are ignored
Then this plugin will filter set the variable qual to 13 and cutoff to 0.12.
"""
import re
import os
Expand All @@ -111,7 +124,7 @@
import moa.logger

l = moa.logger.getLogger(__name__)
l.setLevel(moa.logger.DEBUG)
#l.setLevel(moa.logger.DEBUG)
import moa.ui

class MoaPathParser(Extension):
Expand Down Expand Up @@ -148,6 +161,18 @@ def hook_prepare(job):
sysConf.jinja2.extensions = []
sysConf.jinja2.extensions += [MoaPathParser]

def _varparser(conf, name):
"""
Filter out variables from the variable name
"""
l.debug('parsing parameters from directory name %s', name)
for item in name.split('__')[1:]:
if not '_' in item:
continue
k, v = item.split('_', 1)
l.debug('setting %s to %s' % (k, v))
conf[k] = v

def hook_pre_filesets(job):

wd = job.wd
Expand All @@ -163,7 +188,11 @@ def hook_pre_filesets(job):

while dirparts:
cp = os.path.sep.join(dirparts)
p = dirparts.pop()
p = dirparts.pop()
if i == 1:
#top level - parse parameters from the dirname
_varparser(job.conf, p)

clean_p = re.sub("^[0-9]+\.+", "", p).replace('.', '_')

#print i, clean_p, p, cp
Expand Down
10 changes: 8 additions & 2 deletions lib/python/moa/plugin/system/openLavaActor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def hook_defineCommandOptions(job, parser):
parser.add_argument( '--oln', default=1, type=int, dest='openlavaSlots',
help='The number of cores the jobs requires')

parser.add_argument( '--olm', default=1, dest='openlavaHost',
help='The host to use for openlava')

def openlavaRunner(wd, cl, conf={}, **kwargs):
"""
Run the job using OPENLAVA
Expand All @@ -54,7 +57,7 @@ def openlavaRunner(wd, cl, conf={}, **kwargs):

l.debug("starting openlava actor for %s" % command)

outDir = os.path.realpath(os.path.abspath(os.path.join(wd, '.moa', 'log.latest')))
outDir = os.path.abspath(os.path.join(wd, '.moa', 'log.latest'))
if not os.path.exists(outDir):
try:
os.makedirs(outDir)
Expand Down Expand Up @@ -85,6 +88,9 @@ def s(*cl):

bsub_cl.extend(["-n", slots])

if '--olm' in sys.argv:
bsub_cl.extend(["-m", sysConf.args.openlavaHost])

lastJids = []

#if len(sysConf.job.data.openlava.get('jidlist', [])) > 1:
Expand Down Expand Up @@ -152,7 +158,7 @@ def s(*cl):
tmpfile = tempfile.NamedTemporaryFile(dir=tmpdir, prefix='openlava.',
delete=False, suffix='.sh')
tmpfile.write("\n".join(sc))
tmpfilename = os.path.realpath(os.path.abspath(tmpfile.name))
tmpfilename = os.path.abspath(tmpfile.name)
tmpfile.close()
os.chmod(tmpfile.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)

Expand Down

0 comments on commit 361e9e6

Please sign in to comment.