Skip to content

Commit

Permalink
actually splitting now
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Fiers committed Aug 2, 2012
1 parent da1354a commit 35cb9ad
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions bin/fastq2contigs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python

import os
import re
import sys
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('fastq')
parser.add_argument('output')
parser.add_argument('-n', dest='non', help='nubmer NNNs to cut a fastq file',
type=int, default=10)

args = parser.parse_args()

F = open(args.fastq)
G = open(args.output, 'w')

splitter = re.compile('n{'+str(args.non)+',}', re.I)

header = F.readline()
assert(header[0] == '@')
header = header[1:].split()[0]
print 'header', header

seq = []
for line in F:
if line[0] == '+': break
seq.append(line.strip())
seq = "".join(seq)
print 'read %d nt' % len(seq)

contigs = splitter.split(seq)
for i, c in enumerate(contigs):
G.write(">%s_%s\n" % (header, i))
while c:
G.write("%s\n" % c[:80])
c = c[80:]

print "wrote %d contigs to %s" % (len(contigs), sys.argv[2])

F.close()
G.close()

0 comments on commit 35cb9ad

Please sign in to comment.