Skip to content

Commit

Permalink
applies the variants in a vcf file to the reference fasta
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Fiers committed Jun 11, 2012
1 parent 14612b2 commit 58ea57d
Showing 1 changed file with 48 additions and 0 deletions.
48 changes: 48 additions & 0 deletions bin/vcf_applicator
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python

import os
import sys

import argparse

parser = argparse.ArgumentParser('vcf applicator')
parser.add_argument('seq')
parser.add_argument('vcf')
parser.add_argument('out')
args = parser.parse_args()

seq = []
head = ''
with open(args.seq) as F:
head = F.readline().strip()
for line in F:
seq.append(line.strip().lower())

seq = list("".join(seq))

print 'Input sequence length', len(seq)
for i, line in enumerate(reversed(open(args.vcf).readlines())):
if line[:1] == '#': continue
ls = line.split()
if not ls: continue
if len(ls) < 8: continue

pos = int(ls[1])-1
ref = ls[3]
alt = ls[4]

#print pos, ref, alt,
#print seq[pos-2:pos+len(ref)+2], '-->',
for j in range(pos, pos+len(ref)):
seq[j] = ''
seq[pos] = alt.upper()
#print seq[pos-2:pos+len(ref)+2]

seq = "".join(seq)
print 'Output sequence length', len(seq)
with open(args.out, 'w') as F:
F.write(head+"\n")
while seq:
F.write("%s\n" % seq[:60])
seq = seq[60:]

0 comments on commit 58ea57d

Please sign in to comment.