Skip to content

Commit

Permalink
interpro_tsv2*.py reporting also annotated fasta
Browse files Browse the repository at this point in the history
  • Loading branch information
lpryszcz committed Feb 8, 2017
1 parent fe452e2 commit 2863bfc
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions interpro_tsv2function.py
@@ -1,10 +1,16 @@
#!/usr/bin/env python
"""Parse InterProScan tsv output and report predicted function for each gene.
"""Parse InterProScan tsv output and report predicted function for each gene/protein.
If fasta file is given, it will also report annotated fasta file (.with_functions.fasta).
cat tsv | python tsv2function.py
cat tsv | python tsv2function.py [fasta]
"""

import sys
from Bio import SeqIO

fastafn = ""
if len(sys.argv)>1:
fastafn = sys.argv[1]

protid2function = {}
for l in sys.stdin:
Expand Down Expand Up @@ -36,5 +42,14 @@

sys.stderr.write("#%s unique proteins annotated with %s functions\n" % (len(protid2function),sum(len(x) for x in protid2function.itervalues())))


if fastafn:
outfn = "".join(fastafn.split('.')[:-1])+".with_functions.fasta"
out = open(outfn, "w")
for r in SeqIO.parse(fastafn, "fasta"):
if r.id in protid2function:
r.description = "; ".join(protid2function[r.id])
else:
r.description = ""
out.write(r.format("fasta"))
print "Saved annotated FastA as: %s"%outfn

0 comments on commit 2863bfc

Please sign in to comment.