Skip to content

Commit

Permalink
Merge pull request #52 from standage/fix/fasta
Browse files Browse the repository at this point in the history
Fix missing ##FASTA directives, better handling of feature scores
  • Loading branch information
standage committed Jan 15, 2017
2 parents 40daa4b + 8bd99a3 commit 4cd885d
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 8 deletions.
1 change: 1 addition & 0 deletions tag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from tag.range import Range
from tag.reader import GFF3Reader
from tag.writer import GFF3Writer
from tag.score import Score
from tag import cli
from tag import select
from tag import mrna
Expand Down
16 changes: 8 additions & 8 deletions tag/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tag.directive import Directive
from tag.range import Range
from tag.sequence import Sequence
from tag.score import Score


class Feature(object):
Expand Down Expand Up @@ -59,10 +60,7 @@ def __init__(self, data):
self._source = fields[1]
self._type = fields[2]
self._range = Range(int(fields[3]) - 1, int(fields[4]))
if fields[5] == '.':
self.score = None
else:
self.score = float(fields[5])
self._score = Score(fields[5])
self._strand = fields[6]
if fields[7] == '.':
self.phase = None
Expand All @@ -78,15 +76,13 @@ def __init__(self, data):

def __str__(self):
"""String representation of the feature, sans children."""
score = '.'
if self.score is not None:
score = "{:.3f}".format(self.score)
phase = '.'
if self.phase is not None:
phase = str(self.phase)
return '\t'.join([
self.seqid, self.source, self.type, str(self.start + 1),
str(self.end), score, self.strand, phase, self.attributes
str(self.end), str(self._score), self.strand, phase,
self.attributes
])

def __repr__(self):
Expand Down Expand Up @@ -236,6 +232,10 @@ def num_children(self):
def fid(self):
return self.get_attribute('ID')

@property
def score(self):
return self._score.value

@property
def slug(self):
"""
Expand Down
35 changes: 35 additions & 0 deletions tag/score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (C) 2017 Daniel Standage <daniel.standage@gmail.com>
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# -----------------------------------------------------------------------------

from __future__ import print_function
import re


class Score(object):

def __init__(self, datastr):
self._type = None
if datastr == '.':
self.value = None
elif re.search('^-*\d+$', datastr):
self.value = int(datastr)
self._type = int
else:
self.value = float(datastr)
self._type = float

def __str__(self):
if self.value is None:
return '.'
elif self._type == int:
return '{:d}'.format(self.value)
elif abs(self.value) < 1e6 and abs(self.value) > 1e-4:
return '{:1.3f}'.format(self.value)
else:
return '{:1.3E}'.format(self.value)
5 changes: 5 additions & 0 deletions tag/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import sys
import tag
from tag import Feature
from tag import Sequence
from tag import GFF3Reader


Expand Down Expand Up @@ -45,6 +46,7 @@ def __init__(self, instream, outfile='-'):
self.outfile = tag.open(outfile, 'w')
self.retainids = False
self.feature_counts = defaultdict(int)
self._seq_written = False

def __del__(self):
if self.outfilename != '-' and not isinstance(self.outfile, StringIO):
Expand All @@ -62,4 +64,7 @@ def write(self):
feature.add_attribute('ID', fid)
else:
feature.drop_attribute('ID')
if isinstance(entry, Sequence) and not self._seq_written:
print('##FASTA', file=self.outfile)
self._seq_written = True
print(repr(entry), file=self.outfile)
18 changes: 18 additions & 0 deletions tests/test_score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (C) 2017 Daniel Standage <daniel.standage@gmail.com>
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# -----------------------------------------------------------------------------

from tag import Score


def test_basic():
for score in ['.', '10', '12.345', '-11', '-98765.432', '-4.555E+09']:
assert str(Score(score)) == score
assert str(Score('10.0')) == '10.000'
assert str(Score('1.32e12')) == '1.320E+12'
assert str(Score('1.2e-16')) == '1.200E-16'
9 changes: 9 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,12 @@ def test_write_file():
with open(output, 'r') as testout:
testoutput2 = testout.read()
assert testoutput1 == testoutput2, (testoutput1, testoutput2)


def test_write_minimus():
reader = GFF3Reader(tag.pkgdata('minimus.gff3'))
output = StringIO()
writer = GFF3Writer(reader, output)
writer.write()

assert output.getvalue() == tag.pkgdata('minimus.gff3').read()
6 changes: 6 additions & 0 deletions tests/testdata/minimus.gff3
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
##gff-version 3
##sequence-region minimus 1 10
minimus vim region 4 6 9000 + . Note=lolwut
##FASTA
>minimus
GATTACANNN

0 comments on commit 4cd885d

Please sign in to comment.