Skip to content

Commit

Permalink
Merge pull request #49 from standage/feature/mrna
Browse files Browse the repository at this point in the history
New mRNA module, generator cognate for AEGeAn's pmrna command
  • Loading branch information
standage committed Jan 5, 2017
2 parents 16135be + 26a265e commit 40daa4b
Show file tree
Hide file tree
Showing 12 changed files with 129 additions and 3 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@ All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]
## Changed
### Added
- Module for mRNA handling, with a function for selecting the primary mRNA from
a gene or other feature.
- New CLI command `tag pmrna`.

### Changed
- Modules focused on classes / data structure now support more concise imports
(for example, `from tag import Feature` and `tag.Feature` now supported and
preferred over `from tag.feature import feature` and `tag.feature.Feature`).
Expand Down
1 change: 1 addition & 0 deletions tag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from tag.writer import GFF3Writer
from tag import cli
from tag import select
from tag import mrna
from gzip import open as gzopen

from ._version import get_versions
Expand Down
1 change: 1 addition & 0 deletions tag/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
mains = {
'gff3': tag.cli.gff3.main,
'occ': tag.cli.occ.main,
'pmrna': tag.cli.pmrna.main,
}


Expand Down
4 changes: 3 additions & 1 deletion tag/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import tag
from . import gff3
from . import occ
from . import pmrna


def parser():
Expand All @@ -21,8 +22,9 @@ def parser():
parser.add_argument('-l', '--logfile', metavar='FILE', default=sys.stderr,
type=argparse.FileType('w'))
subparsers = parser.add_subparsers(dest='cmd', metavar='cmd',
help='gff3 | occ')
help='gff3 | occ | pmrna')
tag.cli.gff3.subparser(subparsers)
tag.cli.occ.subparser(subparsers)
tag.cli.pmrna.subparser(subparsers)

return parser
27 changes: 27 additions & 0 deletions tag/cli/pmrna.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (C) 2015 Daniel Standage <daniel.standage@gmail.com>
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# -----------------------------------------------------------------------------

from __future__ import print_function
import argparse
from collections import defaultdict
from intervaltree import IntervalTree
import tag


def subparser(subparsers):
subparser = subparsers.add_parser('pmrna')
subparser.add_argument('-r', '--relax', action='store_false', default=True,
dest='strict', help='relax parsing stringency')
subparser.add_argument('gff3', help='input file')


def main(args):
reader = tag.GFF3Reader(infilename=args.gff3, strict=args.strict)
writer = tag.GFF3Writer(tag.mrna.primary(reader))
writer.write()
12 changes: 12 additions & 0 deletions tag/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,3 +453,15 @@ def parse_attributes(self, attrstring):
valdict = dict((val, True) for val in values)
attributes[key] = valdict
return attributes

@property
def cdslen(self):
"""
Translated length of this feature.
Undefined for non-mRNA features.
"""
if self.type != 'mRNA':
return None

return sum([len(c) for c in self.children if c.type == 'CDS'])
24 changes: 24 additions & 0 deletions tag/mrna.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (C) 2016 Daniel Standage <daniel.standage@gmail.com>
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# -----------------------------------------------------------------------------

import tag


def primary(entrystream, parenttype='gene'):
for entry in entrystream:
if not isinstance(entry, tag.feature.Feature):
yield entry
continue

for feature in tag.select.features(entry, parenttype, traverse=True):
mrnas = [m for m in tag.select.features(feature.children, 'mRNA')]
mrnas.sort(key=lambda m: (m.cdslen, m.get_attribute('ID')))
mrnas.pop()
feature.children = [c for c in feature.children if c not in mrnas]
yield entry
2 changes: 1 addition & 1 deletion tag/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __del__(self):
if self.outfilename != '-' and not isinstance(self.outfile, StringIO):
self.outfile.close()

def write(self, relax=False):
def write(self):
"""Pull features from the instream and write them to the output."""
for entry in self._instream:
if isinstance(entry, Feature):
Expand Down
14 changes: 14 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,17 @@ def test_occ(gff3, ftype, expected_output):
assert sys.stdout.getvalue() == expected_output

sys.stdout = oldstdout


def test_pmrna():
oldstdout = sys.stdout
sys.stdout = StringIO()
args = type('', (), {})()
args.gff3 = 'tests/testdata/nanosplice.gff3'
args.strict = True
tag.cli.pmrna.main(args)

testout = tag.pkgdata('nanosplice-primary.gff3').read()
assert sys.stdout.getvalue() == testout

sys.stdout = oldstdout
23 changes: 23 additions & 0 deletions tests/test_mrna.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (C) 2016 Daniel Standage <daniel.standage@gmail.com>
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# -----------------------------------------------------------------------------

import pytest
import tag


def test_primary():
reader = tag.reader.GFF3Reader(tag.pkgdata('nanosplice.gff3'))
gene = next(tag.select.features(tag.mrna.primary(reader), type='gene'))
assert gene.cdslen is None
assert gene.num_children == 1
assert gene.children[0].get_attribute('ID') == 'mRNAsecond'

reader = tag.reader.GFF3Reader(tag.pkgdata('pdom-withseq.gff3'))
for gene in tag.select.features(tag.mrna.primary(reader), type='gene'):
assert gene.num_children == 1
8 changes: 8 additions & 0 deletions tests/testdata/nanosplice-primary.gff3
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##gff-version 3
##sequence-region 42 5001 12000
42 nano gene 5001 12000 . - . ID=gene1
42 nano mRNA 5001 12000 . - . ID=mRNA1;Parent=gene1
42 nano CDS 5001 6000 . - 1 Parent=mRNA1
42 nano CDS 9001 10000 . - 2 Parent=mRNA1
42 nano CDS 11001 12000 . - 0 Parent=mRNA1
###
9 changes: 9 additions & 0 deletions tests/testdata/nanosplice.gff3
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
42 nano gene 5001 12000 . - . ID=gene1
42 nano mRNA 5001 12000 . - . ID=mRNAfirst;Parent=gene1
42 nano CDS 5001 6000 . - 1 ID=CDS1;Parent=mRNAfirst
42 nano CDS 7001 8000 . - 2 ID=CDS1;Parent=mRNAfirst
42 nano CDS 11001 12000 . - 0 ID=CDS1;Parent=mRNAfirst
42 nano mRNA 5001 12000 . - . ID=mRNAsecond;Parent=gene1
42 nano CDS 5001 6000 . - 1 ID=CDS2;Parent=mRNAsecond
42 nano CDS 9001 10000 . - 2 ID=CDS2;Parent=mRNAsecond
42 nano CDS 11001 12000 . - 0 ID=CDS2;Parent=mRNAsecond

0 comments on commit 40daa4b

Please sign in to comment.