Skip to content

Commit

Permalink
Added a new GODag function which creates a hierarchy report for all t…
Browse files Browse the repository at this point in the history
…erms below a user specified GO Term.

If the user does not specify a GO Term, then a hierarchy report is generated for each: BP, MF, CC
  • Loading branch information
dvklopfenstein committed Apr 9, 2015
1 parent b3dd755 commit d5c98fa
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 2 deletions.
33 changes: 31 additions & 2 deletions goatools/obo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,22 @@ def get_all_child_edges(self):
all_child_edges |= p.get_all_child_edges()
return all_child_edges

def write_hier_rec(self, out=sys.stdout,
len_dash=1, max_depth=None, num_child=None,
depth=1, dp="-"):
"""Write hierarchy for a GO Term record."""
if len_dash is not None:
dp = ''.join(['-']*depth) if len_dash is not None else ''
out.write('{DASHES:{N}} '.format(DASHES=dp, N=len_dash))
if num_child is not None:
out.write('{N:>5} '.format(N=len(self.get_all_children())))
out.write('{GO}\n'.format(GO=self))
depth += 1
if max_depth is not None and depth > max_depth:
return
for p in self.children:
p.write_hier_rec(out, len_dash, max_depth, num_child, depth, dp)


class GODag(dict):

Expand Down Expand Up @@ -206,7 +222,8 @@ def _init_depth(rec):
# populate children and levels
for rec in self.values():
for p in rec.parents:
p.children.append(rec)
if rec not in p.children:
p.children.append(rec)

if rec.level is None:
_init_level(rec)
Expand All @@ -215,9 +232,21 @@ def _init_depth(rec):
_init_depth(rec)

def write_dag(self, out=sys.stdout):
"""Write info for all GO Term in obo file, sorted numerically."""
"""Write info for all GO Terms in obo file, sorted numerically."""
for rec_id, rec in sorted(self.items()):
print(rec, file=out)

def write_hier_all(self, out=sys.stdout,
len_dash=1, max_depth=None, num_child=None):
"""Write hierarchy for all GO Terms in obo file."""
# Print: [biological_process, molecular_function, and cellular_component]
for go_id in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
self.write_hier(go_id, out, len_dash, max_depth, num_child)

def write_hier(self, GO_id, out=sys.stdout,
len_dash=1, max_depth=None, num_child=None):
"""Write hierarchy for a GO Term."""
self[GO_id].write_hier_rec(out, len_dash, max_depth, num_child)

def write_summary_cnts(self, GO_ids, out=sys.stdout):
"""Write summary of level and depth counts for specific GO ids."""
Expand Down
92 changes: 92 additions & 0 deletions scripts/write_hierarchy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from __future__ import print_function

"""
Print the hierarchy below Term, GO:0030663
>>> python {SCR} GO:0030663
- GO:0030663 level-05 depth-07 COPI-coated vesicle membrane [cellular_component]
-- GO:0012508 level-05 depth-08 Golgi to ER transport vesicle membrane [cellular_component]
-- GO:0012509 level-05 depth-08 inter-Golgi transport vesicle membrane [cellular_component]
Write the hierarchy below Term, GO:0030663 into a file
>>> python {SCR} GO:0030663 --o=hier_GO_0030663.rpt
WROTE: hier_GO_0030663.rpt
Print the hierarchy for biological process, molecular_function, and cellular_component:
>>> python {SCR} --o=hier_BP_MF_CC.rpt
Print hierarchy for BP, MF, CC only printing the first 2 levels.
>>> python {SCR} --max_depth=2
>>> python {SCR} --max_depth=2 --dash_len=2 --num_child
Print hierarchy
- 26894 GO:0008150 level-00 depth-00 biological_process [biological_process]
-- 30 GO:0001906 level-01 depth-01 cell killing [biological_process]
-- 555 GO:0002376 level-01 depth-01 immune system process [biological_process]
-- 11208 GO:0065007 level-01 depth-01 biological regulation [biological_process]
>>> python {SCR}
This program prints the hierarchy for all GO terms, if no argument is provided.
If a GO term is provided as an argument, then the hierarchy of all children
for that term is printed.
""".format(SCR=__file__)

import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from goatools.obo_parser import GODag


if __name__ == "__main__":

import argparse
p = argparse.ArgumentParser(__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

p.add_argument('go_ids', type=str, nargs='*',
help='GO Term, e.g. GO:0070458')
p.add_argument('--o', default=None, type=str,
help="Specifies the name of the output file")
p.add_argument('--no_indent', default=False,
help="Do not indent GO terms", action='store_true')
p.add_argument('--obo', default="go-basic.obo", type=str,
help="Location and name of the obo file")
p.add_argument('--dash_len', default=1, type=int,
help="Printed width of the dashes column")
p.add_argument('--max_depth', default=None, type=int,
help="max depth for printing relative to GO Term")
p.add_argument('--num_child', default=None, action='store_true',
help="Print total number of children for each GO")

args = p.parse_args()

obo_dag = GODag(obo_file=args.obo)

file_out = sys.stdout if args.o is None else open(args.o, 'w')
lenprt = args.dash_len if not args.no_indent else None

if args.go_ids:
for go_id in args.go_ids:
obo_dag.write_hier(
go_id,
file_out,
len_dash=lenprt,
max_depth=args.max_depth,
num_child=args.num_child)
else:
obo_dag.write_hier_all(
file_out,
len_dash=lenprt,
max_depth=args.max_depth,
num_child=args.num_child)

if args.o is not None:
file_out.close()
sys.stdout.write(" WROTE: {}\n".format(args.o))

0 comments on commit d5c98fa

Please sign in to comment.