Skip to content

Commit

Permalink
Move wr_hier script implmentation out of godag and into its own modules.
Browse files Browse the repository at this point in the history
  • Loading branch information
dvklopfenstein committed Jun 24, 2018
1 parent 0d88ad3 commit 1b2584d
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 222 deletions.
18 changes: 7 additions & 11 deletions goatools/cli/wr_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
--no_indent Do not indent GO terms
--max_indent=<int> max indent depth for printing relative to GO Term
--num_child=<int> Print count of total number of children for each GO
--short If a branch has already been printed, do not re-print.
--concise If a branch has already been printed, do not re-print.
Print '===' instead of dashes to note the point of compression
--dash_len=<int> Printed width of the dashes column [default: 6]
-r --relationship Load and use the 'relationship' field
"""

Expand Down Expand Up @@ -48,22 +48,21 @@ def cli():
objcli.wrtxt_hier(fout_txt)
else:
objcli.prt_hier(sys.stdout)
print(objcli.kws)


class WrHierCli(object):
"""Write hierarchy cli."""

kws_set_all = set(['relationship', 'up', 'f'])
kws_dct_all = set(['GO', 'dag', 'i', 'o', 'max_indent', 'num_child', 'no_indent', 'short',
'gaf', 'gene2go'])
kws_dct_wr = set(['max_indent', 'num_child', 'no_indent', 'short', 'relationship'])
kws_dct_all = set(['GO', 'dag', 'i', 'o', 'max_indent', 'no_indent', 'concise',
'gaf', 'gene2go', 'dash_len', 'include_only'])
kws_dct_wr = set(['max_indent', 'no_indent', 'concise', 'relationship', 'dash_len'])

def __init__(self, args=None, prt=sys.stdout):
self.kws = DocOptParse(__doc__, self.kws_dct_all, self.kws_set_all).get_docargs(
args, intvals=set(['max_indent', 'num_child']))
args, intvals=set(['max_indent', 'dash_len']))
opt_attrs = OboOptionalAttrs.attributes.intersection(self.kws.keys())
godag = get_godag(self.kws['dag'], prt, optional_attrs=opt_attrs)
# goids_usr = None if 'GO' not in self.kws else self.kws['GO']
self.gosubdag = GoSubDag(godag.keys(), godag,
relationships='relationship' in opt_attrs,
tcntobj=get_tcntobj(godag, **self.kws),
Expand Down Expand Up @@ -97,9 +96,6 @@ def prt_hier(self, prt=sys.stdout):
"""Write hierarchy below specfied GO IDs."""
objwr = WrHierGO(self.gosubdag, **self.kws)
assert self.goids, "NO VALID GO IDs WERE PROVIDED"
# kws = {k:v for k, v in self.kws.items() if k in self.kws_dct_wr}
# objwr.write_hier_all(prt=prt, **kws)
# max_indent=None, num_child=None, short_prt=False):
if 'up' not in objwr.usrset:
for goid in self.goids:
objwr.prt_hier_down(goid, prt)
Expand Down
62 changes: 41 additions & 21 deletions goatools/gosubdag/rpt/write_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
class WrHierGO(object):
"""Write hierarchy object."""

kws_dct = set(['max_indent', 'num_child'])
kws_set = set(['no_indent', 'short'])
kws_dct = set(['max_indent'])
kws_set = set(['no_indent', 'concise'])
consts = Consts()

def __init__(self, gosubdag, **kws):
Expand All @@ -25,27 +25,41 @@ def __init__(self, gosubdag, **kws):
def prt_hier_all(self, prt=sys.stdout):
"""Write hierarchy for all GO Terms in obo file."""
# Print: [biological_process, molecular_function, and cellular_component]
gos_printed = set()
for goid in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
self.prt_hier_down(goid, prt)
gos_printed.update(self.prt_hier_down(goid, prt))
return gos_printed

def prt_hier_down(self, goid, prt=sys.stdout):
"""Write hierarchy for all GO IDs below GO ID in arg, goid."""
obj = _WrHierPrt(self, prt)
obj.prt_hier_rec(goid)
return obj.gos_printed

def prt_hier_up(self, goids, prt=sys.stdout):
"""Write hierarchy for all GO IDs below GO ID in arg, goid."""
go2goterm_all = {go:self.gosubdag.go2obj[go] for go in goids}
objp = GoPaths()
gos_printed = set()
for namespace, go2term_ns in self._get_namespace2go2term(go2goterm_all).items():
go_root = self.consts.NAMESPACE2GO[namespace]
goids_all = set()
goids_all = set() # GO IDs from user-specfied GO to root
for goid, goterm in go2term_ns.items():
goids_all.add(goid)
paths = objp.get_paths_from_to(goterm, goid_end=None, dn0_up1=True)
goids_all.update(set(o.id for p in paths for o in p))
obj = _WrHierPrt(self, prt, goids_all, set(go2term_ns.keys()))
# Only include GO IDs from user-specified GO to the root
if 'include_only' not in self.usrdct:
self.usrdct['include_only'] = set()
self.usrdct['include_only'].update(goids_all)
# Mark the user-specfied GO term
if 'go_marks' not in self.usrdct:
self.usrdct['go_marks'] = set()
self.usrdct['go_marks'].update(go2term_ns.keys())
obj = _WrHierPrt(self, prt) # , goids_all, set(go2term_ns.keys()))
gos_printed.update(obj.gos_printed)
obj.prt_hier_rec(go_root)
return gos_printed

@staticmethod
def _get_namespace2go2term(go2terms):
Expand All @@ -56,40 +70,40 @@ def _get_namespace2go2term(go2terms):
return namespace2go2term


# pylint: disable=too-many-instance-attributes,too-few-public-methods
class _WrHierPrt(object):
"""Print GO hierarchy."""

def __init__(self, obj, prt=sys.stdout, include_only=None, go_marks=None):
def __init__(self, obj, prt=sys.stdout):
self.gosubdag = obj.gosubdag
self.max_indent = obj.usrdct.get('max_indent')
# self.num_child = num_child
self.include_only = include_only # if include_only else set()
self.go_marks = go_marks if go_marks else set()
self.short_prt = 'short' in obj.usrset
self.include_only = obj.usrdct['include_only'] if 'include_only' in obj.usrdct else None
self.go_marks = obj.usrdct['go_marks'] if 'go_marks' in obj.usrdct else set()
self.concise_prt = 'concise' in obj.usrset
self.indent = 'no_indent' not in obj.usrset
# vars
self.prt = prt
self.gos_printed = set()
self.prtfmt = self.gosubdag.prt_attr['fmta'].replace('{GO}{alt:1} # ', '')
self.prtfmt = self._init_prtfmt()
self.dash_len = obj.usrdct.get('dash_len', 6) + 12

def prt_hier_rec(self, goid, depth=1):
"""Write hierarchy for a GO Term record and all GO IDs down to the leaf level."""
ntgo = self.gosubdag.go2nt[goid]
ntobj = self.gosubdag.go2obj[goid]
# Shortens hierarchy report by only printing the hierarchy
# for the sub-set of user-specified GO terms which are connected.
if self.include_only is not None and goid not in self.include_only:
if self.include_only and goid not in self.include_only:
return
nrp = self.short_prt and goid in self.gos_printed
nrp = self.concise_prt and goid in self.gos_printed
if self.go_marks:
self.prt.write('{} '.format('>' if goid in self.go_marks else ' '))

# '-' is default character indicating hierarchy level
# '=' is used to indicate a hierarchical path printed in detail previously.
dashgo = self._str_dashgoid(ntgo, depth, not nrp or not ntobj.children)
self.prt.write('{DASHGO:{N}}'.format(DASHGO=dashgo, N=17))
self.prt.write('{DASHGO:{N}}'.format(DASHGO=dashgo, N=self.dash_len))

# if num_child is not None:
# self.prt.write('{N:>5} '.format(N=len(self.get_all_children())))
self.prt.write("{GO_INFO}\n".format(GO_INFO=self.prtfmt.format(**ntgo._asdict())))
self.gos_printed.add(goid)
# Do not print hierarchy below this turn if it has already been printed
Expand All @@ -111,9 +125,15 @@ def _str_dash(depth, single_or_double):

def _str_dashgoid(self, ntgo, depth, single_or_double):
"""Return a string containing dashes (optional) and GO ID."""
dashes = self._str_dash(depth, single_or_double)
dashes = self._str_dash(depth, single_or_double) if self.indent else ""
return "{DASHES} {GO}{alt:1}".format(DASHES=dashes, GO=ntgo.GO, alt=ntgo.alt)

def _init_prtfmt(self):
"""Initialize print format."""
prtfmt = self.gosubdag.prt_attr['fmt']
prtfmt = prtfmt.replace('{GO} # ', '')
prtfmt = prtfmt.replace('{D1:5} ', '')
return prtfmt

#### Examples:
####
Expand All @@ -135,20 +155,20 @@ def _str_dashgoid(self, ntgo, depth, single_or_double):
####
#### Print hierarchy for BP, MF, CC only printing the first 2 levels.
#### >>> python {SCR} --max_indent=2
#### >>> python {SCR} --max_indent=2 --dash_len=2 --num_child
#### >>> python {SCR} --max_indent=2 --dash_len=2
####
####
#### Print a shortened version of the hierarchy for BP, MF, and CC.
#### Print a conciseened version of the hierarchy for BP, MF, and CC.
#### This will only print a path to a leaf GO Term once.
#### If the path appears a second time, the term is printed again, but its path is not.
#### The presence of a compressed (unprinted) paths is marked by using '=" instead of '-'.
####
#### $ wc -l hier_BP_MF_CC*.rpt
####
#### 789583 hier_BP_MF_CC.rpt
#### 70152 hier_BP_MF_CC_short.rpt
#### 70152 hier_BP_MF_CC_concise.rpt
####
#### >>> python {SCR} --o=hier_BP_MF_CC_short.rpt --short
#### >>> python {SCR} --o=hier_BP_MF_CC_concise.rpt --concise
####
#### Print hierarchy
#### - 26894 GO:0008150 level-00 depth-00 biological_process [biological_process]
Expand Down
102 changes: 51 additions & 51 deletions goatools/obo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,43 +260,43 @@ def get_goterms_lower(self):
return set.union(self.children, *self.relationship_rev.values())


def write_hier_rec(self, gos_printed, out=sys.stdout,
len_dash=1, max_depth=None, num_child=None, short_prt=False,
include_only=None, go_marks=None,
depth=1, depth_dashes="-"):
"""Write hierarchy for a GO Term record."""
# Added by DV Klopfenstein
goid = self.id
# Shortens hierarchy report by only printing the hierarchy
# for the sub-set of user-specified GO terms which are connected.
if include_only is not None and goid not in include_only:
return
nrp = short_prt and goid in gos_printed
if go_marks is not None:
out.write('{} '.format('>' if goid in go_marks else ' '))
if len_dash is not None:
# Default character indicating hierarchy level is '-'.
# '=' is used to indicate a hierarchical path printed in detail previously.
letter = '-' if not nrp or not self.children else '='
depth_dashes = ''.join([letter]*depth)
out.write('{DASHES:{N}} '.format(DASHES=depth_dashes, N=len_dash))
if num_child is not None:
out.write('{N:>5} '.format(N=len(self.get_all_children())))
out.write('{GO}\tL-{L:>02}\tD-{D:>02}\t{desc}\n'.format(
GO=self.id, L=self.level, D=self.depth, desc=self.name))
# Track GOs previously printed only if needed
if short_prt:
gos_printed.add(goid)
# Do not print hierarchy below this turn if it has already been printed
if nrp:
return
depth += 1
if max_depth is not None and depth > max_depth:
return
for child in self.children:
child.write_hier_rec(gos_printed, out, len_dash, max_depth, num_child, short_prt,
include_only, go_marks,
depth, depth_dashes)
#### def write_hier_rec(self, gos_printed, out=sys.stdout,
#### len_dash=1, max_depth=None, num_child=None, short_prt=False,
#### include_only=None, go_marks=None,
#### depth=1, depth_dashes="-"):
#### """Write hierarchy for a GO Term record."""
#### # Added by DV Klopfenstein
#### goid = self.id
#### # Shortens hierarchy report by only printing the hierarchy
#### # for the sub-set of user-specified GO terms which are connected.
#### if include_only is not None and goid not in include_only:
#### return
#### nrp = short_prt and goid in gos_printed
#### if go_marks is not None:
#### out.write('{} '.format('>' if goid in go_marks else ' '))
#### if len_dash is not None:
#### # Default character indicating hierarchy level is '-'.
#### # '=' is used to indicate a hierarchical path printed in detail previously.
#### letter = '-' if not nrp or not self.children else '='
#### depth_dashes = ''.join([letter]*depth)
#### out.write('{DASHES:{N}} '.format(DASHES=depth_dashes, N=len_dash))
#### if num_child is not None:
#### out.write('{N:>5} '.format(N=len(self.get_all_children())))
#### out.write('{GO}\tL-{L:>02}\tD-{D:>02}\t{desc}\n'.format(
#### GO=self.id, L=self.level, D=self.depth, desc=self.name))
#### # Track GOs previously printed only if needed
#### if short_prt:
#### gos_printed.add(goid)
#### # Do not print hierarchy below this turn if it has already been printed
#### if nrp:
#### return
#### depth += 1
#### if max_depth is not None and depth > max_depth:
#### return
#### for child in self.children:
#### child.write_hier_rec(gos_printed, out, len_dash, max_depth, num_child, short_prt,
#### include_only, go_marks,
#### depth, depth_dashes)


class GODag(dict):
Expand Down Expand Up @@ -435,20 +435,20 @@ def write_dag(self, out=sys.stdout):
for rec in sorted(self.values()):
print(rec, file=out)

def write_hier_all(self, out=sys.stdout,
len_dash=1, max_depth=None, num_child=None, short_prt=False):
"""Write hierarchy for all GO Terms in obo file."""
# Print: [biological_process, molecular_function, and cellular_component]
for go_id in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
self.write_hier(go_id, out, len_dash, max_depth, num_child, short_prt, None)

def write_hier(self, go_id, out=sys.stdout,
len_dash=1, max_depth=None, num_child=None, short_prt=False,
include_only=None, go_marks=None):
"""Write hierarchy for a GO Term."""
gos_printed = set()
self[go_id].write_hier_rec(gos_printed, out, len_dash, max_depth, num_child,
short_prt, include_only, go_marks)
#### def write_hier_all(self, out=sys.stdout,
#### len_dash=1, max_depth=None, num_child=None, short_prt=False):
#### """Write hierarchy for all GO Terms in obo file."""
#### # Print: [biological_process, molecular_function, and cellular_component]
#### for go_id in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
#### self.write_hier(go_id, out, len_dash, max_depth, num_child, short_prt, None)
####
#### def write_hier(self, go_id, out=sys.stdout,
#### len_dash=1, max_depth=None, num_child=None, short_prt=False,
#### include_only=None, go_marks=None):
#### """Write hierarchy for a GO Term."""
#### gos_printed = set()
#### self[go_id].write_hier_rec(gos_printed, out, len_dash, max_depth, num_child,
#### short_prt, include_only, go_marks)

@staticmethod
def id2int(go_id):
Expand Down

0 comments on commit 1b2584d

Please sign in to comment.