-
Notifications
You must be signed in to change notification settings - Fork 210
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3489301
commit b3fe1cb
Showing
24 changed files
with
2,887 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
"""Command-line interface to create an initial Python sections file | ||
Usage: | ||
wr_sections.py [GO_FILE] | ||
wr_sections.py [GO_FILE] [options] | ||
Options: | ||
-h --help show this help message and exit | ||
-i <file.txt>, --ifile=<sections_in.txt> Read or Write file name [default: sections_in.txt] | ||
-o <file.txt>, --ofile=<sections.txt> write file name [default: sections.txt] | ||
--txt=<file.txt> Write file name [default: grouped_gos.txt] | ||
--py=<file.py> Write the sections list into a Python file | ||
--xlsx=<file.xlsx> Group user GO IDs and write the results into an xlsx file | ||
--obo=<file.obo> Ontologies in obo file [default: go-basic.obo]. | ||
--slims=<file.obo> GO slims in obo file [default: goslim_generic.obo]. | ||
--gaf=<file.gaf> Annotations from a gaf file | ||
--gene2go=<gene2go> Annotations from a gene2go file downloaded from NCBI | ||
""" | ||
|
||
from __future__ import print_function | ||
|
||
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved." | ||
__author__ = "DV Klopfenstein" | ||
|
||
|
||
import os | ||
import sys | ||
|
||
from goatools.base import get_godag | ||
from goatools.associations import get_tcntobj | ||
|
||
from goatools.cli.docopt_parse import DocOptParse | ||
from goatools.cli.gos_get import GetGOs | ||
|
||
from goatools.gosubdag.gosubdag import GoSubDag | ||
|
||
from goatools.grouper.read_goids import read_sections | ||
from goatools.grouper.grprdflts import GrouperDflts | ||
from goatools.grouper.hdrgos import HdrgosSections | ||
from goatools.grouper.grprobj import Grouper | ||
from goatools.grouper.wr_sections import WrSections | ||
from goatools.grouper.wr_sections import WrPySections | ||
from goatools.grouper.sorter import Sorter | ||
from goatools.grouper.wrxlsx import WrXlsxSortedGos | ||
|
||
|
||
# pylint: disable=too-few-public-methods | ||
class SectionsWr(object): | ||
"""Class for command-line interface for creating GO term diagrams""" | ||
|
||
kws_dict = set(['GO_FILE', 'obo', 'slims', | ||
'ifile', 'ofile', 'txt', | ||
'py', 'xlsx', | ||
'gaf', 'gene2go', 'taxid']) | ||
kws_set = set() | ||
|
||
def __init__(self, gosubdag=None): | ||
self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) | ||
self.gosubdag = None if gosubdag is None else gosubdag | ||
|
||
def cli(self, prt=sys.stdout): | ||
"""Command-line interface for go_draw script.""" | ||
kws = self.objdoc.get_docargs(prt=None) | ||
godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship']) | ||
usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt) | ||
tcntobj = self._get_tcntobj(usrgos, godag, **kws) # Gets TermCounts or None | ||
self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None) | ||
grprdflt = GrouperDflts(self.gosubdag, kws['slims']) | ||
ver_list = [godag.version, grprdflt.ver_goslims] | ||
prt.write("{VER}\n".format(VER="\n".join(ver_list))) | ||
sections = read_sections(kws['ifile'], exclude_ungrouped=True, prt=None) | ||
# print("SECSECSEC", sections) | ||
hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) | ||
grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag) | ||
# Write sections | ||
objsecwr = WrSections(grprobj, ver_list) | ||
if not os.path.exists(kws['ifile']): | ||
objsecwr.wr_txt_section_hdrgos(kws['ifile']) | ||
objsecwr.wr_txt_section_hdrgos(kws['ofile']) | ||
objsecpy = WrPySections(grprobj, ver_list) | ||
if 'py' in kws: | ||
objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version) | ||
# Write user GO IDs in sections | ||
sortobj = Sorter(grprobj) | ||
objgowr = WrXlsxSortedGos("init", sortobj, ver_list) | ||
objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt) | ||
#objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt) | ||
self._prt_cnt_usrgos(usrgos, sys.stdout) | ||
|
||
def _prt_cnt_usrgos(self, usrgos_read, prt): | ||
num_usrgos = len(self.gosubdag.go_sources) | ||
prt.write("{GOs:6} user GO IDs".format(GOs=num_usrgos)) | ||
if len(usrgos_read) != num_usrgos: | ||
prt.write(" of {M} GO IDs read".format(M=len(usrgos_read))) | ||
prt.write("\n") | ||
|
||
@staticmethod | ||
def _get_tcntobj(goids, go2obj, **kws): | ||
"""Get a TermCounts object if the user provides an annotation file, otherwise None.""" | ||
# kws: gaf (gene2go taxid) | ||
if 'gaf' in kws or 'gene2go' in kws: | ||
# Get a reduced go2obj set for TermCounts | ||
_gosubdag = GoSubDag(goids, go2obj, rcntobj=False, prt=None) | ||
return get_tcntobj(_gosubdag.go2obj, **kws) # TermCounts | ||
|
||
|
||
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
"""Various methods to estimating if a GO term is more specific than another GO term.""" | ||
|
||
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved." | ||
__author__ = "DV Klopfenstein" | ||
|
||
|
||
def get_most_specific_dcnt(goids, go2nt): | ||
"""Get the GO ID with the lowest descendants count.""" | ||
# go2nt_usr = {go:go2nt[go] for go in goids} | ||
# return min(go2nt_usr.items(), key=lambda t: t[1].dcnt)[0] | ||
return min(_get_go2nt(goids, go2nt), key=lambda t: t[1].dcnt)[0] | ||
|
||
def get_most_specific_tinfo(goids, go2nt): | ||
"""Get the GO ID with the highest GO term annotation information value.""" | ||
# go2nt_usr = {go:go2nt[go] for go in goids} | ||
# return max(go2nt_usr.items(), key=lambda t: t[1].tinfo)[0] | ||
return max(_get_go2nt(goids, go2nt), key=lambda t: t[1].tinfo)[0] | ||
|
||
def get_most_specific_tinfo_dcnt(goids, go2nt): | ||
"""Get the GO ID with the highest GO term annotation information value.""" | ||
# go2nt_usr = {go:go2nt[go] for go in goids} | ||
# return max(go2nt_usr.items(), key=lambda t: [t[1].tinfo, t[1].dcnt])[0] | ||
return max(_get_go2nt(goids, go2nt), key=lambda t: [t[1].tinfo, t[1].dcnt])[0] | ||
|
||
def _get_go2nt(goids, go2nt_all): | ||
"""Get user go2nt using main GO IDs, not alt IDs.""" | ||
go_nt_list = [] | ||
goids_seen = set() | ||
for goid_usr in goids: | ||
ntgo = go2nt_all[goid_usr] | ||
goid_main = ntgo.id | ||
if goid_main not in goids_seen: | ||
goids_seen.add(goid_main) | ||
go_nt_list.append((goid_main, ntgo)) | ||
return go_nt_list | ||
|
||
|
||
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
# Group GO Terms under researcher-defined sections | ||
# Directory for grouper functions and classes |
Oops, something went wrong.