Skip to content

Commit

Permalink
Read a sections file from file: text, Python text, Python module.
Browse files Browse the repository at this point in the history
  • Loading branch information
dvklopfenstein committed Jul 8, 2018
1 parent ff4410a commit 8be824e
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 16 deletions.
50 changes: 47 additions & 3 deletions goatools/grouper/read_goids.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import os
import sys
import re
import pkgutil
import importlib
from goatools.gosubdag.go_tasks import chk_goids
from goatools.grouper.hdrgos import HdrgosSections
from goatools.grouper.grprobj import Grouper
Expand All @@ -18,8 +20,18 @@ def read_sections(sections_file, exclude_ungrouped=False, prt=sys.stdout):
"""Get sections and GO grouping hdrgos from file, if sections exist."""
if sections_file is None:
return None
assert isinstance(sections_file, str), "BAD SECTIONS FILENAME({S})".format(
S=sections_file)
if os.path.exists(sections_file):
return ReadGoids().read_sections(sections_file, False, exclude_ungrouped)
# Is 'sections_file' a module string?
if '/' not in sections_file and r'\\' not in sections_file and \
pkgutil.find_loader(sections_file) is not None:
mod = importlib.import_module(sections_file)
var = getattr(mod, 'SECTIONS', None)
if var is not None:
return var
raise RuntimeError("NO 'SECTIONS' VARIABLE FOUND IN MODULE({M})".format(M=sections_file))
if prt:
prt.write("CANNOT READ: {SEC}\n".format(SEC=sections_file))

Expand All @@ -33,6 +45,13 @@ class ReadGoids(object):
"""Get user list of GO IDs either from a list or from GO IDs on the command-line"""

srch_section = re.compile(r'^#?\s*SECTION:\s*(\S.*\S)\s*$', flags=re.IGNORECASE)
# For reading SECTIONS from a Python file as text (without importing)
# ("cell death", [ # 6 GO-headers
# ("viral/bacteria", [ # 4 GO-headers
srch_py_section = re.compile(r'^\s*(\(|\[)\s*(\'|")(.*)\s*(\'|")\s*,\s*\[')
# "GO:0002376", # BP 564 L01 D01 M immune system process
# "GO:0002682", # BP 1,183 L02 D02 AB regulation of immune system process
srch_py_goids = re.compile(r'^\s*(\'|")(GO:\d{7})(\'|")\s*,?')

def __init__(self):
self.goids_fin = []
Expand All @@ -50,7 +69,7 @@ def read_txt(self, fin_txt, get_goids_only, exclude_ungrouped, prt=sys.stdout):

def read_py(self, fin_txt, get_goids_only, exclude_ungrouped, prt=sys.stdout):
"""Read GO IDs or sections data from a Python file."""
goids_fin = self._read_txt(fin_txt, get_goids_only, exclude_ungrouped)
goids_fin = self._read_py(fin_txt, get_goids_only, exclude_ungrouped)
sections = self._read_finish(goids_fin, prt)
# Print summary of GO IDs read
if prt is not None:
Expand All @@ -59,9 +78,9 @@ def read_py(self, fin_txt, get_goids_only, exclude_ungrouped, prt=sys.stdout):

def read_sections(self, sections_file, get_goids_only, exclude_ungrouped):
"""Read sections variable from a text file of from a Python file."""
ext = os.path.splitext(sections_file)
ext = os.path.splitext(sections_file)[1]
file_contents = None
if ext and ext == ".py":
if ext == '.py':
file_contents = self.read_py(sections_file, get_goids_only, exclude_ungrouped)
else:
file_contents = self.read_txt(sections_file, get_goids_only, exclude_ungrouped)
Expand Down Expand Up @@ -110,6 +129,31 @@ def _read_txt(self, fin_txt, get_goids_only, exclude_ungrouped):
self.section2goids[section_name] = goids_sec
return goids_sec

def _read_py(self, fin_py, get_goids_only, exclude_ungrouped):
"""Read Python sections file. Store: section2goids sections_seen. Return goids_fin."""
goids_sec = []
with open(fin_py) as istrm:
section_name = None
for line in istrm:
mgo = self.srch_py_goids.search(line) # Matches GO IDs in sections
if mgo:
goids_sec.append(mgo.group(2))
elif not get_goids_only and "[" in line:
msec = self.srch_py_section.search(line) # Matches sections
if msec:
secstr = msec.group(3)
if section_name is not None and goids_sec:
self.section2goids[section_name] = goids_sec
if not exclude_ungrouped or secstr != HdrgosSections.secdflt:
section_name = secstr
self.sections_seen.append(section_name)
else:
section_name = None
goids_sec = []
if section_name is not None and goids_sec:
self.section2goids[section_name] = goids_sec
return goids_sec

def _rpt_unused_sections(self, prt):
"""Report unused sections."""
sections_unused = set(self.sections_seen).difference(self.section2goids.keys())
Expand Down
38 changes: 25 additions & 13 deletions tests/test_wr_sections_txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,45 @@ def test_wr_sections_txt():
# Print usrgos in txt (Do not use sections containing hdrgos)
# ------------------------------------------------------------------
# Show GO grouping hdrgos and usrgos to show how usrgos are grouped
_wr_sections_txt("a0_hdr1.txt", usrgos, sections_file=None, grprdflt=grprdflt)
_wr_sections_txt("a0_hdr1.txt", usrgos, sections=None, grprdflt=grprdflt)

# ------------------------------------------------------------------
# Print usrgos in txt using sections containing hdrgos
# ------------------------------------------------------------------
sec1 = _read_sections("./data/gjoneska/sections_in.txt")
# Print usrgos in sections, showing how they were grouped under hdrgos
_wr_sections_txt("a_ec0_hdr1.txt", usrgos, sec1, grprdflt=grprdflt)

# ------------------------------------------------------------------
# Print usrgos in txt using sections containing hdrgos
# ------------------------------------------------------------------
# sec2 = _read_sections("goatools/test_data/sections/gjoneska_pfenning.py")
# # Print usrgos in sections, showing how they were grouped under hdrgos
# _wr_sections_txt("b_sec0_hdr1.txt", usrgos, sec2, grprdflt=grprdflt)
sec2a = _read_sections("goatools/test_data/sections/gjoneska_pfenning.py")
_wr_sections_txt("b_sec0_hdr1.txt", usrgos, sec2a, grprdflt=grprdflt)

sec2b = _read_sections("goatools.test_data.sections.gjoneska_pfenning")
_wr_sections_txt("c_sec0_hdr1.txt", usrgos, sec2b, grprdflt=grprdflt)
# print("@@@@@@@@@ SECTIONS READ AS TEXT", sec1)
# print("@@@@@@@@@ SECTIONS READ AS TEXT", sec2)
# print("@@@@@@@@@ SECTIONS READ AS TEXT", sec3)
_chk_sections(sec2a, sec2b)


def _chk_sections(sec_a, sec_b):
"""Do the two sections variables contain the same data?"""
assert len(sec_a) == len(sec_b), "LENGTH MISMATCH: {A} != {B}".format(
A=len(sec_a), B=len(sec_b))
for (name_a, gos_a), (name_b, gos_b) in zip(sec_a, sec_b):
assert name_a == name_b, "NAME MISMATCH: {A} != {B}".format(A=name_a, B=name_b)
assert gos_a == gos_b, "GO IDs MISMATCH: {A} != {B}".format(A=gos_a, B=gos_b)

def _read_sections(fin):
def _read_sections(sec):
"""Get sections variable from file."""
sec = os.path.join(REPO, fin)
assert read_sections(sec), "EMPTY SECTIONS FILE({})".format(sec)
return sec
if '/' in sec:
sec = os.path.join(REPO, sec)
var = read_sections(sec)
assert var, "EMPTY SECTIONS FILE({})".format(sec)
return var

def _wr_sections_txt(fout_txt, usrgos, sections_file, grprdflt):
def _wr_sections_txt(fout_txt, usrgos, sections, grprdflt):
"""Given a list of usrgos and sections, write text file."""
try:
sections = read_sections(sections_file)
hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=sections)
grprobj = Grouper(fout_txt, usrgos, hdrobj, grprdflt.gosubdag, go2nt=None)
full_txt = os.path.join(REPO, fout_txt)
Expand Down

0 comments on commit 8be824e

Please sign in to comment.