Merge pull request #24 from stuppie/master

Python3 (thanks stuppie)
tanghaibao · Feb 6, 2015 · e7656d0 · e7656d0
2 parents eae14d7 + cab7ac2
commit e7656d0
Show file tree

Hide file tree

Showing 11 changed files with 73 additions and 64 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.png
+*.obo
diff --git a/goatools/.gitignore b/goatools/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/goatools/__init__.py b/goatools/__init__.py
@@ -1,6 +1,7 @@
+from __future__ import absolute_import
 from .version import __version__
 
 # make the module importable
-from go_enrichment import *
-import multiple_testing
-import obo_parser
+from goatools.go_enrichment import *
+from . import multiple_testing
+from . import obo_parser
diff --git a/goatools/go_enrichment.py b/goatools/go_enrichment.py
@@ -8,12 +8,12 @@
 study genes using Fisher's exact test, and corrected for multiple testing
 (including Bonferroni, Holm, Sidak, and false discovery rate)
 """
-
+from __future__ import absolute_import
 import sys
 import collections
 import os.path as op
 import fisher
-from multiple_testing import Bonferroni, Sidak, HolmBonferroni, FDR, calc_qval
+from .multiple_testing import Bonferroni, Sidak, HolmBonferroni, FDR, calc_qval
 
 
 class GOEnrichmentRecord(object):
@@ -26,7 +26,7 @@ def __init__(self, **kwargs):
         for f in self._fields:
             self.__setattr__(f, "n.a.")
 
-        for k, v in kwargs.iteritems():
+        for k, v in kwargs.items():
             assert k in self._fields, "invalid field name %s" % k
             self.__setattr__(k, v)
 
@@ -57,12 +57,12 @@ def __repr__(self):
         return "GOEnrichmentRecord(%s)" % self.id
 
     def find_goterm(self, go):
-        if self.id in go.keys():
+        if self.id in list(go.keys()):
             self.goterm = go[self.id]
             self.description = self.goterm.name
 
     def update_fields(self, **kwargs):
-        for k, v in kwargs.iteritems():
+        for k, v in kwargs.items():
             assert k in self._fields, "invalid field name %s" % k
             self.__setattr__(k, v)
 
@@ -101,7 +101,7 @@ def run_study(self, study):
 
         pop_n, study_n = len(self.pop), len(study)
 
-        for term, study_count in term_study.items():
+        for term, study_count in list(term_study.items()):
             pop_count = self.term_pop[term]
             p = fisher.pvalue_population(study_count, study_n,
                                          pop_count, pop_n)
@@ -160,7 +160,7 @@ def update_results(self, method, corrected_pvals):
 
     def print_summary(self, min_ratio=None, indent=False, pval=0.05):
         # field names for output
-        print "\t".join(GOEnrichmentRecord()._fields)
+        print("\t".join(GOEnrichmentRecord()._fields))
 
         for rec in self.results:
             # calculate some additional statistics
@@ -171,7 +171,7 @@ def print_summary(self, min_ratio=None, indent=False, pval=0.05):
                 continue
 
             if rec.is_ratio_different:
-                print rec.__str__(indent=indent)
+                print(rec.__str__(indent=indent))
 
 
 def count_terms(geneset, assoc, obo_dag):

diff --git a/goatools/mapslim.py b/goatools/mapslim.py
@@ -12,7 +12,7 @@
     For now this does not implement Bucket Terms.
 """
 
-from obo_parser import GODag
+from .obo_parser import GODag
 
 
 def mapslim(go_term, go_dag, goslim_dag):

diff --git a/goatools/multiple_testing.py b/goatools/multiple_testing.py
@@ -4,12 +4,13 @@
 """
 A list of commonly used multiple correction routines
 """
-
+from __future__ import print_function
+from __future__ import absolute_import
 import sys
 import random
 import fisher
 import numpy as np
-import go_enrichment
+import goatools.go_enrichment
 
 
 class AbstractCorrection(object):
@@ -61,14 +62,14 @@ class HolmBonferroni(AbstractCorrection):
     """
     def set_correction(self):
         if len(self.pvals):
-            idxs, correction = zip(*self.generate_significant())
+            idxs, correction = list(zip(*self.generate_significant()))
             idxs = list(idxs)
             self.corrected_pvals[idxs] *= correction
 
     def generate_significant(self):
 
         pvals = self.pvals
-        pvals_idxs = zip(pvals, xrange(len(pvals)))
+        pvals_idxs = list(zip(pvals, list(range(len(pvals)))))
         pvals_idxs.sort()
 
         lp = len(self.pvals)
@@ -99,16 +100,16 @@ def __init__(self, p_val_distribution, results, a=.05):
 
 def calc_qval(study_count, study_n, pop_count, pop_n,
               pop, assoc, term_pop, obo_dag):
-    print >>sys.stderr, ("generating p-value distribution for FDR "
-                         "calculation (this might take a while)")
+    print(("generating p-value distribution for FDR "
+                         "calculation (this might take a while)"), file=sys.stderr)
     T = 1000    # number of samples
     distribution = []
-    for i in xrange(T):
+    for i in range(T):
         new_study = random.sample(pop, study_n)
         new_term_study = go_enrichment.count_terms(new_study, assoc, obo_dag)
 
         smallest_p = 1
-        for term, study_count in new_term_study.items():
+        for term, study_count in list(new_term_study.items()):
             pop_count = term_pop[term]
             p = fisher.pvalue_population(study_count,
                                          study_n,
@@ -118,7 +119,7 @@ def calc_qval(study_count, study_n, pop_count, pop_n,
                 smallest_p = p.two_tail
 
         distribution.append(smallest_p)
-        print >>sys.stderr, i, smallest_p
+        print(i, smallest_p, file=sys.stderr)
     return distribution
 
 

diff --git a/goatools/obo_parser.py b/goatools/obo_parser.py
@@ -1,8 +1,11 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-
+from __future__ import print_function
 import sys
-from exceptions import EOFError
+try:
+    from exceptions import EOFError
+except ImportError:
+    pass
 
 typedef_tag, term_tag = "[Typedef]", "[Term]"
 
@@ -40,11 +43,11 @@ class OBOReader:
     def __init__(self, obo_file="go-basic.obo"):
 
         try:
-            self._handle = file(obo_file)
+            self._handle = open(obo_file)
         except:
-            print >>sys.stderr, ("download obo file first\n "
+            print(("download obo file first\n "
                                  "[http://purl.obolibrary.org/obo/"
-                                 "go/go-basic.obo]")
+                                 "go/go-basic.obo]"), file=sys.stderr)
             sys.exit(1)
 
     def __iter__(self):
@@ -53,9 +56,9 @@ def __iter__(self):
         if not line.startswith(term_tag):
             read_until(self._handle, term_tag)
         while 1:
-            yield self.next()
+            yield self.__next__()
 
-    def next(self):
+    def __next__(self):
 
         lines = []
         line = self._handle.readline()
@@ -164,15 +167,15 @@ def __init__(self, obo_file="go-basic.obo"):
 
     def load_obo_file(self, obo_file):
 
-        print >>sys.stderr, "load obo file %s" % obo_file
+        print("load obo file %s" % obo_file, file=sys.stderr)
         obo_reader = OBOReader(obo_file)
         for rec in obo_reader:
             self[rec.id] = rec
             for alt in rec.alt_ids:
                 self[alt] = rec
 
         self.populate_terms()
-        print >>sys.stderr, len(self), "nodes imported"
+        print(len(self), "nodes imported", file=sys.stderr)
 
     def populate_terms(self):
 
@@ -185,11 +188,11 @@ def depth(rec):
             return rec.level
 
         # make the parents references to the GO terms
-        for rec in self.itervalues():
+        for rec in self.values():
             rec.parents = [self[x] for x in rec._parents]
 
         # populate children and levels
-        for rec in self.itervalues():
+        for rec in self.values():
             for p in rec.parents:
                 p.children.append(rec)
 
@@ -198,18 +201,18 @@ def depth(rec):
 
     def write_dag(self, out=sys.stdout):
         for rec_id, rec in sorted(self.items()):
-            print >>out, rec
+            print(rec, file=out)
 
     def query_term(self, term, verbose=False):
         if term not in self:
-            print >>sys.stderr, "Term %s not found!" % term
+            print("Term %s not found!" % term, file=sys.stderr)
             return
 
         rec = self[term]
-        print >>sys.stderr, rec
+        print(rec, file=sys.stderr)
         if verbose:
-            print >>sys.stderr, "all parents:", rec.get_all_parents()
-            print >>sys.stderr, "all children:", rec.get_all_children()
+            print("all parents:", rec.get_all_parents(), file=sys.stderr)
+            print("all children:", rec.get_all_children(), file=sys.stderr)
 
         return rec
 
@@ -232,7 +235,7 @@ def paths_to_top(self, term, verbose=False):
         """
         # error handling consistent with original authors
         if term not in self:
-            print >>sys.stderr, "Term %s not found!" % term
+            print("Term %s not found!" % term, file=sys.stderr)
             return
 
         def _paths_to_top_recursive(rec):
@@ -262,8 +265,8 @@ def draw_lineage(self, recs, nodecolor="mediumseagreen",
         try:
             import pygraphviz as pgv
         except:
-            print >>sys.stderr, "pygraphviz not installed, lineage not drawn!"
-            print >>sys.stderr, "try `easy_install pygraphviz`"
+            print("pygraphviz not installed, lineage not drawn!", file=sys.stderr)
+            print("try `easy_install pygraphviz`", file=sys.stderr)
             return
 
         G = pgv.AGraph(name="GO tree")
@@ -311,20 +314,20 @@ def draw_lineage(self, recs, nodecolor="mediumseagreen",
             gmlfile = pf + ".gml"
             nx.write_gml(NG, gmlfile)
 
-        print >>sys.stderr, ("lineage info for terms %s written to %s" %
-                             ([rec.id for rec in recs], lineage_img))
+        print(("lineage info for terms %s written to %s" %
+                             ([rec.id for rec in recs], lineage_img)), file=sys.stderr)
 
         G.draw(lineage_img, prog="dot")
 
     def update_association(self, association):
         bad_terms = set()
-        for key, terms in association.items():
+        for key, terms in list(association.items()):
             parents = set()
             for term in terms:
                 try:
                     parents.update(self[term].get_all_parents())
                 except:
-                    bad_terms.add(term)
+                    bad_terms.add(term.strip())
             terms.update(parents)
         if bad_terms:
-            print >>sys.stderr, "terms not found: %s", bad_terms
+            print("terms not found: %s" % (bad_terms,), file=sys.stderr)
diff --git a/run.sh b/run.sh
@@ -11,7 +11,7 @@ GOSLIM_OBO_DOWNLOAD=http://www.geneontology.org/ontology/subsets/goslim_generic.
 if [ ! -f $GO_OBO_FILE ]
 then
     echo "downloading GO file: $GO_OBO_FILE"
-    wget -O $GOSLIM_OBO_FILE $GO_OBO_DOWNLOAD
+    wget -O $GO_OBO_FILE $GO_OBO_DOWNLOAD
 fi
 
 if [ ! -f $GOSLIM_OBO_FILE ]
@@ -29,19 +29,19 @@ do
 case $REPLY in
 
 1)
-python scripts/find_enrichment.py --alpha=0.05 --indent data/study data/population data/association
+python3 scripts/find_enrichment.py --alpha=0.05 --indent data/study data/population data/association
 ;;
 
 2)
-python scripts/plot_go_term.py --term=GO:0008135
+python3 scripts/plot_go_term.py --term=GO:0008135
 ;;
 
 3)
-python 'tests/test_mapslim.py'
+python3 'tests/test_mapslim.py'
 ;;
 
 4)
-python scripts/map_to_slim.py --association_file=data/association --slim_out=direct $GO_OBO_FILE $GOSLIM_OBO_FILE
+python3 scripts/map_to_slim.py --association_file=data/association --slim_out=direct $GO_OBO_FILE $GOSLIM_OBO_FILE
 ;;
 
 

diff --git a/scripts/find_enrichment.py b/scripts/find_enrichment.py
@@ -14,7 +14,7 @@
 --pval: experiment-wise alpha; for the entire experiment, what significance
         level to apply after Bonferroni correction
 """
-
+from __future__ import print_function
 import sys
 import os.path as op
 sys.path.insert(0, op.join(op.dirname(__file__), ".."))
@@ -33,9 +33,9 @@ def read_geneset(study_fn, pop_fn, compare=False):
         pop |= study
         pop -= common
         study -= common
-        print >>sys.stderr, "removed %d overlapping items" % (len(common), )
-        print >>sys.stderr, "Set 1: {0}, Set 2: {1}".\
-            format(len(study), len(pop))
+        print("removed %d overlapping items" % (len(common), ), file=sys.stderr)
+        print("Set 1: {0}, Set 2: {1}".\
+            format(len(study), len(pop)), file=sys.stderr)
 
     return study, pop
 
@@ -102,7 +102,7 @@ def check_bad_args(args):
     (opts, args) = p.parse_args()
     bad = check_bad_args(args)
     if bad:
-        print bad
+        print(bad)
         sys.exit(p.print_help())
 
     min_ratio = opts.ratio

diff --git a/scripts/map_to_slim.py b/scripts/map_to_slim.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 
+from __future__ import print_function
 import os
 import os.path as op
 import sys
@@ -80,7 +81,7 @@ def read_associations(assoc_fn):
     # in case a single term is given as input:
     if opts.term:
         if opts.term not in go_dag:
-            print >> sys.stderr, ("term %s not found!" % opts.term)
+            print(("term %s not found!" % opts.term), file=sys.stderr)
             sys.exit(1)
         direct_anc, all_anc = mapslim(opts.term, go_dag, goslim_dag)
         # output either all or only direct slims, depending on user command
@@ -95,7 +96,7 @@ def read_associations(assoc_fn):
         assert os.path.exists(opts.ass_file_name), ("file %s not found!"
                                                     % opts.ass_file_name)
         assocs = read_associations(opts.ass_file_name)
-        for protein_product, go_terms in assocs.iteritems():
+        for protein_product, go_terms in assocs.items():
             all_direct_anc = set()
             all_covered_anc = set()
             all_all_anc = set()
@@ -113,4 +114,4 @@ def read_associations(assoc_fn):
                 slim_terms_str = ";".join(all_direct_anc)
             else:
                 slim_terms_str = ";".join(all_all_anc)
-            print(protein_product + "\t" + slim_terms_str)
+            print((protein_product + "\t" + slim_terms_str))