Skip to content

Commit

Permalink
Merge pull request #24 from stuppie/master
Browse files Browse the repository at this point in the history
Python3 (thanks stuppie)
  • Loading branch information
tanghaibao committed Feb 6, 2015
2 parents eae14d7 + cab7ac2 commit e7656d0
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 64 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.png
*.obo
1 change: 1 addition & 0 deletions goatools/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pyc
7 changes: 4 additions & 3 deletions goatools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import absolute_import
from .version import __version__

# make the module importable
from go_enrichment import *
import multiple_testing
import obo_parser
from goatools.go_enrichment import *
from . import multiple_testing
from . import obo_parser
16 changes: 8 additions & 8 deletions goatools/go_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
study genes using Fisher's exact test, and corrected for multiple testing
(including Bonferroni, Holm, Sidak, and false discovery rate)
"""

from __future__ import absolute_import
import sys
import collections
import os.path as op
import fisher
from multiple_testing import Bonferroni, Sidak, HolmBonferroni, FDR, calc_qval
from .multiple_testing import Bonferroni, Sidak, HolmBonferroni, FDR, calc_qval


class GOEnrichmentRecord(object):
Expand All @@ -26,7 +26,7 @@ def __init__(self, **kwargs):
for f in self._fields:
self.__setattr__(f, "n.a.")

for k, v in kwargs.iteritems():
for k, v in kwargs.items():
assert k in self._fields, "invalid field name %s" % k
self.__setattr__(k, v)

Expand Down Expand Up @@ -57,12 +57,12 @@ def __repr__(self):
return "GOEnrichmentRecord(%s)" % self.id

def find_goterm(self, go):
if self.id in go.keys():
if self.id in list(go.keys()):
self.goterm = go[self.id]
self.description = self.goterm.name

def update_fields(self, **kwargs):
for k, v in kwargs.iteritems():
for k, v in kwargs.items():
assert k in self._fields, "invalid field name %s" % k
self.__setattr__(k, v)

Expand Down Expand Up @@ -101,7 +101,7 @@ def run_study(self, study):

pop_n, study_n = len(self.pop), len(study)

for term, study_count in term_study.items():
for term, study_count in list(term_study.items()):
pop_count = self.term_pop[term]
p = fisher.pvalue_population(study_count, study_n,
pop_count, pop_n)
Expand Down Expand Up @@ -160,7 +160,7 @@ def update_results(self, method, corrected_pvals):

def print_summary(self, min_ratio=None, indent=False, pval=0.05):
# field names for output
print "\t".join(GOEnrichmentRecord()._fields)
print("\t".join(GOEnrichmentRecord()._fields))

for rec in self.results:
# calculate some additional statistics
Expand All @@ -171,7 +171,7 @@ def print_summary(self, min_ratio=None, indent=False, pval=0.05):
continue

if rec.is_ratio_different:
print rec.__str__(indent=indent)
print(rec.__str__(indent=indent))


def count_terms(geneset, assoc, obo_dag):
Expand Down
2 changes: 1 addition & 1 deletion goatools/mapslim.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
For now this does not implement Bucket Terms.
"""

from obo_parser import GODag
from .obo_parser import GODag


def mapslim(go_term, go_dag, goslim_dag):
Expand Down
19 changes: 10 additions & 9 deletions goatools/multiple_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
"""
A list of commonly used multiple correction routines
"""

from __future__ import print_function
from __future__ import absolute_import
import sys
import random
import fisher
import numpy as np
import go_enrichment
import goatools.go_enrichment


class AbstractCorrection(object):
Expand Down Expand Up @@ -61,14 +62,14 @@ class HolmBonferroni(AbstractCorrection):
"""
def set_correction(self):
if len(self.pvals):
idxs, correction = zip(*self.generate_significant())
idxs, correction = list(zip(*self.generate_significant()))
idxs = list(idxs)
self.corrected_pvals[idxs] *= correction

def generate_significant(self):

pvals = self.pvals
pvals_idxs = zip(pvals, xrange(len(pvals)))
pvals_idxs = list(zip(pvals, list(range(len(pvals)))))
pvals_idxs.sort()

lp = len(self.pvals)
Expand Down Expand Up @@ -99,16 +100,16 @@ def __init__(self, p_val_distribution, results, a=.05):

def calc_qval(study_count, study_n, pop_count, pop_n,
pop, assoc, term_pop, obo_dag):
print >>sys.stderr, ("generating p-value distribution for FDR "
"calculation (this might take a while)")
print(("generating p-value distribution for FDR "
"calculation (this might take a while)"), file=sys.stderr)
T = 1000 # number of samples
distribution = []
for i in xrange(T):
for i in range(T):
new_study = random.sample(pop, study_n)
new_term_study = go_enrichment.count_terms(new_study, assoc, obo_dag)

smallest_p = 1
for term, study_count in new_term_study.items():
for term, study_count in list(new_term_study.items()):
pop_count = term_pop[term]
p = fisher.pvalue_population(study_count,
study_n,
Expand All @@ -118,7 +119,7 @@ def calc_qval(study_count, study_n, pop_count, pop_n,
smallest_p = p.two_tail

distribution.append(smallest_p)
print >>sys.stderr, i, smallest_p
print(i, smallest_p, file=sys.stderr)
return distribution


Expand Down
51 changes: 27 additions & 24 deletions goatools/obo_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

from __future__ import print_function
import sys
from exceptions import EOFError
try:
from exceptions import EOFError
except ImportError:
pass

typedef_tag, term_tag = "[Typedef]", "[Term]"

Expand Down Expand Up @@ -40,11 +43,11 @@ class OBOReader:
def __init__(self, obo_file="go-basic.obo"):

try:
self._handle = file(obo_file)
self._handle = open(obo_file)
except:
print >>sys.stderr, ("download obo file first\n "
print(("download obo file first\n "
"[http://purl.obolibrary.org/obo/"
"go/go-basic.obo]")
"go/go-basic.obo]"), file=sys.stderr)
sys.exit(1)

def __iter__(self):
Expand All @@ -53,9 +56,9 @@ def __iter__(self):
if not line.startswith(term_tag):
read_until(self._handle, term_tag)
while 1:
yield self.next()
yield self.__next__()

def next(self):
def __next__(self):

lines = []
line = self._handle.readline()
Expand Down Expand Up @@ -164,15 +167,15 @@ def __init__(self, obo_file="go-basic.obo"):

def load_obo_file(self, obo_file):

print >>sys.stderr, "load obo file %s" % obo_file
print("load obo file %s" % obo_file, file=sys.stderr)
obo_reader = OBOReader(obo_file)
for rec in obo_reader:
self[rec.id] = rec
for alt in rec.alt_ids:
self[alt] = rec

self.populate_terms()
print >>sys.stderr, len(self), "nodes imported"
print(len(self), "nodes imported", file=sys.stderr)

def populate_terms(self):

Expand All @@ -185,11 +188,11 @@ def depth(rec):
return rec.level

# make the parents references to the GO terms
for rec in self.itervalues():
for rec in self.values():
rec.parents = [self[x] for x in rec._parents]

# populate children and levels
for rec in self.itervalues():
for rec in self.values():
for p in rec.parents:
p.children.append(rec)

Expand All @@ -198,18 +201,18 @@ def depth(rec):

def write_dag(self, out=sys.stdout):
for rec_id, rec in sorted(self.items()):
print >>out, rec
print(rec, file=out)

def query_term(self, term, verbose=False):
if term not in self:
print >>sys.stderr, "Term %s not found!" % term
print("Term %s not found!" % term, file=sys.stderr)
return

rec = self[term]
print >>sys.stderr, rec
print(rec, file=sys.stderr)
if verbose:
print >>sys.stderr, "all parents:", rec.get_all_parents()
print >>sys.stderr, "all children:", rec.get_all_children()
print("all parents:", rec.get_all_parents(), file=sys.stderr)
print("all children:", rec.get_all_children(), file=sys.stderr)

return rec

Expand All @@ -232,7 +235,7 @@ def paths_to_top(self, term, verbose=False):
"""
# error handling consistent with original authors
if term not in self:
print >>sys.stderr, "Term %s not found!" % term
print("Term %s not found!" % term, file=sys.stderr)
return

def _paths_to_top_recursive(rec):
Expand Down Expand Up @@ -262,8 +265,8 @@ def draw_lineage(self, recs, nodecolor="mediumseagreen",
try:
import pygraphviz as pgv
except:
print >>sys.stderr, "pygraphviz not installed, lineage not drawn!"
print >>sys.stderr, "try `easy_install pygraphviz`"
print("pygraphviz not installed, lineage not drawn!", file=sys.stderr)
print("try `easy_install pygraphviz`", file=sys.stderr)
return

G = pgv.AGraph(name="GO tree")
Expand Down Expand Up @@ -311,20 +314,20 @@ def draw_lineage(self, recs, nodecolor="mediumseagreen",
gmlfile = pf + ".gml"
nx.write_gml(NG, gmlfile)

print >>sys.stderr, ("lineage info for terms %s written to %s" %
([rec.id for rec in recs], lineage_img))
print(("lineage info for terms %s written to %s" %
([rec.id for rec in recs], lineage_img)), file=sys.stderr)

G.draw(lineage_img, prog="dot")

def update_association(self, association):
bad_terms = set()
for key, terms in association.items():
for key, terms in list(association.items()):
parents = set()
for term in terms:
try:
parents.update(self[term].get_all_parents())
except:
bad_terms.add(term)
bad_terms.add(term.strip())
terms.update(parents)
if bad_terms:
print >>sys.stderr, "terms not found: %s", bad_terms
print("terms not found: %s" % (bad_terms,), file=sys.stderr)
10 changes: 5 additions & 5 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ GOSLIM_OBO_DOWNLOAD=http://www.geneontology.org/ontology/subsets/goslim_generic.
if [ ! -f $GO_OBO_FILE ]
then
echo "downloading GO file: $GO_OBO_FILE"
wget -O $GOSLIM_OBO_FILE $GO_OBO_DOWNLOAD
wget -O $GO_OBO_FILE $GO_OBO_DOWNLOAD
fi

if [ ! -f $GOSLIM_OBO_FILE ]
Expand All @@ -29,19 +29,19 @@ do
case $REPLY in

1)
python scripts/find_enrichment.py --alpha=0.05 --indent data/study data/population data/association
python3 scripts/find_enrichment.py --alpha=0.05 --indent data/study data/population data/association
;;

2)
python scripts/plot_go_term.py --term=GO:0008135
python3 scripts/plot_go_term.py --term=GO:0008135
;;

3)
python 'tests/test_mapslim.py'
python3 'tests/test_mapslim.py'
;;

4)
python scripts/map_to_slim.py --association_file=data/association --slim_out=direct $GO_OBO_FILE $GOSLIM_OBO_FILE
python3 scripts/map_to_slim.py --association_file=data/association --slim_out=direct $GO_OBO_FILE $GOSLIM_OBO_FILE
;;


Expand Down
10 changes: 5 additions & 5 deletions scripts/find_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
--pval: experiment-wise alpha; for the entire experiment, what significance
level to apply after Bonferroni correction
"""

from __future__ import print_function
import sys
import os.path as op
sys.path.insert(0, op.join(op.dirname(__file__), ".."))
Expand All @@ -33,9 +33,9 @@ def read_geneset(study_fn, pop_fn, compare=False):
pop |= study
pop -= common
study -= common
print >>sys.stderr, "removed %d overlapping items" % (len(common), )
print >>sys.stderr, "Set 1: {0}, Set 2: {1}".\
format(len(study), len(pop))
print("removed %d overlapping items" % (len(common), ), file=sys.stderr)
print("Set 1: {0}, Set 2: {1}".\
format(len(study), len(pop)), file=sys.stderr)

return study, pop

Expand Down Expand Up @@ -102,7 +102,7 @@ def check_bad_args(args):
(opts, args) = p.parse_args()
bad = check_bad_args(args)
if bad:
print bad
print(bad)
sys.exit(p.print_help())

min_ratio = opts.ratio
Expand Down
7 changes: 4 additions & 3 deletions scripts/map_to_slim.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

from __future__ import print_function
import os
import os.path as op
import sys
Expand Down Expand Up @@ -80,7 +81,7 @@ def read_associations(assoc_fn):
# in case a single term is given as input:
if opts.term:
if opts.term not in go_dag:
print >> sys.stderr, ("term %s not found!" % opts.term)
print(("term %s not found!" % opts.term), file=sys.stderr)
sys.exit(1)
direct_anc, all_anc = mapslim(opts.term, go_dag, goslim_dag)
# output either all or only direct slims, depending on user command
Expand All @@ -95,7 +96,7 @@ def read_associations(assoc_fn):
assert os.path.exists(opts.ass_file_name), ("file %s not found!"
% opts.ass_file_name)
assocs = read_associations(opts.ass_file_name)
for protein_product, go_terms in assocs.iteritems():
for protein_product, go_terms in assocs.items():
all_direct_anc = set()
all_covered_anc = set()
all_all_anc = set()
Expand All @@ -113,4 +114,4 @@ def read_associations(assoc_fn):
slim_terms_str = ";".join(all_direct_anc)
else:
slim_terms_str = ";".join(all_all_anc)
print(protein_product + "\t" + slim_terms_str)
print((protein_product + "\t" + slim_terms_str))

0 comments on commit e7656d0

Please sign in to comment.