fix cyclic imports in goatools.go_enrichment

tanghaibao · Mar 10, 2015 · 540935f · 540935f
1 parent fe91e7e
commit 540935f
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 37 deletions.
diff --git a/goatools/go_enrichment.py b/goatools/go_enrichment.py
@@ -9,11 +9,9 @@
 (including Bonferroni, Holm, Sidak, and false discovery rate)
 """
 from __future__ import absolute_import
-import sys
-import collections
-import os.path as op
 import fisher
 from .multiple_testing import Bonferroni, Sidak, HolmBonferroni, FDR, calc_qval
+from .ratio import count_terms, is_ratio_different
 
 
 class GOEnrichmentRecord(object):
@@ -172,29 +170,3 @@ def print_summary(self, min_ratio=None, indent=False, pval=0.05):
 
             if rec.is_ratio_different:
                 print(rec.__str__(indent=indent))
-
-
-def count_terms(geneset, assoc, obo_dag):
-    """count the number of terms in the study group
-    """
-    term_cnt = collections.defaultdict(int)
-    for gene in (g for g in geneset if g in assoc):
-        for x in assoc[gene]:
-            if x in obo_dag:
-                term_cnt[obo_dag[x].id] += 1
-
-    return term_cnt
-
-
-def is_ratio_different(min_ratio, study_go, study_n, pop_go, pop_n):
-    """
-    check if the ratio go /n is different between the study group and
-    the population
-    """
-    if min_ratio is None:
-        return True
-    s = float(study_go) / study_n
-    p = float(pop_go) / pop_n
-    if s > p:
-        return s / p > min_ratio
-    return p / s > min_ratio
diff --git a/goatools/multiple_testing.py b/goatools/multiple_testing.py
@@ -10,7 +10,7 @@
 import random
 import fisher
 import numpy as np
-import goatools.go_enrichment
+from .ratio import count_terms
 
 
 class AbstractCorrection(object):
@@ -99,14 +99,13 @@ def __init__(self, p_val_distribution, results, a=.05):
 
 
 def calc_qval(study_count, study_n, pop_count, pop_n,
-              pop, assoc, term_pop, obo_dag):
-    print(("generating p-value distribution for FDR "
-                         "calculation (this might take a while)"), file=sys.stderr)
-    T = 1000    # number of samples
+              pop, assoc, term_pop, obo_dag, T=500):
+    print(("Generate p-value distribution for FDR "
+           "based on resampling (this might take a while)"), file=sys.stderr)
     distribution = []
     for i in range(T):
         new_study = random.sample(pop, study_n)
-        new_term_study = go_enrichment.count_terms(new_study, assoc, obo_dag)
+        new_term_study = count_terms(new_study, assoc, obo_dag)
 
         smallest_p = 1
         for term, study_count in list(new_term_study.items()):
@@ -119,7 +118,9 @@ def calc_qval(study_count, study_n, pop_count, pop_n,
                 smallest_p = p.two_tail
 
         distribution.append(smallest_p)
-        print(i, smallest_p, file=sys.stderr)
+        if i % 10  == 0:
+            print("Sample {0} / {1}: p-value {2}".\
+                        format(i, T, smallest_p), file=sys.stderr)
     return distribution
 
 

diff --git a/goatools/ratio.py b/goatools/ratio.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+
+from collections import defaultdict
+
+
+def count_terms(geneset, assoc, obo_dag):
+    """count the number of terms in the study group
+    """
+    term_cnt = defaultdict(int)
+    for gene in (g for g in geneset if g in assoc):
+        for x in assoc[gene]:
+            if x in obo_dag:
+                term_cnt[obo_dag[x].id] += 1
+
+    return term_cnt
+
+
+def is_ratio_different(min_ratio, study_go, study_n, pop_go, pop_n):
+    """
+    check if the ratio go /n is different between the study group and
+    the population
+    """
+    if min_ratio is None:
+        return True
+    s = float(study_go) / study_n
+    p = float(pop_go) / pop_n
+    if s > p:
+        return s / p > min_ratio
+    return p / s > min_ratio
diff --git a/goatools/version.py b/goatools/version.py
@@ -1 +1 @@
-__version__ = "0.5.2"
+__version__ = "0.5.3"