Skip to content

Commit

Permalink
fix cyclic imports in goatools.go_enrichment
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Mar 10, 2015
1 parent fe91e7e commit 540935f
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 37 deletions.
30 changes: 1 addition & 29 deletions goatools/go_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
(including Bonferroni, Holm, Sidak, and false discovery rate)
"""
from __future__ import absolute_import
import sys
import collections
import os.path as op
import fisher
from .multiple_testing import Bonferroni, Sidak, HolmBonferroni, FDR, calc_qval
from .ratio import count_terms, is_ratio_different


class GOEnrichmentRecord(object):
Expand Down Expand Up @@ -172,29 +170,3 @@ def print_summary(self, min_ratio=None, indent=False, pval=0.05):

if rec.is_ratio_different:
print(rec.__str__(indent=indent))


def count_terms(geneset, assoc, obo_dag):
"""count the number of terms in the study group
"""
term_cnt = collections.defaultdict(int)
for gene in (g for g in geneset if g in assoc):
for x in assoc[gene]:
if x in obo_dag:
term_cnt[obo_dag[x].id] += 1

return term_cnt


def is_ratio_different(min_ratio, study_go, study_n, pop_go, pop_n):
"""
check if the ratio go /n is different between the study group and
the population
"""
if min_ratio is None:
return True
s = float(study_go) / study_n
p = float(pop_go) / pop_n
if s > p:
return s / p > min_ratio
return p / s > min_ratio
15 changes: 8 additions & 7 deletions goatools/multiple_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import random
import fisher
import numpy as np
import goatools.go_enrichment
from .ratio import count_terms


class AbstractCorrection(object):
Expand Down Expand Up @@ -99,14 +99,13 @@ def __init__(self, p_val_distribution, results, a=.05):


def calc_qval(study_count, study_n, pop_count, pop_n,
pop, assoc, term_pop, obo_dag):
print(("generating p-value distribution for FDR "
"calculation (this might take a while)"), file=sys.stderr)
T = 1000 # number of samples
pop, assoc, term_pop, obo_dag, T=500):
print(("Generate p-value distribution for FDR "
"based on resampling (this might take a while)"), file=sys.stderr)
distribution = []
for i in range(T):
new_study = random.sample(pop, study_n)
new_term_study = go_enrichment.count_terms(new_study, assoc, obo_dag)
new_term_study = count_terms(new_study, assoc, obo_dag)

smallest_p = 1
for term, study_count in list(new_term_study.items()):
Expand All @@ -119,7 +118,9 @@ def calc_qval(study_count, study_n, pop_count, pop_n,
smallest_p = p.two_tail

distribution.append(smallest_p)
print(i, smallest_p, file=sys.stderr)
if i % 10 == 0:
print("Sample {0} / {1}: p-value {2}".\
format(i, T, smallest_p), file=sys.stderr)
return distribution


Expand Down
31 changes: 31 additions & 0 deletions goatools/ratio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-


from collections import defaultdict


def count_terms(geneset, assoc, obo_dag):
"""count the number of terms in the study group
"""
term_cnt = defaultdict(int)
for gene in (g for g in geneset if g in assoc):
for x in assoc[gene]:
if x in obo_dag:
term_cnt[obo_dag[x].id] += 1

return term_cnt


def is_ratio_different(min_ratio, study_go, study_n, pop_go, pop_n):
"""
check if the ratio go /n is different between the study group and
the population
"""
if min_ratio is None:
return True
s = float(study_go) / study_n
p = float(pop_go) / pop_n
if s > p:
return s / p > min_ratio
return p / s > min_ratio
2 changes: 1 addition & 1 deletion goatools/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.2"
__version__ = "0.5.3"

0 comments on commit 540935f

Please sign in to comment.