Skip to content

Commit

Permalink
address issue #41 to also test for terms missing in 'study' - NOTE th…
Browse files Browse the repository at this point in the history
…is change

increases the number of statistical tests performed, so p-values after multiple
testing correction will be numerically different from results before
  • Loading branch information
tanghaibao committed Jul 11, 2015
1 parent 58c97d3 commit cd65dcb
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 17 deletions.
20 changes: 11 additions & 9 deletions goatools/go_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,20 @@ def __init__(self, pop, assoc, obo_dag, alpha=.05, study=None,
self.results = []

obo_dag.update_association(assoc)
self.term_study = count_terms(study, assoc, obo_dag)
self.term_pop = count_terms(pop, assoc, obo_dag)

if study:
self.run_study(study)

def run_study(self, study):
results = self.results

term_study = count_terms(study, self.assoc, self.obo_dag)

pop_n, study_n = len(self.pop), len(study)
allterms = set(self.term_study.keys() + self.term_pop.keys())

# Init study_count and pop_count to handle empty sets
study_count = pop_count = 0
for term, study_count in list(term_study.items()):
pop_count = self.term_pop[term]
for term in allterms:
study_count = self.term_study.get(term, 0)
pop_count = self.term_pop.get(term, 0)
p = fisher.pvalue_population(study_count, study_n,
pop_count, pop_n)

Expand Down Expand Up @@ -159,7 +157,11 @@ def update_results(self, method, corrected_pvals):
rec.__setattr__("p_"+method, val)

def print_summary(self, min_ratio=None, indent=False, pval=0.05):
# Header contains parameters
from .version import __version__ as version
from datetime import date

# Header contains provenance and parameters
print("# Generated by GOATOOLS v{0} ({1})".format(version, date.today()))
print("# min_ratio={0} pval={1}".format(min_ratio, pval))

# field names for output
Expand All @@ -170,7 +172,7 @@ def print_summary(self, min_ratio=None, indent=False, pval=0.05):
# (over_under, is_ratio_different)
rec.update_remaining_fields(min_ratio=min_ratio)

if pval is not None and rec.p_bonferroni > pval:
if pval is not None and rec.p_uncorrected >= pval:
continue

if rec.is_ratio_different:
Expand Down
5 changes: 2 additions & 3 deletions goatools/multiple_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def __init__(self, pvals, a=.05):
self.a = a # type-1 error cutoff for each test

self.set_correction()
# Reset all pvals > 1 to 1
self.corrected_pvals[self.corrected_pvals > 1] = 1

def set_correction(self):
# the purpose of multiple correction is to lower the alpha
Expand All @@ -36,9 +38,6 @@ class Bonferroni(AbstractCorrection):
def set_correction(self):
self.corrected_pvals *= self.n

# reset all pvals > 1 to 1
self.corrected_pvals[self.corrected_pvals > 1] = 1


class Sidak(AbstractCorrection):
"""http://en.wikipedia.org/wiki/Bonferroni_correction
Expand Down
2 changes: 1 addition & 1 deletion goatools/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.5"
__version__ = "0.5.6"
2 changes: 1 addition & 1 deletion run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ do
case $REPLY in

1)
python scripts/find_enrichment.py --alpha=0.05 --indent data/study data/population data/association
python scripts/find_enrichment.py --pval=0.05 --indent data/study data/population data/association
;;

2)
Expand Down
5 changes: 2 additions & 3 deletions scripts/find_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,8 @@ def check_input_files(ns, p):
help='data/study data/population data/association')
p.add_argument('--alpha', default=0.05, type=float,
help="Test-wise alpha for multiple testing ")
p.add_argument('--pval', default=None, type=float,
help="Family-wise alpha (whole experiment), only print out "
"Bonferroni p-value is less than this value. ")
p.add_argument('--pval', default=.05, type=float,
help="Only print out when uncorrected p-value < this value.")
p.add_argument('--compare', dest='compare', default=False,
action='store_true',
help="the population file as a comparison group. if this "
Expand Down

0 comments on commit cd65dcb

Please sign in to comment.