Skip to content

Commit

Permalink
Merge pull request #29 from dvklopfenstein/master
Browse files Browse the repository at this point in the history
Updated to argparse from deprecated optparse.
  • Loading branch information
tanghaibao committed Mar 23, 2015
2 parents d611c25 + b7e72fa commit 75ed821
Showing 1 changed file with 40 additions and 41 deletions.
81 changes: 40 additions & 41 deletions scripts/find_enrichment.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from __future__ import print_function

"""
python %prog study.file population.file gene-association.file
python {} study.file population.file gene-association.file
This program returns P-values for functional enrichment in a cluster of study
genes using Fisher's exact test, and corrected for multiple testing (including
Expand All @@ -13,8 +14,8 @@
(most often you don't need to change this other than 0.05 or 0.01)
--pval: experiment-wise alpha; for the entire experiment, what significance
level to apply after Bonferroni correction
"""
from __future__ import print_function
""".format(__file__)

import sys
import os.path as op
sys.path.insert(0, op.join(op.dirname(__file__), ".."))
Expand Down Expand Up @@ -56,76 +57,74 @@ def read_associations(assoc_fn):
return assoc


def check_bad_args(args):
"""check args. otherwise if one of the 3 args is bad
def check_input_files(ns, p):
"""check filename args. otherwise if one of the 3 filenames is bad
it's hard to tell which one"""
import os
if not len(args) == 3:
return "please send in 3 file names"
for arg in args[:-1]:
if not os.path.exists(arg):
return "*%s* does not exist" % arg
if not len(ns.filenames) == 3:
p.print_help()
msg = """
3 Expected files; Expected content: study population association",
{} Actual files: {}""".format(len(ns.filenames), ' '.join(ns.filenames))
raise Exception(msg)
for fin in ns.filenames:
if not os.path.exists(fin):
return "*{}* does not exist".format(fin)

return False


if __name__ == "__main__":

import optparse
p = optparse.OptionParser(__doc__)
import argparse
p = argparse.ArgumentParser(__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

p.add_option('--alpha', default=0.05, type="float",
help="Test-wise alpha for multiple testing "
"[default: %default]")
p.add_option('--pval', default=None, type="float",
p.add_argument('filenames', type=str, nargs='+',
help='data/study data/population data/association')
p.add_argument('--alpha', default=0.05, type=float,
help="Test-wise alpha for multiple testing ")
p.add_argument('--pval', default=None, type=float,
help="Family-wise alpha (whole experiment), only print out "
"Bonferroni p-value is less than this value. "
"[default: %default]")
p.add_option('--compare', dest='compare', default=False,
"Bonferroni p-value is less than this value. ")
p.add_argument('--compare', dest='compare', default=False,
action='store_true',
help="the population file as a comparison group. if this "
"flag is specified, the population is used as the study "
"plus the `population/comparison`")
p.add_option('--ratio', dest='ratio', type='float', default=None,
p.add_argument('--ratio', dest='ratio', type=float, default=None,
help="only show values where the difference between study "
"and population ratios is greater than this. useful for "
"excluding GO categories with small differences, but "
"containing large numbers of genes. should be a value "
"between 1 and 2. ")
p.add_option('--fdr', dest='fdr', default=False,
p.add_argument('--fdr', dest='fdr', default=False,
action='store_true',
help="Calculate the false discovery rate (alt. to the "
"Bonferroni but slower)")
p.add_option('--indent', dest='indent', default=False,
p.add_argument('--indent', dest='indent', default=False,
action='store_true', help="indent GO terms")
p.add_option('--obo', default="go-basic.obo", type="string",
help="Specifies location and name of the obo file"
"[default: %default]")
p.add_argument('--obo', default="go-basic.obo", type=str,
help="Specifies location and name of the obo file")

(opts, args) = p.parse_args()
bad = check_bad_args(args)
if bad:
print(bad)
sys.exit(p.print_help())
args = p.parse_args()
check_input_files(args, p)

min_ratio = opts.ratio
min_ratio = args.ratio
if min_ratio is not None:
assert 1 <= min_ratio <= 2

assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)"
assert 0 < args.alpha < 1, "Test-wise alpha must fall between (0, 1)"

study_fn, pop_fn, assoc_fn = args
study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare)
study_fn, pop_fn, assoc_fn = args.filenames
study, pop = read_geneset(study_fn, pop_fn, compare=args.compare)
assoc = read_associations(assoc_fn)
#print("{}".format(assoc))
#print("{}".format(study))
#print("{}".format(pop))

methods = ["bonferroni", "sidak", "holm"]
if opts.fdr:
if args.fdr:
methods.append("fdr")

obo_dag = GODag(obo_file=opts.obo)
g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=opts.alpha,
obo_dag = GODag(obo_file=args.obo)
g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=args.alpha,
study=study, methods=methods)
g.print_summary(min_ratio=min_ratio, indent=opts.indent, pval=opts.pval)
g.print_summary(min_ratio=min_ratio, indent=args.indent, pval=args.pval)

0 comments on commit 75ed821

Please sign in to comment.