Merge pull request #29 from dvklopfenstein/master

Updated to argparse from deprecated optparse.
tanghaibao · Mar 23, 2015 · 75ed821 · 75ed821
2 parents d611c25 + b7e72fa
commit 75ed821
Showing 1 changed file with 40 additions and 41 deletions.
diff --git a/scripts/find_enrichment.py b/scripts/find_enrichment.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
+from __future__ import print_function
 
 """
-python %prog study.file population.file gene-association.file
+python {} study.file population.file gene-association.file
 
 This program returns P-values for functional enrichment in a cluster of study
 genes using Fisher's exact test, and corrected for multiple testing (including
@@ -13,8 +14,8 @@
         (most often you don't need to change this other than 0.05 or 0.01)
 --pval: experiment-wise alpha; for the entire experiment, what significance
         level to apply after Bonferroni correction
-"""
-from __future__ import print_function
+""".format(__file__)
+
 import sys
 import os.path as op
 sys.path.insert(0, op.join(op.dirname(__file__), ".."))
@@ -56,76 +57,74 @@ def read_associations(assoc_fn):
     return assoc
 
 
-def check_bad_args(args):
-    """check args. otherwise if one of the 3 args is bad
+def check_input_files(ns, p):
+    """check filename args. otherwise if one of the 3 filenames is bad
     it's hard to tell which one"""
     import os
-    if not len(args) == 3:
-        return "please send in 3 file names"
-    for arg in args[:-1]:
-        if not os.path.exists(arg):
-            return "*%s* does not exist" % arg
+    if not len(ns.filenames) == 3:
+        p.print_help()
+        msg = """
+  3 Expected files; Expected content: study population association",
+  {} Actual   files: {}""".format(len(ns.filenames), ' '.join(ns.filenames))
+        raise Exception(msg)
+    for fin in ns.filenames:
+        if not os.path.exists(fin):
+            return "*{}* does not exist".format(fin)
 
     return False
 
 
 if __name__ == "__main__":
 
-    import optparse
-    p = optparse.OptionParser(__doc__)
+    import argparse
+    p = argparse.ArgumentParser(__doc__,
+                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-    p.add_option('--alpha', default=0.05, type="float",
-                 help="Test-wise alpha for multiple testing "
-                 "[default: %default]")
-    p.add_option('--pval', default=None, type="float",
+    p.add_argument('filenames', type=str, nargs='+',
+                 help='data/study data/population data/association')
+    p.add_argument('--alpha', default=0.05, type=float,
+                 help="Test-wise alpha for multiple testing ")
+    p.add_argument('--pval', default=None, type=float,
                  help="Family-wise alpha (whole experiment), only print out "
-                 "Bonferroni p-value is less than this value. "
-                 "[default: %default]")
-    p.add_option('--compare', dest='compare', default=False,
+                 "Bonferroni p-value is less than this value. ")
+    p.add_argument('--compare', dest='compare', default=False,
                  action='store_true',
                  help="the population file as a comparison group. if this "
                  "flag is specified, the population is used as the study "
                  "plus the `population/comparison`")
-    p.add_option('--ratio', dest='ratio', type='float', default=None,
+    p.add_argument('--ratio', dest='ratio', type=float, default=None,
                  help="only show values where the difference between study "
                  "and population ratios is greater than this. useful for "
                  "excluding GO categories with small differences, but "
                  "containing large numbers of genes. should be a value "
                  "between 1 and 2. ")
-    p.add_option('--fdr', dest='fdr', default=False,
+    p.add_argument('--fdr', dest='fdr', default=False,
                  action='store_true',
                  help="Calculate the false discovery rate (alt. to the "
                  "Bonferroni but slower)")
-    p.add_option('--indent', dest='indent', default=False,
+    p.add_argument('--indent', dest='indent', default=False,
                  action='store_true', help="indent GO terms")
-    p.add_option('--obo', default="go-basic.obo", type="string",
-                 help="Specifies location and name of the obo file"
-                 "[default: %default]")
+    p.add_argument('--obo', default="go-basic.obo", type=str,
+                 help="Specifies location and name of the obo file")
 
-    (opts, args) = p.parse_args()
-    bad = check_bad_args(args)
-    if bad:
-        print(bad)
-        sys.exit(p.print_help())
+    args = p.parse_args()
+    check_input_files(args, p)
 
-    min_ratio = opts.ratio
+    min_ratio = args.ratio
     if min_ratio is not None:
         assert 1 <= min_ratio <= 2
 
-    assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)"
+    assert 0 < args.alpha < 1, "Test-wise alpha must fall between (0, 1)"
 
-    study_fn, pop_fn, assoc_fn = args
-    study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare)
+    study_fn, pop_fn, assoc_fn = args.filenames
+    study, pop = read_geneset(study_fn, pop_fn, compare=args.compare)
     assoc = read_associations(assoc_fn)
-    #print("{}".format(assoc))
-    #print("{}".format(study))
-    #print("{}".format(pop))
 
     methods = ["bonferroni", "sidak", "holm"]
-    if opts.fdr:
+    if args.fdr:
         methods.append("fdr")
 
-    obo_dag = GODag(obo_file=opts.obo)
-    g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=opts.alpha,
+    obo_dag = GODag(obo_file=args.obo)
+    g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=args.alpha,
                           study=study, methods=methods)
-    g.print_summary(min_ratio=min_ratio, indent=opts.indent, pval=opts.pval)
+    g.print_summary(min_ratio=min_ratio, indent=args.indent, pval=args.pval)