/
utils.py
134 lines (109 loc) · 4.69 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from glob import glob
import os
import argparse
from sourmash.logging import notify
from sourmash.sourmash_args import check_scaled_bounds, check_num_bounds
def add_moltype_args(parser):
parser.add_argument(
'--protein', dest='protein', action='store_true',
help='choose a protein signature; by default, a nucleotide signature is used'
)
parser.add_argument(
'--no-protein', dest='protein', action='store_false',
help='do not choose a protein signature')
parser.set_defaults(protein=False)
parser.add_argument(
'--dayhoff', dest='dayhoff', action='store_true',
help='choose Dayhoff-encoded amino acid signatures'
)
parser.add_argument(
'--no-dayhoff', dest='dayhoff', action='store_false',
help='do not choose Dayhoff-encoded amino acid signatures')
parser.set_defaults(dayhoff=False)
parser.add_argument(
'--hp', '--hydrophobic-polar', dest='hp', action='store_true',
help='choose hydrophobic-polar-encoded amino acid signatures'
)
parser.add_argument(
'--no-hp', '--no-hydrophobic-polar', dest='hp', action='store_false',
help='do not choose hydrophobic-polar-encoded amino acid signatures')
parser.set_defaults(hp=False)
parser.add_argument(
'--dna', '--rna', '--nucleotide', dest='dna', default=None, action='store_true',
help='choose a nucleotide signature (default: True)')
parser.add_argument(
'--no-dna', '--no-rna', '--no-nucleotide', dest='dna', action='store_false',
help='do not choose a nucleotide signature')
parser.set_defaults(dna=None)
def add_construct_moltype_args(parser):
add_moltype_args(parser)
parser.set_defaults(dna=True)
def add_ksize_arg(parser, *, default=None):
"Add -k/--ksize to argparse parsers, with specified default."
if default:
message = f"k-mer size to select; default={default}"
else:
message = f"k-mer size to select; no default."
parser.add_argument(
'-k', '--ksize', metavar='K', default=default, type=int,
help=message,
)
#https://stackoverflow.com/questions/55324449/how-to-specify-a-minimum-or-maximum-float-value-with-argparse#55410582
def range_limited_float_type(arg):
""" Type function for argparse - a float within some predefined bounds """
min_val = 0
max_val = 1
try:
f = float(arg)
except ValueError:
raise argparse.ArgumentTypeError("\n\tERROR: Must be a floating point number.")
if f < min_val or f > max_val:
raise argparse.ArgumentTypeError(f"\n\tERROR: Argument must be >{str(min_val)} and <{str(max_val)}.")
return f
def add_tax_threshold_arg(parser, containment_default=0.1, ani_default=None):
parser.add_argument(
'--containment-threshold', default=containment_default, type=range_limited_float_type,
help=f'minimum containment threshold for classification; default={containment_default}',
)
parser.add_argument(
'--ani-threshold', '--aai-threshold', default=ani_default, type=range_limited_float_type,
help=f'minimum ANI threshold (nucleotide gather) or AAI threshold (protein gather) for classification; default={ani_default}',
)
def add_picklist_args(parser):
parser.add_argument(
'--picklist', default=None,
help="select signatures based on a picklist, i.e. 'file.csv:colname:coltype'"
)
parser.add_argument(
'--picklist-require-all', default=False, action='store_true',
help="require that all picklist values be found or else fail"
)
def add_pattern_args(parser):
parser.add_argument(
'--include-db-pattern',
default=None,
help='search only signatures that match this pattern in name, filename, or md5'
)
parser.add_argument(
'--exclude-db-pattern',
default=None,
help='search only signatures that do not match this pattern in name, filename, or md5'
)
def opfilter(path):
return not path.startswith('__') and path not in ['utils']
def command_list(dirpath):
paths = glob(os.path.join(dirpath, '*.py'))
filenames = [os.path.basename(path) for path in paths]
basenames = [os.path.splitext(path)[0] for path in filenames if not path.startswith('__')]
basenames = filter(opfilter, basenames)
return sorted(basenames)
def add_scaled_arg(parser, default=None):
parser.add_argument(
'--scaled', metavar='FLOAT', type=check_scaled_bounds,
help='scaled value should be between 100 and 1e6'
)
def add_num_arg(parser, default=0):
parser.add_argument(
'-n', '--num-hashes', '--num', metavar='N', type=check_num_bounds, default=default,
help='num value should be between 50 and 50000'
)