-
Notifications
You must be signed in to change notification settings - Fork 80
/
index.py
83 lines (71 loc) · 2.9 KB
/
index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""index signatures for rapid search"""
usage="""
sourmash index -k 31 dbname *.sig
Create an on-disk database of signatures that can be searched in low
memory with 'search' and 'gather'. All signatures must be the same
k-mer size, molecule type, and num/scaled; the standard signature
selectors (-k/--ksize, --scaled, --dna/--protein) choose which
signatures to be added.
The key options for index are:
* `-k/--ksize <int>`: k-mer size to select
* `--dna` or --protein`: nucleotide or protein signatures (default `--dna`)
* `--traverse-directory`: load all signatures below this directory
If `dbname` ends with `.sbt.json`, index will create the database as a
collection of multiple files, with an index `dbname.sbt.json` and a
subdirectory `.sbt.dbname`. If `dbname` ends with `.sbt.zip`, index
will create a zip archive containing the multiple files. For sourmash
v2 and v3, `sbt.json` will be added automatically; this behavior will
change in sourmash v4 to default to `.sbt.zip`.
---
"""
from sourmash.cli.utils import add_moltype_args, add_ksize_arg
def subparser(subparsers):
subparser = subparsers.add_parser('index', description=__doc__,
usage=usage)
subparser.add_argument('sbt_name', help='name to save index into; .sbt.zip or .sbt.json file')
subparser.add_argument(
'signatures', nargs='+',
help='signatures to load into SBT'
)
subparser.add_argument(
'--from-file',
help='a file containing a list of signatures file to load'
)
subparser.add_argument(
'-q', '--quiet', action='store_true',
help='suppress non-error output'
)
add_ksize_arg(subparser, 31)
subparser.add_argument(
'-d', '--n_children', metavar='D', type=int, default=2,
help='number of children for internal nodes; default=2'
)
subparser.add_argument(
'--traverse-directory', action='store_true',
help='load all signatures underneath any directories'
)
subparser.add_argument(
'--append', action='store_true', default=False,
help='add signatures to an existing SBT'
)
subparser.add_argument(
'-x', '--bf-size', metavar='S', type=float, default=1e5,
help='Bloom filter size used for internal nodes'
)
subparser.add_argument(
'-f', '--force', action='store_true',
help='try loading all files with --traverse-directory'
)
subparser.add_argument(
'-s', '--sparseness', metavar='FLOAT', type=float, default=.0,
help='What percentage of internal nodes will not be saved; ranges '
'from 0.0 (save all nodes) to 1.0 (no nodes saved)'
)
subparser.add_argument(
'--scaled', metavar='FLOAT', type=float, default=0,
help='downsample signatures to the specified scaled factor'
)
add_moltype_args(subparser)
def main(args):
import sourmash
return sourmash.commands.index(args)