-
Notifications
You must be signed in to change notification settings - Fork 77
/
extract.py
80 lines (60 loc) · 2.47 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""extract one or more signatures"""
usage="""
### `sourmash signature extract` - extract signatures from a collection
Extract the specified signature(s) from a collection of signatures.
For example,
sourmash signature extract *.sig -k 21 --dna -o extracted.sig
will extract all nucleotide signatures calculated at k=21 from all
.sig files in the current directory.
There are currently two other useful selectors for `extract`: you can specify
(part of) an md5sum, as output in the CSVs produced by `search` and `gather`;
and you can specify (part of) a name.
For example,
sourmash signature extract tests/test-data/*.fa.sig --md5 09a0869
will extract the signature from `47.fa.sig` which has an md5sum of
`09a08691ce52952152f0e866a59f6261`; and
sourmash signature extract tests/test-data/*.fa.sig --name NC_009665
will extract the same signature, which has an accession number of
`NC_009665.1`.
#### Using picklists with `sourmash sig extract`
As of sourmash 4.2.0, `extract` also supports picklists, a feature by
which you can select signatures based on values in a CSV file. See
[the command line docs](https://sourmash.readthedocs.io/en/latest/command-line.html) for more information.
"""
from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
add_picklist_args, add_pattern_args)
def subparser(subparsers):
subparser = subparsers.add_parser('extract', description=__doc__, usage=usage)
subparser.add_argument('signatures', nargs='*')
subparser.add_argument(
'-q', '--quiet', action='store_true',
help='suppress non-error output'
)
subparser.add_argument(
'-o', '--output', metavar='FILE',
help='output signature to this file (default stdout)',
default='-',
)
subparser.add_argument(
'--md5', default=None,
help='select signatures whose md5 contains this substring'
)
subparser.add_argument(
'--name', default=None,
help='select signatures whose name contains this substring'
)
subparser.add_argument(
'-f', '--force', action='store_true',
help='try to load all files as signatures'
)
subparser.add_argument(
'--from-file',
help='a text file containing a list of files to load signatures from'
)
add_ksize_arg(subparser)
add_moltype_args(subparser)
add_pattern_args(subparser)
add_picklist_args(subparser)
def main(args):
import sourmash
return sourmash.sig.__main__.extract(args)