-
Notifications
You must be signed in to change notification settings - Fork 77
/
split.py
73 lines (56 loc) · 2.34 KB
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""split signature files"""
usage="""
### `sourmash signature split` - split signatures into individual files
Split each signature in the input file(s) into individual files, with
standardized names.
For example,
sourmash signature split tests/test-data/2.fa.sig
will create 3 files,
`f372e478.k=21.scaled=1000.DNA.dup=0.2.fa.sig`,
`f3a90d4e.k=31.scaled=1000.DNA.dup=0.2.fa.sig`, and
`43f3b48e.k=51.scaled=1000.DNA.dup=0.2.fa.sig`, representing the three
different DNA signatures at different ksizes created from the input file
`2.fa`.
The format of the names of the output files is standardized and stable
for major versions of sourmash: currently, they are period-separated
with fields:
* `md5sum` - a unique hash value based on the contents of the signature.
* `k=<ksize>` - k-mer size.
* `scaled=<scaled>` or `num=<num>` - scaled or num value for MinHash.
* `<moltype>` - the molecule type (DNA, protein, dayhoff, or hp)
* `dup=<n>` - a non-negative integer that prevents duplicate signatures from colliding.
* `basename` - basename of first input file used to create signature; if none provided, or stdin, this is `none`.
If `--outdir` is specified, all of the signatures are placed in outdir.
Note: `split` only saves files in the JSON `.sig` format.
"""
from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
add_picklist_args)
def subparser(subparsers):
subparser = subparsers.add_parser('split', description=__doc__, usage=usage)
subparser.add_argument('signatures', nargs='*')
subparser.add_argument(
'-q', '--quiet', action='store_true',
help='suppress non-error output'
)
subparser.add_argument(
'--output-dir', '--outdir',
help='output signatures to this directory',
)
subparser.add_argument(
'-f', '--force', action='store_true',
help='try to load all files as signatures'
)
subparser.add_argument(
'--from-file',
help='a text file containing a list of files to load signatures from'
)
subparser.add_argument(
'-E', '--extension', type=str, default='.sig',
help="write files with this extension ('.sig' by default)"
)
add_ksize_arg(subparser)
add_moltype_args(subparser)
add_picklist_args(subparser)
def main(args):
import sourmash
return sourmash.sig.__main__.split(args)