/
Parameters.cpp
2183 lines (1981 loc) · 118 KB
/
Parameters.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include "Parameters.h"
#include "Util.h"
#include "DistanceCalculator.h"
#include "Debug.h"
#include "CommandCaller.h"
#include "ByteParser.h"
#include "FileUtil.h"
#include <map>
#include <iomanip>
#include <regex.h>
#include <unistd.h>
#ifdef __CYGWIN__
#include <sys/cygwin.h>
#endif
#ifdef OPENMP
#include <omp.h>
#endif
Parameters* Parameters::instance = NULL;
extern const char* binary_name;
extern const char* version;
Parameters::Parameters():
scoringMatrixFile("INVALID", "INVALID"),
seedScoringMatrixFile("INVALID", "INVALID"),
PARAM_S(PARAM_S_ID,"-s", "Sensitivity","sensitivity: 1.0 faster; 4.0 fast default; 7.5 sensitive (range 1.0-7.5)", typeid(float), (void *) &sensitivity, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PREFILTER),
PARAM_K(PARAM_K_ID,"-k", "K-mer size", "k-mer size in the range (0: set automatically to optimum)",typeid(int), (void *) &kmerSize, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_CLUSTLINEAR|MMseqsParameter::COMMAND_EXPERT),
PARAM_THREADS(PARAM_THREADS_ID,"--threads", "Threads", "number of cores used for the computation (uses all cores by default)",typeid(int), (void *) &threads, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_COMMON),
PARAM_COMPRESSED(PARAM_COMPRESSED_ID,"--compressed", "Compressed", "write results in compressed format",typeid(int), (void *) &compressed, "^[0-1]{1}$", MMseqsParameter::COMMAND_COMMON),
PARAM_ALPH_SIZE(PARAM_ALPH_SIZE_ID,"--alph-size", "Alphabet size", "alphabet size (range 2-21)",typeid(int),(void *) &alphabetSize, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_CLUSTLINEAR|MMseqsParameter::COMMAND_EXPERT),
// Regex for Range 1-32768
// Please do not change manually, use a tool to regenerate
// e.g.: http://gamon.webfactional.com/regexnumericrangegenerator/
PARAM_MAX_SEQ_LEN(PARAM_MAX_SEQ_LEN_ID,"--max-seq-len","Max sequence length", "maximum sequence length (range 1-32768])",typeid(int), (void *) &maxSeqLen, "^[0-9]{1}[0-9]*", MMseqsParameter::COMMAND_COMMON|MMseqsParameter::COMMAND_EXPERT),
PARAM_DIAGONAL_SCORING(PARAM_DIAGONAL_SCORING_ID,"--diag-score", "Diagonal scoring", "Use ungapped diagonal scoring during prefilter", typeid(bool), (void *) &diagonalScoring, "", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_EXACT_KMER_MATCHING(PARAM_EXACT_KMER_MATCHING_ID,"--exact-kmer-matching", "Exact k-mer matching", "only exact k-mer matching (range 0-1)", typeid(int),(void *) &exactKmerMatching, "^[0-1]{1}$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_MASK_RESIDUES(PARAM_MASK_RESIDUES_ID,"--mask", "Mask residues", "mask sequences in k-mer stage 0: w/o low complexity masking, 1: with low complexity masking", typeid(int),(void *) &maskMode, "^[0-1]{1}", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_MASK_LOWER_CASE(PARAM_MASK_LOWER_CASE_ID,"--mask-lower-case", "Mask lower case residues", "lowercase letters will be excluded from k-mer search 0: include region, 1: exclude region", typeid(int),(void *) &maskLowerCaseMode, "^[0-1]{1}", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_MIN_DIAG_SCORE(PARAM_MIN_DIAG_SCORE_ID,"--min-ungapped-score", "Minimum diagonal score", "accept only matches with ungapped alignment score above this threshold", typeid(int),(void *) &minDiagScoreThr, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_K_SCORE(PARAM_K_SCORE_ID,"--k-score", "K-score", "K-mer threshold for generating similar k-mer lists",typeid(int),(void *) &kmerScore, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_MAX_SEQS(PARAM_MAX_SEQS_ID,"--max-seqs", "Max results per query", "Maximum result sequences per query allowed to pass the prefilter (this parameter affects sensitivity)",typeid(int),(void *) &maxResListLen, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PREFILTER),
PARAM_SPLIT(PARAM_SPLIT_ID,"--split", "Split database", "Splits input sets into N equally distributed chunks. The default value sets the best split automatically. createindex can only be used with split 1.",typeid(int),(void *) &split, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_SPLIT_MODE(PARAM_SPLIT_MODE_ID,"--split-mode", "Split mode", "0: split target db; 1: split query db; 2: auto, depending on main memory",typeid(int),(void *) &splitMode, "^[0-2]{1}$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_SPLIT_MEMORY_LIMIT(PARAM_SPLIT_MEMORY_LIMIT_ID, "--split-memory-limit", "Split memory limit", "Set max memory per split. E.g. 800B, 5K, 10M, 1G. Defaults (0) to all available system memory.", typeid(ByteParser), (void*) &splitMemoryLimit, "^(0|[1-9]{1}[0-9]*(B|K|M|G|T)?)$", MMseqsParameter::COMMAND_COMMON|MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_DISK_SPACE_LIMIT(PARAM_DISK_SPACE_LIMIT_ID, "--disk-space-limit", "Disk space limit", "Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Defaults (0) to all available disk space in the temp folder.", typeid(ByteParser), (void*) &diskSpaceLimit, "^(0|[1-9]{1}[0-9]*(B|K|M|G|T)?)$", MMseqsParameter::COMMAND_COMMON|MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_SPLIT_AMINOACID(PARAM_SPLIT_AMINOACID_ID,"--split-aa", "Split by amino acid","Try to find the best split for the target database by amino acid count instead",typeid(bool), (void *) &splitAA, "$", MMseqsParameter::COMMAND_EXPERT),
PARAM_SUB_MAT(PARAM_SUB_MAT_ID,"--sub-mat", "Substitution matrix", "amino acid substitution matrix file",typeid(ScoreMatrixFile),(void *) &scoringMatrixFile, "", MMseqsParameter::COMMAND_COMMON|MMseqsParameter::COMMAND_EXPERT),
PARAM_SEED_SUB_MAT(PARAM_SEED_SUB_MAT_ID,"--seed-sub-mat", "Seed substitution matrix", "amino acid substitution matrix for kmer generation file",typeid(ScoreMatrixFile),(void *) &seedScoringMatrixFile, "", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_NO_COMP_BIAS_CORR(PARAM_NO_COMP_BIAS_CORR_ID,"--comp-bias-corr", "Compositional bias","correct for locally biased amino acid composition (range 0-1)",typeid(int), (void *) &compBiasCorrection, "^[0-1]{1}$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_ALIGN|MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_SPACED_KMER_MODE(PARAM_SPACED_KMER_MODE_ID,"--spaced-kmer-mode", "Spaced k-mers", "0: use consecutive positions a k-mers; 1: use spaced k-mers",typeid(int), (void *) &spacedKmer, "^[0-1]{1}", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_REMOVE_TMP_FILES(PARAM_REMOVE_TMP_FILES_ID, "--remove-tmp-files", "Remove temporary files" , "Delete temporary files", typeid(bool), (void *) &removeTmpFiles, "",MMseqsParameter::COMMAND_MISC|MMseqsParameter::COMMAND_EXPERT),
PARAM_INCLUDE_IDENTITY(PARAM_INCLUDE_IDENTITY_ID,"--add-self-matches", "Include identical seq. id.","artificially add entries of queries with themselves (for clustering)",typeid(bool), (void *) &includeIdentity, "", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_ALIGN|MMseqsParameter::COMMAND_EXPERT),
PARAM_PRELOAD_MODE(PARAM_PRELOAD_MODE_ID, "--db-load-mode", "Preload mode", "Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch", typeid(int), (void*) &preloadMode, "[0-3]{1}", MMseqsParameter::COMMAND_COMMON|MMseqsParameter::COMMAND_EXPERT),
PARAM_SPACED_KMER_PATTERN(PARAM_SPACED_KMER_PATTERN_ID, "--spaced-kmer-pattern", "Spaced k-mer pattern", "User-specified spaced k-mer pattern", typeid(std::string), (void *) &spacedKmerPattern, "^1[01]*1$", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
PARAM_LOCAL_TMP(PARAM_LOCAL_TMP_ID, "--local-tmp", "Local temporary path", "Path where some of the temporary files will be created", typeid(std::string), (void *) &localTmp, "", MMseqsParameter::COMMAND_PREFILTER|MMseqsParameter::COMMAND_EXPERT),
// alignment
PARAM_ALIGNMENT_MODE(PARAM_ALIGNMENT_MODE_ID,"--alignment-mode", "Alignment mode", "How to compute the alignment: 0: automatic; 1: only score and end_pos; 2: also start_pos and cov; 3: also seq.id; 4: only ungapped alignment",typeid(int), (void *) &alignmentMode, "^[0-4]{1}$", MMseqsParameter::COMMAND_ALIGN|MMseqsParameter::COMMAND_EXPERT),
PARAM_E(PARAM_E_ID,"-e", "E-value threshold", "list matches below this E-value (range 0.0-inf)",typeid(float), (void *) &evalThr, "^([-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?)|[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_ALIGN),
PARAM_C(PARAM_C_ID,"-c", "Coverage threshold", "list matches above this fraction of aligned (covered) residues (see --cov-mode)",typeid(float), (void *) &covThr, "^0(\\.[0-9]+)?|^1(\\.0+)?$", MMseqsParameter::COMMAND_ALIGN| MMseqsParameter::COMMAND_CLUSTLINEAR),
PARAM_COV_MODE(PARAM_COV_MODE_ID, "--cov-mode", "Coverage mode", "0: coverage of query and target, 1: coverage of target, 2: coverage of query 3: target seq. length needs to be at least x% of query length, 4: query seq. length needs to be at least x% of target length 5: short seq. needs to be at least x% of the other seq. length", typeid(int), (void *) &covMode, "^[0-5]{1}$", MMseqsParameter::COMMAND_ALIGN),
PARAM_SEQ_ID_MODE(PARAM_SEQ_ID_MODE_ID, "--seq-id-mode", "Seq. id. mode", "0: alignment length 1: shorter, 2: longer sequence", typeid(int), (void *) &seqIdMode, "^[0-2]{1}$", MMseqsParameter::COMMAND_ALIGN),
PARAM_MAX_REJECTED(PARAM_MAX_REJECTED_ID,"--max-rejected", "Max reject", "maximum rejected alignments before alignment calculation for a query is aborted",typeid(int),(void *) &maxRejected, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_ALIGN),
PARAM_MAX_ACCEPT(PARAM_MAX_ACCEPT_ID,"--max-accept", "Max accept", "maximum accepted alignments before alignment calculation for a query is stopped",typeid(int),(void *) &maxAccept, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_ALIGN),
PARAM_ADD_BACKTRACE(PARAM_ADD_BACKTRACE_ID, "-a", "Add backtrace", "add backtrace string (convert to alignments with mmseqs convertalis utility)", typeid(bool), (void *) &addBacktrace, "", MMseqsParameter::COMMAND_ALIGN),
PARAM_REALIGN(PARAM_REALIGN_ID, "--realign", "Realign hits", "compute more conservative, shorter alignments (scores and E-values not changed)", typeid(bool), (void *) &realign, "", MMseqsParameter::COMMAND_ALIGN|MMseqsParameter::COMMAND_EXPERT),
PARAM_MIN_SEQ_ID(PARAM_MIN_SEQ_ID_ID,"--min-seq-id", "Seq. id. threshold","list matches above this sequence identity (for clustering) (range 0.0-1.0)",typeid(float), (void *) &seqIdThr, "^0(\\.[0-9]+)?|1(\\.0+)?$", MMseqsParameter::COMMAND_ALIGN),
PARAM_MIN_ALN_LEN(PARAM_MIN_ALN_LEN_ID,"--min-aln-len", "Min. alignment length","minimum alignment length (range 0-INT_MAX)",typeid(int), (void *) &alnLenThr, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_ALIGN),
PARAM_SCORE_BIAS(PARAM_SCORE_BIAS_ID,"--score-bias", "Score bias", "Score bias when computing the SW alignment (in bits)",typeid(float), (void *) &scoreBias, "^-?[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_ALIGN|MMseqsParameter::COMMAND_EXPERT),
PARAM_ALT_ALIGNMENT(PARAM_ALT_ALIGNMENT_ID,"--alt-ali", "Alternative alignments","Show up to this many alternative alignments",typeid(int), (void *) &altAlignment, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_ALIGN),
PARAM_GAP_OPEN(PARAM_GAP_OPEN_ID,"--gap-open", "Gap open cost","Gap open cost",typeid(int), (void *) &gapOpen, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_ALIGN|MMseqsParameter::COMMAND_EXPERT),
PARAM_GAP_EXTEND(PARAM_GAP_EXTEND_ID,"--gap-extend", "Gap extension cost","Gap extension cost",typeid(int), (void *) &gapExtend, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_ALIGN|MMseqsParameter::COMMAND_EXPERT),
// clustering
PARAM_CLUSTER_MODE(PARAM_CLUSTER_MODE_ID,"--cluster-mode", "Cluster mode", "0: Setcover, 1: connected component, 2: Greedy clustering by sequence length 3: Greedy clustering by sequence length (low mem)",typeid(int), (void *) &clusteringMode, "[0-3]{1}$", MMseqsParameter::COMMAND_CLUST),
PARAM_CLUSTER_STEPS(PARAM_CLUSTER_STEPS_ID,"--cluster-steps", "Cascaded clustering steps", "cascaded clustering steps from 1 to -s",typeid(int), (void *) &clusterSteps, "^[1-9]{1}$", MMseqsParameter::COMMAND_CLUST|MMseqsParameter::COMMAND_EXPERT),
PARAM_CASCADED(PARAM_CASCADED_ID,"--single-step-clustering", "Single step clustering", "switches from cascaded to simple clustering workflow",typeid(bool), (void *) &cascaded, "", MMseqsParameter::COMMAND_CLUST),
PARAM_CLUSTER_REASSIGN(PARAM_CLUSTER_REASSIGN_ID,"--cluster-reassign", "Cluster reassign", "cascaded clustering can cluster sequence that not fulfill the clustering criteria. Cluster reassignment corrects this errors", typeid(int), (void *) &clusterReassignment, "[0-1]{1}$", MMseqsParameter::COMMAND_CLUST),
// affinity clustering
PARAM_MAXITERATIONS(PARAM_MAXITERATIONS_ID,"--max-iterations", "Max depth connected component", "maximum depth of breadth first search in connected component",typeid(int), (void *) &maxIteration, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUST|MMseqsParameter::COMMAND_EXPERT),
PARAM_SIMILARITYSCORE(PARAM_SIMILARITYSCORE_ID,"--similarity-type", "Similarity type", "type of score used for clustering (range 1,2). 1=alignment score. 2=sequence identity ",typeid(int),(void *) &similarityScoreType, "^[1-2]{1}$", MMseqsParameter::COMMAND_CLUST|MMseqsParameter::COMMAND_EXPERT),
// logging
PARAM_V(PARAM_V_ID,"-v", "Verbosity","verbosity level: 0=nothing, 1: +errors, 2: +warnings, 3: +info",typeid(int), (void *) &verbosity, "^[0-3]{1}$", MMseqsParameter::COMMAND_COMMON),
// create profile (HMM)
PARAM_PROFILE_TYPE(PARAM_PROFILE_TYPE_ID,"--profile-type", "Profile type", "0: HMM (HHsuite) 1: PSSM or 2: HMMER3",typeid(int),(void *) &profileMode, "^[0-2]{1}$"),
// convertalignments
PARAM_FORMAT_MODE(PARAM_FORMAT_MODE_ID,"--format-mode", "Alignment format", "Output format 0: BLAST-TAB, 1: SAM, 2: BLAST-TAB + query/db length", typeid(int), (void*) &formatAlignmentMode, "^[0-2]{1}$"),
PARAM_FORMAT_OUTPUT(PARAM_FORMAT_OUTPUT_ID,"--format-output", "Format alignment output", "Choose output columns 'query,target,evalue,gapopen,pident,nident,qstart,qend,qlen,tstart,tend,tlen,alnlen,raw,bits,cigar,qseq,tseq,qheader,theader,qaln,taln,qframe,tframe,mismatch,qcov,tcov,qset,qsetid,tset,tsetid'", typeid(std::string), (void*) &outfmt, ""),
PARAM_DB_OUTPUT(PARAM_DB_OUTPUT_ID, "--db-output", "Database output", "Output a result db instead of a text file", typeid(bool), (void*) &dbOut, "", MMseqsParameter::COMMAND_EXPERT),
// --include-only-extendablediagonal
PARAM_RESCORE_MODE(PARAM_RESCORE_MODE_ID,"--rescore-mode", "Rescore mode", "Rescore diagonal with: 0: Hamming distance, 1: local alignment (score only), 2: local alignment, 3: global alignment or 4: longest alignment fullfilling window quality criterion", typeid(int), (void *) &rescoreMode, "^[0-4]{1}$"),
PARAM_WRAPPED_SCORING(PARAM_WRAPPED_SCORING_ID,"--wrapped-scoring", "Allow wrapped scoring","Double the (nucleotide) query sequence during the scoring process to allow wrapped diagonal scoring around end and start", typeid(bool), (void *) &wrappedScoring, "", MMseqsParameter::COMMAND_MISC),
PARAM_FILTER_HITS(PARAM_FILTER_HITS_ID,"--filter-hits", "Remove hits by seq. id. and coverage", "filter hits by seq.id. and coverage", typeid(bool), (void *) &filterHits, "", MMseqsParameter::COMMAND_EXPERT),
PARAM_SORT_RESULTS(PARAM_SORT_RESULTS_ID, "--sort-results", "Sort results", "Sort results: 0: no sorting, 1: sort by evalue (Alignment) or seq.id. (Hamming)", typeid(int), (void *) &sortResults, "^[0-1]{1}$", MMseqsParameter::COMMAND_EXPERT),
// result2msa
PARAM_ALLOW_DELETION(PARAM_ALLOW_DELETION_ID,"--allow-deletion", "Allow deletions", "allow deletions in a MSA", typeid(bool), (void*) &allowDeletion, ""),
PARAM_ADD_INTERNAL_ID(PARAM_ADD_INTERNAL_ID_ID,"--add-iternal-id", "Add internal id.", "add internal id as comment to MSA", typeid(bool), (void*) &addInternalId, "", MMseqsParameter::COMMAND_EXPERT),
PARAM_COMPRESS_MSA(PARAM_COMPRESS_MSA_ID,"--compress", "Compress MSA", "create MSA in ca3m format", typeid(bool), (void*) &compressMSA, ""),
PARAM_SUMMARIZE_HEADER(PARAM_SUMMARIZE_HEADER_ID,"--summarize", "Summarize headers", "summarize cluster headers into a single header description", typeid(bool), (void*) &summarizeHeader, ""),
PARAM_SUMMARY_PREFIX(PARAM_SUMMARY_PREFIX_ID, "--summary-prefix", "Summary prefix","sets the cluster summary prefix",typeid(std::string),(void *) &summaryPrefix, "", MMseqsParameter::COMMAND_EXPERT),
PARAM_OMIT_CONSENSUS(PARAM_OMIT_CONSENSUS_ID, "--omit-consensus", "Omit consensus", "Omit consensus sequence in alignment", typeid(bool), (void*) &omitConsensus, "", MMseqsParameter::COMMAND_EXPERT),
PARAM_SKIP_QUERY(PARAM_SKIP_QUERY_ID, "--skip-query", "Skip query", "Skip the query sequence", typeid(bool), (void*) &skipQuery, "", MMseqsParameter::COMMAND_EXPERT),
// convertmsa
PARAM_IDENTIFIER_FIELD(PARAM_IDENTIFIER_FIELD_ID, "--identifier-field", "Identifier field", "Field from STOCKHOLM comments for choosing the MSA identifier: 0: ID, 1: AC. If the respective comment does not exist, the name of the first sequence will become the identifier.", typeid(int), (void*) &identifierField, "^[0-1]{1}$", MMseqsParameter::COMMAND_COMMON),
// msa2profile
PARAM_MATCH_MODE(PARAM_MATCH_MODE_ID, "--match-mode", "Match mode", "0: Columns that have a residue in the first sequence are kept, 1: columns that have a residue in --match-ratio of all sequences are kept.", typeid(int), (void*)&matchMode, "^(0|1)$", MMseqsParameter::COMMAND_PROFILE),
PARAM_MATCH_RATIO(PARAM_MATCH_RATIO_ID, "--match-ratio", "Match ratio", "columns that have a residue in this ratio of all sequences are kept", typeid(float), (void*)&matchRatio, "^0(\\.[0-9]+)?|1(\\.0+)?$", MMseqsParameter::COMMAND_PROFILE),
// result2profile
PARAM_MASK_PROFILE(PARAM_MASK_PROFILE_ID,"--mask-profile", "Mask profile", "mask query sequence of profile using tantan [0,1]", typeid(int),(void *) &maskProfile, "^[0-1]{1}$", MMseqsParameter::COMMAND_PROFILE |MMseqsParameter::COMMAND_EXPERT),
PARAM_E_PROFILE(PARAM_E_PROFILE_ID,"--e-profile", "Profile e-value threshold", "includes sequences matches with < e-value thr. into the profile (>=0.0)", typeid(float), (void *) &evalProfile, "^([-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?)|([0-9]*(\\.[0-9]+)?)$", MMseqsParameter::COMMAND_PROFILE),
PARAM_FILTER_MSA(PARAM_FILTER_MSA_ID,"--filter-msa", "Filter MSA", "filter msa: 0: do not filter, 1: filter", typeid(int), (void*) &filterMsa, "^[0-1]{1}$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_FILTER_MAX_SEQ_ID(PARAM_FILTER_MAX_SEQ_ID_ID,"--max-seq-id", "Maximum seq. id. threshold", "reduce redundancy of output MSA using max. pairwise sequence identity [0.0,1.0]", typeid(float), (void*) &filterMaxSeqId, "^0(\\.[0-9]+)?|1(\\.0+)?$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_FILTER_QSC(PARAM_FILTER_QSC_ID, "--qsc", "Minimum score per column", "reduce diversity of output MSAs using min. score per aligned residue with query sequences [-50.0,100.0]", typeid(float), (void*) &qsc, "^\\-*[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_FILTER_QID(PARAM_FILTER_QID_ID, "--qid", "Minimum seq. id.", "reduce diversity of output MSAs using min.seq. identity with query sequences [0.0,1.0]", typeid(float), (void*) &qid, "^0(\\.[0-9]+)?|1(\\.0+)?$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_FILTER_COV(PARAM_FILTER_COV_ID, "--cov", "Minimum coverage", "filter output MSAs using min. fraction of query residues covered by matched sequences [0.0,1.0]", typeid(float), (void*) &covMSAThr, "^0(\\.[0-9]+)?|1(\\.0+)?$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_FILTER_NDIFF(PARAM_FILTER_NDIFF_ID, "--diff", "Select N most diverse seqs", "filter MSAs by selecting most diverse set of sequences, keeping at least this many seqs in each MSA block of length 50", typeid(int), (void*) &Ndiff, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_WG(PARAM_WG_ID, "--wg", "Use global sequence weighting", "use global sequence weighting for profile calculation", typeid(bool), (void*) &wg, "", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_PCA(PARAM_PCA_ID, "--pca", "Pseudo count a", "pseudo count admixture strength", typeid(float), (void*) &pca, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_PCB(PARAM_PCB_ID, "--pcb", "Pseudo count b", "pseudo counts: Neff at half of maximum admixture (range 0.0-inf)", typeid(float), (void*) &pcb, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
// sequence2profile
PARAM_NEFF(PARAM_NEFF_ID, "--neff", "Neff", "Neff included into context state profile (1.0,20.0)", typeid(float), (void*) &neff, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE),
PARAM_TAU(PARAM_TAU_ID, "--tau", "Tau", "Tau: context state pseudo count mixture (0.0,1.0)", typeid(float), (void*) &tau, "[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE),
//createtsv
PARAM_TARGET_COLUMN(PARAM_TARGET_COLUMN_ID, "--target-column", "Target column", "Select a target column (default 1), 0 if no target id exists.",typeid(int),(void *) &targetTsvColumn, "^[0-9]*$"),
PARAM_FIRST_SEQ_REP_SEQ(PARAM_FIRST_SEQ_REP_SEQ_ID, "--first-seq-as-repr", "First sequence as representative", "Use the first sequence of the clustering result as representative sequence", typeid(bool), (void*) &firstSeqRepr, "", MMseqsParameter::COMMAND_MISC),
PARAM_FULL_HEADER(PARAM_FULL_HEADER_ID, "--full-header", "Add full header", "Replace DB ID by its corresponding Full Header", typeid(bool), (void*) &fullHeader, ""),
PARAM_IDX_SEQ_SRC(PARAM_IDX_SEQ_SRC_ID, "--idx-seq-src", "Sequence source", "0: auto, 1: split/translated sequences, 2: input sequences", typeid(int), (void*) &idxSeqSrc, "^[0-2]{1}$", MMseqsParameter::COMMAND_MISC),
// result2stats
PARAM_STAT(PARAM_STAT_ID, "--stat", "Statistics to be computed", "can be one of: linecount, mean, doolittle, charges, seqlen, firstline.", typeid(std::string), (void*) &stat, ""),
// linearcluster
PARAM_KMER_PER_SEQ(PARAM_KMER_PER_SEQ_ID, "--kmer-per-seq", "K-mers per sequence", "kmer per sequence", typeid(int), (void*) &kmersPerSequence, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR),
PARAM_KMER_PER_SEQ_SCALE(PARAM_KMER_PER_SEQ_SCALE_ID, "--kmer-per-seq-scale", "scale k-mers per sequence", "scale kmer per sequence based on sequence length as kmer-per-seq val + scale x seqlen", typeid(float), (void*) &kmersPerSequenceScale, "^0(\\.[0-9]+)?|1(\\.0+)?$", MMseqsParameter::COMMAND_EXPERT),
PARAM_INCLUDE_ONLY_EXTENDABLE(PARAM_INCLUDE_ONLY_EXTENDABLE_ID, "--include-only-extendable", "Include only extendable", "Include only extendable", typeid(bool), (void*) &includeOnlyExtendable, "", MMseqsParameter::COMMAND_CLUSTLINEAR),
PARAM_IGNORE_MULTI_KMER(PARAM_IGNORE_MULTI_KMER_ID, "--ignore-multi-kmer", "Skip repeating k-mers", "Skip kmers occuring multiple times (>=2)", typeid(bool), (void*) &ignoreMultiKmer, "", MMseqsParameter::COMMAND_CLUSTLINEAR|MMseqsParameter::COMMAND_EXPERT),
PARAM_HASH_SHIFT(PARAM_HASH_SHIFT_ID, "--hash-shift", "Shift hash", "Shift k-mer hash", typeid(int), (void*) &hashShift, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR|MMseqsParameter::COMMAND_EXPERT),
PARAM_PICK_N_SIMILAR(PARAM_HASH_SHIFT_ID, "--pick-n-sim-kmer", "Add N similar to search", "adds N similar to search", typeid(int), (void*) &pickNbest, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR|MMseqsParameter::COMMAND_EXPERT),
PARAM_ADJUST_KMER_LEN(PARAM_ADJUST_KMER_LEN_ID, "--adjust-kmer-len", "Adjust k-mer length", "adjust k-mer length based on specificity (only for nucleotides)", typeid(bool), (void*) &adjustKmerLength, "", MMseqsParameter::COMMAND_CLUSTLINEAR|MMseqsParameter::COMMAND_EXPERT),
// workflow
PARAM_RUNNER(PARAM_RUNNER_ID, "--mpi-runner", "MPI runner","Use MPI on compute grid with this MPI command (e.g. \"mpirun -np 42\")",typeid(std::string),(void *) &runner, "", MMseqsParameter::COMMAND_COMMON|MMseqsParameter::COMMAND_EXPERT),
PARAM_REUSELATEST(PARAM_REUSELATEST_ID, "--force-reuse", "Force restart with latest tmp", "reuse tmp file in tmp/latest folder ignoring parameters and git version change", typeid(bool),(void *) &reuseLatest, "", MMseqsParameter::COMMAND_COMMON|MMseqsParameter::COMMAND_EXPERT),
// search workflow
PARAM_NUM_ITERATIONS(PARAM_NUM_ITERATIONS_ID, "--num-iterations", "Number search iterations","Search iterations",typeid(int),(void *) &numIterations, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PROFILE),
PARAM_START_SENS(PARAM_START_SENS_ID, "--start-sens", "Start sensitivity","start sensitivity",typeid(float),(void *) &startSens, "^[0-9]*(\\.[0-9]+)?$"),
PARAM_SENS_STEPS(PARAM_SENS_STEPS_ID, "--sens-steps", "Search steps","Search steps performed from --start-sense and -s.",typeid(int),(void *) &sensSteps, "^[1-9]{1}$"),
PARAM_SLICE_SEARCH(PARAM_SLICE_SEARCH_ID, "--slice-search", "Run a seq-profile search in slice mode", "For bigger profile DB, run iteratively the search by greedily swapping the search results.", typeid(bool),(void *) &sliceSearch, "", MMseqsParameter::COMMAND_PROFILE|MMseqsParameter::COMMAND_EXPERT),
PARAM_STRAND(PARAM_STRAND_ID, "--strand", "Strand selection", "Strand selection only works for DNA/DNA search 0: reverse, 1: forward, 2: both", typeid(int), (void *) &strand, "^[0-2]{1}$", MMseqsParameter::COMMAND_EXPERT),
// easysearch
PARAM_GREEDY_BEST_HITS(PARAM_GREEDY_BEST_HITS_ID, "--greedy-best-hits", "Greedy best hits", "Choose the best hits greedily to cover the query.", typeid(bool), (void*)&greedyBestHits, ""),
// extractorfs
PARAM_ORF_MIN_LENGTH(PARAM_ORF_MIN_LENGTH_ID, "--min-length", "Min codons in orf", "minimum codon number in open reading frames",typeid(int),(void *) &orfMinLength, "^[1-9]{1}[0-9]*$"),
PARAM_ORF_MAX_LENGTH(PARAM_ORF_MAX_LENGTH_ID, "--max-length", "Max codons in length", "maximum codon number in open reading frames",typeid(int),(void *) &orfMaxLength, "^[1-9]{1}[0-9]*$"),
PARAM_ORF_MAX_GAP(PARAM_ORF_MAX_GAP_ID, "--max-gaps", "Max orf gaps", "maximum number of codons with gaps or unknown residues before an open reading frame is rejected",typeid(int),(void *) &orfMaxGaps, "^(0|[1-9]{1}[0-9]*)$"),
PARAM_CONTIG_START_MODE(PARAM_CONTIG_START_MODE_ID,"--contig-start-mode", "Contig start mode", "Contig start can be 0: incomplete, 1: complete, 2: both",typeid(int),(void *) &contigStartMode, "^[0-2]{1}"),
PARAM_CONTIG_END_MODE(PARAM_CONTIG_END_MODE_ID,"--contig-end-mode", "Contig end mode", "Contig end can be 0: incomplete, 1: complete, 2: both ",typeid(int),(void *) &contigEndMode, "^[0-2]{1}"),
PARAM_ORF_START_MODE(PARAM_ORF_START_MODE_ID,"--orf-start-mode", "Orf start mode", "Orf fragment can be 0: from start to stop, 1: from any to stop, 2: from last encountered start to stop (no start in the middle)",typeid(int),(void *) &orfStartMode, "^[0-2]{1}"),
PARAM_ORF_FORWARD_FRAMES(PARAM_ORF_FORWARD_FRAMES_ID, "--forward-frames", "Forward frames", "comma-seperated list of ORF frames on the forward strand to be extracted", typeid(std::string), (void *) &forwardFrames, ""),
PARAM_ORF_REVERSE_FRAMES(PARAM_ORF_REVERSE_FRAMES_ID, "--reverse-frames", "Reverse frames", "comma-seperated list of ORF frames on the reverse strand to be extracted", typeid(std::string), (void *) &reverseFrames, ""),
PARAM_USE_ALL_TABLE_STARTS(PARAM_USE_ALL_TABLE_STARTS_ID,"--use-all-table-starts", "Use all table starts", "use all alteratives for a start codon in the genetic table, if false - only ATG (AUG)",typeid(bool),(void *) &useAllTableStarts, ""),
PARAM_TRANSLATE(PARAM_TRANSLATE_ID,"--translate", "Translate orf", "translate ORF to amino acid",typeid(int),(void *) &translate, "^[0-1]{1}"),
PARAM_CREATE_LOOKUP(PARAM_CREATE_LOOKUP_ID, "--create-lookup", "Create lookup", "Create database lookup file (can be very large)", typeid(int), (void *) &createLookup, "^[0-1]{1}", MMseqsParameter::COMMAND_EXPERT),
// indexdb
PARAM_CHECK_COMPATIBLE(PARAM_CHECK_COMPATIBLE_ID, "--check-compatible", "Check compatible", "0: Always recreate index, 1: Check if recreating index is needed, 2: Fail if index is incompatible", typeid(int), (void*) &checkCompatible, "^[0-2]{1}$", MMseqsParameter::COMMAND_MISC),
PARAM_SEARCH_TYPE(PARAM_SEARCH_TYPE_ID, "--search-type", "Search type", "search type 0: auto 1: amino acid, 2: translated, 3: nucleotide", typeid(int),(void *) &searchType, "^[0-3]{1}"),
// createdb
PARAM_USE_HEADER(PARAM_USE_HEADER_ID,"--use-fasta-header", "Use fasta header", "use the id parsed from the fasta header as the index key instead of using incrementing numeric identifiers",typeid(bool),(void *) &useHeader, ""),
PARAM_ID_OFFSET(PARAM_ID_OFFSET_ID, "--id-offset", "Offset of numeric ids", "numeric ids in index file are offset by this value ",typeid(int),(void *) &identifierOffset, "^(0|[1-9]{1}[0-9]*)$"),
PARAM_DB_TYPE(PARAM_DB_TYPE_ID,"--dbtype", "Database type", "Database type 0: auto, 1: amino acid 2: nucleotides",typeid(int),(void *) &dbType, "[0-2]{1}"),
PARAM_DONT_SPLIT_SEQ_BY_LEN(PARAM_DONT_SPLIT_SEQ_BY_LEN_ID,"--dont-split-seq-by-len", "Split seq. by length", "Dont split sequences by --max-seq-len",typeid(bool),(void *) &splitSeqByLen, ""),
PARAM_DONT_SHUFFLE(PARAM_DONT_SHUFFLE_ID,"--dont-shuffle", "Do not shuffle input database", "Do not shuffle input database",typeid(bool),(void *) &shuffleDatabase, ""),
PARAM_USE_HEADER_FILE(PARAM_USE_HEADER_FILE_ID, "--use-header-file", "Use ffindex header", "use the ffindex header file instead of the body to map the entry keys",typeid(bool),(void *) &useHeaderFile, ""),
// splitsequence
PARAM_SEQUENCE_OVERLAP(PARAM_SEQUENCE_OVERLAP_ID, "--sequence-overlap", "Overlap between sequences", "overlap between sequences",typeid(int),(void *) &sequenceOverlap, "^(0|[1-9]{1}[0-9]*)$"),
PARAM_SEQUENCE_SPLIT_MODE(PARAM_SEQUENCE_SPLIT_MODE_ID, "--sequence-split-mode", "Sequence split mode", "sequence split mode 0: soft link data write new index, 1: copy data",typeid(int),(void *) &sequenceSplitMode, "^[0-1]{1}$"),
// gff2db
PARAM_GFF_TYPE(PARAM_GFF_TYPE_ID,"--gff-type", "GFF type", "type in the GFF file to filter by",typeid(std::string),(void *) &gffType, ""),
// translatenucs
PARAM_TRANSLATION_TABLE(PARAM_TRANSLATION_TABLE_ID,"--translation-table", "Translation table", "1) CANONICAL, 2) VERT_MITOCHONDRIAL, 3) YEAST_MITOCHONDRIAL, 4) MOLD_MITOCHONDRIAL, 5) INVERT_MITOCHONDRIAL, 6) CILIATE, 9) FLATWORM_MITOCHONDRIAL, 10) EUPLOTID, 11) PROKARYOTE, 12) ALT_YEAST, 13) ASCIDIAN_MITOCHONDRIAL, 14) ALT_FLATWORM_MITOCHONDRIAL, 15) BLEPHARISMA, 16) CHLOROPHYCEAN_MITOCHONDRIAL, 21) TREMATODE_MITOCHONDRIAL, 22) SCENEDESMUS_MITOCHONDRIAL, 23) THRAUSTOCHYTRIUM_MITOCHONDRIAL, 24) PTEROBRANCHIA_MITOCHONDRIAL, 25) GRACILIBACTERIA, 26) PACHYSOLEN, 27) KARYORELICT, 28) CONDYLOSTOMA, 29) MESODINIUM, 30) PERTRICH, 31) BLASTOCRITHIDIA", typeid(int),(void *) &translationTable, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_MISC|MMseqsParameter::COMMAND_EXPERT),
// createseqfiledb
PARAM_ADD_ORF_STOP(PARAM_ADD_ORF_STOP_ID,"--add-orf-stop", "Add orf stop", "add * at complete start and end", typeid(bool),(void *) &addOrfStop, ""),
// createseqfiledb
PARAM_MIN_SEQUENCES(PARAM_MIN_SEQUENCES_ID,"--min-sequences", "Min sequences", "minimum number of sequences a cluster may contain", typeid(int),(void *) &minSequences,"^[1-9]{1}[0-9]*$"),
PARAM_MAX_SEQUENCES(PARAM_MAX_SEQUENCES_ID,"--max-sequences", "Max sequences", "maximum number of sequences a cluster may contain", typeid(int),(void *) &maxSequences,"^[1-9]{1}[0-9]*$"),
PARAM_HH_FORMAT(PARAM_HH_FORMAT_ID,"--hh-format", "HH format", "format entries to use with hhsuite (for singleton clusters)", typeid(bool), (void *) &hhFormat, ""),
// filterdb
PARAM_FILTER_COL(PARAM_FILTER_COL_ID,"--filter-column", "Filter column", "column", typeid(int),(void *) &filterColumn,"^[1-9]{1}[0-9]*$"),
PARAM_COLUMN_TO_TAKE(PARAM_COLUMN_TO_TAKE_ID,"--column-to-take", "Column to take", "column to take in join mode. If -1, the whole line is taken", typeid(int),(void *) &columnToTake,"^(-1|0|[1-9]{1}[0-9]*)$"),
PARAM_FILTER_REGEX(PARAM_FILTER_REGEX_ID,"--filter-regex", "Filter regex", "regex to select column (example float: [0-9]*(.[0-9]+)? int:[1-9]{1}[0-9])", typeid(std::string),(void *) &filterColumnRegex,"^.*$"),
PARAM_FILTER_POS(PARAM_FILTER_POS_ID,"--positive-filter", "Positive filter", "used in conjunction with --filter-file. If true, out = in \\intersect filter ; if false, out = in - filter", typeid(bool),(void *) &positiveFilter,""),
PARAM_FILTER_FILE(PARAM_FILTER_FILE_ID,"--filter-file", "Filter file", "specify a file that contains the filtering elements", typeid(std::string),(void *) &filteringFile,""),
PARAM_FILTER_EXPRESSION(PARAM_FILTER_EXPRESSION_ID, "--filter-expression", "Filter expression", "Specify a mathematical expression to filter lines", typeid(std::string), (void*) &filterExpression, ""),
PARAM_MAPPING_FILE(PARAM_MAPPING_FILE_ID,"--mapping-file", "Mapping file", "specify a file that translates the keys of a DB to new keys, TSV format", typeid(std::string),(void *) &mappingFile,""),
PARAM_TRIM_TO_ONE_COL(PARAM_TRIM_TO_ONE_COL_ID,"--trim-to-one-column", "Trim to one column","Output only the column specified by --filter-column.",typeid(bool), (void *) &trimToOneColumn, ""),
PARAM_EXTRACT_LINES(PARAM_EXTRACT_LINES_ID,"--extract-lines", "Extract N lines", "extract n lines of each entry.",typeid(int), (void *) &extractLines, "^[1-9]{1}[0-9]*$"),
PARAM_COMP_OPERATOR(PARAM_COMP_OPERATOR_ID, "--comparison-operator", "Numerical comparison operator", "Filter by comparing each entry row numerically by using the le) less-than-equal, ge) greater-than-equal or e) equal operator.", typeid(std::string), (void *) &compOperator, ""),
PARAM_COMP_VALUE(PARAM_COMP_VALUE_ID, "--comparison-value", "Numerical comparison value", "Filter by comparing each entry to this value.", typeid(float), (void *) &compValue, "^.*$"),
PARAM_SORT_ENTRIES(PARAM_SORT_ENTRIES_ID, "--sort-entries", "Sort entries", "Sort column set by --filter-column, by 0: no sorting, 1: increasing, 2: decreasing, 3: random shuffle.", typeid(int), (void *) &sortEntries, "^[1-9]{1}[0-9]*$"),
PARAM_BEATS_FIRST(PARAM_BEATS_FIRST_ID, "--beats-first", "Beats first", "Filter by comparing each entry to the first entry.", typeid(bool), (void*) &beatsFirst, ""),
PARAM_JOIN_DB(PARAM_JOIN_DB_ID, "--join-db","join to DB", "Join another database entry with respect to the database identifier in the chosen column", typeid(std::string), (void*) &joinDB, ""),
PARAM_COMPUTE_POSITIONS(PARAM_COMPUTE_POSITIONS_ID, "--compute-positions", "Compute positions", "Add the positions of he hit on the target genome", typeid(std::string), (void*) &compPos, ""),
PARAM_TRANSITIVE_REPLACE(PARAM_TRANSITIVE_REPLACE_ID, "--transitive-replace", "Replace transitively", "Replace cluster name in a search file by all genes in this cluster", typeid(std::string), (void*) &clusterFile, ""),
// besthitperset
PARAM_SIMPLE_BEST_HIT(PARAM_SIMPLE_BEST_HIT_ID, "--simple-best-hit", "Use simple best hit", "Update the p-value by a single best hit, or by best and second best hits", typeid(bool), (void*) &simpleBestHit, ""),
PARAM_ALPHA(PARAM_ALPHA_ID, "--alpha", "Alpha", "Set alpha for combining p-values during aggregation", typeid(float), (void*) &alpha, ""),
PARAM_SHORT_OUTPUT(PARAM_SHORT_OUTPUT_ID, "--short-output", "Short output", "The output database will contain only the spread p-value", typeid(bool), (void*) &shortOutput, ""),
PARAM_AGGREGATION_MODE(PARAM_AGGREGATION_MODE_ID, "--aggregation-mode", "Aggregation mode", "Combined P-values computed from 0: multi-hit, 1: minimum of all P-values, 2: product-of-P-values, 3: truncated product", typeid(int), (void*) &aggregationMode, "^[0-4]{1}$"),
// concatdb
PARAM_PRESERVEKEYS(PARAM_PRESERVEKEYS_ID,"--preserve-keys", "Preserve the keys", "the keys of the two DB should be distinct, and they will be preserved in the concatenation.",typeid(bool), (void *) &preserveKeysB, ""),
PARAM_TAKE_LARGER_ENTRY(PARAM_TAKE_LARGER_ENTRY_ID,"--take-larger-entry", "Take the larger entry", "only keeps the larger entry (dataSize >) in the concatenation, both databases need the same keys in the index",typeid(bool), (void *) &takeLargerEntry, ""),
// offsetalignment
PARAM_CHAIN_ALIGNMENT(PARAM_CHAIN_ALIGNMENT_ID,"--chain-alignments", "Chain overlapping alignments", "Chain overlapping alignments",typeid(int),(void *) &chainAlignment, "^[0-1]{1}", MMseqsParameter::COMMAND_EXPERT),
PARAM_MERGE_QUERY(PARAM_MERGE_QUERY_ID,"--merge-query", "Merge query", "combine ORFs/split sequences to a single entry",typeid(int),(void *) &mergeQuery, "^[0-1]{1}", MMseqsParameter::COMMAND_EXPERT),
// tsv2db
PARAM_OUTPUT_DBTYPE(PARAM_OUTPUT_DBTYPE_ID,"--output-dbtype", "Output database type", "Set database type for resulting database: Amino acid sequences 0, Nucl. seq. 1, Profiles 2, Alignment result 5, Clustering result 6, Prefiltering result 7, Taxonomy result 8, Indexed database 9, cA3M MSAs 10, FASTA or A3M MSAs 11, Generic database 12, Omic dbtype file 13, Bi-directional prefiltering result 14, Offsetted headers 15",typeid(int),(void *) &outputDbType, "^(0|[1-9]{1}[0-9]*)$"),
//diff
PARAM_USESEQID(PARAM_USESEQID_ID,"--use-seq-id", "Match sequences by their id.", "Sequence ID (Uniprot, GenBank, ...) is used for identifying matches between the old and the new DB.",typeid(bool), (void *) &useSequenceId, ""),
// prefixid
PARAM_PREFIX(PARAM_PREFIX_ID, "--prefix", "Prefix", "Use this prefix for all entries", typeid(std::string),(void *) &prefix,""),
PARAM_TSV(PARAM_TSV_ID,"--tsv", "Tsv", "should output be in TSV format",typeid(bool),(void *) &tsvOut, ""),
// summarize headers
PARAM_HEADER_TYPE(PARAM_HEADER_TYPE_ID,"--header-type", "Header type", "Header Type: 1 Uniclust, 2 Metaclust",typeid(int), (void *) &headerType, "[1-2]{1}"),
// mergedbs
PARAM_MERGE_PREFIXES(PARAM_MERGE_PREFIXES_ID, "--prefixes", "Merge prefixes", "Comma separated list of prefixes for each entry", typeid(std::string),(void *) &mergePrefixes,""),
// summarizeresult
PARAM_OVERLAP(PARAM_OVERLAP_ID, "--overlap", "Overlap threshold", "Maximum overlap of covered regions", typeid(float), (void*) &overlap, "^[0-9]*(\\.[0-9]+)?$"),
// msa2profile
PARAM_MSA_TYPE(PARAM_MSA_TYPE_ID,"--msa-type", "MSA type", "MSA Type: cA3M 0, A3M 1, FASTA 2", typeid(int), (void *) &msaType, "^[0-2]{1}$"),
// extractalignedregion
PARAM_EXTRACT_MODE(PARAM_EXTRACT_MODE_ID,"--extract-mode", "Extract mode", "Query 1, Target 2", typeid(int), (void *) &extractMode, "^[1-2]{1}$"),
// convertkb
PARAM_KB_COLUMNS(PARAM_KB_COLUMNS_ID, "--kb-columns", "UniprotKB columns", "list of indices of UniprotKB columns to be extracted", typeid(std::string), (void *) &kbColumns, ""),
PARAM_RECOVER_DELETED(PARAM_RECOVER_DELETED_ID, "--recover-deleted", "Recover deleted", "Indicates if sequences are allowed to be be removed during updating", typeid(bool), (void*) &recoverDeleted, ""),
// filtertaxdb
PARAM_TAXON_LIST(PARAM_TAXON_LIST_ID, "--taxon-list", "Selected taxons", "taxonomy ID, possibly multiple separated by ','", typeid(std::string), (void*) &taxonList, ""),
// view
PARAM_ID_LIST(PARAM_ID_LIST_ID, "--id-list", "Selected entries with key", "entries to be printed seperated by ','", typeid(std::string), (void*) &idList, ""),
PARAM_IDX_ENTRY_TYPE(PARAM_IDX_ENTRY_TYPE_ID, "--idx-entry-type", "Index entry type", "sequence; 0, src sequence 1: header: 2, src header :3 (default 0)", typeid(int), (void*) &idxEntryType, "^[0-3]{1}$"),
// lca and addtaxonomy
PARAM_PICK_ID_FROM(PARAM_PICK_ID_FROM_ID,"--pick-id-from", "Extract mode", "Query 1, Target 2", typeid(int), (void *) &pickIdFrom, "^[1-2]{1}$"),
PARAM_LCA_RANKS(PARAM_LCA_RANKS_ID, "--lca-ranks", "LCA ranks", "Add column with specified ranks (':' separated)", typeid(std::string), (void*) &lcaRanks, ""),
PARAM_BLACKLIST(PARAM_BLACKLIST_ID, "--blacklist", "Blacklisted taxa", "Comma separated list of ignored taxa in LCA computation", typeid(std::string), (void*)&blacklist, "([0-9]+,)?[0-9]+"),
PARAM_TAXON_ADD_LINEAGE(PARAM_TAXON_ADD_LINEAGE_ID, "--tax-lineage", "Show taxon lineage", "Add column with full taxonomy lineage", typeid(bool), (void*)&showTaxLineage, ""),
// taxonomyreport
PARAM_REPORT_MODE(PARAM_REPORT_MODE_ID,"--report-mode", "Report mode", "Taxonomy report mode 0: Kraken 1: Krona", typeid(int), (void *) &reportMode, "^[0-1]{1}$"),
// createtaxcb
PARAM_NCBI_TAX_DUMP(PARAM_NCBI_TAX_DUMP_ID, "--ncbi-tax-dump", "NCBI tax dump directory", "NCBI tax dump directory. The tax dump can be downloaded here \"ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz\"", typeid(std::string), (void*) &ncbiTaxDump, ""),
PARAM_TAX_MAPPING_FILE(PARAM_TAX_MAPPING_FILE_ID, "--tax-mapping-file", "Taxonomical mapping file", "File to map sequence identifer to taxonomical identifier", typeid(std::string), (void*) &taxMappingFile, ""),
// expandaln
PARAM_EXPANSION_MODE(PARAM_EXPANSION_MODE_ID, "--expansion-mode", "Expansion mode", "Which hits (still meeting the alignment criteria) to use when expanding the alignment results: 0 Use all hits, 1 Use only the best hit of each target", typeid(int), (void*) &expansionMode, "^[0-2]{1}$"),
// taxonomy
PARAM_LCA_MODE(PARAM_LCA_MODE_ID, "--lca-mode", "LCA mode", "LCA Mode 1: Single Search LCA , 2: 2bLCA, 3: approx. 2bLCA, 4: top hit", typeid(int), (void*) &taxonomySearchMode, "^[1-4]{1}$"),
PARAM_TAX_OUTPUT_MODE(PARAM_TAX_OUTPUT_MODE_ID, "--tax-output-mode", "Taxonomy output mode", "0: output LCA, 1: output alignment", typeid(int), (void*) &taxonomyOutpuMode, "^[0-1]{1}$"),
// createsubdb
PARAM_SUBDB_MODE(PARAM_SUBDB_MODE_ID, "--subdb-mode", "Subdb mode", "LCA Mode 0: copy data 1: soft link data", typeid(int), (void*) &subDbMode, "^[0-1]{1}$")
{
if (instance) {
Debug(Debug::ERROR) << "Parameter instance already exists!\n";
abort();
}
instance = this;
// onlyverbosity
onlyverbosity.push_back(&PARAM_V);
// verbandcompression
verbandcompression.push_back(&PARAM_COMPRESSED);
verbandcompression.push_back(&PARAM_V);
// onlythreads
onlythreads.push_back(&PARAM_THREADS);
onlythreads.push_back(&PARAM_V);
// threadsandcompression
threadsandcompression.push_back(&PARAM_THREADS);
threadsandcompression.push_back(&PARAM_COMPRESSED);
threadsandcompression.push_back(&PARAM_V);
// alignment
align.push_back(&PARAM_SUB_MAT);
align.push_back(&PARAM_ADD_BACKTRACE);
align.push_back(&PARAM_ALIGNMENT_MODE);
align.push_back(&PARAM_WRAPPED_SCORING);
align.push_back(&PARAM_E);
align.push_back(&PARAM_MIN_SEQ_ID);
align.push_back(&PARAM_MIN_ALN_LEN);
align.push_back(&PARAM_SEQ_ID_MODE);
align.push_back(&PARAM_ALT_ALIGNMENT);
align.push_back(&PARAM_C);
align.push_back(&PARAM_COV_MODE);
align.push_back(&PARAM_MAX_SEQ_LEN);
align.push_back(&PARAM_NO_COMP_BIAS_CORR);
align.push_back(&PARAM_REALIGN);
align.push_back(&PARAM_MAX_REJECTED);
align.push_back(&PARAM_MAX_ACCEPT);
align.push_back(&PARAM_INCLUDE_IDENTITY);
align.push_back(&PARAM_PRELOAD_MODE);
align.push_back(&PARAM_PCA);
align.push_back(&PARAM_PCB);
align.push_back(&PARAM_SCORE_BIAS);
align.push_back(&PARAM_GAP_OPEN);
align.push_back(&PARAM_GAP_EXTEND);
align.push_back(&PARAM_THREADS);
align.push_back(&PARAM_COMPRESSED);
align.push_back(&PARAM_V);
// prefilter
prefilter.push_back(&PARAM_SUB_MAT);
prefilter.push_back(&PARAM_SEED_SUB_MAT);
prefilter.push_back(&PARAM_S);
prefilter.push_back(&PARAM_K);
prefilter.push_back(&PARAM_K_SCORE);
prefilter.push_back(&PARAM_ALPH_SIZE);
prefilter.push_back(&PARAM_MAX_SEQ_LEN);
prefilter.push_back(&PARAM_MAX_SEQS);
prefilter.push_back(&PARAM_SPLIT);
prefilter.push_back(&PARAM_SPLIT_MODE);
prefilter.push_back(&PARAM_SPLIT_MEMORY_LIMIT);
prefilter.push_back(&PARAM_C);
prefilter.push_back(&PARAM_COV_MODE);
prefilter.push_back(&PARAM_NO_COMP_BIAS_CORR);
prefilter.push_back(&PARAM_DIAGONAL_SCORING);
prefilter.push_back(&PARAM_EXACT_KMER_MATCHING);
prefilter.push_back(&PARAM_MASK_RESIDUES);
prefilter.push_back(&PARAM_MASK_LOWER_CASE);
prefilter.push_back(&PARAM_MIN_DIAG_SCORE);
prefilter.push_back(&PARAM_INCLUDE_IDENTITY);
prefilter.push_back(&PARAM_SPACED_KMER_MODE);
prefilter.push_back(&PARAM_PRELOAD_MODE);
prefilter.push_back(&PARAM_PCA);
prefilter.push_back(&PARAM_PCB);
prefilter.push_back(&PARAM_SPACED_KMER_PATTERN);
prefilter.push_back(&PARAM_LOCAL_TMP);
prefilter.push_back(&PARAM_THREADS);
prefilter.push_back(&PARAM_COMPRESSED);
prefilter.push_back(&PARAM_V);
// ungappedprefilter
ungappedprefilter.push_back(&PARAM_SUB_MAT);
ungappedprefilter.push_back(&PARAM_C);
ungappedprefilter.push_back(&PARAM_E);
ungappedprefilter.push_back(&PARAM_COV_MODE);
ungappedprefilter.push_back(&PARAM_NO_COMP_BIAS_CORR);
ungappedprefilter.push_back(&PARAM_MIN_DIAG_SCORE);
ungappedprefilter.push_back(&PARAM_THREADS);
ungappedprefilter.push_back(&PARAM_COMPRESSED);
ungappedprefilter.push_back(&PARAM_V);
// clustering
clust.push_back(&PARAM_CLUSTER_MODE);
clust.push_back(&PARAM_MAXITERATIONS);
clust.push_back(&PARAM_SIMILARITYSCORE);
clust.push_back(&PARAM_THREADS);
clust.push_back(&PARAM_COMPRESSED);
clust.push_back(&PARAM_V);
// rescorediagonal
rescorediagonal.push_back(&PARAM_SUB_MAT);
rescorediagonal.push_back(&PARAM_RESCORE_MODE);
rescorediagonal.push_back(&PARAM_WRAPPED_SCORING);
rescorediagonal.push_back(&PARAM_FILTER_HITS);
rescorediagonal.push_back(&PARAM_E);
rescorediagonal.push_back(&PARAM_C);
rescorediagonal.push_back(&PARAM_ADD_BACKTRACE);
rescorediagonal.push_back(&PARAM_COV_MODE);
rescorediagonal.push_back(&PARAM_MIN_SEQ_ID);
rescorediagonal.push_back(&PARAM_MIN_ALN_LEN);
rescorediagonal.push_back(&PARAM_SEQ_ID_MODE);
rescorediagonal.push_back(&PARAM_INCLUDE_IDENTITY);
rescorediagonal.push_back(&PARAM_SORT_RESULTS);
rescorediagonal.push_back(&PARAM_PRELOAD_MODE);
rescorediagonal.push_back(&PARAM_THREADS);
rescorediagonal.push_back(&PARAM_COMPRESSED);
rescorediagonal.push_back(&PARAM_V);
// alignbykmer
alignbykmer.push_back(&PARAM_SUB_MAT);
alignbykmer.push_back(&PARAM_K);
alignbykmer.push_back(&PARAM_SPACED_KMER_MODE);
alignbykmer.push_back(&PARAM_SPACED_KMER_PATTERN);
alignbykmer.push_back(&PARAM_ALPH_SIZE);
alignbykmer.push_back(&PARAM_FILTER_HITS);
alignbykmer.push_back(&PARAM_C);
alignbykmer.push_back(&PARAM_E);
alignbykmer.push_back(&PARAM_COV_MODE);
alignbykmer.push_back(&PARAM_MIN_SEQ_ID);
alignbykmer.push_back(&PARAM_MIN_ALN_LEN);
alignbykmer.push_back(&PARAM_INCLUDE_IDENTITY);
alignbykmer.push_back(&PARAM_GAP_OPEN);
alignbykmer.push_back(&PARAM_GAP_EXTEND);
alignbykmer.push_back(&PARAM_THREADS);
alignbykmer.push_back(&PARAM_COMPRESSED);
alignbykmer.push_back(&PARAM_V);
// convertprofiledb
convertprofiledb.push_back(&PARAM_SUB_MAT);
convertprofiledb.push_back(&PARAM_PROFILE_TYPE);
convertprofiledb.push_back(&PARAM_THREADS);
convertprofiledb.push_back(&PARAM_COMPRESSED);
convertprofiledb.push_back(&PARAM_V);
// sequence2profile
sequence2profile.push_back(&PARAM_PCA);
sequence2profile.push_back(&PARAM_PCB);
sequence2profile.push_back(&PARAM_NEFF);
sequence2profile.push_back(&PARAM_TAU);
sequence2profile.push_back(&PARAM_THREADS);
sequence2profile.push_back(&PARAM_SUB_MAT);
sequence2profile.push_back(&PARAM_COMPRESSED);
sequence2profile.push_back(&PARAM_V);
// create fasta
createFasta.push_back(&PARAM_V);
// result2profile
result2profile.push_back(&PARAM_SUB_MAT);
result2profile.push_back(&PARAM_E);
result2profile.push_back(&PARAM_MASK_PROFILE);
result2profile.push_back(&PARAM_E_PROFILE);
result2profile.push_back(&PARAM_NO_COMP_BIAS_CORR);
result2profile.push_back(&PARAM_WG);
result2profile.push_back(&PARAM_ALLOW_DELETION);
result2profile.push_back(&PARAM_FILTER_MSA);
result2profile.push_back(&PARAM_FILTER_MAX_SEQ_ID);
result2profile.push_back(&PARAM_FILTER_QID);
result2profile.push_back(&PARAM_FILTER_QSC);
result2profile.push_back(&PARAM_FILTER_COV);
result2profile.push_back(&PARAM_FILTER_NDIFF);
result2profile.push_back(&PARAM_PCA);
result2profile.push_back(&PARAM_PCB);
result2profile.push_back(&PARAM_OMIT_CONSENSUS);
result2profile.push_back(&PARAM_PRELOAD_MODE);
result2profile.push_back(&PARAM_GAP_OPEN);
result2profile.push_back(&PARAM_GAP_EXTEND);
result2profile.push_back(&PARAM_THREADS);
result2profile.push_back(&PARAM_COMPRESSED);
result2profile.push_back(&PARAM_V);
// result2pp
result2pp.push_back(&PARAM_SUB_MAT);
result2pp.push_back(&PARAM_E_PROFILE);
result2pp.push_back(&PARAM_E);
result2pp.push_back(&PARAM_NO_COMP_BIAS_CORR);
result2pp.push_back(&PARAM_WG);
result2pp.push_back(&PARAM_FILTER_MSA);
result2pp.push_back(&PARAM_FILTER_MAX_SEQ_ID);
result2pp.push_back(&PARAM_FILTER_QID);
result2pp.push_back(&PARAM_FILTER_QSC);
result2pp.push_back(&PARAM_FILTER_COV);
result2pp.push_back(&PARAM_FILTER_NDIFF);
result2pp.push_back(&PARAM_PCA);
result2pp.push_back(&PARAM_PCB);
result2pp.push_back(&PARAM_OMIT_CONSENSUS);
result2pp.push_back(&PARAM_PRELOAD_MODE);
result2pp.push_back(&PARAM_THREADS);
result2pp.push_back(&PARAM_COMPRESSED);
result2pp.push_back(&PARAM_V);
// createtsv
createtsv.push_back(&PARAM_FIRST_SEQ_REP_SEQ);
createtsv.push_back(&PARAM_TARGET_COLUMN);
createtsv.push_back(&PARAM_FULL_HEADER);
createtsv.push_back(&PARAM_IDX_SEQ_SRC);
createtsv.push_back(&PARAM_DB_OUTPUT);
createtsv.push_back(&PARAM_THREADS);
createtsv.push_back(&PARAM_COMPRESSED);
createtsv.push_back(&PARAM_V);
//result2stats
result2stats.push_back(&PARAM_STAT);
result2stats.push_back(&PARAM_TSV);
result2stats.push_back(&PARAM_COMPRESSED);
result2stats.push_back(&PARAM_THREADS);
result2stats.push_back(&PARAM_V);
// format alignment
convertalignments.push_back(&PARAM_SUB_MAT);
convertalignments.push_back(&PARAM_FORMAT_MODE);
convertalignments.push_back(&PARAM_FORMAT_OUTPUT);
convertalignments.push_back(&PARAM_TRANSLATION_TABLE);
convertalignments.push_back(&PARAM_GAP_OPEN);
convertalignments.push_back(&PARAM_GAP_EXTEND);
convertalignments.push_back(&PARAM_DB_OUTPUT);
convertalignments.push_back(&PARAM_PRELOAD_MODE);
convertalignments.push_back(&PARAM_SEARCH_TYPE);
convertalignments.push_back(&PARAM_THREADS);
convertalignments.push_back(&PARAM_COMPRESSED);
convertalignments.push_back(&PARAM_V);
// result2msa
result2msa.push_back(&PARAM_SUB_MAT);
result2msa.push_back(&PARAM_E_PROFILE);
result2msa.push_back(&PARAM_ALLOW_DELETION);
result2msa.push_back(&PARAM_ADD_INTERNAL_ID);
result2msa.push_back(&PARAM_NO_COMP_BIAS_CORR);
result2msa.push_back(&PARAM_FILTER_MSA);
result2msa.push_back(&PARAM_FILTER_MAX_SEQ_ID);
result2msa.push_back(&PARAM_FILTER_QID);
result2msa.push_back(&PARAM_FILTER_QSC);
result2msa.push_back(&PARAM_FILTER_COV);
result2msa.push_back(&PARAM_FILTER_NDIFF);
result2msa.push_back(&PARAM_THREADS);
result2msa.push_back(&PARAM_COMPRESS_MSA);
result2msa.push_back(&PARAM_SUMMARIZE_HEADER);
result2msa.push_back(&PARAM_SUMMARY_PREFIX);
result2msa.push_back(&PARAM_OMIT_CONSENSUS);
result2msa.push_back(&PARAM_SKIP_QUERY);
result2msa.push_back(&PARAM_GAP_OPEN);
result2msa.push_back(&PARAM_GAP_EXTEND);
result2msa.push_back(&PARAM_COMPRESSED);
//result2msa.push_back(&PARAM_FIRST_SEQ_REP_SEQ);
result2msa.push_back(&PARAM_V);
// convertmsa
convertmsa.push_back(&PARAM_IDENTIFIER_FIELD);
convertmsa.push_back(&PARAM_COMPRESSED);
convertmsa.push_back(&PARAM_V);
// msa2profile
msa2profile.push_back(&PARAM_MSA_TYPE);
msa2profile.push_back(&PARAM_SUB_MAT);
msa2profile.push_back(&PARAM_MATCH_MODE);
msa2profile.push_back(&PARAM_MATCH_RATIO);
msa2profile.push_back(&PARAM_PCA);
msa2profile.push_back(&PARAM_PCB);
msa2profile.push_back(&PARAM_NO_COMP_BIAS_CORR);
msa2profile.push_back(&PARAM_WG);
msa2profile.push_back(&PARAM_FILTER_MSA);
msa2profile.push_back(&PARAM_FILTER_COV);
msa2profile.push_back(&PARAM_FILTER_QID);
msa2profile.push_back(&PARAM_FILTER_QSC);
msa2profile.push_back(&PARAM_FILTER_MAX_SEQ_ID);
msa2profile.push_back(&PARAM_FILTER_NDIFF);
msa2profile.push_back(&PARAM_GAP_OPEN);
msa2profile.push_back(&PARAM_GAP_EXTEND);
msa2profile.push_back(&PARAM_THREADS);
msa2profile.push_back(&PARAM_COMPRESSED);
msa2profile.push_back(&PARAM_V);
// profile2pssm
profile2pssm.push_back(&PARAM_SUB_MAT);
profile2pssm.push_back(&PARAM_MAX_SEQ_LEN);
profile2pssm.push_back(&PARAM_NO_COMP_BIAS_CORR);
profile2pssm.push_back(&PARAM_DB_OUTPUT);
profile2pssm.push_back(&PARAM_THREADS);
profile2pssm.push_back(&PARAM_COMPRESSED);
profile2pssm.push_back(&PARAM_V);
// profile2seq (profile2consensus + profile2repseq)
profile2seq.push_back(&PARAM_SUB_MAT);
profile2seq.push_back(&PARAM_MAX_SEQ_LEN);
profile2seq.push_back(&PARAM_THREADS);
profile2seq.push_back(&PARAM_COMPRESSED);
profile2seq.push_back(&PARAM_V);
// profile2cs
profile2cs.push_back(&PARAM_SUB_MAT);
// profile2cs.push_back(&PARAM_ALPH_SIZE);
profile2cs.push_back(&PARAM_PCA);
profile2cs.push_back(&PARAM_PCB);
profile2cs.push_back(&PARAM_THREADS);
profile2cs.push_back(&PARAM_COMPRESSED);
profile2cs.push_back(&PARAM_V);
// extract orf
extractorfs.push_back(&PARAM_ORF_MIN_LENGTH);
extractorfs.push_back(&PARAM_ORF_MAX_LENGTH);
extractorfs.push_back(&PARAM_ORF_MAX_GAP);
extractorfs.push_back(&PARAM_CONTIG_START_MODE);
extractorfs.push_back(&PARAM_CONTIG_END_MODE);
extractorfs.push_back(&PARAM_ORF_START_MODE);
extractorfs.push_back(&PARAM_ORF_FORWARD_FRAMES);
extractorfs.push_back(&PARAM_ORF_REVERSE_FRAMES);
extractorfs.push_back(&PARAM_TRANSLATION_TABLE);
extractorfs.push_back(&PARAM_TRANSLATE);
extractorfs.push_back(&PARAM_USE_ALL_TABLE_STARTS);
extractorfs.push_back(&PARAM_ID_OFFSET);
extractorfs.push_back(&PARAM_CREATE_LOOKUP);
extractorfs.push_back(&PARAM_THREADS);
extractorfs.push_back(&PARAM_COMPRESSED);
extractorfs.push_back(&PARAM_V);
// extract frames
extractframes.push_back(&PARAM_ORF_FORWARD_FRAMES);
extractframes.push_back(&PARAM_ORF_REVERSE_FRAMES);
extractframes.push_back(&PARAM_CREATE_LOOKUP);
extractframes.push_back(&PARAM_THREADS);
extractframes.push_back(&PARAM_COMPRESSED);
extractframes.push_back(&PARAM_V);
// orf to contig
orftocontig.push_back(&PARAM_THREADS);
orftocontig.push_back(&PARAM_COMPRESSED);
orftocontig.push_back(&PARAM_V);
// orf to contig
reverseseq.push_back(&PARAM_THREADS);
reverseseq.push_back(&PARAM_COMPRESSED);
reverseseq.push_back(&PARAM_V);
// splitsequence
splitsequence.push_back(&PARAM_MAX_SEQ_LEN);
splitsequence.push_back(&PARAM_SEQUENCE_OVERLAP);
splitsequence.push_back(&PARAM_SEQUENCE_SPLIT_MODE);
splitsequence.push_back(&PARAM_CREATE_LOOKUP);
splitsequence.push_back(&PARAM_THREADS);
splitsequence.push_back(&PARAM_COMPRESSED);
splitsequence.push_back(&PARAM_V);
// splitdb
splitdb.push_back(&PARAM_SPLIT);
splitdb.push_back(&PARAM_SPLIT_AMINOACID);
splitdb.push_back(&PARAM_COMPRESSED);
splitdb.push_back(&PARAM_V);
// create index
indexdb.push_back(&PARAM_SEED_SUB_MAT);
indexdb.push_back(&PARAM_K);
indexdb.push_back(&PARAM_ALPH_SIZE);
indexdb.push_back(&PARAM_NO_COMP_BIAS_CORR);
indexdb.push_back(&PARAM_MAX_SEQ_LEN);
indexdb.push_back(&PARAM_MAX_SEQS);
indexdb.push_back(&PARAM_MASK_RESIDUES);
indexdb.push_back(&PARAM_MASK_LOWER_CASE);
indexdb.push_back(&PARAM_SPACED_KMER_MODE);
indexdb.push_back(&PARAM_SPACED_KMER_PATTERN);
indexdb.push_back(&PARAM_S);
indexdb.push_back(&PARAM_K_SCORE);
indexdb.push_back(&PARAM_CHECK_COMPATIBLE);
indexdb.push_back(&PARAM_SEARCH_TYPE);
indexdb.push_back(&PARAM_SPLIT);
indexdb.push_back(&PARAM_SPLIT_MEMORY_LIMIT);
indexdb.push_back(&PARAM_THREADS);
indexdb.push_back(&PARAM_V);
// create kmer index
kmerindexdb.push_back(&PARAM_SEED_SUB_MAT);
kmerindexdb.push_back(&PARAM_K);
kmerindexdb.push_back(&PARAM_HASH_SHIFT);
kmerindexdb.push_back(&PARAM_KMER_PER_SEQ);
kmerindexdb.push_back(&PARAM_MIN_SEQ_ID);
kmerindexdb.push_back(&PARAM_ADJUST_KMER_LEN);
kmerindexdb.push_back(&PARAM_SPLIT_MEMORY_LIMIT);
kmerindexdb.push_back(&PARAM_IGNORE_MULTI_KMER);
kmerindexdb.push_back(&PARAM_ALPH_SIZE);
kmerindexdb.push_back(&PARAM_MAX_SEQ_LEN);
kmerindexdb.push_back(&PARAM_MASK_RESIDUES);
kmerindexdb.push_back(&PARAM_MASK_LOWER_CASE);
kmerindexdb.push_back(&PARAM_CHECK_COMPATIBLE);
kmerindexdb.push_back(&PARAM_SEARCH_TYPE);
kmerindexdb.push_back(&PARAM_SPACED_KMER_MODE);
kmerindexdb.push_back(&PARAM_SPACED_KMER_PATTERN);
kmerindexdb.push_back(&PARAM_THREADS);
kmerindexdb.push_back(&PARAM_V);
// create db
createdb.push_back(&PARAM_MAX_SEQ_LEN);
createdb.push_back(&PARAM_DONT_SPLIT_SEQ_BY_LEN);
createdb.push_back(&PARAM_DB_TYPE);
createdb.push_back(&PARAM_DONT_SHUFFLE);
createdb.push_back(&PARAM_ID_OFFSET);
createdb.push_back(&PARAM_COMPRESSED);
createdb.push_back(&PARAM_V);
// convert2fasta
convert2fasta.push_back(&PARAM_USE_HEADER_FILE);
convert2fasta.push_back(&PARAM_V);
// result2flat
result2flat.push_back(&PARAM_USE_HEADER);
result2flat.push_back(&PARAM_V);
// gff2db
gff2ffindex.push_back(&PARAM_GFF_TYPE);
gff2ffindex.push_back(&PARAM_ID_OFFSET);
gff2ffindex.push_back(&PARAM_V);
// translate nucleotide
translatenucs.push_back(&PARAM_TRANSLATION_TABLE);
translatenucs.push_back(&PARAM_ADD_ORF_STOP);
translatenucs.push_back(&PARAM_V);
translatenucs.push_back(&PARAM_COMPRESSED);
translatenucs.push_back(&PARAM_THREADS);
// createseqfiledb
createseqfiledb.push_back(&PARAM_MIN_SEQUENCES);
createseqfiledb.push_back(&PARAM_MAX_SEQUENCES);
createseqfiledb.push_back(&PARAM_HH_FORMAT);
createseqfiledb.push_back(&PARAM_THREADS);
createseqfiledb.push_back(&PARAM_COMPRESSED);
createseqfiledb.push_back(&PARAM_V);
// filterDb
filterDb.push_back(&PARAM_FILTER_EXPRESSION);
filterDb.push_back(&PARAM_FILTER_COL);
filterDb.push_back(&PARAM_COLUMN_TO_TAKE);
filterDb.push_back(&PARAM_FILTER_REGEX);
filterDb.push_back(&PARAM_FILTER_POS);
filterDb.push_back(&PARAM_FILTER_FILE);
filterDb.push_back(&PARAM_BEATS_FIRST);
filterDb.push_back(&PARAM_MAPPING_FILE);
filterDb.push_back(&PARAM_THREADS);
filterDb.push_back(&PARAM_V);
filterDb.push_back(&PARAM_TRIM_TO_ONE_COL);
filterDb.push_back(&PARAM_EXTRACT_LINES);
filterDb.push_back(&PARAM_COMP_OPERATOR);
filterDb.push_back(&PARAM_COMP_VALUE);
filterDb.push_back(&PARAM_SORT_ENTRIES);
filterDb.push_back(&PARAM_INCLUDE_IDENTITY);
filterDb.push_back(&PARAM_JOIN_DB);
filterDb.push_back(&PARAM_COMPUTE_POSITIONS);
filterDb.push_back(&PARAM_COMPRESSED);
filterDb.push_back(&PARAM_TRANSITIVE_REPLACE);
// besthitperset
besthitbyset.push_back(&PARAM_SIMPLE_BEST_HIT);
besthitbyset.push_back(&PARAM_THREADS);
besthitbyset.push_back(&PARAM_COMPRESSED);
besthitbyset.push_back(&PARAM_V);
// combinepvalperset
combinepvalbyset.push_back(&PARAM_ALPHA);
combinepvalbyset.push_back(&PARAM_AGGREGATION_MODE);
// combinepvalperset.push_back(&PARAM_SHORT_OUTPUT);
combinepvalbyset.push_back(&PARAM_THREADS);
combinepvalbyset.push_back(&PARAM_COMPRESSED);
combinepvalbyset.push_back(&PARAM_V);
// offsetalignment
offsetalignment.push_back(&PARAM_CHAIN_ALIGNMENT);
offsetalignment.push_back(&PARAM_MERGE_QUERY);
offsetalignment.push_back(&PARAM_SEARCH_TYPE);
offsetalignment.push_back(&PARAM_THREADS);
offsetalignment.push_back(&PARAM_COMPRESSED);
offsetalignment.push_back(&PARAM_PRELOAD_MODE);
offsetalignment.push_back(&PARAM_V);
// tsv2db
tsv2db.push_back(&PARAM_INCLUDE_IDENTITY);
tsv2db.push_back(&PARAM_OUTPUT_DBTYPE);
tsv2db.push_back(&PARAM_COMPRESSED);
tsv2db.push_back(&PARAM_V);
// swap results
swapresult.push_back(&PARAM_SUB_MAT);
swapresult.push_back(&PARAM_E);
swapresult.push_back(&PARAM_SPLIT_MEMORY_LIMIT);
swapresult.push_back(&PARAM_GAP_OPEN);
swapresult.push_back(&PARAM_GAP_EXTEND);
swapresult.push_back(&PARAM_THREADS);
swapresult.push_back(&PARAM_COMPRESSED);
swapresult.push_back(&PARAM_PRELOAD_MODE);
swapresult.push_back(&PARAM_V);
// swap results
swapdb.push_back(&PARAM_SPLIT_MEMORY_LIMIT);
swapdb.push_back(&PARAM_THREADS);
swapdb.push_back(&PARAM_COMPRESSED);
swapdb.push_back(&PARAM_V);
// subtractdbs
subtractdbs.push_back(&PARAM_THREADS);
subtractdbs.push_back(&PARAM_E_PROFILE);
subtractdbs.push_back(&PARAM_E);
subtractdbs.push_back(&PARAM_COMPRESSED);
subtractdbs.push_back(&PARAM_V);
// clusthash
clusthash.push_back(&PARAM_SUB_MAT);
clusthash.push_back(&PARAM_ALPH_SIZE);
clusthash.push_back(&PARAM_MIN_SEQ_ID);
clusthash.push_back(&PARAM_MAX_SEQ_LEN);
clusthash.push_back(&PARAM_THREADS);
clusthash.push_back(&PARAM_COMPRESSED);
clusthash.push_back(&PARAM_V);
// kmermatcher
kmermatcher.push_back(&PARAM_SUB_MAT);
kmermatcher.push_back(&PARAM_ALPH_SIZE);
kmermatcher.push_back(&PARAM_MIN_SEQ_ID);
kmermatcher.push_back(&PARAM_KMER_PER_SEQ);
kmermatcher.push_back(&PARAM_KMER_PER_SEQ_SCALE);
kmermatcher.push_back(&PARAM_ADJUST_KMER_LEN);
kmermatcher.push_back(&PARAM_MASK_RESIDUES);
kmermatcher.push_back(&PARAM_MASK_LOWER_CASE);
kmermatcher.push_back(&PARAM_COV_MODE);
kmermatcher.push_back(&PARAM_K);
kmermatcher.push_back(&PARAM_C);
kmermatcher.push_back(&PARAM_MAX_SEQ_LEN);
kmermatcher.push_back(&PARAM_HASH_SHIFT);
kmermatcher.push_back(&PARAM_SPLIT_MEMORY_LIMIT);
kmermatcher.push_back(&PARAM_INCLUDE_ONLY_EXTENDABLE);
kmermatcher.push_back(&PARAM_IGNORE_MULTI_KMER);
kmermatcher.push_back(&PARAM_THREADS);
kmermatcher.push_back(&PARAM_COMPRESSED);
kmermatcher.push_back(&PARAM_V);
// kmermatcher
kmersearch.push_back(&PARAM_SEED_SUB_MAT);
kmersearch.push_back(&PARAM_KMER_PER_SEQ);
kmersearch.push_back(&PARAM_MASK_RESIDUES);
kmersearch.push_back(&PARAM_MASK_LOWER_CASE);
kmersearch.push_back(&PARAM_COV_MODE);
kmersearch.push_back(&PARAM_C);
kmersearch.push_back(&PARAM_MAX_SEQ_LEN);
kmersearch.push_back(&PARAM_PICK_N_SIMILAR);
kmersearch.push_back(&PARAM_SPLIT_MEMORY_LIMIT);
kmersearch.push_back(&PARAM_THREADS);
kmersearch.push_back(&PARAM_COMPRESSED);
kmersearch.push_back(&PARAM_V);
// countkmer
countkmer.push_back(&PARAM_K);
countkmer.push_back(&PARAM_SPACED_KMER_MODE);
countkmer.push_back(&PARAM_SPACED_KMER_PATTERN);
countkmer.push_back(&PARAM_THREADS);
// mergedbs
mergedbs.push_back(&PARAM_MERGE_PREFIXES);
mergedbs.push_back(&PARAM_COMPRESSED);
mergedbs.push_back(&PARAM_V);
// summarize
summarizeheaders.push_back(&PARAM_SUMMARY_PREFIX);
summarizeheaders.push_back(&PARAM_HEADER_TYPE);
summarizeheaders.push_back(&PARAM_THREADS);
summarizeheaders.push_back(&PARAM_COMPRESSED);
summarizeheaders.push_back(&PARAM_V);
// diff
diff.push_back(&PARAM_USESEQID);
diff.push_back(&PARAM_THREADS);
diff.push_back(&PARAM_COMPRESSED);
diff.push_back(&PARAM_V);
// prefixid
prefixid.push_back(&PARAM_PREFIX);
prefixid.push_back(&PARAM_MAPPING_FILE);
prefixid.push_back(&PARAM_TSV);
prefixid.push_back(&PARAM_THREADS);
prefixid.push_back(&PARAM_COMPRESSED);
prefixid.push_back(&PARAM_V);
// summarizeresult
summarizeresult.push_back(&PARAM_ADD_BACKTRACE);
summarizeresult.push_back(&PARAM_OVERLAP);
summarizeresult.push_back(&PARAM_C);
summarizeresult.push_back(&PARAM_THREADS);
summarizeresult.push_back(&PARAM_COMPRESSED);
summarizeresult.push_back(&PARAM_V);
// summarizetabs
summarizetabs.push_back(&PARAM_OVERLAP);
summarizetabs.push_back(&PARAM_E);
summarizetabs.push_back(&PARAM_C);
summarizetabs.push_back(&PARAM_THREADS);
summarizetabs.push_back(&PARAM_COMPRESSED);
summarizetabs.push_back(&PARAM_V);
// annoate
extractdomains.push_back(&PARAM_SUB_MAT);
extractdomains.push_back(&PARAM_MSA_TYPE);
extractdomains.push_back(&PARAM_E);
extractdomains.push_back(&PARAM_C);
extractdomains.push_back(&PARAM_THREADS);
extractdomains.push_back(&PARAM_COMPRESSED);
extractdomains.push_back(&PARAM_V);
// concatdbs
concatdbs.push_back(&PARAM_COMPRESSED);
concatdbs.push_back(&PARAM_PRESERVEKEYS);
concatdbs.push_back(&PARAM_TAKE_LARGER_ENTRY);
concatdbs.push_back(&PARAM_THREADS);
concatdbs.push_back(&PARAM_V);
// extractalignedregion
extractalignedregion.push_back(&PARAM_COMPRESSED);
extractalignedregion.push_back(&PARAM_EXTRACT_MODE);
extractalignedregion.push_back(&PARAM_PRELOAD_MODE);
extractalignedregion.push_back(&PARAM_THREADS);
extractalignedregion.push_back(&PARAM_V);
// convertkb
convertkb.push_back(&PARAM_COMPRESSED);
convertkb.push_back(&PARAM_MAPPING_FILE);
convertkb.push_back(&PARAM_KB_COLUMNS);
convertkb.push_back(&PARAM_V);
// filtertaxdb
filtertaxdb.push_back(&PARAM_COMPRESSED);
filtertaxdb.push_back(&PARAM_TAXON_LIST);
// lca
lca.push_back(&PARAM_COMPRESSED);
lca.push_back(&PARAM_LCA_RANKS);
lca.push_back(&PARAM_BLACKLIST);
lca.push_back(&PARAM_TAXON_ADD_LINEAGE);
lca.push_back(&PARAM_THREADS);
lca.push_back(&PARAM_V);
// createsubdb
createsubdb.push_back(&PARAM_SUBDB_MODE);
createsubdb.push_back(&PARAM_V);
// createtaxdb
createtaxdb.push_back(&PARAM_NCBI_TAX_DUMP);
createtaxdb.push_back(&PARAM_TAX_MAPPING_FILE);
createtaxdb.push_back(&PARAM_THREADS);
createtaxdb.push_back(&PARAM_V);
// addtaxonomy
addtaxonomy.push_back(&PARAM_PICK_ID_FROM);
addtaxonomy.push_back(&PARAM_COMPRESSED);
addtaxonomy.push_back(&PARAM_TAXON_ADD_LINEAGE);
addtaxonomy.push_back(&PARAM_LCA_RANKS);
addtaxonomy.push_back(&PARAM_THREADS);
addtaxonomy.push_back(&PARAM_V);
// taxonomyreport
taxonomyreport.push_back(&PARAM_REPORT_MODE);
taxonomyreport.push_back(&PARAM_THREADS);
taxonomyreport.push_back(&PARAM_V);
// view
view.push_back(&PARAM_ID_LIST);
view.push_back(&PARAM_IDX_ENTRY_TYPE);
view.push_back(&PARAM_V);
// exapandaln
expandaln.push_back(&PARAM_COMPRESSED);
expandaln.push_back(&PARAM_EXPANSION_MODE);
expandaln.push_back(&PARAM_SUB_MAT);
expandaln.push_back(&PARAM_GAP_OPEN);
expandaln.push_back(&PARAM_GAP_EXTEND);
expandaln.push_back(&PARAM_MAX_SEQ_LEN);
expandaln.push_back(&PARAM_SCORE_BIAS);
expandaln.push_back(&PARAM_NO_COMP_BIAS_CORR);
expandaln.push_back(&PARAM_E);
expandaln.push_back(&PARAM_MIN_SEQ_ID);
expandaln.push_back(&PARAM_SEQ_ID_MODE);
expandaln.push_back(&PARAM_C);
expandaln.push_back(&PARAM_COV_MODE);
expandaln.push_back(&PARAM_PCA);
expandaln.push_back(&PARAM_PCB);
expandaln.push_back(&PARAM_THREADS);
expandaln.push_back(&PARAM_V);
sortresult.push_back(&PARAM_COMPRESSED);
sortresult.push_back(&PARAM_THREADS);
sortresult.push_back(&PARAM_V);
// WORKFLOWS
searchworkflow = combineList(align, prefilter);
searchworkflow = combineList(searchworkflow, rescorediagonal);
searchworkflow = combineList(searchworkflow, result2profile);
searchworkflow = combineList(searchworkflow, extractorfs);
searchworkflow = combineList(searchworkflow, translatenucs);
searchworkflow = combineList(searchworkflow, offsetalignment);
// needed for slice search, however all its parameters are already present in searchworkflow
// searchworkflow = combineList(searchworkflow, sortresult);
searchworkflow.push_back(&PARAM_NUM_ITERATIONS);
searchworkflow.push_back(&PARAM_START_SENS);
searchworkflow.push_back(&PARAM_SENS_STEPS);
searchworkflow.push_back(&PARAM_SLICE_SEARCH);
searchworkflow.push_back(&PARAM_STRAND);
searchworkflow.push_back(&PARAM_DISK_SPACE_LIMIT);
searchworkflow.push_back(&PARAM_RUNNER);
searchworkflow.push_back(&PARAM_REUSELATEST);
searchworkflow.push_back(&PARAM_REMOVE_TMP_FILES);
linsearchworkflow = combineList(align, kmersearch);
linsearchworkflow = combineList(linsearchworkflow, swapresult);
linsearchworkflow = combineList(linsearchworkflow, extractorfs);
linsearchworkflow = combineList(linsearchworkflow, translatenucs);
linsearchworkflow = combineList(linsearchworkflow, offsetalignment);
linsearchworkflow.push_back(&PARAM_RUNNER);
linsearchworkflow.push_back(&PARAM_REUSELATEST);
linsearchworkflow.push_back(&PARAM_REMOVE_TMP_FILES);
// easyslinsearch
easylinsearchworkflow = combineList(createlinindex, linsearchworkflow);
easylinsearchworkflow = combineList(easylinsearchworkflow, convertalignments);
// easysearch
easysearchworkflow = combineList(searchworkflow, convertalignments);
easysearchworkflow = combineList(easysearchworkflow, summarizeresult);
easysearchworkflow = combineList(easysearchworkflow, createdb);
easysearchworkflow.push_back(&PARAM_GREEDY_BEST_HITS);
// createindex workflow
createindex = combineList(indexdb, extractorfs);
createindex = combineList(createindex, translatenucs);
createindex = combineList(createindex, splitsequence);