Skip to content
Browse files

Cleaned up help message for many subprograms, mostly by adding defaul…

…t parameters.

Changed -t,--trim option in sga assemble to -x,--cut-branches to avoid confusion with --threads used elsewhere.
  • Loading branch information...
1 parent 6337c7b commit 566598bff4e6b81df91812b8d1ce1d39b169998e @jts committed Oct 15, 2010
View
2 src/SGA/SGACommon.h
@@ -25,7 +25,7 @@
#define RSAI_EXT ".rsai"
// Default values
-#define DEFAULT_MIN_OVERLAP 25
+#define DEFAULT_MIN_OVERLAP 45
#define DEFAULT_EXTRACT_LEN 100
#endif
View
41 src/SGA/assemble.cpp
@@ -37,53 +37,51 @@ static const char *ASSEMBLE_USAGE_MESSAGE =
" -o, --out=FILE write the contigs to FILE (default: contigs.fa)\n"
" -m, --min-overlap=LEN only use overlaps of at least LEN. This can be used to filter\n"
" the overlap set so that the overlap step only needs to be run once.\n"
-" -b, --bubble=N perform N bubble removal steps\n"
-" -s, --smooth perform variation smoothing algorithm\n"
-" -t, --trim=N trim terminal branches using N rounds\n"
-" -c, --coverage=N remove edges that have junction-sequence coverage less than N. This can be used\n"
-" to detect and remove chimeric reads\n"
+" -b, --bubble=N perform N bubble removal steps (default: 3)\n"
+//" -s, --smooth perform variation smoothing algorithm\n"
+" -x, --cut-terminal=N cut off terminal branches in N rounds (default: 10)\n"
+" -c, --coverage=N remove edges that have junction-sequence coverage less than N. This is used\n"
+" to detect and remove chimeric reads (default: not performed)\n"
" -r,--resolve-small=LEN resolve small repeats using spanning overlaps when the difference between the shortest\n"
-" and longest overlap is greater than LEN\n"
+" and longest overlap is greater than LEN (default: not performed)\n"
" -a, --asqg-outfile=FILE write the final graph to FILE\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
namespace opt
{
static unsigned int verbose;
static std::string asqgFile;
- static std::string prefix;
static std::string outFile;
static std::string debugFile;
static std::string asqgOutfile;
static unsigned int minOverlap;
static bool bEdgeStats = false;
static bool bSmoothGraph = false;
static int resolveSmallRepeatLen = -1;
- static int numTrimRounds = 0;
- static int numBubbleRounds = 0;
+ static int numTrimRounds = 10;
+ static int numBubbleRounds = 3;
static int coverageCutoff = 0;
static bool bValidate;
- static bool bExact = false;
+ static bool bExact = true;
}
-static const char* shortopts = "p:o:m:d:t:b:a:c:r:sv";
+static const char* shortopts = "p:o:m:d:b:a:c:r:x:sv";
-enum { OPT_HELP = 1, OPT_VERSION, OPT_VALIDATE };
+enum { OPT_HELP = 1, OPT_VERSION, OPT_VALIDATE, OPT_EDGESTATS, OPT_EXACT };
static const struct option longopts[] = {
{ "verbose", no_argument, NULL, 'v' },
- { "prefix", required_argument, NULL, 'p' },
{ "out", required_argument, NULL, 'o' },
{ "min-overlap", required_argument, NULL, 'm' },
{ "debug-file", required_argument, NULL, 'd' },
{ "bubble", required_argument, NULL, 'b' },
- { "trim", required_argument, NULL, 't' },
+ { "cut-terminal", required_argument, NULL, 'x' },
{ "asqg-outfile", required_argument, NULL, 'a' },
{ "resolve-small", required_argument, NULL, 'r' },
{ "coverage", required_argument, NULL, 'c' },
{ "smooth", no_argument, NULL, 's' },
- { "edge-stats", no_argument, NULL, 'x' },
- { "exact", no_argument, NULL, 'e' },
+ { "edge-stats", no_argument, NULL, OPT_EDGESTATS },
+ { "exact", no_argument, NULL, OPT_EXACT },
{ "help", no_argument, NULL, OPT_HELP },
{ "version", no_argument, NULL, OPT_VERSION },
{ "validate", no_argument, NULL, OPT_VALIDATE},
@@ -273,8 +271,8 @@ void assemble()
pGraph->renameVertices("contig-");
// Write the results
- pGraph->writeDot("final.dot");
- pGraph->writeASQG("final.asqg");
+ //pGraph->writeDot("final.dot");
+ pGraph->writeASQG("final-graph.asqg.gz");
SGFastaVisitor av(opt::outFile);
pGraph->visit(av);
if(!opt::asqgOutfile.empty())
@@ -299,20 +297,19 @@ void parseAssembleOptions(int argc, char** argv)
std::istringstream arg(optarg != NULL ? optarg : "");
switch (c)
{
- case 'p': arg >> opt::prefix; break;
case 'o': arg >> opt::outFile; break;
case 'm': arg >> opt::minOverlap; break;
case 'd': arg >> opt::debugFile; break;
case '?': die = true; break;
case 'v': opt::verbose++; break;
case 'b': arg >> opt::numBubbleRounds; break;
case 's': opt::bSmoothGraph = true; break;
- case 't': arg >> opt::numTrimRounds; break;
+ case 'x': arg >> opt::numTrimRounds; break;
case 'a': arg >> opt::asqgOutfile; break;
case 'c': arg >> opt::coverageCutoff; break;
case 'r': arg >> opt::resolveSmallRepeatLen; break;
- case 'x': opt::bEdgeStats = true; break;
- case 'e': opt::bExact = true; break;
+ case OPT_EXACT: opt::bExact = true; break;
+ case OPT_EDGESTATS: opt::bEdgeStats = true; break;
case OPT_VALIDATE: opt::bValidate = true; break;
case OPT_HELP:
std::cout << ASSEMBLE_USAGE_MESSAGE;
View
6 src/SGA/connect.cpp
@@ -44,8 +44,8 @@ static const char *CONNECT_USAGE_MESSAGE =
" --help display this help and exit\n"
" -v, --verbose display verbose output\n"
" -t, --threads=NUM use NUM threads to compute the overlaps (default: 1)\n"
-" -e, --error-rate the maximum error rate allowed between two sequences to consider them aligned\n"
-" -m, --min-overlap=LEN minimum overlap required between two reads\n"
+" -e, --error-rate the maximum error rate allowed between two sequences to consider them aligned (default: exact matches only)\n"
+" -m, --min-overlap=LEN minimum overlap required between two reads (default: 45)\n"
" -p, --prefix=PREFIX use PREFIX instead of the prefix of the reads filename for the input/output files\n"
" -o, --outfile=FILE write the connected reads to FILE\n"
" -l, --seed-length=LEN force the seed length to be LEN. By default, the seed length in the overlap step\n"
@@ -67,7 +67,7 @@ namespace opt
static std::string readsFile;
static std::string outFile;
- static double errorRate;
+ static double errorRate = 0.0f;
static unsigned int minOverlap = DEFAULT_MIN_OVERLAP;
static int seedLength = 0;
static int seedStride = 0;
View
18 src/SGA/correct.cpp
@@ -42,20 +42,20 @@ static const char *CORRECT_USAGE_MESSAGE =
" --help display this help and exit\n"
" -v, --verbose display verbose output\n"
" -p, --prefix=PREFIX use PREFIX for the names of the index files (default: prefix of the input file)\n"
-" -o, --outfile=FILE write the corrected reads to FILE\n"
+" -o, --outfile=FILE write the corrected reads to FILE (default: READSFILE.ec.fa)\n"
" -t, --threads=NUM use NUM threads to compute the overlaps (default: 1)\n"
" --discard detect and discard low-quality reads\n"
" -d, --sample-rate=N use occurrence array sample rate of N in the FM-index. Higher values use significantly\n"
-" less memory at the cost of higher runtime. This value must be a power of 2. Default is 128\n"
+" less memory at the cost of higher runtime. This value must be a power of 2 (default: 128)\n"
" -a, --algorithm=STR specify the correction algorithm to use. STR must be one of hybrid, kmer, overlap.\n"
" The default algorithm is hybrid which first attempts kmer correction, then performs\n"
" overlap correction on the remaining uncorrected reads.\n"
" --metrics=FILE collect error correction metrics (error rate by position in read, etc) and write\n"
" them to FILE\n"
"\nOverlap correction parameters:\n"
-" -e, --error-rate the maximum error rate allowed between two sequences to consider them overlapped\n"
-" -m, --min-overlap=LEN minimum overlap required between two reads\n"
-" -c, --conflict=INT use INT as the threshold to detect a conflicted base in the multi-overlap\n"
+" -e, --error-rate the maximum error rate allowed between two sequences to consider them overlapped (default: 0.04)\n"
+" -m, --min-overlap=LEN minimum overlap required between two reads (default: 45)\n"
+" -c, --conflict=INT use INT as the threshold to detect a conflicted base in the multi-overlap (default: 5)\n"
" -l, --seed-length=LEN force the seed length to be LEN. By default, the seed length in the overlap step\n"
" is calculated to guarantee all overlaps with --error-rate differences are found.\n"
" This option removes the guarantee but will be (much) faster. As SGA can tolerate some\n"
@@ -65,10 +65,10 @@ static const char *CORRECT_USAGE_MESSAGE =
" -b, --branch-cutoff=N stop the overlap search at N branches. This parameter is used to control the search time for\n"
" highly-repetitive reads. If the number of branches exceeds N, the search stops and the read\n"
" will not be corrected. This is not enabled by default.\n"
-" -r, --rounds=NUM iteratively correct reads up to a maximum of NUM rounds. Default: 1 round of correction\n"
+" -r, --rounds=NUM iteratively correct reads up to a maximum of NUM rounds (default: 1)\n"
"\nKmer correction parameters:\n"
-" -k, --kmer-size=N The length of the kmer to use.\n"
-" -x, --kmer-threshold=N Attempt to correct kmers that are seen less than N times.\n"
+" -k, --kmer-size=N The length of the kmer to use. (default: 41)\n"
+" -x, --kmer-threshold=N Attempt to correct kmers that are seen less than N times. (default: 3)\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
static const char* PROGRAM_IDENT =
@@ -86,7 +86,7 @@ namespace opt
static std::string metricsFile;
static int sampleRate = BWT::DEFAULT_SAMPLE_RATE;
- static double errorRate;
+ static double errorRate = 0.04;
static unsigned int minOverlap = DEFAULT_MIN_OVERLAP;
static int seedLength = 0;
static int seedStride = 0;
View
2 src/SGA/merge.cpp
@@ -36,7 +36,7 @@ static const char *MERGE_USAGE_MESSAGE =
" -v, --verbose display verbose output\n"
" --help display this help and exit\n"
" -t, --threads=NUM use NUM threads to merge the indices (default: 1)\n"
-" -p, --prefix=PREFIX write final index to file using PREFIX (the default is to concatenate the input filenames)\n"
+" -p, --prefix=PREFIX write final index to files starting with PREFIX (the default is to concatenate the input filenames)\n"
" -r, --remove remove the original BWT, SAI and reads files after the merge\n"
" -g, --gap-array=N use N bits of storage for each element of the gap array. Acceptable values are 4,8,16 or 32. Lower\n"
" values can substantially reduce the amount of memory required at the cost of less predictable memory usage.\n"
View
6 src/SGA/overlap.cpp
@@ -61,8 +61,8 @@ static const char *OVERLAP_USAGE_MESSAGE =
" --help display this help and exit\n"
" -v, --verbose display verbose output\n"
" -t, --threads=NUM use NUM worker threads to compute the overlaps (default: no threading)\n"
-" -e, --error-rate the maximum error rate allowed to consider two sequences aligned\n"
-" -m, --min-overlap=LEN minimum overlap required between two reads\n"
+" -e, --error-rate the maximum error rate allowed to consider two sequences aligned (default: exact matches only)\n"
+" -m, --min-overlap=LEN minimum overlap required between two reads (default: 45)\n"
" -p, --prefix=PREFIX use PREFIX instead of the prefix of the reads filename for the input/output files\n"
" -x, --exhaustive output all overlaps, including transitive edges\n"
" -l, --seed-length=LEN force the seed length to be LEN. By default, the seed length in the overlap step\n"
@@ -72,7 +72,7 @@ static const char *OVERLAP_USAGE_MESSAGE =
" -s, --seed-stride=LEN force the seed stride to be LEN. This parameter will be ignored unless --seed-length\n"
" is specified (see above). This parameter defaults to the same value as --seed-length\n"
" -d, --sample-rate=N sample the symbol counts every N symbols in the FM-index. Higher values use significantly\n"
-" less memory at the cost of higher runtime. This value must be a power of 2. Default is 128\n"
+" less memory at the cost of higher runtime. This value must be a power of 2 (default: 128)\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
static const char* PROGRAM_IDENT =
View
4 src/SGA/oview.cpp
@@ -38,8 +38,8 @@ static const char *OVIEW_USAGE_MESSAGE =
" -v, --verbose display verbose output\n"
" --help display this help and exit\n"
" -i, --id=ID only show overlaps for read with ID\n"
-" -m, --max-overhang=D only show D overhanging bases of the alignments (default=6)\n"
-" -d, --default-padding=D pad the overlap lines with D characters (default=20)\n"
+" -m, --max-overhang=D only show D overhanging bases of the alignments (default: 6)\n"
+" -d, --default-padding=D pad the overlap lines with D characters (default: 20)\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
namespace opt
View
13 src/SGA/preprocess.cpp
@@ -38,8 +38,8 @@ static const char *PREPROCESS_USAGE_MESSAGE =
//" --quality-scale=STR Specify the quality scaling to use. This parameter is mandatory and acceptable\n"
//" strings are none, sanger, illumina1.3, illumina1.5. It is extremely important\n"
//" to set this correctly.\n"
-" -o, --out=FILE write the reads to FILE (default: basename(READS1).pp.fa)\n"
-" -p, --pe-mode=INT 0 - do not treat reads as paired\n"
+" -o, --out=FILE write the reads to FILE (default: stdout)\n"
+" -p, --pe-mode=INT 0 - do not treat reads as paired (default)\n"
" 1 - reads are paired with the first read in READS1 and the second\n"
" read in READS2. The paired reads will be interleaved in the output file\n"
" -q, --quality-trim=INT perform Heng Li's BWA quality trim algorithm. \n"
@@ -50,13 +50,12 @@ static const char *PREPROCESS_USAGE_MESSAGE =
" Bases with phred score <= 3 are considered low quality. Default: no filtering.\n"
" The filtering is applied after trimming so bases removed are not counted.\n"
" -m, --min-length=INT discard sequences that are shorter than INT\n"
-" this is most useful when used in conjunction with --quality-trim\n"
+" this is most useful when used in conjunction with --quality-trim. Default: 40\n"
" -h, --hard-clip=INT clip all reads to be length INT. In most cases it is better to use\n"
" the soft clip (quality-trim) option.\n"
-" --permute-ambiguous If an ambiguous basecall is found randomly change it to one of possible bases\n"
-" to allow the read to be kept. For example N will be changed to one of [ACGT],\n"
-" M will be changed to [AC] and so on.\n"
-" The quality value (if present) is not changed.\n"
+" --permute-ambiguous Randomly change ambiguous base calls to one of possible bases.\n"
+" For example M will be changed to A or C. If this option is not specified, the\n"
+" entire read will be discarded.\n"
" -s, --sample=FLOAT Randomly sample reads or pairs with acceptance probability FLOAT.\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
View
6 src/SGA/rmdup.cpp
@@ -43,10 +43,10 @@ static const char *RMDUP_USAGE_MESSAGE =
" --help display this help and exit\n"
" -o, --out=FILE write the output to FILE (default: READFILE.rmdup.fa)\n"
" -p, --prefix=PREFIX use PREFIX instead of the prefix of the reads filename for the input/output files\n"
-" -e, --error-rate the maximum error rate allowed to consider two sequences identical\n"
-" -t, --threads=NUM use NUM computation threads (default: 1)\n"
+" -e, --error-rate the maximum error rate allowed to consider two sequences identical (default: exact matches required)\n"
+" -t, --threads=N use N threads (default: 1)\n"
" -d, --sample-rate=N sample the symbol counts every N symbols in the FM-index. Higher values use significantly\n"
-" less memory at the cost of higher runtime. This value must be a power of 2. Default is 128\n"
+" less memory at the cost of higher runtime. This value must be a power of 2 (default: 128)\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
static const char* PROGRAM_IDENT =
View
8 src/SGA/scaffold.cpp
@@ -26,12 +26,12 @@ static const char *SCAFFOLD_USAGE_MESSAGE =
"\n"
" --help display this help and exit\n"
" -v, --verbose display verbose output\n"
-" -m, --min-length=N only use contigs at least N bp in length to build scaffolds.\n"
-" -a, --astatistic-file=FILE load Myers' A-statistic values from FILE. This is used to\n"
+" -m, --min-length=N only use contigs at least N bp in length to build scaffolds (default: no minimun).\n"
+" -a, --astatistic-file=FILE load Myers' A-statistic values from FILE. This is used to\n"
" determine unique and repetitive contigs with the -u/--unique-astat\n"
" and -r/--repeat-astat parameters\n"
-" -u, --unique-astat=FLOAT Contigs with an a-statitic value about FLOAT will be considered unique\n"
-" -r, --repeat-astat=FLOAT Contigs with an a-statistic below FLOAT will be considered repetitive\n"
+" -u, --unique-astat=FLOAT Contigs with an a-statitic value about FLOAT will be considered unique (default: 20.0)\n"
+" -r, --repeat-astat=FLOAT Contigs with an a-statistic below FLOAT will be considered repetitive (default: 5.0)\n"
" Contigs with an a-statistic between these thresholds will not be\n"
" classified as unique or repetitive\n"
" -o, --outfile=FILE write the scaffolds to FILE (default: CONTIGSFILE.scaf\n"
View
4 src/SGA/subgraph.cpp
@@ -39,8 +39,8 @@ static const char *SUBGRAPH_USAGE_MESSAGE =
" -v, --verbose display verbose output\n"
" --help display this help and exit\n"
" -o, --out=FILE write the subgraph to FILE (default: subgraph.asqg.gz)\n"
-" -s, --size=NUM the size of the subgraph to extract, all vertices that are at most NUM hops\n"
-" away from the root will be included. Default: 5\n"
+" -s, --size=N the size of the subgraph to extract, all vertices that are at most N hops\n"
+" away from the root will be included (default: 5)\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
namespace opt
View
2 src/bin/sga-pipeline
@@ -403,7 +403,7 @@ if "overlap" not in arguments:
arguments["overlap"] = "-m %d -t %d" % (min_overlap, num_threads)
if "assemble" not in arguments:
- arguments["assemble"] = "--exact -t 10 -b 10 -r 20"
+ arguments["assemble"] = "--exact -x 10 -b 10 -r 20"
if "correct" not in arguments:
arguments["correct"] = "-m %d -e %f -l 16 -t %d" % (min_overlap, error_rate, num_threads)

0 comments on commit 566598b

Please sign in to comment.
Something went wrong with that request. Please try again.