Skip to content

Commit

Permalink
Merge pull request #19 from mh11/77a4bc7aacd7b838fc3097af515e2f27ffec…
Browse files Browse the repository at this point in the history
…de3e

Enable independent indexing (and merging) of forward and reverse index (and fastq) files
  • Loading branch information
jts committed May 23, 2012
2 parents cd7bcf0 + 77a4bc7 commit 73bfc59
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 38 deletions.
84 changes: 52 additions & 32 deletions src/SGA/index.cpp
Expand Up @@ -49,6 +49,7 @@ static const char *INDEX_USAGE_MESSAGE =
" -p, --prefix=PREFIX write index to file using PREFIX instead of prefix of READSFILE\n"
" --no-reverse suppress construction of the reverse BWT. Use this option when building the index\n"
" for reads that will be error corrected using the k-mer corrector, which only needs the forward index\n"
" --no-forward suppress construction of the forward BWT. Use this option when building the forward and reverse index separately\n"
" -g, --gap-array=N use N bits of storage for each element of the gap array. Acceptable values are 4,8,16 or 32. Lower\n"
" values can substantially reduce the amount of memory required at the cost of less predictable memory usage.\n"
" When this value is set to 32, the memory requirement is essentially deterministic and requires ~5N bytes where\n"
Expand All @@ -66,13 +67,14 @@ namespace opt
static int numThreads = 1;
static bool bDiskAlgo = false;
static bool bBuildReverse = true;
static bool bBuildForward = true;
static bool validate;
static int gapArrayStorage = 8;
}

static const char* shortopts = "p:a:m:t:d:g:cv";

enum { OPT_HELP = 1, OPT_VERSION, OPT_NO_REVERSE };
enum { OPT_HELP = 1, OPT_VERSION, OPT_NO_REVERSE,OPT_NO_FWD };

static const struct option longopts[] = {
{ "verbose", no_argument, NULL, 'v' },
Expand All @@ -83,6 +85,7 @@ static const struct option longopts[] = {
{ "gap-array", required_argument, NULL, 'g' },
{ "algorithm", required_argument, NULL, 'a' },
{ "no-reverse", no_argument, NULL, OPT_NO_REVERSE },
{ "no-forward", no_argument, NULL, OPT_NO_FWD },
{ "help", no_argument, NULL, OPT_HELP },
{ "version", no_argument, NULL, OPT_VERSION },
{ NULL, 0, NULL, 0 }
Expand Down Expand Up @@ -111,45 +114,57 @@ void indexInMemoryBCR()
{
std::cout << "Building index for " << opt::readsFile << " in memory using BCR\n";

// Parse the initial read table
std::vector<DNAEncodedString> readSequences;
SeqReader reader(opt::readsFile);
SeqRecord sr;
while(reader.get(sr))
readSequences.push_back(sr.seq.toString());
BWTCA::runBauerCoxRosone(&readSequences, opt::prefix + BWT_EXT, opt::prefix + SAI_EXT);
if(opt::bBuildForward || opt::bBuildReverse)
{
// Parse the initial read table
std::vector<DNAEncodedString> readSequences;
SeqReader reader(opt::readsFile);
SeqRecord sr;
while(reader.get(sr))
readSequences.push_back(sr.seq.toString());

if(opt::bBuildReverse)
{
// Reverse all the reads
for(size_t i = 0; i < readSequences.size(); ++i)
readSequences[i] = reverse(readSequences[i].toString());
BWTCA::runBauerCoxRosone(&readSequences, opt::prefix + RBWT_EXT, opt::prefix + RSAI_EXT);
}
if(opt::bBuildForward)
{
BWTCA::runBauerCoxRosone(&readSequences, opt::prefix + BWT_EXT, opt::prefix + SAI_EXT);
}


if(opt::bBuildReverse)
{
// Reverse all the reads
for(size_t i = 0; i < readSequences.size(); ++i)
readSequences[i] = reverse(readSequences[i].toString());
BWTCA::runBauerCoxRosone(&readSequences, opt::prefix + RBWT_EXT, opt::prefix + RSAI_EXT);
}
}
}

//
void indexInMemorySAIS()
{
std::cout << "Building index for " << opt::readsFile << " in memory\n";

// Parse the initial read table
ReadTable* pRT = new ReadTable(opt::readsFile);
if(opt::bBuildForward || opt::bBuildReverse){
// Parse the initial read table
ReadTable* pRT = new ReadTable(opt::readsFile);

// Create and write the suffix array for the forward reads
buildIndexForTable(opt::prefix, pRT, false);

if(opt::bBuildReverse)
{
// Reverse all the reads
pRT->reverseAll();
// Create and write the suffix array for the forward reads
if(opt::bBuildForward)
{
buildIndexForTable(opt::prefix, pRT, false);
}

// Build the reverse suffix array
buildIndexForTable(opt::prefix, pRT, true);
}
if(opt::bBuildReverse)
{
// Reverse all the reads
pRT->reverseAll();

delete pRT;
// Build the reverse suffix array
buildIndexForTable(opt::prefix, pRT, true);
}

delete pRT;
}
}

//
Expand All @@ -166,9 +181,13 @@ void indexOnDisk()
parameters.storageLevel = opt::gapArrayStorage;
parameters.bBuildReverse = false;
parameters.bUseBCR = (opt::algorithm == "bcr");
buildBWTDisk(parameters);

if(opt::bBuildReverse)

if(opt::bBuildForward)
{
buildBWTDisk(parameters);
}

if(opt::bBuildReverse)
{
parameters.bwtExtension = RBWT_EXT;
parameters.saiExtension = RSAI_EXT;
Expand Down Expand Up @@ -219,6 +238,7 @@ void parseIndexOptions(int argc, char** argv)
case 'a': arg >> opt::algorithm; break;
case 'v': opt::verbose++; break;
case OPT_NO_REVERSE: opt::bBuildReverse = false; break;
case OPT_NO_FWD: opt::bBuildForward = false; break;
case OPT_HELP:
std::cout << INDEX_USAGE_MESSAGE;
exit(EXIT_SUCCESS);
Expand Down
33 changes: 27 additions & 6 deletions src/SGA/merge.cpp
Expand Up @@ -44,6 +44,10 @@ static const char *MERGE_USAGE_MESSAGE =
" When this value is set to 32, the memory requirement is essentially deterministic and requires ~5N bytes where\n"
" N is the size of the FM-index of READS2.\n"
" The default value is 4.\n"
" --no-sequence Suppress merging of the sequence files. Use this option when merging the index(es) separate e.g. in parallel\n"
" --no-forward Suppress merging of the forward index. Use this option when merging the index(es) separate e.g. in parallel\n"
" --no-reverse Suppress merging of the reverse index. Use this option when merging the index(es) separate e.g. in parallel\n"
"\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";

namespace opt
Expand All @@ -53,18 +57,24 @@ namespace opt
static int numThreads = 1;
static bool bRemove;
static int gapArrayStorage = 4;
static bool bMergeSequence = true;
static bool bMergeForward = true;
static bool bMergeReverse = true;
}

static const char* shortopts = "p:m:t:g:vr";

enum { OPT_HELP = 1, OPT_VERSION };
enum { OPT_HELP = 1, OPT_VERSION, OPT_NO_SEQUENCE, OPT_NO_FWD, OPT_NO_REV };

static const struct option longopts[] = {
{ "verbose", no_argument, NULL, 'v' },
{ "prefix", required_argument, NULL, 'p' },
{ "remove", no_argument, NULL, 'r' },
{ "threads", required_argument, NULL, 't' },
{ "gap-array", required_argument, NULL, 'g' },
{ "no-sequence", no_argument, NULL, OPT_NO_SEQUENCE },
{ "no-forward", no_argument, NULL, OPT_NO_FWD },
{ "no-reverse", no_argument, NULL, OPT_NO_REV },
{ "help", no_argument, NULL, OPT_HELP },
{ "version", no_argument, NULL, OPT_VERSION },
{ NULL, 0, NULL, 0 }
Expand All @@ -91,7 +101,10 @@ int mergeMain(int argc, char** argv)
}

// Merge the indices
mergeIndependentIndices(inFiles[0], inFiles[1], opt::prefix, BWT_EXT, SAI_EXT, false, opt::numThreads, opt::gapArrayStorage);
if(opt::bMergeForward)
{
mergeIndependentIndices(inFiles[0], inFiles[1], opt::prefix, BWT_EXT, SAI_EXT, false, opt::numThreads, opt::gapArrayStorage);
}

// Skip merging the reverse indices if the reverse bwt file does not exist.
std::string rbwt_filename_1 = prefix1 + RBWT_EXT;
Expand All @@ -102,11 +115,16 @@ int mergeMain(int argc, char** argv)
int ret1 = stat(rbwt_filename_1.c_str(), &file_s_1);
int ret2 = stat(rbwt_filename_2.c_str(), &file_s_2);

if(ret1 == 0 || ret2 == 0)
mergeIndependentIndices(inFiles[0], inFiles[1], opt::prefix, RBWT_EXT, RSAI_EXT, true, opt::numThreads, opt::gapArrayStorage);

if((ret1 == 0 || ret2 == 0) && opt::bMergeReverse)
{
mergeIndependentIndices(inFiles[0], inFiles[1], opt::prefix, RBWT_EXT, RSAI_EXT, true, opt::numThreads, opt::gapArrayStorage);
}

// Merge the read files
mergeReadFiles(inFiles[0], inFiles[1], opt::prefix);
if(opt::bMergeSequence)
{
mergeReadFiles(inFiles[0], inFiles[1], opt::prefix);
}

if(opt::bRemove)
{
Expand Down Expand Up @@ -151,6 +169,9 @@ void parseMergeOptions(int argc, char** argv)
case 't': arg >> opt::numThreads; break;
case 'g': arg >> opt::gapArrayStorage; break;
case 'v': opt::verbose++; break;
case OPT_NO_SEQUENCE: opt::bMergeSequence = false; break;
case OPT_NO_FWD: opt::bMergeForward = false; break;
case OPT_NO_REV: opt::bMergeReverse = false; break;
case OPT_HELP:
std::cout << MERGE_USAGE_MESSAGE;
exit(EXIT_SUCCESS);
Expand Down

0 comments on commit 73bfc59

Please sign in to comment.