Skip to content

Commit

Permalink
[FEATURE] expose macro to use 32bit as position type for proteins
Browse files Browse the repository at this point in the history
  • Loading branch information
h-2 committed Nov 23, 2016
1 parent 2d508a1 commit dd800f9
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 16 deletions.
26 changes: 16 additions & 10 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,12 @@ message ("\n${ColourBold}Build configuration${ColourReset}")

message (STATUS "LAMBDA version is: ${SEQAN_APP_VERSION}")

option (LAMBDA_FASTBUILD "Build only blastp and blastx modes (speeds up build)." OFF)
option (LAMBDA_NATIVE_BUILD "Architecture-specific optimizations, i.e. g++ -march=native." ON)
option (LAMBDA_STATIC_BUILD "Include all libraries in the binaries." OFF)
option (LAMBDA_MMAPPED_DB "Use mmapped access to the database." OFF)
option (LAMBDA_LINGAPS_OPT "Add optimized codepaths for linear gap costs (inc. bin size and compile time)." OFF)
option (LAMBDA_FASTBUILD "Build only blastp and blastx modes (speeds up build)." OFF)
option (LAMBDA_LINGAPS_OPT "Add optimized codepaths for linear gap costs (increases bin size and compile time)." OFF)
option (LAMBDA_LONG_PROTEIN_SUBJ_SEQS "Make max protein sequence length == 4.3billion instead of 65,535. INVALIDATES INDEXS!" OFF)
option (LAMBDA_MMAPPED_DB "Use mmapped access to the database." OFF)
option (LAMBDA_NATIVE_BUILD "Architecture-specific optimizations, i.e. g++ -march=native." ON)
option (LAMBDA_STATIC_BUILD "Include all libraries in the binaries." OFF)

if (LAMBDA_FASTBUILD)
add_definitions (-DFASTBUILD=1)
Expand Down Expand Up @@ -125,12 +126,17 @@ if (LAMBDA_LINGAPS_OPT)
add_definitions (-DLAMBDA_LINGAPS_OPT=1)
endif ()

if (LAMBDA_LONG_PROTEIN_SUBJ_SEQS)
add_definitions (-DLAMBDA_LONG_PROTEIN_SUBJ_SEQS=1)
endif ()

message(STATUS "The following options are selected for the build:")
message( " LAMBDA_FASTBUILD ${LAMBDA_FASTBUILD}")
message( " LAMBDA_LINGAPS_OPT ${LAMBDA_LINGAPS_OPT}")
message( " LAMBDA_MMAPPED_DB ${LAMBDA_MMAPPED_DB}")
message( " LAMBDA_NATIVE_BUILD ${LAMBDA_NATIVE_BUILD}")
message( " LAMBDA_STATIC_BUILD ${LAMBDA_STATIC_BUILD}")
message( " LAMBDA_FASTBUILD ${LAMBDA_FASTBUILD}")
message( " LAMBDA_LINGAPS_OPT ${LAMBDA_LINGAPS_OPT}")
message( " LAMBDA_LONG_PROTEIN_SUBJ_SEQS ${LAMBDA_LONG_PROTEIN_SUBJ_SEQS}")
message( " LAMBDA_MMAPPED_DB ${LAMBDA_MMAPPED_DB}")
message( " LAMBDA_NATIVE_BUILD ${LAMBDA_NATIVE_BUILD}")
message( " LAMBDA_STATIC_BUILD ${LAMBDA_STATIC_BUILD}")
message(STATUS "Run 'cmake -LH' to get a comment on each option.")
message(STATUS "Remove CMakeCache.txt and re-run cmake with -DOPTIONNAME=ON|OFF to change an option.")

Expand Down
29 changes: 27 additions & 2 deletions src/lambda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,15 @@ template <typename TRedAlph,
inline int
validateIndexOptions(LambdaOptions const & options)
{
//TODO verify that index dir exists
// Check that directory exists and is readable
struct stat path_stat;
stat(toCString(options.indexDir), &path_stat);
if (stat(toCString(options.indexDir), &path_stat) || !S_ISDIR(path_stat.st_mode))
{
std::cerr << "ERROR: Index directory does not exist or is not readable.\n";
return -1;
}

std::string buffer;
readIndexOption(buffer, "alph_translated", options);
if (buffer != _alphName(TransAlph<p>()))
Expand Down Expand Up @@ -160,13 +168,30 @@ validateIndexOptions(LambdaOptions const & options)
{
buffer.clear();
readIndexOption(buffer, "genetic_code", options);
unsigned long b = 0;
b = 0;
if ((!lexicalCast(b, buffer)) || (b != static_cast<unsigned long>(options.geneticCode)))
{
std::cerr << "WARNING: The codon translation table used during indexing and during search are different. "
"This is not a problem per se, but is likely not what you want.\n\n";
}
}

buffer.clear();
readIndexOption(buffer, "subj_seq_len_bits", options);
b = 0;
if ((!lexicalCast(b, buffer)) || (b != static_cast<unsigned long>(sizeof(SizeTypePos_<TRedAlph>) * 8)))
{
#ifndef LAMBDA_LONG_PROTEIN_SUBJ_SEQS
std::cerr << "ERROR: Your lambda executable was built with LAMBDA_LONG_PROTEIN_SUBJ_SEQS,\n"
" but the index was created by an executable that was built without it.\n";
#else
std::cerr << "ERROR: Your lambda executable was built without LAMBDA_LONG_PROTEIN_SUBJ_SEQS,\n"
" but the index was created by an executable that was built with it.\n";
#endif
std::cerr << " You need to recreate the index or rebuild Lambda.\n";
return -1;
}

return 0;
}

Expand Down
5 changes: 3 additions & 2 deletions src/lambda_indexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ realMain(LambdaIndexerOptions const & options,
dumpTranslatedSeqs(translatedSeqs, options);

// see if final sequence set actually fits into index
if (!checkIndexSize(translatedSeqs))
if (!checkIndexSize(translatedSeqs, BlastProgramSelector<p>()))
return -1;

if (options.dbIndexType == DbIndexType::FM_INDEX)
Expand Down Expand Up @@ -246,7 +246,8 @@ realMain(LambdaIndexerOptions const & options,
{ options.indexDir + "/option:alph_original", std::string(_alphName(OrigSubjAlph<p>())) },
{ options.indexDir + "/option:alph_translated", std::string(_alphName(TransAlph<p>())) },
{ options.indexDir + "/option:alph_reduced", std::string(_alphName(TRedAlph())) },
{ options.indexDir + "/option:genetic_code", std::to_string(options.geneticCode) }
{ options.indexDir + "/option:genetic_code", std::to_string(options.geneticCode) },
{ options.indexDir + "/option:subj_seq_len_bits", std::to_string(sizeof(SizeTypePos_<TRedAlph>) * 8)},
})
{
std::ofstream f{std::get<0>(s).c_str(), std::ios_base::out | std::ios_base::binary};
Expand Down
10 changes: 8 additions & 2 deletions src/lambda_indexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,9 @@ dumpTranslatedSeqs(TCDStringSet<String<TTransAlph>> const & translatedSeqs,
// Function loadSubj()
// --------------------------------------------------------------------------

template <typename TRedAlph>
template <typename TRedAlph, BlastProgram p>
inline bool
checkIndexSize(TCDStringSet<String<TRedAlph>> const & seqs)
checkIndexSize(TCDStringSet<String<TRedAlph>> const & seqs, BlastProgramSelector<p> const &)
{
using SAV = typename SAValue<TCDStringSet<String<TRedAlph>>>::Type;
uint64_t curNumSeq = length(seqs);
Expand All @@ -303,6 +303,12 @@ checkIndexSize(TCDStringSet<String<TRedAlph>> const & seqs)
std::cerr << "Too long sequences to be indexed:\n "
<< "length" << maxLen << " present in file, but only "
<< maxLenSeq << " supported by index.\n";
#ifndef LAMBDA_LONG_PROTEIN_SUBJ_SEQS
if (p != BlastProgram::BLASTN)
std::cout << "You can recompile Lambda and add -DLAMBDA_LONG_PROTEIN_SUBJ_SEQS=1 to activate\n"
"support for longer protein sequences.\n";
#endif

return false;
}
return true;
Expand Down
4 changes: 4 additions & 0 deletions src/options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ using SizeTypeNum_ = uint32_t;
template <typename T>
struct SizeTypePosMeta_
{
#ifdef LAMBDA_LONG_PROTEIN_SUBJ_SEQS
using Type = uint32_t;
#else
using Type = uint16_t;
#endif
};

template <>
Expand Down

0 comments on commit dd800f9

Please sign in to comment.