Permalink
Browse files

Compressed databases with Zstd initial commit

Co-authored-by: Milot Mirdita <milot@mirdita.de>
  • Loading branch information...
martin-steinegger authored and milot-mirdita committed Dec 5, 2018
1 parent e2d04a3 commit 147d6a9d04ff314a8e6531fe63f401b5680ddf69
Showing with 1,739 additions and 1,055 deletions.
  1. +14 −1 CMakeLists.txt
  2. +1 −1 cmake/MMseqsSetupDerivedTarget.cmake
  3. +1 −1 data/blastn.sh
  4. +1 −1 data/blastp.sh
  5. +1 −1 data/blastpgp.sh
  6. +2 −0 data/cascaded_clustering.sh
  7. +1 −0 data/linclust.sh
  8. +1 −0 data/search2m.sh
  9. +1 −1 data/searchtargetprofile.sh
  10. +1 −0 data/taxonomy.sh
  11. +1 −1 data/translated_search.sh
  12. +25 −25 src/alignment/Alignment.cpp
  13. +4 −3 src/alignment/Alignment.h
  14. +3 −3 src/alignment/CompressedA3M.cpp
  15. +1 −1 src/alignment/CompressedA3M.h
  16. +7 −5 src/alignment/Matcher.cpp
  17. +55 −0 src/alignment/Matcher.h
  18. +12 −11 src/alignment/StripedSmithWaterman.cpp
  19. +14 −10 src/alignment/rescorediagonal.cpp
  20. +5 −1 src/clustering/AlignmentSymmetry.cpp
  21. +5 −5 src/clustering/Clustering.cpp
  22. +2 −1 src/clustering/Clustering.h
  23. +19 −3 src/clustering/ClusteringAlgorithms.cpp
  24. +1 −1 src/clustering/Main.cpp
  25. +6 −6 src/commons/DBConcat.cpp
  26. +101 −23 src/commons/DBReader.cpp
  27. +39 −9 src/commons/DBReader.h
  28. +151 −39 src/commons/DBWriter.cpp
  29. +9 −6 src/commons/DBWriter.h
  30. +5 −3 src/commons/IndexReader.h
  31. +2 −2 src/commons/Orf.cpp
  32. +1 −1 src/commons/Orf.h
  33. +56 −2 src/commons/Parameters.cpp
  34. +25 −0 src/commons/Parameters.h
  35. +10 −8 src/commons/Sequence.cpp
  36. +4 −7 src/commons/Sequence.h
  37. +9 −5 src/linclust/kmermatcher.cpp
  38. +9 −8 src/multihit/Aggregation.cpp
  39. +3 −2 src/multihit/Aggregation.h
  40. +6 −6 src/multihit/besthitperset.cpp
  41. +8 −8 src/multihit/combinepvalperset.cpp
  42. +12 −12 src/multihit/resultsbyset.cpp
  43. +20 −5 src/prefiltering/IndexBuilder.cpp
  44. +6 −6 src/prefiltering/IndexTable.h
  45. +4 −4 src/prefiltering/Main.cpp
  46. +44 −41 src/prefiltering/Prefiltering.cpp
  47. +1 −0 src/prefiltering/Prefiltering.h
  48. +31 −30 src/prefiltering/PrefilteringIndexReader.cpp
  49. +1 −1 src/prefiltering/QueryMatcher.cpp
  50. +4 −4 src/prefiltering/UngappedAlignment.cpp
  51. +13 −12 src/prefiltering/ungappedprefilter.cpp
  52. +3 −3 src/taxonomy/addtaxonomy.cpp
  53. +0 −5 src/taxonomy/createtaxdb.cpp
  54. +3 −3 src/taxonomy/filtertaxdb.cpp
  55. +3 −3 src/taxonomy/lca.cpp
  56. +1 −0 src/test/CMakeLists.txt
  57. +18 −18 src/test/TestDBReader.cpp
  58. +1 −1 src/test/TestDBReaderIndexSerialization.cpp
  59. +149 −0 src/test/TestDBReaderZstd.cpp
  60. +2 −2 src/test/TestIndexTable.cpp
  61. +1 −1 src/test/TestKmerGenerator.cpp
  62. +1 −1 src/test/TestMultipleAlignment.cpp
  63. +2 −2 src/test/TestProfileAlignment.cpp
  64. +7 −7 src/util/alignall.cpp
  65. +8 −8 src/util/alignbykmer.cpp
  66. +3 −3 src/util/apply.cpp
  67. +10 −6 src/util/clusthash.cpp
  68. +4 −4 src/util/convert2fasta.cpp
  69. +11 −11 src/util/convertalignments.cpp
  70. +4 −4 src/util/convertca3m.cpp
  71. +1 −1 src/util/convertkb.cpp
  72. +1 −1 src/util/convertmsa.cpp
  73. +7 −7 src/util/convertprofiledb.cpp
  74. +98 −120 src/util/createdb.cpp
  75. +48 −47 src/util/createseqfiledb.cpp
  76. +4 −4 src/util/createsubdb.cpp
  77. +9 −9 src/util/createtsv.cpp
  78. +41 −28 src/util/diffseqdbs.cpp
  79. +16 −16 src/util/expandaln.cpp
  80. +9 −10 src/util/extractalignedregion.cpp
  81. +11 −11 src/util/extractdomains.cpp
  82. +9 −7 src/util/extractframes.cpp
  83. +9 −9 src/util/extractorfs.cpp
  84. +11 −10 src/util/filterdb.cpp
  85. +2 −0 src/util/filterdb.h
  86. +6 −7 src/util/gff2db.cpp
  87. +6 −6 src/util/indexdb.cpp
  88. +4 −5 src/util/maskbygff.cpp
  89. +49 −36 src/util/mergeclusters.cpp
  90. +2 −2 src/util/mergedbs.cpp
  91. +5 −6 src/util/mergeresultsbyset.cpp
  92. +58 −51 src/util/msa2profile.cpp
  93. +97 −36 src/util/offsetalignment.cpp
  94. +4 −4 src/util/orftocontig.cpp
  95. +6 −6 src/util/prefixid.cpp
  96. +5 −5 src/util/profile2cs.cpp
  97. +4 −4 src/util/profile2pssm.cpp
  98. +7 −7 src/util/proteinaln2nucl.cpp
  99. +6 −6 src/util/result2flat.cpp
  100. +27 −24 src/util/result2msa.cpp
  101. +10 −10 src/util/result2pp.cpp
  102. +13 −12 src/util/result2profile.cpp
  103. +6 −6 src/util/result2repseq.cpp
  104. +10 −10 src/util/result2stats.cpp
  105. +2 −0 src/util/result2stats.h
  106. +4 −5 src/util/sequence2profile.cpp
  107. +3 −3 src/util/sortresult.cpp
  108. +3 −3 src/util/splitdb.cpp
  109. +9 −7 src/util/splitsequence.cpp
  110. +7 −7 src/util/subtractdbs.cpp
  111. +7 −7 src/util/summarizeheaders.cpp
  112. +4 −4 src/util/summarizeresult.cpp
  113. +4 −4 src/util/summarizetabs.cpp
  114. +69 −50 src/util/swapresults.cpp
  115. +1 −1 src/util/touchdb.cpp
  116. +5 −5 src/util/translateaa.cpp
  117. +6 −6 src/util/translatenucs.cpp
  118. +1 −1 src/util/tsv2db.cpp
  119. +2 −2 src/workflow/Cluster.cpp
  120. +2 −2 src/workflow/CreateIndex.cpp
  121. +2 −3 src/workflow/Linclust.cpp
  122. +16 −14 src/workflow/Search.cpp
  123. +2 −0 util/update_zstd.sh
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(MMseqs CXX)
project(MMseqs CXX C)
message("-- Source Directory: ${CMAKE_CURRENT_SOURCE_DIR}")
message("-- Project Directory: ${PROJECT_SOURCE_DIR}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@@ -50,6 +50,19 @@ if (CMAKE_COMPILER_IS_CLANG)
set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -stdlib=libc++")
endif ()
# zstd
SET(ZSTD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd")
SET(CMAKE_INSTALL_LIBDIR bin)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd/build/cmake/CMakeModules")
OPTION(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" OFF)
OPTION(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
OPTION(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" OFF)
OPTION(ZSTD_MULTITHREAD_SUPPORT "MULTITHREADING SUPPORT" OFF)
OPTION(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" OFF)
OPTION(ZSTD_BUILD_CONTRIB "BUILD CONTRIB" OFF)
OPTION(ZSTD_BUILD_TESTS "BUILD TESTS" OFF)
add_subdirectory(lib/zstd/build/cmake/lib EXCLUDE_FROM_ALL)
include_directories(lib)
include_directories(lib/kseq)
include_directories(lib/simd)
@@ -6,7 +6,7 @@ function (mmseqs_setup_derived_target TARGET)
get_target_property(DEF_TMP mmseqs-framework COMPILE_DEFINITIONS)
get_target_property(INCL_TMP mmseqs-framework INCLUDE_DIRECTORIES)
target_link_libraries(${TARGET} mmseqs-framework)
target_link_libraries(${TARGET} mmseqs-framework libzstd_static)
append_target_property(${TARGET} COMPILE_FLAGS ${COMPILE_TMP})
append_target_property(${TARGET} LINK_FLAGS ${LINK_TMP})
set_property(TARGET ${TARGET} APPEND PROPERTY COMPILE_DEFINITIONS ${DEF_TMP})
@@ -64,7 +64,7 @@ if notExists "$4/aln_offset"; then
|| fail "Offset step died"
fi
(mv -f "$4/aln_offset" "$3" && mv -f "$4/aln_offset.index" "$3.index") \
(mv -f "$4/aln_offset" "$3" && mv -f "$4/aln_offset.dbtype" "$3.dbtype" && mv -f "$4/aln_offset.index" "$3.index") \
|| fail "Could not move result to $3"
if [ -n "$REMOVE_TMP" ]; then
@@ -75,7 +75,7 @@ while [ "$STEP" -lt "$STEPS" ]; do
done
# post processing
(mv -f "$TMP_PATH/aln_${SENSE_0}" "$3" && mv -f "$TMP_PATH/aln_${SENSE_0}.index" "$3.index" ) \
(mv -f "$TMP_PATH/aln_${SENSE_0}" "$3" && mv -f "$TMP_PATH/aln_${SENSE_0}.dbtype" "$3.dbtype" && mv -f "$TMP_PATH/aln_${SENSE_0}.index" "$3.index" ) \
|| fail "Could not move result to $3"
if [ -n "$REMOVE_TMP" ]; then
@@ -82,7 +82,7 @@ while [ $STEP -lt $NUM_IT ]; do
done
# post processing
STEP=$((STEP-1))
(mv -f "$TMP_PATH/aln_0" "$3" && mv -f "$TMP_PATH/aln_0.index" "$3.index") || fail "Could not move result to $3"
(mv -f "$TMP_PATH/aln_0" "$3" && mv -f "$TMP_PATH/aln_0.dbtype" "$3.dbtype" && mv -f "$TMP_PATH/aln_0.index" "$3.index") || fail "Could not move result to $3"
if [ -n "$REMOVE_TMP" ]; then
echo "Remove temporary files"
@@ -119,10 +119,12 @@ if [ -n "$REASSIGN" ]; then
|| fail "mergedbs reassign died"
# post processing
mv -f "${TMP_PATH}/clu_reassign" "$2" || fail "Could not move result to $2"
mv -f "${TMP_PATH}/clu_reassign.dbtype" "$2" || fail "Could not move result to $2"
mv -f "${TMP_PATH}/clu_reassign.index" "$2.index" || fail "Could not move result to $2"
else
# post processing
mv -f "${TMP_PATH}/clu" "$2" || fail "Could not move result to $2"
mv -f "${TMP_PATH}/clu.dbtype" "$2.dbtype" || fail "Could not move result to $2"
mv -f "${TMP_PATH}/clu.index" "$2.index" || fail "Could not move result to $2"
fi
@@ -86,6 +86,7 @@ fi
# post processing
mv -f "${TMP_PATH}/clu" "$2" || fail "Could not move result to $2"
mv -f "${TMP_PATH}/clu.dbtype" "$2.dbtype" || fail "Could not move result to $2"
mv -f "${TMP_PATH}/clu.index" "$2.index" || fail "Could not move result to $2"
if [ -n "$REMOVE_TMP" ]; then
@@ -69,6 +69,7 @@ if [ -n "${SEARCH2_PAR}" ]; then
fi
mv -f "${LCA_SOURCE}" "${RESULTS}"
mv -f "${LCA_SOURCE}.dbtype" "${RESULTS}.dbtype"
mv -f "${LCA_SOURCE}.index" "${RESULTS}.index"
if [ -n "${REMOVE_TMP}" ]; then
@@ -47,7 +47,7 @@ if notExists "$TMP_PATH/aln"; then
fi
# post processing
(mv -f "${TMP_PATH}/aln" "${RESULTS}"; mv -f "${TMP_PATH}/aln.index" "${RESULTS}.index") || fail "Could not move result to ${RESULTS}"
(mv -f "${TMP_PATH}/aln" "${RESULTS}"; mv -f "${TMP_PATH}/aln.dbtype" "${RESULTS}.dbtype"; mv -f "${TMP_PATH}/aln.index" "${RESULTS}.index") || fail "Could not move result to ${RESULTS}"
if [ -n "${REMOVE_TMP}" ]; then
echo "Remove temporary files"
@@ -76,6 +76,7 @@ if [ -n "${LCA_PAR}" ]; then
|| fail "Lca died"
else
mv -f "${TMP_PATH}/taxa" "${RESULTS}"
mv -f "${TMP_PATH}/taxa.dbtype" "${RESULTS}.dbtype"
mv -f "${TMP_PATH}/taxa.index" "${RESULTS}.index"
fi
@@ -66,7 +66,7 @@ if notExists "$4/aln_offset"; then
|| fail "Offset step died"
fi
(mv -f "$4/aln_offset" "$3" && mv -f "$4/aln_offset.index" "$3.index") \
(mv -f "$4/aln_offset" "$3" && mv -f "$4/aln_offset.dbtype" "$3.dbtype" && mv -f "$4/aln_offset.index" "$3.index") \
|| fail "Could not move result to $3"
if [ -n "$REMOVE_TMP" ]; then
@@ -23,7 +23,7 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &querySeqD
covThr(par.covThr), canCovThr(par.covThr), covMode(par.covMode), seqIdMode(par.seqIdMode), evalThr(par.evalThr), seqIdThr(par.seqIdThr),
includeIdentity(par.includeIdentity), addBacktrace(par.addBacktrace), realign(par.realign), scoreBias(par.scoreBias),
threads(static_cast<unsigned int>(par.threads)), outDB(outDB), outDBIndex(outDBIndex),
threads(static_cast<unsigned int>(par.threads)), compressed(par.compressed), outDB(outDB), outDBIndex(outDBIndex),
maxSeqLen(par.maxSeqLen), compBiasCorrection(par.compBiasCorrection), altAlignment(par.altAlignment), qdbr(NULL), qSeqLookup(NULL),
tdbr(NULL), tidxdbr(NULL), tSeqLookup(NULL), templateDBIsIndex(false) {
@@ -49,7 +49,7 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &querySeqD
}
if (altAlignment > 0) {
if(querySeqType==Sequence::NUCLEOTIDES){
if(Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_NUCLEOTIDES)){
Debug(Debug::ERROR) << "Alternative alignments are not supported for nucleotides.\n";
EXIT(EXIT_FAILURE);
}
@@ -67,7 +67,7 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &querySeqD
if (indexDB.length() > 0) {
Debug(Debug::INFO) << "Use index " << indexDB << "\n";
tidxdbr = new DBReader<unsigned int>(indexDB.c_str(), (indexDB + ".index").c_str());
tidxdbr = new DBReader<unsigned int>(indexDB.c_str(), (indexDB + ".index").c_str(), threads, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX);
tidxdbr->open(DBReader<unsigned int>::NOSORT);
bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP);
@@ -96,7 +96,7 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &querySeqD
}
if (templateDBIsIndex == false) {
tdbr = new DBReader<unsigned int>(targetSeqDB.c_str(), targetSeqDBIndex.c_str());
tdbr = new DBReader<unsigned int>(targetSeqDB.c_str(), targetSeqDBIndex.c_str(), threads, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX);
tdbr->open(DBReader<unsigned int>::NOSORT);
}
@@ -107,7 +107,7 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &querySeqD
querySeqType = targetSeqType;
} else {
// open the sequence, prefiltering and output databases
qdbr = new DBReader<unsigned int>(querySeqDB.c_str(), querySeqDBIndex.c_str());
qdbr = new DBReader<unsigned int>(querySeqDB.c_str(), querySeqDBIndex.c_str(), par.threads, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX);
qdbr->open(DBReader<unsigned int>::NOSORT);
//size_t freeSpace = FileUtil::getFreeSpace(FileUtil::dirName(outDB).c_str());
@@ -142,27 +142,27 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &querySeqD
Debug(Debug::ERROR) << "Please recreate your database or add a .dbtype file to your sequence/profile database.\n";
EXIT(EXIT_FAILURE);
}
if (querySeqType == Sequence::HMM_PROFILE && targetSeqType == Sequence::HMM_PROFILE) {
if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_HMM_PROFILE)) {
Debug(Debug::ERROR) << "Only the query OR the target database can be a profile database.\n";
EXIT(EXIT_FAILURE);
}
if (querySeqType != Sequence::HMM_PROFILE && targetSeqType == Sequence::PROFILE_STATE_SEQ) {
if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_HMM_PROFILE) == false && Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_PROFILE_STATE_SEQ)) {
Debug(Debug::ERROR) << "The query has to be a profile when using a target profile state database.\n";
EXIT(EXIT_FAILURE);
} else if (querySeqType == Sequence::HMM_PROFILE && targetSeqType == Sequence::PROFILE_STATE_SEQ) {
querySeqType = Sequence::PROFILE_STATE_PROFILE;
} else if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_PROFILE_STATE_SEQ)) {
querySeqType = Parameters::DBTYPE_PROFILE_STATE_PROFILE;
}
Debug(Debug::INFO) << "Query database type: " << DBReader<unsigned int>::getDbTypeName(querySeqType) << "\n";
Debug(Debug::INFO) << "Target database type: " << DBReader<unsigned int>::getDbTypeName(targetSeqType) << "\n";
prefdbr = new DBReader<unsigned int>(prefDB.c_str(), prefDBIndex.c_str());
prefdbr = new DBReader<unsigned int>(prefDB.c_str(), prefDBIndex.c_str(), threads, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX);
prefdbr->open(DBReader<unsigned int>::LINEAR_ACCCESS);
if (querySeqType == Sequence::NUCLEOTIDES) {
if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_NUCLEOTIDES)) {
m = new NucleotideMatrix(par.scoringMatrixFile.c_str(), 1.0, scoreBias);
gapOpen = 5;
gapExtend = 2;
} else if (querySeqType == Sequence::PROFILE_STATE_PROFILE){
} else if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_PROFILE_STATE_PROFILE)){
SubstitutionMatrix s(par.scoringMatrixFile.c_str(), 2.0, scoreBias);
this->m = new SubstitutionMatrixProfileStates(s.matrixName, s.probMatrix, s.pBack, s.subMatrixPseudoCounts, 2.0, scoreBias, 255);
gapOpen = par.gapOpen;
@@ -280,7 +280,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex,
const unsigned int maxAlnNum, const unsigned int maxRejected) {
size_t alignmentsNum = 0;
size_t totalPassedNum = 0;
DBWriter dbw(outDB.c_str(), outDBIndex.c_str(), threads);
DBWriter dbw(outDB.c_str(), outDBIndex.c_str(), threads, compressed, Parameters::DBTYPE_ALIGNMENT_RES);
dbw.open();
EvalueComputation evaluer(tdbr->getAminoAcidDBSize(), this->m, gapOpen, gapExtend);
@@ -317,9 +317,9 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex,
Debug::printProgress(id);
// get the prefiltering list
char *data = prefdbr->getData(id);
char *data = prefdbr->getData(id, thread_idx);
unsigned int queryDbKey = prefdbr->getDbKey(id);
setQuerySequence(qSeq, id, queryDbKey);
setQuerySequence(qSeq, id, queryDbKey, thread_idx);
matcher.initQuery(&qSeq);
// parse the prefiltering list and calculate a Smith-Waterman alignment for each sequence in the list
@@ -343,7 +343,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex,
diagonal = hit.diagonal;
}
setTargetSequence(dbSeq, dbKey);
setTargetSequence(dbSeq, dbKey, thread_idx);
// check if the sequences could pass the coverage threshold
if(Util::canBeCovered(canCovThr, covMode, static_cast<float>(qSeq.L), static_cast<float>(dbSeq.L)) == false )
{
@@ -375,15 +375,15 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex,
data = Util::skipLine(data);
}
if(altAlignment > 0 && realign == false ){
computeAlternativeAlignment(queryDbKey, dbSeq, swResults, matcher, evalThr, swMode);
computeAlternativeAlignment(queryDbKey, dbSeq, swResults, matcher, evalThr, swMode, thread_idx);
}
// write the results
std::sort(swResults.begin(), swResults.end(), Matcher::compareHits);
if (realign == true) {
realigner->initQuery(&qSeq);
for (size_t result = 0; result < swResults.size(); result++) {
setTargetSequence(dbSeq, swResults[result].dbKey);
setTargetSequence(dbSeq, swResults[result].dbKey, thread_idx);
const bool isIdentity = (queryDbKey == swResults[result].dbKey && (includeIdentity || sameQTDB)) ? true : false;
Matcher::result_t res = realigner->getSWResult(&dbSeq, INT_MAX, covMode, covThr, FLT_MAX,
Matcher::SCORE_COV_SEQID, seqIdMode, isIdentity);
@@ -403,7 +403,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex,
}
swResults = swRealignResults;
if(altAlignment> 0 ){
computeAlternativeAlignment(queryDbKey, dbSeq, swResults, matcher, FLT_MAX, Matcher::SCORE_COV_SEQID);
computeAlternativeAlignment(queryDbKey, dbSeq, swResults, matcher, FLT_MAX, Matcher::SCORE_COV_SEQID, thread_idx);
}
}
@@ -441,13 +441,13 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex,
Debug(Debug::INFO) << hits_f << " hits per query sequence.\n";
}
inline void Alignment::setQuerySequence(Sequence &seq, size_t id, unsigned int key) {
inline void Alignment::setQuerySequence(Sequence &seq, size_t id, unsigned int key, int thread_idx) {
if (qSeqLookup != NULL) {
std::pair<const unsigned char*, const unsigned int> sequence = qSeqLookup->getSequence(id);
seq.mapSequence(id, key, sequence);
} else {
// map the query sequence
char *querySeqData = qdbr->getDataByDBKey(key);
char *querySeqData = qdbr->getDataByDBKey(key, thread_idx);
if (querySeqData == NULL) {
#pragma omp critical
{
@@ -463,13 +463,13 @@ inline void Alignment::setQuerySequence(Sequence &seq, size_t id, unsigned int k
}
}
inline void Alignment::setTargetSequence(Sequence &seq, unsigned int key) {
inline void Alignment::setTargetSequence(Sequence &seq, unsigned int key, int thread_idx) {
if (tSeqLookup != NULL) {
size_t id = tdbr->getId(key);
std::pair<const unsigned char*, const unsigned int> sequence = tSeqLookup->getSequence(id);
seq.mapSequence(id, key, sequence);
} else {
char *dbSeqData = tdbr->getDataByDBKey(key);
char *dbSeqData = tdbr->getDataByDBKey(key, thread_idx);
if (dbSeqData == NULL) {
#pragma omp critical
{
@@ -511,7 +511,7 @@ bool Alignment::checkCriteria(Matcher::result_t &res, bool isIdentity, double ev
void Alignment::computeAlternativeAlignment(unsigned int queryDbKey, Sequence &dbSeq,
std::vector<Matcher::result_t> &swResults,
Matcher &matcher, float evalThr, int swMode) {
Matcher &matcher, float evalThr, int swMode, int thread_idx) {
int xIndex = m->aa2int[static_cast<int>('X')];
size_t firstItResSize = swResults.size();
for(size_t i = 0; i < firstItResSize; i++) {
@@ -520,7 +520,7 @@ void Alignment::computeAlternativeAlignment(unsigned int queryDbKey, Sequence &d
if (isIdentity == true) {
continue;
}
setTargetSequence(dbSeq, swResults[i].dbKey);
setTargetSequence(dbSeq, swResults[i].dbKey, thread_idx);
for (int pos = swResults[i].dbStartPos; pos < swResults[i].dbEndPos; ++pos) {
dbSeq.int_sequence[pos] = xIndex;
}
@@ -76,6 +76,7 @@ class Alignment {
// keeps state of the SW alignment mode (ALIGNMENT_MODE_SCORE_ONLY, ALIGNMENT_MODE_SCORE_COV or ALIGNMENT_MODE_SCORE_COV_SEQID)
unsigned int swMode;
unsigned int threads;
unsigned int compressed;
const std::string outDB;
const std::string outDBIndex;
@@ -110,15 +111,15 @@ class Alignment {
void initSWMode(unsigned int alignmentMode);
void setQuerySequence(Sequence &seq, size_t id, unsigned int key);
void setQuerySequence(Sequence &seq, size_t id, unsigned int key, int thread_idx);
void setTargetSequence(Sequence &seq, unsigned int key);
void setTargetSequence(Sequence &seq, unsigned int key, int thread_idx);
static size_t estimateHDDMemoryConsumption(int dbSize, int maxSeqs);
void computeAlternativeAlignment(unsigned int queryDbKey, Sequence &dbSeq,
std::vector<Matcher::result_t> &vector, Matcher &matcher,
float evalThr, int swMode);
float evalThr, int swMode, int thread_idx);
};
#endif
@@ -37,7 +37,7 @@ void readU32(const char **ptr, uint32_t &result) {
std::string CompressedA3M::extractA3M(const char *data, size_t data_size,
DBReader<unsigned int>& sequenceReader,
DBReader<unsigned int>& headerReader) {
DBReader<unsigned int>& headerReader, int thread_idx) {
std::ostringstream output;
//read stuff till compressed part
@@ -87,8 +87,8 @@ std::string CompressedA3M::extractA3M(const char *data, size_t data_size,
readU32(&data, entry_index);
index += 4;
std::string sequence = sequenceReader.getData(entry_index);
std::string header = headerReader.getData(entry_index);
std::string sequence = sequenceReader.getData(entry_index, thread_idx);
std::string header = headerReader.getData(entry_index, thread_idx);
// make sure we always have a valid fasta prefix
if (header[0] != '>') {
@@ -12,7 +12,7 @@ class CompressedA3M {
static std::string extractA3M(const char *data, size_t data_size,
DBReader<unsigned int>& sequenceReader,
DBReader<unsigned int>& headerReader);
DBReader<unsigned int>& headerReader, int thread_idx);
static void extractMatcherResults(unsigned int &key, std::vector<Matcher::result_t> &results,
const char *data, size_t dataSize, DBReader<unsigned int>& sequenceReader, bool skipFirst);
Oops, something went wrong.

0 comments on commit 147d6a9

Please sign in to comment.