Permalink
Browse files

Prefilter slicing stability

  • Loading branch information...
ClovisG committed Dec 7, 2018
1 parent a951e4d commit 2039a0a7ab113ae838edb5b523fe0ef15c4b7989
@@ -41,3 +41,4 @@ EliTestFiles/
NCBI_Bacteria_Orfs/
build.old/
.vscode/c_cpp_properties.json
vsc.code-workspace
@@ -145,9 +145,11 @@ while [ "${STEP}" -lt "${MAX_STEPS}" ] && [ "${NUM_PROFILES}" -gt 0 ]; do
# shellcheck disable=SC2086
"$MMSEQS" sortresult "${MERGED}" "${TMP_PATH}/aln_merged_trunc" ${SORTRESULT_PAR} \
|| fail "sortresult died"

mv -f "${TMP_PATH}/aln_merged_trunc" "${TMP_PATH}/aln_merged"
mv -f "${TMP_PATH}/aln_merged_trunc.index" "${TMP_PATH}/aln_merged.index"
mv -f "${TMP_PATH}/aln_merged_trunc.dbtype" "${TMP_PATH}/aln_merged.dbtype"


join "${TMP_PATH}/pref_keep.list" "${PROFILEDB}.index" > "${PROFILEDB}.index.tmp"
mv -f "${PROFILEDB}.index.tmp" "${PROFILEDB}.index"
@@ -90,6 +90,10 @@ class Matcher{
return true;
if(second.score > first.score )
return false;
if(first.dbKey < second.dbKey )
return true;
if(second.dbKey < first.dbKey )
return false;
return false;
}

@@ -811,7 +811,7 @@ bool Prefiltering::runSplit(DBReader<unsigned int>* qdbr, const std::string &res
QueryMatcher matcher(indexTable, sequenceLookup, kmerSubMat, ungappedSubMat,
evaluer, tdbr->getSeqLens() + dbFrom, kmerThr, kmerMatchProb,
kmerSize, dbSize, maxSeqLen, seq.getEffectiveKmerSize(),
maxResults, aaBiasCorrection, diagonalScoring, minDiagScoreThr, takeOnlyBestKmer);
maxResults, aaBiasCorrection, diagonalScoring, minDiagScoreThr, takeOnlyBestKmer,resListOffset);

if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_HMM_PROFILE) || Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_PROFILE_STATE_PROFILE)) {
matcher.setProfileMatrix(seq.profile_matrix);
@@ -838,7 +838,7 @@ bool Prefiltering::runSplit(DBReader<unsigned int>* qdbr, const std::string &res
std::pair<hit_t *, size_t> prefResults = matcher.matchQuery(&seq, targetSeqId);
size_t resultSize = prefResults.second;
// write
writePrefilterOutput(qdbr, &tmpDbw, thread_idx, id, prefResults, dbFrom, resListOffset, maxResults);
writePrefilterOutput(qdbr, &tmpDbw, thread_idx, id, prefResults, dbFrom, 0, maxResults);

// update statistics counters
if (resultSize != 0) {
@@ -1020,7 +1020,7 @@ double Prefiltering::setKmerThreshold(DBReader<unsigned int> *qdbr) {
EvalueComputation evaluer(tdbr->getAminoAcidDBSize(), ungappedSubMat);
QueryMatcher matcher(indexTable, sequenceLookup, kmerSubMat, ungappedSubMat, evaluer, tdbr->getSeqLens(), kmerThr, 1.0,
kmerSize, indexTable->getSize(), maxSeqLen, seq.getEffectiveKmerSize(),
150000, aaBiasCorrection, false, minDiagScoreThr, takeOnlyBestKmer);
150000, aaBiasCorrection, false, minDiagScoreThr, takeOnlyBestKmer,0);
if(Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_HMM_PROFILE) || Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_PROFILE_STATE_PROFILE) ){
matcher.setProfileMatrix(seq.profile_matrix);
} else {
@@ -31,7 +31,7 @@ QueryMatcher::QueryMatcher(IndexTable *indexTable, SequenceLookup *sequenceLooku
unsigned int maxSeqLen, unsigned int effectiveKmerSize,
size_t maxHitsPerQuery, bool aaBiasCorrection,
bool diagonalScoring, unsigned int minDiagScoreThr,
bool takeOnlyBestKmer)
bool takeOnlyBestKmer, size_t resListOffset)
: evaluer(evaluer)
{
this->kmerSubMat = kmerSubMat;
@@ -50,6 +50,7 @@ QueryMatcher::QueryMatcher(IndexTable *indexTable, SequenceLookup *sequenceLooku
this->counterResultSize = std::max((size_t)1000000, dbSize);
this->maxDbMatches = std::max((size_t)1000000, dbSize) * 2;
this->resList = (hit_t *) mem_align(ALIGN_INT, maxHitsPerQuery * sizeof(hit_t) );
this->resListOffset = resListOffset;
this->databaseHits = new(std::nothrow) IndexEntryLocal[maxDbMatches];
Util::checkAllocation(databaseHits, "Could not allocate databaseHits memory in QueryMatcher");
this->foundDiagonals = (CounterResult*)calloc(counterResultSize, sizeof(CounterResult));
@@ -377,7 +378,12 @@ std::pair<hit_t *, size_t> QueryMatcher::getResult(CounterResult * results,
// write result to list
//std::cout << i << "\t" << results[i].id << "\t" << (int)results[i].count << "\t" << results[i].diagonal << std::endl;
if(aboveThreshold && isNotQueryId){
hit_t *result = (resList + elementCounter);
if (elementCounter<resListOffset)
{
elementCounter++;
continue;
}
hit_t *result = (resList + elementCounter - resListOffset);
result->seqId = seqIdCurr;
result->prefScore = scoreCurr;
result->diagonal = diagCurr;
@@ -407,7 +413,7 @@ std::pair<hit_t *, size_t> QueryMatcher::getResult(CounterResult * results,
break;
}
}
std::pair<hit_t *, size_t> pair = std::make_pair(this->resList, elementCounter);
std::pair<hit_t *, size_t> pair = std::make_pair(this->resList, elementCounter - resListOffset);
return pair;
}

@@ -79,7 +79,7 @@ class QueryMatcher {
double kmerMatchProb, int kmerSize, size_t dbSize,
unsigned int maxSeqLen, unsigned int effectiveKmerSize,
size_t maxHitsPerQuery, bool aaBiasCorrection, bool diagonalScoring,
unsigned int minDiagScoreThr, bool takeOnlyBestKmer);
unsigned int minDiagScoreThr, bool takeOnlyBestKmer,size_t resListOffset);
~QueryMatcher();

// returns result for the sequence
@@ -253,6 +253,9 @@ class QueryMatcher {
// max seq. per query
size_t maxHitsPerQuery;

// offset in the result list
size_t resListOffset;

//pointer to seqLens
float *seqLens;

0 comments on commit 2039a0a

Please sign in to comment.