diff --git a/src/util/result2msa.cpp b/src/util/result2msa.cpp index 7f05f164b..5e8f02900 100644 --- a/src/util/result2msa.cpp +++ b/src/util/result2msa.cpp @@ -1,7 +1,7 @@ #include "MsaFilter.h" #include "Parameters.h" #include "PSSMCalculator.h" -#include "IndexReader.h" +#include "DBReader.h" #include "DBWriter.h" #include "Debug.h" #include "Util.h" @@ -25,46 +25,37 @@ int result2msa(int argc, const char **argv, const Command &command) { const bool isCA3M = par.msaFormatMode == Parameters::FORMAT_MSA_CA3M || par.msaFormatMode == Parameters::FORMAT_MSA_CA3M_CONSENSUS; const bool shouldWriteNullByte = par.msaFormatMode != Parameters::FORMAT_MSA_STOCKHOLM_FLAT; - DBReader *tDbr = NULL; - IndexReader *tDbrIdx = NULL; - DBReader *targetHeaderReader = NULL; - IndexReader *targetHeaderReaderIdx = NULL; - - int targetDbtype = FileUtil::parseDbType(par.db2.c_str()); - if (isCA3M == false && Parameters::isEqualDbtype(targetDbtype, Parameters::DBTYPE_INDEX_DB)) { - bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); - tDbrIdx = new IndexReader(par.db2, par.threads, IndexReader::SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0); - tDbr = tDbrIdx->sequenceReader; - targetHeaderReaderIdx = new IndexReader(par.db2, par.threads, IndexReader::HEADERS, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0); - targetHeaderReader = targetHeaderReaderIdx->sequenceReader; - } else { - tDbr = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - tDbr->open(DBReader::NOSORT); - if (isCA3M == false) { - targetHeaderReader = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - targetHeaderReader->open(DBReader::NOSORT); - } + DBReader qDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qDbr.open(DBReader::NOSORT); + DBReader queryHeaderReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + queryHeaderReader.open(DBReader::NOSORT); + if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { + qDbr.readMmapedDataInMemory(); + queryHeaderReader.readMmapedDataInMemory(); } - DBReader *qDbr = NULL; - DBReader *queryHeaderReader = NULL; + DBReader *tDbr = &qDbr; + DBReader *targetHeaderReader = &queryHeaderReader; + unsigned int maxSequenceLength = qDbr.getMaxSeqLen(); + const bool sameDatabase = (par.db1.compare(par.db2) == 0) ? true : false; if (!sameDatabase) { - qDbr = new DBReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - qDbr->open(DBReader::NOSORT); + tDbr = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tDbr->open(DBReader::NOSORT); + maxSequenceLength = std::max(qDbr.getMaxSeqLen(), tDbr->getMaxSeqLen()); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { - qDbr->readMmapedDataInMemory(); + tDbr->readMmapedDataInMemory(); } - queryHeaderReader = new DBReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - queryHeaderReader->open(DBReader::NOSORT); - if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { - queryHeaderReader->readMmapedDataInMemory(); + + if (isCA3M == false) { + targetHeaderReader = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + targetHeaderReader->open(DBReader::NOSORT); + + if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { + targetHeaderReader->readMmapedDataInMemory(); + } } - } else { - qDbr = tDbr; - queryHeaderReader = targetHeaderReader; } - const unsigned int maxSequenceLength = std::max(tDbr->getMaxSeqLen(), qDbr->getMaxSeqLen()); DBConcat *seqConcat = NULL; DBReader *refReader = NULL; @@ -124,15 +115,15 @@ int result2msa(int argc, const char **argv, const Command &command) { // adjust score of each match state by -0.2 to trim alignment SubstitutionMatrix subMat(par.scoringMatrixFile.aminoacids, 2.0f, -0.2f); EvalueComputation evalueComputation(tDbr->getAminoAcidDBSize(), &subMat, par.gapOpen.aminoacids, par.gapExtend.aminoacids); - if (qDbr->getDbtype() == -1 || tDbr->getDbtype() == -1) { + if (qDbr.getDbtype() == -1 || tDbr->getDbtype() == -1) { Debug(Debug::ERROR) << "Please recreate your database or add a .dbtype file to your sequence/profile database\n"; return EXIT_FAILURE; } - if (Parameters::isEqualDbtype(qDbr->getDbtype(), Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype(tDbr->getDbtype(), Parameters::DBTYPE_HMM_PROFILE)) { + if (Parameters::isEqualDbtype(qDbr.getDbtype(), Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype(tDbr->getDbtype(), Parameters::DBTYPE_HMM_PROFILE)) { Debug(Debug::ERROR) << "Only the query OR the target database can be a profile database\n"; return EXIT_FAILURE; } - Debug(Debug::INFO) << "Query database size: " << qDbr->getSize() << " type: " << qDbr->getDbTypeName() << "\n"; + Debug(Debug::INFO) << "Query database size: " << qDbr.getSize() << " type: " << qDbr.getDbTypeName() << "\n"; Debug(Debug::INFO) << "Target database size: " << tDbr->getSize() << " type: " << tDbr->getDbTypeName() << "\n"; const bool isFiltering = par.filterMsa != 0; @@ -144,12 +135,12 @@ int result2msa(int argc, const char **argv, const Command &command) { thread_idx = (unsigned int) omp_get_thread_num(); #endif - Matcher matcher(qDbr->getDbtype(), maxSequenceLength, &subMat, &evalueComputation, par.compBiasCorrection, par.gapOpen.aminoacids, par.gapExtend.aminoacids); + Matcher matcher(qDbr.getDbtype(), maxSequenceLength, &subMat, &evalueComputation, par.compBiasCorrection, par.gapOpen.aminoacids, par.gapExtend.aminoacids); MultipleAlignment aligner(maxSequenceLength, &subMat); PSSMCalculator calculator(&subMat, maxSequenceLength, maxSetSize, par.pca, par.pcb); MsaFilter filter(maxSequenceLength, maxSetSize, &subMat, par.gapOpen.aminoacids, par.gapExtend.aminoacids); UniprotHeaderSummarizer summarizer; - Sequence centerSequence(maxSequenceLength, qDbr->getDbtype(), &subMat, 0, false, par.compBiasCorrection); + Sequence centerSequence(maxSequenceLength, qDbr.getDbtype(), &subMat, 0, false, par.compBiasCorrection); Sequence edgeSequence(maxSequenceLength, tDbr->getDbtype(), &subMat, 0, false, false); // which sequences where kept after filtering @@ -182,12 +173,12 @@ int result2msa(int argc, const char **argv, const Command &command) { progress.updateProgress(); unsigned int queryKey = resultReader.getDbKey(id); - size_t queryId = qDbr->getId(queryKey); + size_t queryId = qDbr.getId(queryKey); if (queryId == UINT_MAX) { Debug(Debug::WARNING) << "Invalid query sequence " << queryKey << "\n"; continue; } - centerSequence.mapSequence(queryId, queryKey, qDbr->getData(queryId, thread_idx), qDbr->getSeqLen(queryId)); + centerSequence.mapSequence(queryId, queryKey, qDbr.getData(queryId, thread_idx), qDbr.getSeqLen(queryId)); // TODO: Do we still need this? // if (centerSequence.L) { @@ -197,13 +188,13 @@ int result2msa(int argc, const char **argv, const Command &command) { // } // } - size_t centerHeaderId = queryHeaderReader->getId(queryKey); + size_t centerHeaderId = queryHeaderReader.getId(queryKey); if (centerHeaderId == UINT_MAX) { Debug(Debug::WARNING) << "Invalid query header " << queryKey << "\n"; continue; } - char *centerSequenceHeader = queryHeaderReader->getData(centerHeaderId, thread_idx); - size_t centerHeaderLength = queryHeaderReader->getEntryLen(centerHeaderId) - 1; + char *centerSequenceHeader = queryHeaderReader.getData(centerHeaderId, thread_idx); + size_t centerHeaderLength = queryHeaderReader.getEntryLen(centerHeaderId) - 1; if (par.msaFormatMode == Parameters::FORMAT_MSA_STOCKHOLM_FLAT) { accession = Util::parseFastaHeader(centerSequenceHeader); @@ -458,23 +449,15 @@ int result2msa(int argc, const char **argv, const Command &command) { FileUtil::remove(resultWriter.getIndexFileName()); } resultReader.close(); - + queryHeaderReader.close(); + qDbr.close(); if (!sameDatabase) { - qDbr->close(); - delete qDbr; - queryHeaderReader->close(); - delete queryHeaderReader; - } - if (tDbrIdx == NULL) { - tDbr->close(); - delete tDbr; - if (targetHeaderReader != NULL) { + if (isCA3M == false) { targetHeaderReader->close(); delete targetHeaderReader; } - } else { - delete tDbrIdx; - delete targetHeaderReaderIdx; + tDbr->close(); + delete tDbr; } if (refReader != NULL) {