Skip to content

Commit

Permalink
Revert "result2msa now supports reading from index"
Browse files Browse the repository at this point in the history
This reverts commit 15fdf48.
  • Loading branch information
milot-mirdita committed Jul 26, 2021
1 parent 7ee3e79 commit ad5837b
Showing 1 changed file with 39 additions and 56 deletions.
95 changes: 39 additions & 56 deletions src/util/result2msa.cpp
@@ -1,7 +1,7 @@
#include "MsaFilter.h"
#include "Parameters.h"
#include "PSSMCalculator.h"
#include "IndexReader.h"
#include "DBReader.h"
#include "DBWriter.h"
#include "Debug.h"
#include "Util.h"
Expand All @@ -25,46 +25,37 @@ int result2msa(int argc, const char **argv, const Command &command) {
const bool isCA3M = par.msaFormatMode == Parameters::FORMAT_MSA_CA3M || par.msaFormatMode == Parameters::FORMAT_MSA_CA3M_CONSENSUS;
const bool shouldWriteNullByte = par.msaFormatMode != Parameters::FORMAT_MSA_STOCKHOLM_FLAT;

DBReader<unsigned int> *tDbr = NULL;
IndexReader *tDbrIdx = NULL;
DBReader<unsigned int> *targetHeaderReader = NULL;
IndexReader *targetHeaderReaderIdx = NULL;

int targetDbtype = FileUtil::parseDbType(par.db2.c_str());
if (isCA3M == false && Parameters::isEqualDbtype(targetDbtype, Parameters::DBTYPE_INDEX_DB)) {
bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP);
tDbrIdx = new IndexReader(par.db2, par.threads, IndexReader::SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0);
tDbr = tDbrIdx->sequenceReader;
targetHeaderReaderIdx = new IndexReader(par.db2, par.threads, IndexReader::HEADERS, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0);
targetHeaderReader = targetHeaderReaderIdx->sequenceReader;
} else {
tDbr = new DBReader<unsigned int>(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
tDbr->open(DBReader<unsigned int>::NOSORT);
if (isCA3M == false) {
targetHeaderReader = new DBReader<unsigned int>(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
targetHeaderReader->open(DBReader<unsigned int>::NOSORT);
}
DBReader<unsigned int> qDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
qDbr.open(DBReader<unsigned int>::NOSORT);
DBReader<unsigned int> queryHeaderReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
queryHeaderReader.open(DBReader<unsigned int>::NOSORT);
if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) {
qDbr.readMmapedDataInMemory();
queryHeaderReader.readMmapedDataInMemory();
}

DBReader<unsigned int> *qDbr = NULL;
DBReader<unsigned int> *queryHeaderReader = NULL;
DBReader<unsigned int> *tDbr = &qDbr;
DBReader<unsigned int> *targetHeaderReader = &queryHeaderReader;
unsigned int maxSequenceLength = qDbr.getMaxSeqLen();

const bool sameDatabase = (par.db1.compare(par.db2) == 0) ? true : false;
if (!sameDatabase) {
qDbr = new DBReader<unsigned int>(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
qDbr->open(DBReader<unsigned int>::NOSORT);
tDbr = new DBReader<unsigned int>(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
tDbr->open(DBReader<unsigned int>::NOSORT);
maxSequenceLength = std::max(qDbr.getMaxSeqLen(), tDbr->getMaxSeqLen());
if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) {
qDbr->readMmapedDataInMemory();
tDbr->readMmapedDataInMemory();
}
queryHeaderReader = new DBReader<unsigned int>(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
queryHeaderReader->open(DBReader<unsigned int>::NOSORT);
if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) {
queryHeaderReader->readMmapedDataInMemory();

if (isCA3M == false) {
targetHeaderReader = new DBReader<unsigned int>(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
targetHeaderReader->open(DBReader<unsigned int>::NOSORT);

if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) {
targetHeaderReader->readMmapedDataInMemory();
}
}
} else {
qDbr = tDbr;
queryHeaderReader = targetHeaderReader;
}
const unsigned int maxSequenceLength = std::max(tDbr->getMaxSeqLen(), qDbr->getMaxSeqLen());

DBConcat *seqConcat = NULL;
DBReader<unsigned int> *refReader = NULL;
Expand Down Expand Up @@ -124,15 +115,15 @@ int result2msa(int argc, const char **argv, const Command &command) {
// adjust score of each match state by -0.2 to trim alignment
SubstitutionMatrix subMat(par.scoringMatrixFile.aminoacids, 2.0f, -0.2f);
EvalueComputation evalueComputation(tDbr->getAminoAcidDBSize(), &subMat, par.gapOpen.aminoacids, par.gapExtend.aminoacids);
if (qDbr->getDbtype() == -1 || tDbr->getDbtype() == -1) {
if (qDbr.getDbtype() == -1 || tDbr->getDbtype() == -1) {
Debug(Debug::ERROR) << "Please recreate your database or add a .dbtype file to your sequence/profile database\n";
return EXIT_FAILURE;
}
if (Parameters::isEqualDbtype(qDbr->getDbtype(), Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype(tDbr->getDbtype(), Parameters::DBTYPE_HMM_PROFILE)) {
if (Parameters::isEqualDbtype(qDbr.getDbtype(), Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype(tDbr->getDbtype(), Parameters::DBTYPE_HMM_PROFILE)) {
Debug(Debug::ERROR) << "Only the query OR the target database can be a profile database\n";
return EXIT_FAILURE;
}
Debug(Debug::INFO) << "Query database size: " << qDbr->getSize() << " type: " << qDbr->getDbTypeName() << "\n";
Debug(Debug::INFO) << "Query database size: " << qDbr.getSize() << " type: " << qDbr.getDbTypeName() << "\n";
Debug(Debug::INFO) << "Target database size: " << tDbr->getSize() << " type: " << tDbr->getDbTypeName() << "\n";

const bool isFiltering = par.filterMsa != 0;
Expand All @@ -144,12 +135,12 @@ int result2msa(int argc, const char **argv, const Command &command) {
thread_idx = (unsigned int) omp_get_thread_num();
#endif

Matcher matcher(qDbr->getDbtype(), maxSequenceLength, &subMat, &evalueComputation, par.compBiasCorrection, par.gapOpen.aminoacids, par.gapExtend.aminoacids);
Matcher matcher(qDbr.getDbtype(), maxSequenceLength, &subMat, &evalueComputation, par.compBiasCorrection, par.gapOpen.aminoacids, par.gapExtend.aminoacids);
MultipleAlignment aligner(maxSequenceLength, &subMat);
PSSMCalculator calculator(&subMat, maxSequenceLength, maxSetSize, par.pca, par.pcb);
MsaFilter filter(maxSequenceLength, maxSetSize, &subMat, par.gapOpen.aminoacids, par.gapExtend.aminoacids);
UniprotHeaderSummarizer summarizer;
Sequence centerSequence(maxSequenceLength, qDbr->getDbtype(), &subMat, 0, false, par.compBiasCorrection);
Sequence centerSequence(maxSequenceLength, qDbr.getDbtype(), &subMat, 0, false, par.compBiasCorrection);
Sequence edgeSequence(maxSequenceLength, tDbr->getDbtype(), &subMat, 0, false, false);

// which sequences where kept after filtering
Expand Down Expand Up @@ -182,12 +173,12 @@ int result2msa(int argc, const char **argv, const Command &command) {
progress.updateProgress();

unsigned int queryKey = resultReader.getDbKey(id);
size_t queryId = qDbr->getId(queryKey);
size_t queryId = qDbr.getId(queryKey);
if (queryId == UINT_MAX) {
Debug(Debug::WARNING) << "Invalid query sequence " << queryKey << "\n";
continue;
}
centerSequence.mapSequence(queryId, queryKey, qDbr->getData(queryId, thread_idx), qDbr->getSeqLen(queryId));
centerSequence.mapSequence(queryId, queryKey, qDbr.getData(queryId, thread_idx), qDbr.getSeqLen(queryId));

// TODO: Do we still need this?
// if (centerSequence.L) {
Expand All @@ -197,13 +188,13 @@ int result2msa(int argc, const char **argv, const Command &command) {
// }
// }

size_t centerHeaderId = queryHeaderReader->getId(queryKey);
size_t centerHeaderId = queryHeaderReader.getId(queryKey);
if (centerHeaderId == UINT_MAX) {
Debug(Debug::WARNING) << "Invalid query header " << queryKey << "\n";
continue;
}
char *centerSequenceHeader = queryHeaderReader->getData(centerHeaderId, thread_idx);
size_t centerHeaderLength = queryHeaderReader->getEntryLen(centerHeaderId) - 1;
char *centerSequenceHeader = queryHeaderReader.getData(centerHeaderId, thread_idx);
size_t centerHeaderLength = queryHeaderReader.getEntryLen(centerHeaderId) - 1;

if (par.msaFormatMode == Parameters::FORMAT_MSA_STOCKHOLM_FLAT) {
accession = Util::parseFastaHeader(centerSequenceHeader);
Expand Down Expand Up @@ -458,23 +449,15 @@ int result2msa(int argc, const char **argv, const Command &command) {
FileUtil::remove(resultWriter.getIndexFileName());
}
resultReader.close();

queryHeaderReader.close();
qDbr.close();
if (!sameDatabase) {
qDbr->close();
delete qDbr;
queryHeaderReader->close();
delete queryHeaderReader;
}
if (tDbrIdx == NULL) {
tDbr->close();
delete tDbr;
if (targetHeaderReader != NULL) {
if (isCA3M == false) {
targetHeaderReader->close();
delete targetHeaderReader;
}
} else {
delete tDbrIdx;
delete targetHeaderReaderIdx;
tDbr->close();
delete tDbr;
}

if (refReader != NULL) {
Expand Down

0 comments on commit ad5837b

Please sign in to comment.