From 91f2a6ac26003f83999b2d6549e218c8aa25e2b1 Mon Sep 17 00:00:00 2001 From: Martin Steinegger Date: Tue, 1 Aug 2023 15:14:13 +0200 Subject: [PATCH] Handle _h the same as other DBs in createclusterdb --- src/commons/Parameters.cpp | 3 +++ src/util/createclusterdb.cpp | 38 ++---------------------------------- 2 files changed, 5 insertions(+), 36 deletions(-) diff --git a/src/commons/Parameters.cpp b/src/commons/Parameters.cpp index 30123626a..0acab122a 100644 --- a/src/commons/Parameters.cpp +++ b/src/commons/Parameters.cpp @@ -2458,6 +2458,9 @@ void Parameters::setDefaults() { overlap = 0.0f; msaType = 2; + // createclusterdb + dbSuffixList = "_h"; + // summarize header headerType = Parameters::HEADER_TYPE_UNICLUST; diff --git a/src/util/createclusterdb.cpp b/src/util/createclusterdb.cpp index fc806625b..3a8bdb60f 100644 --- a/src/util/createclusterdb.cpp +++ b/src/util/createclusterdb.cpp @@ -110,6 +110,7 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { dbrRep.close(); dbrSeq.close(); } + clusterReader.close(); DBReader::copyDb(par.db2, par.db3 + "_clu"); struct DBSuffix { @@ -118,7 +119,6 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { }; const DBSuffix suffices[] = { - {DBFiles::HEADER, "_h"}, {DBFiles::LOOKUP, ".lookup"}, {DBFiles::SOURCE, ".source"}, {DBFiles::TAX_MAPPING, "_mapping"}, @@ -128,41 +128,7 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { {DBFiles::TAX_MERGED, "_taxonomy"}, }; - Debug::Progress progress2(clusterReader.getSize()); - DBReader headerreader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, - DBReader::USE_DATA | DBReader::USE_INDEX); - headerreader.open(DBReader::NOSORT); - headerreader.readMmapedDataInMemory(); - - DBWriter dbwRep(par.hdr3.c_str(), par.hdr3Index.c_str(), static_cast(par.threads), par.compressed, - headerreader.getDbtype()); - dbwRep.open(); -#pragma omp parallel - { - unsigned int thread_idx = 0; -#ifdef OPENMP - thread_idx = static_cast(omp_get_thread_num()); -#endif -#pragma omp for schedule(dynamic, 1) - for (size_t id = 0; id < clusterReader.getSize(); id++) { - progress2.updateProgress(); - char *data = clusterReader.getData(id, thread_idx); - while (*data != '\0') { - // parse dbkey - size_t dbKey = Util::fast_atoi(data); - size_t readerId = headerreader.getId(dbKey); - dbwRep.writeData(headerreader.getData(readerId, thread_idx), - headerreader.getEntryLen(readerId) - 1, dbKey, thread_idx); - data = Util::skipLine(data); - } - } - } - dbwRep.close(true); - headerreader.close(); - clusterReader.close(); - - // dont copy header file - for (size_t i = 1; i < ARRAY_SIZE(suffices); ++i) { + for (size_t i = 0; i < ARRAY_SIZE(suffices); ++i) { std::string file = par.db1 + suffices[i].suffix; if (suffices[i].flag && FileUtil::fileExists(file.c_str())) { DBReader::copyDb(file, par.db3 + suffices[i].suffix);