Skip to content

Commit

Permalink
Handle _h the same as other DBs in createclusterdb
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Aug 1, 2023
1 parent 8310cd6 commit 91f2a6a
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 36 deletions.
3 changes: 3 additions & 0 deletions src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2458,6 +2458,9 @@ void Parameters::setDefaults() {
overlap = 0.0f;
msaType = 2;

// createclusterdb
dbSuffixList = "_h";

// summarize header
headerType = Parameters::HEADER_TYPE_UNICLUST;

Expand Down
38 changes: 2 additions & 36 deletions src/util/createclusterdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ int createclusearchdb(int argc, const char **argv, const Command& command) {
dbrRep.close();
dbrSeq.close();
}
clusterReader.close();
DBReader<unsigned int>::copyDb(par.db2, par.db3 + "_clu");

struct DBSuffix {
Expand All @@ -118,7 +119,6 @@ int createclusearchdb(int argc, const char **argv, const Command& command) {
};

const DBSuffix suffices[] = {
{DBFiles::HEADER, "_h"},
{DBFiles::LOOKUP, ".lookup"},
{DBFiles::SOURCE, ".source"},
{DBFiles::TAX_MAPPING, "_mapping"},
Expand All @@ -128,41 +128,7 @@ int createclusearchdb(int argc, const char **argv, const Command& command) {
{DBFiles::TAX_MERGED, "_taxonomy"},
};

Debug::Progress progress2(clusterReader.getSize());
DBReader<unsigned int> headerreader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads,
DBReader<unsigned int>::USE_DATA | DBReader<unsigned int>::USE_INDEX);
headerreader.open(DBReader<unsigned int>::NOSORT);
headerreader.readMmapedDataInMemory();

DBWriter dbwRep(par.hdr3.c_str(), par.hdr3Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed,
headerreader.getDbtype());
dbwRep.open();
#pragma omp parallel
{
unsigned int thread_idx = 0;
#ifdef OPENMP
thread_idx = static_cast<unsigned int>(omp_get_thread_num());
#endif
#pragma omp for schedule(dynamic, 1)
for (size_t id = 0; id < clusterReader.getSize(); id++) {
progress2.updateProgress();
char *data = clusterReader.getData(id, thread_idx);
while (*data != '\0') {
// parse dbkey
size_t dbKey = Util::fast_atoi<unsigned int>(data);
size_t readerId = headerreader.getId(dbKey);
dbwRep.writeData(headerreader.getData(readerId, thread_idx),
headerreader.getEntryLen(readerId) - 1, dbKey, thread_idx);
data = Util::skipLine(data);
}
}
}
dbwRep.close(true);
headerreader.close();
clusterReader.close();

// dont copy header file
for (size_t i = 1; i < ARRAY_SIZE(suffices); ++i) {
for (size_t i = 0; i < ARRAY_SIZE(suffices); ++i) {
std::string file = par.db1 + suffices[i].suffix;
if (suffices[i].flag && FileUtil::fileExists(file.c_str())) {
DBReader<unsigned int>::copyDb(file, par.db3 + suffices[i].suffix);
Expand Down

0 comments on commit 91f2a6a

Please sign in to comment.