Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Cleanup in many different places
  • Loading branch information
milot-mirdita committed Dec 15, 2016
1 parent 317cf84 commit 707d9f2
Show file tree
Hide file tree
Showing 24 changed files with 580 additions and 518 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -31,3 +31,4 @@ src/workflow/time_test

build/
.idea/
cmake-build-*/
12 changes: 6 additions & 6 deletions src/alignment/BlastScoreUtils.cpp
Expand Up @@ -555,27 +555,27 @@ BlastScoreUtils::BlastStat BlastScoreUtils::getAltschulStatsForMatrix(std::strin
double (*mat)[10];
long val;

if (matrix.compare("blosum45") == 0) {
if (matrix.compare("blosum45.out") == 0) {
mat = blosum45_values;
val = BLOSUM45_VALUES_MAX;
}
else if (matrix.compare("blosum50") == 0) {
else if (matrix.compare("blosum50.out") == 0) {
mat = blosum50_values;
val = BLOSUM50_VALUES_MAX;
}
else if (matrix.compare("blosum62") == 0) {
else if (matrix.compare("blosum62.out") == 0) {
mat = blosum62_values;
val = BLOSUM62_VALUES_MAX;
}
else if (matrix.compare("blosum62_20") == 0) {
else if (matrix.compare("blosum62_20.out") == 0) {
mat = blosum62_20_values;
val = BLOSUM62_20_VALUES_MAX;
}
else if (matrix.compare("blosum80") == 0) {
else if (matrix.compare("blosum80.out") == 0) {
mat = blosum80_values;
val = BLOSUM80_VALUES_MAX;
}
else if (matrix.compare("blosum90") == 0) {
else if (matrix.compare("blosum90.out") == 0) {
mat = blosum90_values;
val = BLOSUM90_VALUES_MAX;
}
Expand Down
4 changes: 2 additions & 2 deletions src/commons/DBReader.h
Expand Up @@ -17,7 +17,7 @@ class DBReader {
struct Index {
T id;
size_t offset;
static bool compareById(Index x, Index y){
static bool compareById(const Index& x, const Index& y){
return (x.id <= y.id);
}
};
Expand Down Expand Up @@ -158,7 +158,7 @@ class DBReader {
bool dataMapped;
int accessType;

// needed to avoid compiler to optimize away the loop
// needed to prevent the compiler from optimizing away the loop
size_t magicBytes;
};

Expand Down
40 changes: 25 additions & 15 deletions src/commons/DBWriter.cpp
Expand Up @@ -101,20 +101,21 @@ void DBWriter::mergeFiles(DBReader<unsigned int> &qdbr,
}

for (size_t id = 0; id < qdbr.getSize(); id++) {
unsigned int key = qdbr.getDbKey(id);
std::ostringstream ss;
// get all data for the id from all files
for (size_t i = 0; i < fileCount; i++) {
char *data = filesToMerge[i]->getDataByDBKey(qdbr.getDbKey(id));
const char *data = filesToMerge[i]->getDataByDBKey(key);
if (data != NULL) {
if(i < prefixes.size()) {
ss << prefixes[i];
}
ss << filesToMerge[i]->getDataByDBKey(qdbr.getDbKey(id));
ss << data;
}
}
// write result
std::string result = ss.str();
writeData(result.c_str(), result.length(), SSTR(qdbr.getDbKey(id)).c_str(), 0);
writeData(result.c_str(), result.length(), SSTR(key).c_str(), 0);
}

// close all reader
Expand Down Expand Up @@ -288,20 +289,27 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
const char *inData2, const char *inIndex2) {
FILE *file1 = fopen(inData1, "r");
FILE *file2 = fopen(inData2, "r");

if (file1 == NULL || file2 == NULL) {
Debug(Debug::ERROR) << "Could not read merge input files!\n";
EXIT(EXIT_FAILURE);
}

#if HAVE_POSIX_FADVISE
if (posix_fadvise (fileno(file1), 0, 0, POSIX_FADV_SEQUENTIAL) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error\n";
if ((int status = posix_fadvise (fileno(file1), 0, 0, POSIX_FADV_SEQUENTIAL)) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error: " << strerror(status) << "\n";
}
if (posix_fadvise (fileno(file2), 0, 0, POSIX_FADV_SEQUENTIAL) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error\n";
if ((int status = posix_fadvise (fileno(file2), 0, 0, POSIX_FADV_SEQUENTIAL)) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error: " << strerror(status) << "\n";;
}
#endif

int c1, c2;
char * buffer = dataFilesBuffer[0];
size_t writePos = 0;
int dataFilefd = fileno(dataFiles[0]);
while((c1=getc_unlocked(file1)) != EOF) {
if(c1 == '\0'){
while ((c1=getc_unlocked(file1)) != EOF) {
if (c1 == '\0'){
while((c2=getc_unlocked(file2)) != EOF && c2 != '\0') {
buffer[writePos] = (char) c2;
writePos++;
Expand All @@ -312,11 +320,11 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
}
buffer[writePos] = '\0';
writePos++;
if(writePos == bufferSize){
if (writePos == bufferSize){
write(dataFilefd, buffer, bufferSize);
writePos = 0;
}
}else{
} else {
buffer[writePos] = (char) c1;;
writePos++;
if(writePos == bufferSize){
Expand All @@ -325,12 +333,13 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
}
}
}
if(writePos != 0){ // if there are data in the buffer that are not yet written

if(writePos != 0) { // if there are data in the buffer that are not yet written
write(dataFilefd, (const void *) dataFilesBuffer[0], writePos);
}

fclose(file1);
fclose(file2);
fclose(file1);

Debug(Debug::WARNING) << "Merge file " << inData1 << " and " << inData2 << "\n";
DBReader<unsigned int> reader1(inIndex1, inIndex1,
DBReader<std::string>::USE_INDEX);
Expand All @@ -343,12 +352,13 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
unsigned int * seqLen1 = reader1.getSeqLens();
unsigned int * seqLen2 = reader2.getSeqLens();
for(size_t id = 0; id < reader1.getSize(); id++){
// add lenght for file1 and file2 and substrace -1 for one null byte
// add length for file1 and file2 and substract -1 for one null byte
size_t seqLen = seqLen1[id] + seqLen2[id] - 1;
seqLen1[id] = seqLen;
index1[id].offset = currOffset;
currOffset += seqLen;
}

writeIndex(indexFiles[0], reader1.getSize(), index1, seqLen1);
reader2.close();
reader1.close();
Expand Down
9 changes: 6 additions & 3 deletions src/commons/HeaderSummarizer.cpp
Expand Up @@ -6,8 +6,6 @@
#include <set>
#include <algorithm>

static PatternCompiler uninformative("hypothetical|unknown|putative|predicted|unnamed|probable|partial|possible|uncharacterized|fragment");

struct UniprotHeader {
std::string dbType;
std::string identifier;
Expand All @@ -29,10 +27,15 @@ struct UniprotHeader {
updatePriority();
};

PatternCompiler& isUninformative() {
static PatternCompiler uninformative("hypothetical|unknown|putative|predicted|unnamed|probable|partial|possible|uncharacterized|fragment");
return uninformative;
}

void updatePriority() {
priority = 0;

if(uninformative.isMatch(identifier.c_str()))
if(isUninformative().isMatch(identifier.c_str()))
return;

if(dbType == "sp") {
Expand Down
21 changes: 7 additions & 14 deletions src/commons/Parameters.cpp
Expand Up @@ -80,7 +80,7 @@ PARAM_PCA(PARAM_PCA_ID, "--pca", "Pseudo count a", "pseudo count admixture stren
PARAM_PCB(PARAM_PCB_ID, "--pcb", "Pseudo count b", "pseudo counts: Neff at half of maximum admixture (0.0,infinity)", typeid(float), (void*) &pcb, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE),
//PARAM_FIRST_SEQ_REP_SEQ(PARAM_FIRST_SEQ_REP_SEQ_ID, "--first-seq-as-repr", "first sequence as respresentative", "Use the first sequence of the clustering result as representative sequence", typeid(bool), (void*) &firstSeqRepr, "", MMseqsParameter::COMMAND_PROFILE),
// result2stats
PARAM_STAT(PARAM_STAT_ID, "--stat", "Statistics to be computed", "can be one of: linecount, mean, doolittle, charges, seqlen.", typeid(std::string), (void*) &stat, ""),
PARAM_STAT(PARAM_STAT_ID, "--stat", "Statistics to be computed", "can be one of: linecount, mean, doolittle, charges, seqlen, firstline.", typeid(std::string), (void*) &stat, ""),
// linearcluster
PARAM_KMER_PER_SEQ(PARAM_KMER_PER_SEQ_ID, "--kmer-per-seq", "Kmer per sequence", "kmer per sequence", typeid(int), (void*) &kmersPerSequence, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR),
// workflow
Expand Down Expand Up @@ -134,8 +134,7 @@ PARAM_MSA_TYPE(PARAM_MSA_TYPE_ID,"--msa-type", "MSA type", "MSA Type: cA3M 0 or
// extractalignedregion
PARAM_EXTRACT_MODE(PARAM_EXTRACT_MODE_ID,"--extract-mode", "Extract mode", "Query 1, Target 2", typeid(int), (void *) &extractMode, "^[1-2]{1}$"),
// convertkb
PARAM_KB_COLUMNS(PARAM_KB_COLUMNS_ID, "--kb-columns", "UniprotKB Columns", "list of indices of UniprotKB columns to be extracted", typeid(std::string), (void *) &kbColumns, ""),
PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "character to count", typeid(std::string), (void *) &countCharacter, "")
PARAM_KB_COLUMNS(PARAM_KB_COLUMNS_ID, "--kb-columns", "UniprotKB Columns", "list of indices of UniprotKB columns to be extracted", typeid(std::string), (void *) &kbColumns, "")
{
// alignment
Expand Down Expand Up @@ -224,6 +223,8 @@ PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "c
//result2stats
result2stats.push_back(PARAM_STAT);
result2stats.push_back(PARAM_THREADS);
result2stats.push_back(PARAM_V);
// format alignment
convertalignments.push_back(PARAM_FORMAT_MODE);
Expand Down Expand Up @@ -375,9 +376,6 @@ PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "c
linearfilter.push_back(PARAM_MAX_SEQ_LEN);
linearfilter.push_back(PARAM_THREADS);
linearfilter.push_back(PARAM_V);
// result2newick
result2newick.push_back(PARAM_THREADS);
result2newick.push_back(PARAM_V);
// mergedbs
mergedbs.push_back(PARAM_MERGE_PREFIXES);
Expand Down Expand Up @@ -420,11 +418,6 @@ PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "c
extractalignedregion.push_back(PARAM_THREADS);
extractalignedregion.push_back(PARAM_V);
// count
count.push_back(PARAM_COUNT_CHARACTER);
count.push_back(PARAM_THREADS);
count.push_back(PARAM_V);
// convertkb
convertkb.push_back(PARAM_KB_COLUMNS);
convertkb.push_back(PARAM_V);
Expand Down Expand Up @@ -937,9 +930,9 @@ void Parameters::setDefaults() {

// linearcluster
kmersPerSequence = 20;
// count
countCharacter = "\n";

// result2stats
stat = "";
}

std::vector<MMseqsParameter> Parameters::combineList(std::vector<MMseqsParameter> &par1,
Expand Down
10 changes: 1 addition & 9 deletions src/commons/Parameters.h
Expand Up @@ -253,9 +253,6 @@ class Parameters // Parameters for gap penalties and pseudocounts

// convertkb
std::string kbColumns;

//count
std::string countCharacter;

// concatdbs
bool preserveKeysB;
Expand Down Expand Up @@ -361,7 +358,7 @@ class Parameters // Parameters for gap penalties and pseudocounts
//PARAMETER(PARAM_FIRST_SEQ_REP_SEQ)
// PARAMETER(PARAM_NO_PRUNING)

// result2stat
// result2stats
PARAMETER(PARAM_STAT)

// linearcluster
Expand Down Expand Up @@ -439,9 +436,6 @@ class Parameters // Parameters for gap penalties and pseudocounts
// convertkb
PARAMETER(PARAM_KB_COLUMNS)

// count
PARAMETER(PARAM_COUNT_CHARACTER)

std::vector<MMseqsParameter> empty;
std::vector<MMseqsParameter> rescorediagonal;
std::vector<MMseqsParameter> onlyverbosity;
Expand Down Expand Up @@ -470,7 +464,6 @@ class Parameters // Parameters for gap penalties and pseudocounts
std::vector<MMseqsParameter> filterDb;
std::vector<MMseqsParameter> swapresults;
std::vector<MMseqsParameter> subtractdbs;
std::vector<MMseqsParameter> result2newick;
std::vector<MMseqsParameter> diff;
std::vector<MMseqsParameter> dbconcat;
std::vector<MMseqsParameter> mergedbs;
Expand All @@ -480,7 +473,6 @@ class Parameters // Parameters for gap penalties and pseudocounts
std::vector<MMseqsParameter> summarizetabs;
std::vector<MMseqsParameter> extractdomains;
std::vector<MMseqsParameter> extractalignedregion;
std::vector<MMseqsParameter> count;
std::vector<MMseqsParameter> convertkb;

std::vector<MMseqsParameter> combineList(std::vector<MMseqsParameter> &par1,
Expand Down
Empty file added src/commons/ScoreMatrix.cpp
Empty file.
6 changes: 2 additions & 4 deletions src/commons/SubstitutionMatrix.cpp
Expand Up @@ -17,7 +17,6 @@ SubstitutionMatrix::SubstitutionMatrix(const char *scoringMatrixFileName_,
// read amino acid substitution matrix from file
std::string fileName(scoringMatrixFileName);
matrixName = Util::base_name(fileName, "/\\");
matrixName = Util::remove_extension(matrixName);
if (fileName.substr(fileName.length() - 4, 4).compare(".out") == 0){
std::ifstream in(fileName);
if (in.fail()) {
Expand All @@ -28,13 +27,12 @@ SubstitutionMatrix::SubstitutionMatrix(const char *scoringMatrixFileName_,
std::istreambuf_iterator<char>());
readProbMatrix(str);
in.close();
}
else {
} else {
Debug(Debug::ERROR) << "Invalid format of the substitution matrix input file! Only .out files are accepted.\n";
EXIT(EXIT_FAILURE);
}
}else{
matrixName = "blosum62";
matrixName = "blosum62.out";
std::string submat((const char*)blosum62_out,blosum62_out_len);
readProbMatrix(submat);
}
Expand Down
4 changes: 4 additions & 0 deletions src/commons/Util.cpp
Expand Up @@ -16,6 +16,10 @@

KSEQ_INIT(int, read)

const std::string& tostringidentity::to_string(const std::string& s) {
return s;
}

size_t Util::countLines(const char *data, size_t length) {
size_t newlines = 0;
for (size_t i = 0; i < length; i++ ) {
Expand Down
12 changes: 10 additions & 2 deletions src/commons/Util.h
Expand Up @@ -16,12 +16,20 @@
#endif

#define CHECK_BIT(var,pos) ((var) & (1<<(pos)))

#if __cplusplus <= 199711L
#define SSTR( x ) \
dynamic_cast< std::ostringstream& >( \
( std::ostringstream().flush() << std::dec << x ) ).str()
( std::ostringstream().flush() << std::dec << (x) ).str()
#else
#define SSTR( x ) std::to_string(x)
#ifndef TOSTRINGIDENTITY
#define TOSTRINGIDENTITY
namespace tostringidentity {
using std::to_string;
const std::string& to_string(const std::string& s);
}
#endif
#define SSTR(x) tostringidentity::to_string((x))
#endif

#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
Expand Down
10 changes: 5 additions & 5 deletions src/prefiltering/IndexTable.h
Expand Up @@ -140,16 +140,16 @@ class IndexTable {
}

// init index table with external data (needed for index readin)
void initTableByExternalData(size_t sequenzeCount, size_t tableEntriesNum,
void initTableByExternalData(size_t sequenceCount, size_t tableEntriesNum,
char * entries, size_t * entriesSize, SequenceLookup * lookup) {
this->tableEntriesNum = tableEntriesNum;
this->size = sequenzeCount;
// initMemory(sequenzeCount, tableEntriesNum, seqDataSize);
this->size = sequenceCount;
// initMemory(sequenceCount, tableEntriesNum, seqDataSize);
if(lookup != NULL){
sequenceLookup = lookup;
}
this->entries = entries;
Debug(Debug::WARNING) << "Cache database \n";
Debug(Debug::INFO) << "Cache database \n";
char* it = this->entries;
// set the pointers in the index table to the start of the list for a certain k-mer
magicByte = 0; // read each entry to keep them in memory
Expand All @@ -161,7 +161,7 @@ class IndexTable {
}
table[tableSize] = it;
externalData = true;
Debug(Debug::WARNING) << "Read IndexTable ... Done\n";
Debug(Debug::INFO) << "Read IndexTable ... Done\n";
}

void revertPointer(){
Expand Down

0 comments on commit 707d9f2

Please sign in to comment.