Permalink
Browse files

Cleanup in many different places

  • Loading branch information...
milot-mirdita committed Dec 15, 2016
1 parent 317cf84 commit 707d9f2695b830c182b409eb894f1d0f3d67cae8
View
@@ -31,3 +31,4 @@ src/workflow/time_test
build/
.idea/
cmake-build-*/
@@ -555,27 +555,27 @@ BlastScoreUtils::BlastStat BlastScoreUtils::getAltschulStatsForMatrix(std::strin
double (*mat)[10];
long val;
if (matrix.compare("blosum45") == 0) {
if (matrix.compare("blosum45.out") == 0) {
mat = blosum45_values;
val = BLOSUM45_VALUES_MAX;
}
else if (matrix.compare("blosum50") == 0) {
else if (matrix.compare("blosum50.out") == 0) {
mat = blosum50_values;
val = BLOSUM50_VALUES_MAX;
}
else if (matrix.compare("blosum62") == 0) {
else if (matrix.compare("blosum62.out") == 0) {
mat = blosum62_values;
val = BLOSUM62_VALUES_MAX;
}
else if (matrix.compare("blosum62_20") == 0) {
else if (matrix.compare("blosum62_20.out") == 0) {
mat = blosum62_20_values;
val = BLOSUM62_20_VALUES_MAX;
}
else if (matrix.compare("blosum80") == 0) {
else if (matrix.compare("blosum80.out") == 0) {
mat = blosum80_values;
val = BLOSUM80_VALUES_MAX;
}
else if (matrix.compare("blosum90") == 0) {
else if (matrix.compare("blosum90.out") == 0) {
mat = blosum90_values;
val = BLOSUM90_VALUES_MAX;
}
View
@@ -17,7 +17,7 @@ class DBReader {
struct Index {
T id;
size_t offset;
static bool compareById(Index x, Index y){
static bool compareById(const Index& x, const Index& y){
return (x.id <= y.id);
}
};
@@ -158,7 +158,7 @@ class DBReader {
bool dataMapped;
int accessType;
// needed to avoid compiler to optimize away the loop
// needed to prevent the compiler from optimizing away the loop
size_t magicBytes;
};
View
@@ -101,20 +101,21 @@ void DBWriter::mergeFiles(DBReader<unsigned int> &qdbr,
}
for (size_t id = 0; id < qdbr.getSize(); id++) {
unsigned int key = qdbr.getDbKey(id);
std::ostringstream ss;
// get all data for the id from all files
for (size_t i = 0; i < fileCount; i++) {
char *data = filesToMerge[i]->getDataByDBKey(qdbr.getDbKey(id));
const char *data = filesToMerge[i]->getDataByDBKey(key);
if (data != NULL) {
if(i < prefixes.size()) {
ss << prefixes[i];
}
ss << filesToMerge[i]->getDataByDBKey(qdbr.getDbKey(id));
ss << data;
}
}
// write result
std::string result = ss.str();
writeData(result.c_str(), result.length(), SSTR(qdbr.getDbKey(id)).c_str(), 0);
writeData(result.c_str(), result.length(), SSTR(key).c_str(), 0);
}
// close all reader
@@ -288,20 +289,27 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
const char *inData2, const char *inIndex2) {
FILE *file1 = fopen(inData1, "r");
FILE *file2 = fopen(inData2, "r");
if (file1 == NULL || file2 == NULL) {
Debug(Debug::ERROR) << "Could not read merge input files!\n";
EXIT(EXIT_FAILURE);
}
#if HAVE_POSIX_FADVISE
if (posix_fadvise (fileno(file1), 0, 0, POSIX_FADV_SEQUENTIAL) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error\n";
if ((int status = posix_fadvise (fileno(file1), 0, 0, POSIX_FADV_SEQUENTIAL)) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error: " << strerror(status) << "\n";
}
if (posix_fadvise (fileno(file2), 0, 0, POSIX_FADV_SEQUENTIAL) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error\n";
if ((int status = posix_fadvise (fileno(file2), 0, 0, POSIX_FADV_SEQUENTIAL)) != 0){
Debug(Debug::ERROR) << "posix_fadvise returned an error: " << strerror(status) << "\n";;
}
#endif
int c1, c2;
char * buffer = dataFilesBuffer[0];
size_t writePos = 0;
int dataFilefd = fileno(dataFiles[0]);
while((c1=getc_unlocked(file1)) != EOF) {
if(c1 == '\0'){
while ((c1=getc_unlocked(file1)) != EOF) {
if (c1 == '\0'){
while((c2=getc_unlocked(file2)) != EOF && c2 != '\0') {
buffer[writePos] = (char) c2;
writePos++;
@@ -312,11 +320,11 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
}
buffer[writePos] = '\0';
writePos++;
if(writePos == bufferSize){
if (writePos == bufferSize){
write(dataFilefd, buffer, bufferSize);
writePos = 0;
}
}else{
} else {
buffer[writePos] = (char) c1;;
writePos++;
if(writePos == bufferSize){
@@ -325,12 +333,13 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
}
}
}
if(writePos != 0){ // if there are data in the buffer that are not yet written
if(writePos != 0) { // if there are data in the buffer that are not yet written
write(dataFilefd, (const void *) dataFilesBuffer[0], writePos);
}
fclose(file1);
fclose(file2);
fclose(file1);
Debug(Debug::WARNING) << "Merge file " << inData1 << " and " << inData2 << "\n";
DBReader<unsigned int> reader1(inIndex1, inIndex1,
DBReader<std::string>::USE_INDEX);
@@ -343,12 +352,13 @@ void DBWriter::mergeFilePair(const char *inData1, const char *inIndex1,
unsigned int * seqLen1 = reader1.getSeqLens();
unsigned int * seqLen2 = reader2.getSeqLens();
for(size_t id = 0; id < reader1.getSize(); id++){
// add lenght for file1 and file2 and substrace -1 for one null byte
// add length for file1 and file2 and substract -1 for one null byte
size_t seqLen = seqLen1[id] + seqLen2[id] - 1;
seqLen1[id] = seqLen;
index1[id].offset = currOffset;
currOffset += seqLen;
}
writeIndex(indexFiles[0], reader1.getSize(), index1, seqLen1);
reader2.close();
reader1.close();
@@ -6,8 +6,6 @@
#include <set>
#include <algorithm>
static PatternCompiler uninformative("hypothetical|unknown|putative|predicted|unnamed|probable|partial|possible|uncharacterized|fragment");
struct UniprotHeader {
std::string dbType;
std::string identifier;
@@ -29,10 +27,15 @@ struct UniprotHeader {
updatePriority();
};
PatternCompiler& isUninformative() {
static PatternCompiler uninformative("hypothetical|unknown|putative|predicted|unnamed|probable|partial|possible|uncharacterized|fragment");
return uninformative;
}
void updatePriority() {
priority = 0;
if(uninformative.isMatch(identifier.c_str()))
if(isUninformative().isMatch(identifier.c_str()))
return;
if(dbType == "sp") {
View
@@ -80,7 +80,7 @@ PARAM_PCA(PARAM_PCA_ID, "--pca", "Pseudo count a", "pseudo count admixture stren
PARAM_PCB(PARAM_PCB_ID, "--pcb", "Pseudo count b", "pseudo counts: Neff at half of maximum admixture (0.0,infinity)", typeid(float), (void*) &pcb, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE),
//PARAM_FIRST_SEQ_REP_SEQ(PARAM_FIRST_SEQ_REP_SEQ_ID, "--first-seq-as-repr", "first sequence as respresentative", "Use the first sequence of the clustering result as representative sequence", typeid(bool), (void*) &firstSeqRepr, "", MMseqsParameter::COMMAND_PROFILE),
// result2stats
PARAM_STAT(PARAM_STAT_ID, "--stat", "Statistics to be computed", "can be one of: linecount, mean, doolittle, charges, seqlen.", typeid(std::string), (void*) &stat, ""),
PARAM_STAT(PARAM_STAT_ID, "--stat", "Statistics to be computed", "can be one of: linecount, mean, doolittle, charges, seqlen, firstline.", typeid(std::string), (void*) &stat, ""),
// linearcluster
PARAM_KMER_PER_SEQ(PARAM_KMER_PER_SEQ_ID, "--kmer-per-seq", "Kmer per sequence", "kmer per sequence", typeid(int), (void*) &kmersPerSequence, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR),
// workflow
@@ -134,8 +134,7 @@ PARAM_MSA_TYPE(PARAM_MSA_TYPE_ID,"--msa-type", "MSA type", "MSA Type: cA3M 0 or
// extractalignedregion
PARAM_EXTRACT_MODE(PARAM_EXTRACT_MODE_ID,"--extract-mode", "Extract mode", "Query 1, Target 2", typeid(int), (void *) &extractMode, "^[1-2]{1}$"),
// convertkb
PARAM_KB_COLUMNS(PARAM_KB_COLUMNS_ID, "--kb-columns", "UniprotKB Columns", "list of indices of UniprotKB columns to be extracted", typeid(std::string), (void *) &kbColumns, ""),
PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "character to count", typeid(std::string), (void *) &countCharacter, "")
PARAM_KB_COLUMNS(PARAM_KB_COLUMNS_ID, "--kb-columns", "UniprotKB Columns", "list of indices of UniprotKB columns to be extracted", typeid(std::string), (void *) &kbColumns, "")
{
// alignment
@@ -224,6 +223,8 @@ PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "c
//result2stats
result2stats.push_back(PARAM_STAT);
result2stats.push_back(PARAM_THREADS);
result2stats.push_back(PARAM_V);
// format alignment
convertalignments.push_back(PARAM_FORMAT_MODE);
@@ -375,9 +376,6 @@ PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "c
linearfilter.push_back(PARAM_MAX_SEQ_LEN);
linearfilter.push_back(PARAM_THREADS);
linearfilter.push_back(PARAM_V);
// result2newick
result2newick.push_back(PARAM_THREADS);
result2newick.push_back(PARAM_V);
// mergedbs
mergedbs.push_back(PARAM_MERGE_PREFIXES);
@@ -420,11 +418,6 @@ PARAM_COUNT_CHARACTER(PARAM_COUNT_CHARACTER_ID, "--count-char", "Count Char", "c
extractalignedregion.push_back(PARAM_THREADS);
extractalignedregion.push_back(PARAM_V);
// count
count.push_back(PARAM_COUNT_CHARACTER);
count.push_back(PARAM_THREADS);
count.push_back(PARAM_V);
// convertkb
convertkb.push_back(PARAM_KB_COLUMNS);
convertkb.push_back(PARAM_V);
@@ -937,9 +930,9 @@ void Parameters::setDefaults() {
// linearcluster
kmersPerSequence = 20;
// count
countCharacter = "\n";
// result2stats
stat = "";
}
std::vector<MMseqsParameter> Parameters::combineList(std::vector<MMseqsParameter> &par1,
View
@@ -253,9 +253,6 @@ class Parameters // Parameters for gap penalties and pseudocounts
// convertkb
std::string kbColumns;
//count
std::string countCharacter;
// concatdbs
bool preserveKeysB;
@@ -361,7 +358,7 @@ class Parameters // Parameters for gap penalties and pseudocounts
//PARAMETER(PARAM_FIRST_SEQ_REP_SEQ)
// PARAMETER(PARAM_NO_PRUNING)
// result2stat
// result2stats
PARAMETER(PARAM_STAT)
// linearcluster
@@ -439,9 +436,6 @@ class Parameters // Parameters for gap penalties and pseudocounts
// convertkb
PARAMETER(PARAM_KB_COLUMNS)
// count
PARAMETER(PARAM_COUNT_CHARACTER)
std::vector<MMseqsParameter> empty;
std::vector<MMseqsParameter> rescorediagonal;
std::vector<MMseqsParameter> onlyverbosity;
@@ -470,7 +464,6 @@ class Parameters // Parameters for gap penalties and pseudocounts
std::vector<MMseqsParameter> filterDb;
std::vector<MMseqsParameter> swapresults;
std::vector<MMseqsParameter> subtractdbs;
std::vector<MMseqsParameter> result2newick;
std::vector<MMseqsParameter> diff;
std::vector<MMseqsParameter> dbconcat;
std::vector<MMseqsParameter> mergedbs;
@@ -480,7 +473,6 @@ class Parameters // Parameters for gap penalties and pseudocounts
std::vector<MMseqsParameter> summarizetabs;
std::vector<MMseqsParameter> extractdomains;
std::vector<MMseqsParameter> extractalignedregion;
std::vector<MMseqsParameter> count;
std::vector<MMseqsParameter> convertkb;
std::vector<MMseqsParameter> combineList(std::vector<MMseqsParameter> &par1,
No changes.
@@ -17,7 +17,6 @@ SubstitutionMatrix::SubstitutionMatrix(const char *scoringMatrixFileName_,
// read amino acid substitution matrix from file
std::string fileName(scoringMatrixFileName);
matrixName = Util::base_name(fileName, "/\\");
matrixName = Util::remove_extension(matrixName);
if (fileName.substr(fileName.length() - 4, 4).compare(".out") == 0){
std::ifstream in(fileName);
if (in.fail()) {
@@ -28,13 +27,12 @@ SubstitutionMatrix::SubstitutionMatrix(const char *scoringMatrixFileName_,
std::istreambuf_iterator<char>());
readProbMatrix(str);
in.close();
}
else {
} else {
Debug(Debug::ERROR) << "Invalid format of the substitution matrix input file! Only .out files are accepted.\n";
EXIT(EXIT_FAILURE);
}
}else{
matrixName = "blosum62";
matrixName = "blosum62.out";
std::string submat((const char*)blosum62_out,blosum62_out_len);
readProbMatrix(submat);
}
View
@@ -16,6 +16,10 @@
KSEQ_INIT(int, read)
const std::string& tostringidentity::to_string(const std::string& s) {
return s;
}
size_t Util::countLines(const char *data, size_t length) {
size_t newlines = 0;
for (size_t i = 0; i < length; i++ ) {
View
@@ -16,12 +16,20 @@
#endif
#define CHECK_BIT(var,pos) ((var) & (1<<(pos)))
#if __cplusplus <= 199711L
#define SSTR( x ) \
dynamic_cast< std::ostringstream& >( \
( std::ostringstream().flush() << std::dec << x ) ).str()
( std::ostringstream().flush() << std::dec << (x) ).str()
#else
#define SSTR( x ) std::to_string(x)
#ifndef TOSTRINGIDENTITY
#define TOSTRINGIDENTITY
namespace tostringidentity {
using std::to_string;
const std::string& to_string(const std::string& s);
}
#endif
#define SSTR(x) tostringidentity::to_string((x))
#endif
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
@@ -140,16 +140,16 @@ class IndexTable {
}
// init index table with external data (needed for index readin)
void initTableByExternalData(size_t sequenzeCount, size_t tableEntriesNum,
void initTableByExternalData(size_t sequenceCount, size_t tableEntriesNum,
char * entries, size_t * entriesSize, SequenceLookup * lookup) {
this->tableEntriesNum = tableEntriesNum;
this->size = sequenzeCount;
// initMemory(sequenzeCount, tableEntriesNum, seqDataSize);
this->size = sequenceCount;
// initMemory(sequenceCount, tableEntriesNum, seqDataSize);
if(lookup != NULL){
sequenceLookup = lookup;
}
this->entries = entries;
Debug(Debug::WARNING) << "Cache database \n";
Debug(Debug::INFO) << "Cache database \n";
char* it = this->entries;
// set the pointers in the index table to the start of the list for a certain k-mer
magicByte = 0; // read each entry to keep them in memory
@@ -161,7 +161,7 @@ class IndexTable {
}
table[tableSize] = it;
externalData = true;
Debug(Debug::WARNING) << "Read IndexTable ... Done\n";
Debug(Debug::INFO) << "Read IndexTable ... Done\n";
}
void revertPointer(){
Oops, something went wrong.

0 comments on commit 707d9f2

Please sign in to comment.