Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Only store command hashes in the build log.

.build_log load time 350ms -> 17ms, filesize 197MB -> 1.6MB on
Mac. On Windows, it's 500ms -> 20ms.

Makes the build log a lot less useful for scripts, but there could
be a tool for use cases that need log information. A prototype of
such a tool is in nico@1b243d3

The hash function is 64bit murmurhash2. Assuming that that different
commands get the same hash only by chance, it's is very unlikely
for two different commands to hash to the same value with a 64bit
hash.
  • Loading branch information...
commit 5be83d0b2e4909f39998c98dde9a1393d8e5f1c2 1 parent 8f686fa
@nico nico authored
View
64 src/build_log.cc
@@ -37,10 +37,57 @@
namespace {
const char kFileSignature[] = "# ninja log v%d\n";
-const int kCurrentVersion = 4;
+const int kCurrentVersion = 5;
+
+// 64bit MurmurHash2, by Austin Appleby
+#if defined(_MSC_VER)
+#define BIG_CONSTANT(x) (x)
+#else // defined(_MSC_VER)
+#define BIG_CONSTANT(x) (x##LLU)
+#endif // !defined(_MSC_VER)
+inline
+uint64_t MurmurHash64A(const void* key, int len) {
+ static const uint64_t seed = 0xDECAFBADDECAFBADull;
+ const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+ const int r = 47;
+ uint64_t h = seed ^ (len * m);
+ const uint64_t * data = (const uint64_t *)key;
+ const uint64_t * end = data + (len/8);
+ while(data != end) {
+ uint64_t k = *data++;
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+ h ^= k;
+ h *= m;
+ }
+ const unsigned char* data2 = (const unsigned char*)data;
+ switch(len & 7)
+ {
+ case 7: h ^= uint64_t(data2[6]) << 48;
+ case 6: h ^= uint64_t(data2[5]) << 40;
+ case 5: h ^= uint64_t(data2[4]) << 32;
+ case 4: h ^= uint64_t(data2[3]) << 24;
+ case 3: h ^= uint64_t(data2[2]) << 16;
+ case 2: h ^= uint64_t(data2[1]) << 8;
+ case 1: h ^= uint64_t(data2[0]);
+ h *= m;
+ };
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+ return h;
+}
+#undef BIG_CONSTANT
+
} // namespace
+// static
+uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) {
+ return MurmurHash64A(command.str_, command.len_);
+}
+
BuildLog::BuildLog()
: log_file_(NULL), config_(NULL), needs_recompaction_(false) {}
@@ -95,7 +142,7 @@ void BuildLog::RecordCommand(Edge* edge, int start_time, int end_time,
log_entry->output = path;
log_.insert(Log::value_type(log_entry->output, log_entry));
}
- log_entry->command = command;
+ log_entry->command_hash = LogEntry::HashCommand(command);
log_entry->start_time = start_time;
log_entry->end_time = end_time;
log_entry->restat_mtime = restat_mtime;
@@ -239,7 +286,14 @@ bool BuildLog::Load(const string& path, string* err) {
entry->start_time = start_time;
entry->end_time = end_time;
entry->restat_mtime = restat_mtime;
- entry->command = string(start, end - start);
+ if (log_version >= 5) {
+ char c = *end; *end = '\0';
+ entry->command_hash = (uint64_t)strtoull(start, NULL, 10);
+ *end = c;
+ }
+ else
+ entry->command_hash = LogEntry::HashCommand(StringPiece(start,
+ end - start));
}
// Decide whether it's time to rebuild the log:
@@ -267,9 +321,9 @@ BuildLog::LogEntry* BuildLog::LookupByOutput(const string& path) {
}
void BuildLog::WriteEntry(FILE* f, const LogEntry& entry) {
- fprintf(f, "%d\t%d\t%ld\t%s\t%s\n",
+ fprintf(f, "%d\t%d\t%ld\t%s\t%llu\n",
entry.start_time, entry.end_time, (long) entry.restat_mtime,
- entry.output.c_str(), entry.command.c_str());
+ entry.output.c_str(), entry.command_hash);
}
bool BuildLog::Recompact(const string& path, string* err) {
View
6 src/build_log.h
@@ -49,14 +49,16 @@ struct BuildLog {
struct LogEntry {
string output;
- string command;
+ uint64_t command_hash;
int start_time;
int end_time;
TimeStamp restat_mtime;
+ static uint64_t HashCommand(StringPiece command);
+
// Used by tests.
bool operator==(const LogEntry& o) {
- return output == o.output && command == o.command &&
+ return output == o.output && command_hash == o.command_hash &&
start_time == o.start_time && end_time == o.end_time &&
restat_mtime == o.restat_mtime;
}
View
12 src/build_log_test.cc
@@ -110,7 +110,7 @@ TEST_F(BuildLogTest, DoubleEntry) {
BuildLog::LogEntry* e = log.LookupByOutput("out");
ASSERT_TRUE(e);
- ASSERT_EQ("command def", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command def", e->command_hash));
}
TEST_F(BuildLogTest, Truncate) {
@@ -164,7 +164,7 @@ TEST_F(BuildLogTest, UpgradeV3) {
ASSERT_EQ(123, e->start_time);
ASSERT_EQ(456, e->end_time);
ASSERT_EQ(0, e->restat_mtime);
- ASSERT_EQ("command", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash));
}
TEST_F(BuildLogTest, SpacesInOutputV4) {
@@ -183,7 +183,7 @@ TEST_F(BuildLogTest, SpacesInOutputV4) {
ASSERT_EQ(123, e->start_time);
ASSERT_EQ(456, e->end_time);
ASSERT_EQ(456, e->restat_mtime);
- ASSERT_EQ("command", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash));
}
TEST_F(BuildLogTest, DuplicateVersionHeader) {
@@ -207,14 +207,14 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) {
ASSERT_EQ(123, e->start_time);
ASSERT_EQ(456, e->end_time);
ASSERT_EQ(456, e->restat_mtime);
- ASSERT_EQ("command", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash));
e = log.LookupByOutput("out2");
ASSERT_TRUE(e);
ASSERT_EQ(456, e->start_time);
ASSERT_EQ(789, e->end_time);
ASSERT_EQ(789, e->restat_mtime);
- ASSERT_EQ("command2", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command2", e->command_hash));
}
TEST_F(BuildLogTest, VeryLongInputLine) {
@@ -242,5 +242,5 @@ TEST_F(BuildLogTest, VeryLongInputLine) {
ASSERT_EQ(456, e->start_time);
ASSERT_EQ(789, e->end_time);
ASSERT_EQ(789, e->restat_mtime);
- ASSERT_EQ("command2", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command2", e->command_hash));
}
View
6 src/build_test.cc
@@ -953,8 +953,10 @@ TEST_F(BuildWithLogTest, RspFileCmdLineChange) {
// (to simulate a change in the command line between 2 builds)
BuildLog::LogEntry * log_entry = build_log_.LookupByOutput("out");
ASSERT_TRUE(NULL != log_entry);
- ASSERT_EQ("cat out.rsp > out;rspfile=Original very long command", log_entry->command);
- log_entry->command = "cat out.rsp > out;rspfile=Altered very long command";
+ ASSERT_NO_FATAL_FAILURE(AssertHash(
+ "cat out.rsp > out;rspfile=Original very long command",
+ log_entry->command_hash));
+ log_entry->command_hash++; // Change the command hash to something else.
// Now expect the target to be rebuilt
commands_ran_.clear();
state_.Reset();
View
4 src/graph.cc
@@ -157,7 +157,7 @@ bool Edge::RecomputeOutputDirty(BuildLog* build_log,
// dirty.
if (!rule_->generator() && build_log &&
(entry || (entry = build_log->LookupByOutput(output->path())))) {
- if (command != entry->command) {
+ if (BuildLog::LogEntry::HashCommand(command) != entry->command_hash) {
EXPLAIN("command line changed for %s", output->path().c_str());
return true;
}
@@ -200,7 +200,7 @@ string EdgeEnv::LookupVariable(const string& var) {
} else if (edge_->env_) {
return edge_->env_->LookupVariable(var);
} else {
- // XXX shoudl we warn here?
+ // XXX should we warn here?
return string();
}
}
View
5 src/test.cc
@@ -18,6 +18,7 @@
#include <errno.h>
+#include "build_log.h"
#include "parsers.h"
#include "util.h"
@@ -89,6 +90,10 @@ void AssertParse(State* state, const char* input) {
ASSERT_EQ("", err);
}
+void AssertHash(const char* expected, uint64_t actual) {
+ ASSERT_EQ(BuildLog::LogEntry::HashCommand(expected), actual);
+}
+
void VirtualFileSystem::Create(const string& path, int time,
const string& contents) {
files_[path].mtime = time;
View
1  src/test.h
@@ -35,6 +35,7 @@ struct StateTestWithBuiltinRules : public testing::Test {
};
void AssertParse(State* state, const char* input);
+void AssertHash(const char* expected, uint64_t actual);
/// An implementation of DiskInterface that uses an in-memory representation
/// of disk state. It also logs file accesses and directory creations
View
1  src/util.h
@@ -77,6 +77,7 @@ double GetLoadAverage();
#define fileno _fileno
#define unlink _unlink
#define chdir _chdir
+#define strtoull _strtoui64
#endif
#ifdef _WIN32
Please sign in to comment.
Something went wrong with that request. Please try again.