Skip to content

Commit

Permalink
Fix performance issue with the disk serializer
Browse files Browse the repository at this point in the history
This is the issue noted in #76. Keeping all historical results of
queries in the HistoricalQueryResults struct makes serializing and
deserializing those structs very, very slow as time goes on. By only
storing the last execution of the query, we keep the performance
constant, but we kill the feature where osquery can rebuild timelines
without accessing logs. After talking it over, we decided that this
isn't actually that big of a deal because, if you really wanted to
rebuild the old data, you should be able to process the logs, similarly
to bin log replication in MySQL.
  • Loading branch information
marpaia committed Sep 2, 2014
1 parent debdb56 commit 66a2a6f
Show file tree
Hide file tree
Showing 9 changed files with 8 additions and 133 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,7 @@ INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}")
INCLUDE_DIRECTORIES("/usr/local/include")
LINK_DIRECTORIES("/usr/local/lib")

ENABLE_TESTING()

ADD_SUBDIRECTORY(osquery)
ADD_SUBDIRECTORY(tools)
15 changes: 0 additions & 15 deletions osquery/core/test_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,30 +161,15 @@ getSerializedHistoricalQueryResults() {
auto qd = getSerializedQueryData();
auto dr = getSerializedDiffResults();
HistoricalQueryResults r;
r.executions = std::deque<int>{2, 1};
r.mostRecentResults.first = 2;
r.mostRecentResults.second = qd.second;
r.pastResults[1] = dr.second;

pt::ptree root;

pt::ptree executions;
pt::ptree item1;
item1.put("", 2);
executions.push_back(std::make_pair("", item1));
pt::ptree item2;
item2.put("", 1);
executions.push_back(std::make_pair("", item2));
root.add_child("executions", executions);

pt::ptree mostRecentResults;
mostRecentResults.add_child("2", qd.first);
root.add_child("mostRecentResults", mostRecentResults);

pt::ptree pastResults;
pastResults.add_child("1", dr.first);
root.add_child("pastResults", pastResults);

return std::make_pair(root, r);
}

Expand Down
16 changes: 0 additions & 16 deletions osquery/database/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,6 @@ bool Query::isQueryNameInDatabase(std::shared_ptr<DBHandle> db) {
return std::find(names.begin(), names.end(), query_.name) != names.end();
}

Status Query::getExecutions(std::deque<int>& results) {
return getExecutions(results, DBHandle::getInstance());
}

Status Query::getExecutions(std::deque<int>& results,
std::shared_ptr<DBHandle> db) {
HistoricalQueryResults hQR;
auto s = getHistoricalQueryResults(hQR, db);
if (s.ok()) {
results = hQR.executions;
}
return s;
}

Status Query::addNewResults(const osquery::db::QueryData& qd, int unix_time) {
return addNewResults(qd, unix_time, DBHandle::getInstance());
}
Expand Down Expand Up @@ -112,10 +98,8 @@ osquery::Status Query::addNewResults(const osquery::db::QueryData& qd,
if (calculate_diff) {
dr = diff(hQR.mostRecentResults.second, qd);
}
hQR.pastResults[hQR.mostRecentResults.first] = dr;
hQR.mostRecentResults.first = unix_time;
hQR.mostRecentResults.second = qd;
hQR.executions.push_front(unix_time);
std::string json;
auto serialize_status = serializeHistoricalQueryResultsJSON(hQR, json);
if (!serialize_status.ok()) {
Expand Down
10 changes: 0 additions & 10 deletions osquery/database/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,6 @@ class Query {
private:
bool isQueryNameInDatabase(std::shared_ptr<DBHandle> db);

// getExecutions() returns a deque of timestamps of previous query
// executions. These timestamp values are used as the RocksDB sub-keys which
// represent the data stored as a result of those executions.
public:
osquery::Status getExecutions(std::deque<int>& results);

private:
osquery::Status getExecutions(std::deque<int>& results,
std::shared_ptr<DBHandle> db);

// addNewResults adds a new result set to the local data store. If you
// want the diff of the results you've just added, pass a reference to a
// diffResults struct
Expand Down
16 changes: 0 additions & 16 deletions osquery/database/query_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,22 +114,6 @@ TEST_F(QueryTests, test_get_stored_query_names) {
EXPECT_NE(in_vector, names.end());
}

TEST_F(QueryTests, test_get_executions) {
auto hQR = getSerializedHistoricalQueryResultsJSON();
auto query = getOsqueryScheduledQuery();
auto db = DBHandle::getInstanceAtPath("/tmp/rocksdb-osquery-test14");
auto put_status = db->Put(kQueries, query.name, hQR.first);
EXPECT_TRUE(put_status.ok());
EXPECT_EQ(put_status.toString(), "OK");
auto cf = Query(query);
std::deque<int> results;
std::deque<int> expected = {2, 1};
auto s = cf.getExecutions(results, db);
EXPECT_TRUE(s.ok());
EXPECT_EQ(s.toString(), "OK");
EXPECT_EQ(results, expected);
}

TEST_F(QueryTests, test_get_current_results) {
auto hQR = getSerializedHistoricalQueryResultsJSON();
auto query = getOsqueryScheduledQuery();
Expand Down
58 changes: 1 addition & 57 deletions osquery/database/results.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,7 @@ Status serializeHistoricalQueryResultsJSON(const HistoricalQueryResults& r,
Status serializeHistoricalQueryResults(const HistoricalQueryResults& r,
pt::ptree& tree) {
try {
pt::ptree executions;
pt::ptree mostRecentResults;
pt::ptree pastResults;

for (const auto& e : r.executions) {
pt::ptree item;
item.put("", e);
executions.push_back(std::make_pair("", item));
}
tree.add_child("executions", executions);

pt::ptree most_recent_serialized;
auto mrr_status =
Expand All @@ -178,17 +169,6 @@ Status serializeHistoricalQueryResults(const HistoricalQueryResults& r,
boost::lexical_cast<std::string>(r.mostRecentResults.first),
most_recent_serialized);
tree.add_child("mostRecentResults", mostRecentResults);

for (const auto& i : r.pastResults) {
pt::ptree serialized_diff_results;
auto dr_status = serializeDiffResults(i.second, serialized_diff_results);
if (!dr_status.ok()) {
return dr_status;
}
pastResults.add_child(boost::lexical_cast<std::string>(i.first),
serialized_diff_results);
}
tree.add_child("pastResults", pastResults);
}
catch (const std::exception& e) {
return Status(1, e.what());
Expand All @@ -199,17 +179,6 @@ Status serializeHistoricalQueryResults(const HistoricalQueryResults& r,
Status deserializeHistoricalQueryResults(const pt::ptree& tree,
HistoricalQueryResults& r) {
try {
for (const auto& v : tree.get_child("executions")) {
try {
int execution =
boost::lexical_cast<int>(v.second.get_value<std::string>());
r.executions.push_back(execution);
}
catch (const boost::bad_lexical_cast& e) {
return Status(1, e.what());
}
}

for (const auto& v : tree.get_child("mostRecentResults")) {
try {
int execution = boost::lexical_cast<int>(v.first);
Expand All @@ -218,6 +187,7 @@ Status deserializeHistoricalQueryResults(const pt::ptree& tree,
catch (const boost::bad_lexical_cast& e) {
return Status(1, e.what());
}

QueryData q;
for (const auto& each : v.second) {
Row row_;
Expand All @@ -229,32 +199,6 @@ Status deserializeHistoricalQueryResults(const pt::ptree& tree,
r.mostRecentResults.second = q;
}

for (const auto& v : tree.get_child("pastResults")) {
int execution;
try {
execution = boost::lexical_cast<int>(v.first);
}
catch (const boost::bad_lexical_cast& e) {
return Status(1, e.what());
}
DiffResults dr;
for (const auto& a : v.second.get_child("added")) {
Row row_;
for (const auto& each : a.second) {
row_[each.first] = each.second.get_value<std::string>();
}
dr.added.push_back(row_);
}
for (const auto& r : v.second.get_child("removed")) {
Row row_;
for (const auto& each : r.second) {
row_[each.first] = each.second.get_value<std::string>();
}
dr.removed.push_back(row_);
}
r.pastResults[execution] = dr;
}

return Status(0, "OK");
}
catch (const std::exception& e) {
Expand Down
13 changes: 1 addition & 12 deletions osquery/database/results.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,25 +98,14 @@ DiffResults diff(const QueryData& old_, const QueryData& new_);
// HistoricalQueryResults is a struct which represents a scheduled query's
// historical results on disk
struct HistoricalQueryResults {
// a vector of timestamps, sorted by time, starting with the most recent
std::deque<int> executions;

// mostRecentResults->first is the timestamp of the most recent results and
// mostRecentResults->second is the query result data of the most recent
// query
std::pair<int, QueryData> mostRecentResults;

// pastResults is a map of timestamps to data diffs. All timestamps can be
// found pre-sorted in executions. By taking mostRecentResults->second and
// applying the diffs from pastResults (in time order), you can reconstruct
// the entire history of a query's results
std::map<int, DiffResults> pastResults;

// equals operator
bool operator==(const HistoricalQueryResults& comp) const {
return (comp.executions == executions) &&
(comp.mostRecentResults == mostRecentResults) &&
(comp.pastResults == pastResults);
return (comp.mostRecentResults == mostRecentResults);
}

// not equals operator
Expand Down
4 changes: 0 additions & 4 deletions osquery/database/results_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,7 @@ TEST_F(ResultsTests, test_deserialize_historical_query_results) {
HistoricalQueryResults r;
auto s = deserializeHistoricalQueryResults(results.first, r);
EXPECT_EQ(results.second, r);
EXPECT_EQ(results.second.executions, r.executions);
EXPECT_EQ(results.second.mostRecentResults, r.mostRecentResults);
EXPECT_EQ(results.second.pastResults, r.pastResults);
EXPECT_TRUE(s.ok());
EXPECT_EQ(s.toString(), "OK");
}
Expand All @@ -102,9 +100,7 @@ TEST_F(ResultsTests, test_deserialize_historical_query_results_json) {
HistoricalQueryResults r;
auto s = deserializeHistoricalQueryResultsJSON(results.first, r);
EXPECT_EQ(results.second, r);
EXPECT_EQ(results.second.executions, r.executions);
EXPECT_EQ(results.second.mostRecentResults, r.mostRecentResults);
EXPECT_EQ(results.second.pastResults, r.pastResults);
EXPECT_TRUE(s.ok());
EXPECT_EQ(s.toString(), "OK");
}
Expand Down
7 changes: 4 additions & 3 deletions osquery/tables/system/launchd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,18 @@ Row parseLaunchdItem(const std::string& path, const pt::ptree& tree) {
r["name"] = bits[bits.size() - 1];

for (const auto& it : kLaunchdTopLevelStringKeys) {
std::string item;
try {
std::string item = tree.get<std::string>(it.first);
item = tree.get<std::string>(it.first);
if (it.first == "Program") {
boost::replace_all(item, " ", "\\ ");
}
r[it.second] = item;
}
catch (const pt::ptree_error& e) {
VLOG(1) << "Error parsing " << it.first << " from " << path << ": "
VLOG(3) << "Error parsing " << it.first << " from " << path << ": "
<< e.what();
}
r[it.second] = item;
}

for (const auto& it : kLaunchdTopLevelArrayKeys) {
Expand Down

0 comments on commit 66a2a6f

Please sign in to comment.