Skip to content

Commit

Permalink
GDV-31:[C++]Caching projectors and filters for re-use. (apache#83)
Browse files Browse the repository at this point in the history
Introducing a cache to hold the projectors and filters for re-use.
The cache is a LRU that can hold 100 entries.
  • Loading branch information
praveenbingo committed Aug 23, 2018
1 parent 49d9a24 commit fd55d9f
Show file tree
Hide file tree
Showing 25 changed files with 621 additions and 126 deletions.
1 change: 1 addition & 0 deletions cpp/src/gandiva/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,5 @@ add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_build
add_gandiva_unit_test(status_test.cc status.cc)
add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc)
add_gandiva_unit_test(selection_vector_test.cc selection_vector.cc status.cc)
add_gandiva_unit_test(lru_cache_test.cc)

52 changes: 52 additions & 0 deletions cpp/src/gandiva/codegen/cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef GANDIVA_MODULE_CACHE_H
#define GANDIVA_MODULE_CACHE_H

#include <mutex>

#include "codegen/lru_cache.h"

namespace gandiva {

template <class KeyType, typename ValueType>
class Cache {
public:
Cache(size_t capacity = CACHE_SIZE) : cache_(capacity) {}
ValueType GetCachedModule(KeyType cache_key) {
boost::optional<ValueType> result;
result = cache_.get(cache_key);
if (result != boost::none) {
return result.value();
}
mtx_.lock();
result = cache_.get(cache_key);
mtx_.unlock();
return result != boost::none ? result.value() : nullptr;
}

void CacheModule(KeyType cache_key, ValueType module) {
mtx_.lock();
cache_.insert(cache_key, module);
mtx_.unlock();
}

private:
LruCache<KeyType, ValueType> cache_;
static const int CACHE_SIZE = 100;
std::mutex mtx_;
};
} // namespace gandiva
#endif // GANDIVA_MODULE_CACHE_H
17 changes: 17 additions & 0 deletions cpp/src/gandiva/codegen/configuration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,24 @@

#include "gandiva/configuration.h"

#include "boost/functional/hash.hpp"

namespace gandiva {

const std::shared_ptr<Configuration> ConfigurationBuilder::default_configuration_ =
InitDefaultConfig();

std::size_t Configuration::Hash() const {
boost::hash<std::string> string_hash;
return string_hash(byte_code_file_path_);
}

bool Configuration::operator==(const Configuration &other) const {
return other.byte_code_file_path() == byte_code_file_path();
}

bool Configuration::operator!=(const Configuration &other) const {
return !(*this == other);
}

} // namespace gandiva
3 changes: 3 additions & 0 deletions cpp/src/gandiva/codegen/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class Configuration {
public:
const std::string &byte_code_file_path() const { return byte_code_file_path_; }
friend class ConfigurationBuilder;
std::size_t Hash() const;
bool operator==(const Configuration &other) const;
bool operator!=(const Configuration &other) const;

private:
explicit Configuration(const std::string byte_code_file_path)
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/gandiva/codegen/filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#include <vector>

#include "codegen/bitmap_accumulator.h"
#include "codegen/cache.h"
#include "codegen/expr_validator.h"
#include "codegen/filter_cache_key.h"
#include "codegen/llvm_generator.h"
#include "codegen/selection_vector_impl.h"
#include "gandiva/condition.h"
Expand All @@ -42,6 +44,13 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
Status::Invalid("condition cannot be null"));
GANDIVA_RETURN_FAILURE_IF_FALSE(configuration != nullptr,
Status::Invalid("configuration cannot be null"));
static Cache<FilterCacheKey, std::shared_ptr<Filter>> cache;
FilterCacheKey cacheKey(schema, configuration, *(condition.get()));
std::shared_ptr<Filter> cachedFilter = cache.GetCachedModule(cacheKey);
if (cachedFilter != nullptr) {
*filter = cachedFilter;
return Status::OK();
}
// Build LLVM generator, and generate code for the specified expression
std::unique_ptr<LLVMGenerator> llvm_gen;
Status status = LLVMGenerator::Make(configuration, &llvm_gen);
Expand All @@ -58,6 +67,7 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,

// Instantiate the filter with the completely built llvm generator
*filter = std::make_shared<Filter>(std::move(llvm_gen), schema, configuration);
cache.CacheModule(cacheKey, *filter);
return Status::OK();
}

Expand Down
66 changes: 66 additions & 0 deletions cpp/src/gandiva/codegen/filter_cache_key.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef GANDIVA_FILTER_CACHE_KEY_H
#define GANDIVA_FILTER_CACHE_KEY_H

#include "boost/functional/hash.hpp"
#include "gandiva/arrow.h"
#include "gandiva/filter.h"

namespace gandiva {
class FilterCacheKey {
public:
FilterCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> configuration,
Expression &expression)
: schema_(schema), configuration_(configuration) {
static const int kSeedValue = 4;
size_t result = kSeedValue;
expression_as_string_ = expression.ToString();
boost::hash_combine(result, expression_as_string_);
boost::hash_combine(result, configuration);
boost::hash_combine(result, schema_->ToString());
hash_code_ = result;
}

std::size_t Hash() const { return hash_code_; }

bool operator==(const FilterCacheKey &other) const {
// arrow schema does not overload equality operators.
if (!(schema_->Equals(*other.schema().get(), true))) {
return false;
}

if (configuration_ != other.configuration_) {
return false;
}

if (expression_as_string_ != other.expression_as_string_) {
return false;
}
return true;
}

bool operator!=(const FilterCacheKey &other) const { return !(*this == other); }

SchemaPtr schema() const { return schema_; }

private:
const SchemaPtr schema_;
const std::shared_ptr<Configuration> configuration_;
std::string expression_as_string_;
size_t hash_code_;
};
} // namespace gandiva
#endif // GANDIVA_FILTER_CACHE_KEY_H
120 changes: 120 additions & 0 deletions cpp/src/gandiva/codegen/lru_cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef LRU_CACHE_H
#define LRU_CACHE_H

#include <list>
#include <unordered_map>
#include <utility>

#include <boost/optional.hpp>

// modified from boost LRU cache -> the boost cache supported only an
// ordered map.
namespace gandiva {
// a cache which evicts the least recently used item when it is full
template <class Key, class Value>
class LruCache {
public:
using key_type = Key;
using value_type = Value;
using list_type = std::list<key_type>;
struct hasher {
template <typename I>
std::size_t operator()(const I &i) const {
return i.Hash();
}
};
using map_type =
std::unordered_map<key_type, std::pair<value_type, typename list_type::iterator>,
hasher>;

LruCache(size_t capacity) : cache_capacity_(capacity) {}

~LruCache() {}

size_t size() const { return map_.size(); }

size_t capacity() const { return cache_capacity_; }

bool empty() const { return map_.empty(); }

bool contains(const key_type &key) { return map_.find(key) != map_.end(); }

void insert(const key_type &key, const value_type &value) {
typename map_type::iterator i = map_.find(key);
if (i == map_.end()) {
// insert item into the cache, but first check if it is full
if (size() >= cache_capacity_) {
// cache is full, evict the least recently used item
evict();
}

// insert the new item
lru_list_.push_front(key);
map_[key] = std::make_pair(value, lru_list_.begin());
}
}

boost::optional<value_type> get(const key_type &key) {
// lookup value in the cache
typename map_type::iterator value_for_key = map_.find(key);
if (value_for_key == map_.end()) {
// value not in cache
return boost::none;
}

// return the value, but first update its place in the most
// recently used list
typename list_type::iterator postition_in_lru_list = value_for_key->second.second;
if (postition_in_lru_list != lru_list_.begin()) {
// move item to the front of the most recently used list
lru_list_.erase(postition_in_lru_list);
lru_list_.push_front(key);

// update iterator in map
postition_in_lru_list = lru_list_.begin();
const value_type &value = value_for_key->second.first;
map_[key] = std::make_pair(value, postition_in_lru_list);

// return the value
return value;
} else {
// the item is already at the front of the most recently
// used list so just return it
return value_for_key->second.first;
}
}

void clear() {
map_.clear();
lru_list_.clear();
}

private:
void evict() {
// evict item from the end of most recently used list
typename list_type::iterator i = --lru_list_.end();
map_.erase(*i);
lru_list_.erase(i);
}

private:
map_type map_;
list_type lru_list_;
size_t cache_capacity_;
};
} // namespace gandiva
#endif // LRU_CACHE_H
60 changes: 60 additions & 0 deletions cpp/src/gandiva/codegen/lru_cache_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (C) 2017-2018 Dremio Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "codegen/lru_cache.h"

#include <map>
#include <typeinfo>

#include <gtest/gtest.h>

namespace gandiva {

class TestCacheKey {
public:
TestCacheKey(int tmp) : tmp_(tmp) {}
std::size_t Hash() const { return tmp_; }
bool operator==(const TestCacheKey &other) const { return tmp_ == other.tmp_; }

private:
int tmp_;
};

class TestLruCache : public ::testing::Test {
public:
TestLruCache() : cache_(2) {}

protected:
LruCache<TestCacheKey, std::string> cache_;
};

TEST_F(TestLruCache, TestEvict) {
cache_.insert(TestCacheKey(1), "hello");
cache_.insert(TestCacheKey(2), "hello");
cache_.insert(TestCacheKey(1), "hello");
cache_.insert(TestCacheKey(3), "hello");
// should have evicted key 1
ASSERT_EQ(2, cache_.size());
ASSERT_EQ(cache_.get(1), boost::none);
}

TEST_F(TestLruCache, TestLruBehavior) {
cache_.insert(TestCacheKey(1), "hello");
cache_.insert(TestCacheKey(2), "hello");
cache_.get(TestCacheKey(1));
cache_.insert(TestCacheKey(3), "hello");
// should have evicted key 2.
ASSERT_EQ(cache_.get(1).value(), "hello");
}
} // namespace gandiva
17 changes: 11 additions & 6 deletions cpp/src/gandiva/codegen/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,18 @@ class BooleanNode : public Node {

std::string ToString() override {
std::stringstream ss;
ss << children_.at(0)->ToString();
if (expr_type() == BooleanNode::AND) {
ss << " && ";
} else {
ss << " || ";
bool first = true;
for (auto &child : children_) {
if (!first) {
if (expr_type() == BooleanNode::AND) {
ss << " && ";
} else {
ss << " || ";
}
}
ss << child->ToString();
first = false;
}
ss << children_.at(1)->ToString();
return ss.str();
}

Expand Down
Loading

0 comments on commit fd55d9f

Please sign in to comment.