Skip to content

Commit

Permalink
interned QualifiedName (#2451)
Browse files Browse the repository at this point in the history
String interning of `QualifiedName` and switch many associative datastructures to unordered comparison between qualified name. These comparisons had a significant cost on large datalog programs.

A global `QNInterner` object is responsible for interning of qualified names.

Also fix a performance issue in the `RecursiveClauses` analysis.
  • Loading branch information
quentin committed Dec 8, 2023
1 parent 1009dad commit c8edcc1
Show file tree
Hide file tree
Showing 68 changed files with 802 additions and 510 deletions.
17 changes: 0 additions & 17 deletions src/ast/Program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,23 +308,6 @@ void Program::print(std::ostream& os) const {
show(getDirectives(), "\n\n");
}

bool Program::equal(const Node& node) const {
const auto& other = asAssert<Program>(node);
// clang-format off
return equal_targets(pragmas, other.pragmas) &&
equal_targets(components, other.components) &&
equal_targets(instantiations, other.instantiations) &&
equal_targets(functors, other.functors) &&
equal_targets(types, other.types) &&
equal_targets(lattices, other.lattices) &&
equal_targets_map(relations, other.relations, [](auto& a, auto& b) {
return equal_targets(a.decls , b.decls ) &&
equal_targets(a.clauses , b.clauses ) &&
equal_targets(a.directives, b.directives);
});
// clang-format on
}

void Program::addComponent(Own<Component> component) {
assert(component && "NULL component");
components.push_back(std::move(component));
Expand Down
4 changes: 1 addition & 3 deletions src/ast/Program.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class Program : public Node {
VecOwn<Directive> directives;
};

using RelationInfoMap = std::map<QualifiedName, RelationInfo>;
using RelationInfoMap = OrderedQualifiedNameMap<RelationInfo>;

RelationInfoMap& getRelationInfo() {
return relations;
Expand Down Expand Up @@ -238,8 +238,6 @@ class Program : public Node {
void addInstantiation(Own<ComponentInit> instantiation);

private:
bool equal(const Node& node) const override;

Program* cloning() const override;

private:
Expand Down
127 changes: 108 additions & 19 deletions src/ast/QualifiedName.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,136 @@

#include "ast/QualifiedName.h"
#include "souffle/utility/StreamUtil.h"
#include "souffle/utility/StringUtil.h"

#include <algorithm>
#include <cassert>
#include <deque>
#include <map>
#include <ostream>
#include <sstream>
#include <unordered_map>
#include <utility>

namespace souffle::ast {

QualifiedName::QualifiedName() {}
QualifiedName::QualifiedName(std::string name) {
qualifiers.emplace_back(std::move(name));
/// Container of qualified names, provides interning by associating a unique
/// numerical index to each qualified name.
struct QNInterner {
public:
explicit QNInterner() {
qualifiedNames.emplace_back(QualifiedNameData{{}, ""});
qualifiedNameToIndex.emplace("", 0);
}

/// Return the qualified name object for the given string.
///
/// Each `.` character is treated as a separator.
QualifiedName intern(std::string_view qn) {
const auto It = qualifiedNameToIndex.find(qn);
if (It != qualifiedNameToIndex.end()) {
return QualifiedName{It->second};
}

const uint32_t index = static_cast<uint32_t>(qualifiedNames.size());

QualifiedNameData qndata{splitString(qn, '.'), std::string{qn}};
qualifiedNames.emplace_back(std::move(qndata));
qualifiedNameToIndex.emplace(qualifiedNames.back().qualified, index);

return QualifiedName{index};
}

/// Return the qualified name data object from the given index.
const QualifiedNameData& at(uint32_t index) {
return qualifiedNames.at(index);
}

private:
/// Store the qualified name data of interned qualified names.
std::deque<QualifiedNameData> qualifiedNames;

/// Mapping from a qualified name string representation to its index in
/// `qualifiedNames`.
std::unordered_map<std::string_view, uint32_t> qualifiedNameToIndex;
};

namespace {
/// The default qualified name interner instance.
QNInterner Interner;
} // namespace

QualifiedName::QualifiedName() : index(0) {}
QualifiedName::QualifiedName(uint32_t idx) : index(idx) {}

const QualifiedNameData& QualifiedName::data() const {
return Interner.at(index);
}

bool QualifiedName::operator==(const QualifiedName& other) const {
return index == other.index;
}

bool QualifiedName::operator!=(const QualifiedName& other) const {
return index != other.index;
}
QualifiedName::QualifiedName(const char* name) : QualifiedName(std::string(name)) {}
QualifiedName::QualifiedName(std::vector<std::string> qualifiers) : qualifiers(std::move(qualifiers)) {}

void QualifiedName::append(std::string name) {
qualifiers.push_back(std::move(name));
void QualifiedName::append(const std::string& segment) {
assert(segment.find('.') == std::string::npos);
*this = Interner.intern(data().qualified + "." + segment);
}

void QualifiedName::prepend(std::string name) {
qualifiers.insert(qualifiers.begin(), std::move(name));
void QualifiedName::prepend(const std::string& segment) {
assert(segment.find('.') == std::string::npos);
*this = Interner.intern(segment + "." + data().qualified);
}

/** convert to a string separated by fullstop */
std::string QualifiedName::toString() const {
std::stringstream ss;
print(ss);
return ss.str();
const std::string& QualifiedName::toString() const {
return data().qualified;
}

bool QualifiedName::operator<(const QualifiedName& other) const {
return std::lexicographical_compare(
qualifiers.begin(), qualifiers.end(), other.qualifiers.begin(), other.qualifiers.end());
QualifiedName QualifiedName::fromString(std::string_view qname) {
return Interner.intern(qname);
}

bool QualifiedName::lexicalLess(const QualifiedName& other) const {
if (index == other.index) {
return false;
}
return data().lexicalLess(other.data());
}

void QualifiedName::print(std::ostream& out) const {
out << join(qualifiers, ".");
out << toString();
}

std::ostream& operator<<(std::ostream& out, const QualifiedName& id) {
id.print(out);
std::ostream& operator<<(std::ostream& out, const QualifiedName& qn) {
out << qn.toString();
return out;
}

const std::vector<std::string>& QualifiedName::getQualifiers() const {
return data().segments;
}

uint32_t QualifiedName::getIndex() const {
return index;
}

bool QualifiedName::empty() const {
return index == 0;
}

bool QualifiedNameData::lexicalLess(const QualifiedNameData& other) const {
return std::lexicographical_compare(
segments.begin(), segments.end(), other.segments.begin(), other.segments.end());
}

QualifiedName operator+(const std::string& head, const QualifiedName& tail) {
QualifiedName res = tail;
res.prepend(head);
return res;
}

} // namespace souffle::ast
128 changes: 100 additions & 28 deletions src/ast/QualifiedName.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,70 +18,142 @@

#include <cstdint>
#include <iosfwd>
#include <map>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

namespace souffle::ast {

struct QualifiedNameData {
using Segment = std::string;
std::vector<Segment> segments;

/// the whole qualified name with segments glued with dot
std::string qualified;

bool lexicalLess(const QualifiedNameData& other) const;
};

struct QNInterner;

/**
* @class QualifiedName
* @brief Qualified Name class defines fully/partially qualified names
* to identify objects in components.
*/
class QualifiedName {
private:
friend struct QNInterner;
explicit QualifiedName(uint32_t);

public:
/** Build a QualifiedName from a dot-separated qualified name */
static QualifiedName fromString(std::string_view qualname);

/// The empty qualified name
QualifiedName();
QualifiedName(std::string name);
QualifiedName(const char* name);
QualifiedName(std::vector<std::string> qualifiers);

QualifiedName(const QualifiedName&) = default;
QualifiedName(QualifiedName&&) = default;
QualifiedName& operator=(const QualifiedName&) = default;
QualifiedName& operator=(QualifiedName&&) = default;

/** append qualifiers */
void append(std::string name);
const QualifiedNameData& data() const;

/** prepend qualifiers */
void prepend(std::string name);
/** append one qualifier */
void append(const std::string& name);

/** prepend one qualifier */
void prepend(const std::string& name);

/** check for emptiness */
bool empty() const {
return qualifiers.empty();
}
bool empty() const;

/** get qualifiers */
const std::vector<std::string>& getQualifiers() const {
return qualifiers;
}
const std::vector<std::string>& getQualifiers() const;

/** convert to a string separated by fullstop */
std::string toString() const;
const std::string& toString() const;

bool operator==(const QualifiedName& other) const {
return qualifiers == other.qualifiers;
}
bool operator==(const QualifiedName& other) const;

bool operator!=(const QualifiedName& other) const {
return !(*this == other);
}
bool operator!=(const QualifiedName& other) const;

bool operator<(const QualifiedName& other) const;
/// Lexicographic less comparison.
///
/// We don't offer `operator<` because it's a costly operation
/// that should only be used when ordering is required.
///
/// See type definitions of containers below.
bool lexicalLess(const QualifiedName& other) const;

/** print qualified name */
void print(std::ostream& out) const;

friend std::ostream& operator<<(std::ostream& out, const QualifiedName& id);

/// Return the unique identifier of the interned qualified name.
uint32_t getIndex() const;

private:
/* array of name qualifiers */
std::vector<std::string> qualifiers;
/// index of this qualified name in the qualified-name interner
uint32_t index;
};

inline QualifiedName operator+(const std::string& name, const QualifiedName& id) {
QualifiedName res = id;
res.prepend(name);
return res;
/// Return the qualified name by the adding prefix segment in head of the qualified name.
QualifiedName operator+(const std::string& head, const QualifiedName& tail);

struct OrderedQualifiedNameLess {
bool operator()(const QualifiedName& lhs, const QualifiedName& rhs) const {
return lhs.lexicalLess(rhs);
}
};

struct UnorderedQualifiedNameLess {
bool operator()(const QualifiedName& lhs, const QualifiedName& rhs) const {
return lhs.getIndex() < rhs.getIndex();
}
};

struct QualifiedNameHash {
std::size_t operator()(const QualifiedName& qn) const {
return static_cast<std::size_t>(qn.getIndex());
}
};

/// a map from qualified name to T where qualified name keys are ordered in
/// lexicographic order.
template <typename T>
using OrderedQualifiedNameMap = std::map<QualifiedName, T, OrderedQualifiedNameLess>;

/// a map from qualified name to T where qualified name keys are not ordered in
/// any deterministic order.
template <typename T>
using UnorderedQualifiedNameMap = std::unordered_map<QualifiedName, T, QualifiedNameHash>;

/// a multi-map from qualified name to T where qualified name keys are not ordered in
/// any deterministic order.
template <typename T>
using UnorderedQualifiedNameMultimap = std::unordered_multimap<QualifiedName, T, QualifiedNameHash>;

/// an ordered set of qualified name ordered in lexicographic order.
using OrderedQualifiedNameSet = std::set<QualifiedName, OrderedQualifiedNameLess>;

/// an unordered set of qualified name.
using UnorderedQualifiedNameSet = std::unordered_set<QualifiedName, QualifiedNameHash>;

template <typename Container>
OrderedQualifiedNameSet orderedQualifiedNameSet(const Container& cont) {
return OrderedQualifiedNameSet(cont.cbegin(), cont.cend());
}

} // namespace souffle::ast

template <>
struct std::hash<souffle::ast::QualifiedName> {
std::size_t operator()(const souffle::ast::QualifiedName& qn) const noexcept {
return static_cast<std::size_t>(qn.getIndex());
}
};
4 changes: 4 additions & 0 deletions src/ast/Relation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,8 @@ Relation* Relation::cloning() const {
return res;
}

RelationSet orderedRelationSet(const UnorderedRelationSet& cont) {
return RelationSet(cont.cbegin(), cont.cend());
}

} // namespace souffle::ast
Loading

0 comments on commit c8edcc1

Please sign in to comment.