Skip to content

Commit

Permalink
make Find(...) and Holds(...) faster
Browse files Browse the repository at this point in the history
We (ab?)use kaguya's parameter serialization machinery. Rather than
take a `std::string`, we take a `KnownTagKey` and teach Lua how to
convert a Lua string into a `KnownTagKey`.

This avoids the need to do a defensive copy of the string when coming
from Lua.

It provides a modest boost:

```
real 1m8.859s
user 16m13.292s
sys 0m18.104s
```

Most keys are short enough to fit in the small-string optimization, so
this doesn't help us avoid mallocs. An exception is `addr:housenumber`,
which, at 16 bytes, exceeds g++'s limit of 15 bytes.

It should be possible to also apply a similar trick to the `Attribute(...)`
functions, to avoid defensive copies of strings that we've seen as keys
or values.
  • Loading branch information
cldellow committed Dec 4, 2023
1 parent b322166 commit 5c807a9
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ prefix = /usr/local

MANPREFIX := /usr/share/man
TM_VERSION ?= $(shell git describe --tags --abbrev=0)
CXXFLAGS ?= -O3 -Wall -Wno-unknown-pragmas -Wno-sign-compare -std=c++14 -pthread -fPIE -DTM_VERSION=$(TM_VERSION) $(CONFIG) -g
CXXFLAGS ?= -O3 -Wall -Wno-unknown-pragmas -Wno-sign-compare -std=c++14 -pthread -fPIE -DTM_VERSION=$(TM_VERSION) $(CONFIG)
LIB := -L$(PLATFORM_PATH)/lib -lz $(LUA_LIBS) -lboost_program_options -lsqlite3 -lboost_filesystem -lboost_system -lboost_iostreams -lprotobuf -lshp -pthread
INC := -I$(PLATFORM_PATH)/include -isystem ./include -I./src $(LUA_CFLAGS)

Expand Down
9 changes: 1 addition & 8 deletions include/osm_lua_processing.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,6 @@ class OsmLuaProcessing {
// Get the ID of the current object
std::string Id() const;

// Check if there's a value for a given key
bool Holds(const std::string& key) const;

// Get an OSM tag for a given key (or return empty string if none)
const std::string& Find(const std::string& key) const;

// ---- Spatial queries called from Lua

// Find intersecting shapefile layer
Expand Down Expand Up @@ -196,6 +190,7 @@ class OsmLuaProcessing {
inline AttributeStore &getAttributeStore() { return attributeStore; }

struct luaProcessingException :std::exception {};
const TagMap* currentTags;

private:
/// Internal: clear current cached state
Expand Down Expand Up @@ -255,8 +250,6 @@ class OsmLuaProcessing {
class LayerDefinition &layers;

std::vector<std::pair<OutputObject, AttributeSet>> outputs; // All output objects that have been created
//const boost::container::flat_map<std::string, std::string>* currentTags;
const TagMap* currentTags;

std::vector<OutputObject> finalizeOutputs();
};
Expand Down
13 changes: 11 additions & 2 deletions include/tag_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@
// stored in our tag map, and passing a reference to its location.

// Assumptions:
// 1. Not thread-safe.
// 2. Lifetime of map is less than lifetime of keys/values that are passed.
// 1. Not thread-safe
// This is OK because we have 1 instance of OsmLuaProcessing per thread.
// 2. Lifetime of map is less than lifetime of keys/values that are passed
// This is true since the strings are owned by the protobuf block reader
// 3. Max number of tag values will fit in a short
// OSM limit is 5,000 tags per object
class TagMap {
public:
TagMap();
Expand All @@ -26,6 +30,11 @@ class TagMap {
void addTag(const std::string& key, const std::string& value);
const std::string* getTag(const std::string& key) const;

// Return -1 if key not found, else return its keyLoc.
int64_t getTag(const char* key, size_t size) const;

const std::string* getValue(uint32_t keyLoc) const;

boost::container::flat_map<std::string, std::string> exportToBoostMap() const;

private:
Expand Down
64 changes: 49 additions & 15 deletions src/osm_lua_processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,59 @@

using namespace std;

const std::string EMPTY_STRING = "";
thread_local kaguya::State *g_luaState = nullptr;
thread_local OsmLuaProcessing* osmLuaProcessing = nullptr;

// A key in `currentTags`. If Lua code refers to an absent key,
// found will be false.
struct KnownTagKey {
bool found;
uint32_t index;
};

template<> struct kaguya::lua_type_traits<KnownTagKey> {
typedef KnownTagKey get_type;
typedef const KnownTagKey& push_type;

static bool strictCheckType(lua_State* l, int index)
{
return lua_type(l, index) == LUA_TSTRING;
}
static bool checkType(lua_State* l, int index)
{
return lua_isstring(l, index) != 0;
}
static get_type get(lua_State* l, int index)
{
KnownTagKey rv = { false, 0 };
size_t size = 0;
const char* buffer = lua_tolstring(l, index, &size);

int64_t tagLoc = osmLuaProcessing->currentTags->getTag(buffer, size);

if (tagLoc >= 0) {
rv.found = true;
rv.index = tagLoc;
}
// std::string key(buffer, size);
// std::cout << "for key " << key << ": rv.found=" << rv.found << ", rv.index=" << rv.index << std::endl;
return rv;
}
static int push(lua_State* l, push_type s)
{
throw std::runtime_error("Lua code doesn't know how to use KnownTagKey");
}
};

std::string rawId() { return osmLuaProcessing->Id(); }
bool rawHolds(const std::string& key) { return osmLuaProcessing->Holds(key); }
const std::string& rawFind(const std::string& key) { return osmLuaProcessing->Find(key); }
bool rawHolds(const KnownTagKey& key) { return key.found; }
const std::string& rawFind(const KnownTagKey& key) {
if (key.found)
return *(osmLuaProcessing->currentTags->getValue(key.index));

return EMPTY_STRING;
}
std::vector<std::string> rawFindIntersecting(const std::string &layerName) { return osmLuaProcessing->FindIntersecting(layerName); }
bool rawIntersects(const std::string& layerName) { return osmLuaProcessing->Intersects(layerName); }
std::vector<std::string> rawFindCovering(const std::string& layerName) { return osmLuaProcessing->FindCovering(layerName); }
Expand All @@ -34,7 +81,6 @@ std::vector<double> rawCentroid() { return osmLuaProcessing->Centroid(); }


bool supportsRemappingShapefiles = false;
const std::string EMPTY_STRING = "";

int lua_error_handler(int errCode, const char *errMessage)
{
Expand Down Expand Up @@ -151,18 +197,6 @@ string OsmLuaProcessing::Id() const {
return to_string(originalOsmID);
}

// Check if there's a value for a given key
bool OsmLuaProcessing::Holds(const string& key) const {
return currentTags->getTag(key) != nullptr;
}

// Get an OSM tag for a given key (or return empty string if none)
const string& OsmLuaProcessing::Find(const string& key) const {
auto it = currentTags->getTag(key);
if(it == nullptr) return EMPTY_STRING;
return *it;
}

// ---- Spatial queries called from Lua

vector<string> OsmLuaProcessing::FindIntersecting(const string &layerName) {
Expand Down
35 changes: 35 additions & 0 deletions src/tag_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ const std::size_t hashString(const std::string& str) {
return hash;
}

const std::size_t hashString(const char* str, size_t size) {
// This is a pretty crappy hash function in terms of bit
// avalanching and distribution of output values.
//
// But it's very good in terms of speed, which turns out
// to be the important measure.
std::size_t hash = size;
if (hash >= 4)
hash ^= *(uint32_t*)str;

return hash;
}

uint32_t TagMap::ensureString(
std::vector<std::vector<const std::string*>>& vector,
const std::string& value
Expand Down Expand Up @@ -77,6 +90,28 @@ const std::string* TagMap::getTag(const std::string& key) const {
return nullptr;
}

int64_t TagMap::getTag(const char* key, size_t size) const {
// Return -1 if key not found, else return its keyLoc.
std::size_t hash = hashString(key, size);

const uint16_t shard = hash % keys.size();
for (int i = 0; i < keys[shard].size(); i++) {
const std::string& candidate = *keys[shard][i];
if (candidate.size() != size)
continue;

if (memcmp(candidate.data(), key, size) == 0)
return shard << 16 | i;
}

return -1;
}

const std::string* TagMap::getValue(uint32_t keyLoc) const {
const uint32_t valueLoc = key2value[keyLoc >> 16][keyLoc & 0xFFFF];
return values[valueLoc >> 16][valueLoc & 0xFFFF];
}

boost::container::flat_map<std::string, std::string> TagMap::exportToBoostMap() const {
boost::container::flat_map<std::string, std::string> rv;

Expand Down
2 changes: 1 addition & 1 deletion src/tilemaker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ int main(int argc, char* argv[]) {
}

// ---- Write out data
return 0; // TODO
// return 0; // TODO

// If mapsplit, read list of tiles available
unsigned runs=1;
Expand Down

0 comments on commit 5c807a9

Please sign in to comment.