diff --git a/CMakeLists.txt b/CMakeLists.txt index ad353d84..803fb140 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,10 +81,6 @@ ADD_CUSTOM_COMMAND(OUTPUT vector_tile.pb.cc vector_tile.pb.h COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_BINARY_DIR} -I ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/vector_tile.proto) -ADD_CUSTOM_COMMAND(OUTPUT osmformat.pb.cc osmformat.pb.h - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS --cpp_out ${CMAKE_BINARY_DIR} -I ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/osmformat.proto) - file(GLOB tilemaker_src_files src/attribute_store.cpp src/coordinates.cpp @@ -97,25 +93,30 @@ file(GLOB tilemaker_src_files src/mbtiles.cpp src/mmap_allocator.cpp src/node_stores.cpp + src/options_parser.cpp src/osm_lua_processing.cpp src/osm_mem_tiles.cpp src/osm_store.cpp src/output_object.cpp - src/pbf_blocks.cpp + src/pbf_processor.cpp + src/pbf_reader.cpp src/pmtiles.cpp - src/read_pbf.cpp + src/pooled_string.cpp src/read_shp.cpp + src/sharded_node_store.cpp + src/sharded_way_store.cpp src/shared_data.cpp src/shp_mem_tiles.cpp src/sorted_node_store.cpp src/sorted_way_store.cpp + src/tag_map.cpp src/tile_data.cpp src/tilemaker.cpp src/tile_worker.cpp src/way_stores.cpp src/write_geometry.cpp ) -add_executable(tilemaker vector_tile.pb.cc osmformat.pb.cc ${tilemaker_src_files}) +add_executable(tilemaker vector_tile.pb.cc ${tilemaker_src_files}) target_include_directories(tilemaker PRIVATE include) target_include_directories(tilemaker PRIVATE ${CMAKE_BINARY_DIR}) # for generated files target_link_libraries(tilemaker diff --git a/Makefile b/Makefile index 45b7c8af..1ac184f1 100644 --- a/Makefile +++ b/Makefile @@ -93,7 +93,6 @@ INC := -I$(PLATFORM_PATH)/include -isystem ./include -I./src $(LUA_CFLAGS) all: tilemaker tilemaker: \ - include/osmformat.pb.o \ include/vector_tile.pb.o \ src/attribute_store.o \ src/coordinates_geom.o \ @@ -106,18 +105,23 @@ tilemaker: \ src/mbtiles.o \ src/mmap_allocator.o \ src/node_stores.o \ + src/options_parser.o \ src/osm_lua_processing.o \ src/osm_mem_tiles.o \ src/osm_store.o \ src/output_object.o \ - src/pbf_blocks.o \ + src/pbf_processor.o \ + src/pbf_reader.o \ src/pmtiles.o \ - src/read_pbf.o \ + src/pooled_string.o \ src/read_shp.o \ + src/sharded_node_store.o \ + src/sharded_way_store.o \ src/shared_data.o \ src/shp_mem_tiles.o \ src/sorted_node_store.o \ src/sorted_way_store.o \ + src/tag_map.o \ src/tile_data.o \ src/tilemaker.o \ src/tile_worker.o \ @@ -125,7 +129,50 @@ tilemaker: \ src/write_geometry.o $(CXX) $(CXXFLAGS) -o tilemaker $^ $(INC) $(LIB) $(LDFLAGS) -test: test_sorted_way_store +test: \ + test_append_vector \ + test_attribute_store \ + test_deque_map \ + test_pbf_reader \ + test_pooled_string \ + test_sorted_node_store \ + test_sorted_way_store + +test_append_vector: \ + src/mmap_allocator.o \ + test/append_vector.test.o + $(CXX) $(CXXFLAGS) -o test.append_vector $^ $(INC) $(LIB) $(LDFLAGS) && ./test.append_vector + +test_attribute_store: \ + src/mmap_allocator.o \ + src/attribute_store.o \ + src/pooled_string.o \ + test/attribute_store.test.o + $(CXX) $(CXXFLAGS) -o test.attribute_store $^ $(INC) $(LIB) $(LDFLAGS) && ./test.attribute_store + +test_deque_map: \ + test/deque_map.test.o + $(CXX) $(CXXFLAGS) -o test.deque_map $^ $(INC) $(LIB) $(LDFLAGS) && ./test.deque_map + +test_options_parser: \ + src/options_parser.o \ + test/options_parser.test.o + $(CXX) $(CXXFLAGS) -o test.options_parser $^ $(INC) $(LIB) $(LDFLAGS) && ./test.options_parser + +test_pooled_string: \ + src/mmap_allocator.o \ + src/pooled_string.o \ + test/pooled_string.test.o + $(CXX) $(CXXFLAGS) -o test.pooled_string $^ $(INC) $(LIB) $(LDFLAGS) && ./test.pooled_string + +test_sorted_node_store: \ + src/external/streamvbyte_decode.o \ + src/external/streamvbyte_encode.o \ + src/external/streamvbyte_zigzag.o \ + src/mmap_allocator.o \ + src/sorted_node_store.o \ + test/sorted_node_store.test.o + $(CXX) $(CXXFLAGS) -o test.sorted_node_store $^ $(INC) $(LIB) $(LDFLAGS) && ./test.sorted_node_store test_sorted_way_store: \ src/external/streamvbyte_decode.o \ @@ -133,9 +180,14 @@ test_sorted_way_store: \ src/external/streamvbyte_zigzag.o \ src/mmap_allocator.o \ src/sorted_way_store.o \ - src/sorted_way_store.test.o + test/sorted_way_store.test.o $(CXX) $(CXXFLAGS) -o test.sorted_way_store $^ $(INC) $(LIB) $(LDFLAGS) && ./test.sorted_way_store +test_pbf_reader: \ + src/helpers.o \ + src/pbf_reader.o \ + test/pbf_reader.test.o + $(CXX) $(CXXFLAGS) -o test.pbf_reader $^ $(INC) $(LIB) $(LDFLAGS) && ./test.pbf_reader %.o: %.cpp $(CXX) $(CXXFLAGS) -o $@ -c $< $(INC) @@ -153,6 +205,6 @@ install: install docs/man/tilemaker.1 ${DESTDIR}${MANPREFIX}/man1/ clean: - rm -f tilemaker src/*.o src/external/*.o include/*.o include/*.pb.h + rm -f tilemaker src/*.o src/external/*.o include/*.o include/*.pb.h test/*.o .PHONY: install diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index d41fba9b..d605d153 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -107,13 +107,16 @@ For example: ### Lua processing -Your Lua file needs to supply 5 things: +Your Lua file needs to supply a few things: 1. `node_keys`, a list of those OSM keys which indicate that a node should be processed -2. `init_function(name)` (optional), a function to initialize Lua logic -2. `node_function(node)`, a function to process an OSM node and add it to layers -3. `way_function(way)`, a function to process an OSM way and add it to layers -3. `exit_function` (optional), a function to finalize Lua logic (useful to show statistics) +2. `node_function()`, a function to process an OSM node and add it to layers +3. `way_function()`, a function to process an OSM way and add it to layers +4. (optional) `init_function(name)`, a function to initialize Lua logic +5. (optional) `exit_function`, a function to finalize Lua logic (useful to show statistics) +6. (optional) `relation_scan_function`, a function to determine whether your Lua file wishes to process the given relation +7. (optional) `relation_function`, a function to process an OSM relation and add it to layers +8. (optional) `attribute_function`, a function to remap attributes from shapefiles `node_keys` is a simple list (or in Lua parlance, a 'table') of OSM tag keys. If a node has one of those keys, it will be processed by `node_function`; if not, it'll be skipped. For example, if you wanted to show highway crossings and railway stations, it should be `{ "highway", "railway" }`. (This avoids the need to process the vast majority of nodes which contain no important tags at all.) @@ -127,28 +130,30 @@ Note the order: you write to a layer first, then set attributes after. To do that, you use these methods: -* `node:Find(key)` or `way:Find(key)`: get the value for a tag, or the empty string if not present. For example, `way:Find("railway")` might return "rail" for a railway, "siding" for a siding, or "" if it isn't a railway at all. -* `node:Holds(key)` or `way:Holds(key)`: returns true if that key exists, false otherwise. -* `node:Layer("layer_name", false)` or `way:Layer("layer_name", is_area)`: write this node/way to the named layer. This is how you put objects in your vector tile. is_area (true/false) specifies whether a way should be treated as an area, or just as a linestring. -* `way:LayerAsCentroid("layer_name")`: write a single centroid point for this way to the named layer (useful for labels and POIs). -* `node:Attribute(key,value,minzoom)` or `node:Attribute(key,value,minzoom)`: add an attribute to the most recently written layer. Argument `minzoom` is optional, use it if you do not want to write the attribute on lower zoom levels. -* `node:AttributeNumeric(key,value,minzoom)`, `node:AttributeBoolean(key,value,minzoom)` (and `way:`...): for numeric/boolean columns. -* `node:Id()` or `way:Id()`: get the OSM ID of the current object. -* `node:ZOrder(number)` or `way:ZOrder(number)`: Set a numeric value (default 0, 1-byte signed integer) used to sort features within a layer. Use this feature to ensure a proper rendering order if the rendering engine itself does not support sorting. Sorting is not supported across layers merged with `write_to`. Features with different z-order are not merged if `combine_below` or `combine_polygons_below` is used. -* `node:MinZoom(zoom)` or `way:MinZoom(zoom)`: set the minimum zoom level (0-15) at which this object will be written. Note that the JSON layer configuration minimum still applies (so `:MinZoom(5)` will have no effect if your layer only starts at z6). -* `way:Length()` and `way:Area()`: return the length (metres)/area (square metres) of the current object. Requires recent Boost. -* `way:Centroid()`: return the lat/lon of the centre of the current object as a two-element Lua table (element 1 is lat, 2 is lon). +* `Find(key)`: get the value for a tag, or the empty string if not present. For example, `Find("railway")` might return "rail" for a railway, "siding" for a siding, or "" if it isn't a railway at all. +* `Holds(key)`: returns true if that key exists, false otherwise. +* `Layer("layer_name", is_area)`: write this node/way to the named layer. This is how you put objects in your vector tile. is_area (true/false) specifies whether a way should be treated as an area, or just as a linestring. +* `LayerAsCentroid("layer_name")`: write a single centroid point for this way to the named layer (useful for labels and POIs). +* `Attribute(key,value,minzoom)`: add an attribute to the most recently written layer. Argument `minzoom` is optional, use it if you do not want to write the attribute on lower zoom levels. +* `AttributeNumeric(key,value,minzoom)`, `AttributeBoolean(key,value,minzoom)`: for numeric/boolean columns. +* `Id()`: get the OSM ID of the current object. +* `ZOrder(number)`: Set a numeric value (default 0, 1-byte signed integer) used to sort features within a layer. Use this feature to ensure a proper rendering order if the rendering engine itself does not support sorting. Sorting is not supported across layers merged with `write_to`. Features with different z-order are not merged if `combine_below` or `combine_polygons_below` is used. +* `MinZoom(zoom)`: set the minimum zoom level (0-15) at which this object will be written. Note that the JSON layer configuration minimum still applies (so `:MinZoom(5)` will have no effect if your layer only starts at z6). +* `Length()` and `Area()`: return the length (metres)/area (square metres) of the current object. Requires recent Boost. +* `Centroid()`: return the lat/lon of the centre of the current object as a two-element Lua table (element 1 is lat, 2 is lon). The simplest possible function, to include roads/paths and nothing else, might look like this: - function way_function(way) - local highway = way:Find("highway") +```lua + function way_function() + local highway = Find("highway") if highway~="" then - way:Layer("roads", false) - way:Attribute("name", way:Find("name")) - way:Attribute("type", highway) + Layer("roads", false) + Attribute("name", Find("name")) + Attribute("type", highway) end end +``` Take a look at the supplied process.lua for a simple example, or the more complex OpenMapTiles-compatible script in `resources/`. You can specify another filename with the `--process` option. @@ -197,11 +202,11 @@ When processing OSM objects with your Lua script, you can perform simple spatial You can then find out whether a node is within one of these polygons using the `Intersects` method: - if node:Intersects("countries") then print("Looks like it's on land"); end + if Intersects("countries") then print("Looks like it's on land"); end Or you can find out what country(/ies) the node is within using `FindIntersecting`, which returns a table: - names = node:FindIntersecting("countries") + names = FindIntersecting("countries") print(table.concat(name,",")) To enable these functions, set `index` to true in your shapefile layer definition. `index_column` is not needed for `Intersects` but required for `FindIntersecting`. diff --git a/docs/RELATIONS.md b/docs/RELATIONS.md index 6e436b68..6fc3b557 100644 --- a/docs/RELATIONS.md +++ b/docs/RELATIONS.md @@ -22,26 +22,30 @@ This is a two-stage process: first, when reading relations, indicate that these To define which relations should be accepted, add a `relation_scan_function`: - function relation_scan_function(relation) - if relation:Find("type")=="route" and relation:Find("route")=="bicycle" then - local network = relation:Find("network") - if network=="ncn" then relation:Accept() end +```lua + function relation_scan_function() + if Find("type")=="route" and Find("route")=="bicycle" then + local network = Find("network") + if network=="ncn" then Accept() end end end +``` -This function takes the relation as its sole argument. Examine the tags using `relation:Find(key)` as normal. (You can also use `relation:Holds(key)` and `relation:Id()`.) If you want to use this relation, call `relation:Accept()`. +Examine the tags using `Find(key)` as normal. (You can also use `Holds(key)` and `Id()`.) If you want to use this relation, call `Accept()`. #### Stage 2: accessing relations from ways -Now that you've accepted the relations, they will be available from `way_function`. They are accessed using an iterator (`way:NextRelation()`) which reads each relation for that way in turn, returning nil when there are no more relations available. Once you have accessed a relation with the iterator, you can read its tags with `way:FindInRelation(key)`. For example: +Now that you've accepted the relations, they will be available from `way_function`. They are accessed using an iterator (`NextRelation()`) which reads each relation for that way in turn, returning nil when there are no more relations available. Once you have accessed a relation with the iterator, you can read its tags with `FindInRelation(key)`. For example: +```lua while true do - local rel = way:NextRelation() + local rel = NextRelation() if not rel then break end - print ("Part of route "..way:FindInRelation("ref")) + print ("Part of route "..FindInRelation("ref")) end +``` -(Should you need to re-read the relations, you can reset the iterator with `way:RestartRelations()`.) +(Should you need to re-read the relations, you can reset the iterator with `RestartRelations()`.) ### Writing relation geometries @@ -52,13 +56,15 @@ First, make sure that you have accepted the relations using `relation_scan_funct Then write a `relation_function`, which works in the same way as `way_function` would: - function relation_function(relation) - if relation:Find("type")=="route" and relation:Find("route")=="bicycle" then - relation:Layer("bike_routes", false) - relation:Attribute("class", relation:Find("network")) - relation:Attribute("ref", relation:Find("ref")) +```lua + function relation_function() + if Find("type")=="route" and Find("route")=="bicycle" then + Layer("bike_routes", false) + Attribute("class", Find("network")) + Attribute("ref", Find("ref")) end end +``` ### Not supported diff --git a/include/append_vector.h b/include/append_vector.h new file mode 100644 index 00000000..07531217 --- /dev/null +++ b/include/append_vector.h @@ -0,0 +1,195 @@ +#ifndef _APPEND_VECTOR_H +#define _APPEND_VECTOR_H + +#include "mmap_allocator.h" +#include +#include + +// Tilemaker collects OutputObjects in a list that +// - spills to disk +// - only gets appended to +// +// Vector is great for linear access, but resizes cause expensive disk I/O to +// copy elements. +// +// Deque is great for growing without disk I/O, but it allocates in blocks of 512, +// which is inefficient for linear access. +// +// Instead, we author a limited vector-of-vectors class that allocates in bigger chunks, +// to get the best of both worlds. + +#define APPEND_VECTOR_SIZE 8192 +namespace AppendVectorNS { + template + class AppendVector { + public: + struct Iterator { + using iterator_category = std::random_access_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + + Iterator(AppendVector& appendVector, uint16_t vec, uint16_t offset): + appendVector(&appendVector), vec(vec), offset(offset) {} + + Iterator(): + appendVector(nullptr), vec(0), offset(0) {} + + + bool operator<(const Iterator& other) const { + if (vec < other.vec) + return true; + + if (vec > other.vec) + return false; + + return offset < other.offset; + } + + bool operator>=(const Iterator& other) const { + return !(*this < other); + } + + Iterator operator-(int delta) const { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute -= delta; + return Iterator(*appendVector, absolute / APPEND_VECTOR_SIZE, absolute % APPEND_VECTOR_SIZE); + } + + Iterator operator+(int delta) const { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute += delta; + return Iterator(*appendVector, absolute / APPEND_VECTOR_SIZE, absolute % APPEND_VECTOR_SIZE); + } + + bool operator==(const Iterator& other) const { + return appendVector == other.appendVector && vec == other.vec && offset == other.offset; + } + + bool operator!=(const Iterator& other) const { + return !(*this == other); + } + + std::ptrdiff_t operator-(const Iterator& other) const { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + int64_t otherAbsolute = other.vec * APPEND_VECTOR_SIZE + other.offset; + + return absolute - otherAbsolute; + } + + reference operator*() const { + auto& vector = appendVector->vecs[vec]; + auto& el = vector[offset]; + return el; + } + + pointer operator->() const { + auto& vector = appendVector->vecs[vec]; + auto& el = vector[offset]; + return ⪙ + } + + Iterator& operator+= (int delta) { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute += delta; + + vec = absolute / APPEND_VECTOR_SIZE; + offset = absolute % APPEND_VECTOR_SIZE; + return *this; + } + + Iterator& operator-= (int delta) { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute -= delta; + + vec = absolute / APPEND_VECTOR_SIZE; + offset = absolute % APPEND_VECTOR_SIZE; + return *this; + } + + // Prefix increment + Iterator& operator++() { + offset++; + if (offset == APPEND_VECTOR_SIZE) { + offset = 0; + vec++; + } + return *this; + } + + // Postfix increment + Iterator operator++(int) { Iterator tmp = *this; ++(*this); return tmp; } + + // Prefix decrement + Iterator& operator--() { + if (offset > 0) { + offset--; + } else { + vec--; + offset = APPEND_VECTOR_SIZE - 1; + } + + return *this; + } + + // Postfix decrement + Iterator operator--(int) { Iterator tmp = *this; --(*this); return tmp; } + + private: + mutable AppendVector* appendVector; + int32_t vec, offset; + }; + + AppendVector(): + count(0), + vecs(1) { + } + + void clear() { + count = 0; + vecs.clear(); + vecs.push_back(std::vector>()); + vecs.back().reserve(APPEND_VECTOR_SIZE); + } + + size_t size() const { + return count; + } + + T& operator [](int idx) { + auto& vec = vecs[idx / APPEND_VECTOR_SIZE]; + auto& el = vec[idx % APPEND_VECTOR_SIZE]; + return el; + } + + Iterator begin() { + return Iterator(*this, 0, 0); + } + + Iterator end() { + return Iterator(*this, vecs.size() - 1, count % APPEND_VECTOR_SIZE); + } + + void push_back(const T& el) { + if (vecs.back().capacity() == 0) + vecs.back().reserve(APPEND_VECTOR_SIZE); + + vecs.back().push_back(el); + + if (vecs.back().size() == vecs.back().capacity()) { + vecs.push_back(std::vector>()); + vecs.back().reserve(APPEND_VECTOR_SIZE); + } + + count++; + } + + size_t count; + std::deque>> vecs; + }; +} + +#undef APPEND_VECTOR_SIZE + +#endif diff --git a/include/attribute_store.h b/include/attribute_store.h index ad1aa4e1..6f11ba00 100644 --- a/include/attribute_store.h +++ b/include/attribute_store.h @@ -10,6 +10,8 @@ #include #include #include +#include "pooled_string.h" +#include "deque_map.h" /* AttributeStore - global dictionary for attributes */ @@ -39,26 +41,67 @@ class AttributeKeyStore { std::map keys2index; }; -enum class AttributePairType: char { False = 0, True = 1, Float = 2, String = 3 }; +enum class AttributePairType: char { Bool = 0, Float = 1, String = 2 }; // AttributePair is a key/value pair (with minzoom) +#pragma pack(push, 1) struct AttributePair { - std::string stringValue_; - float floatValue_; - short keyIndex; - char minzoom; - AttributePairType valueType; + short keyIndex : 9; + AttributePairType valueType : 3; + char minzoom : 4; + union { + float floatValue_; + PooledString stringValue_; + }; AttributePair(uint32_t keyIndex, bool value, char minzoom) - : keyIndex(keyIndex), valueType(value ? AttributePairType::True : AttributePairType::False), minzoom(minzoom) + : keyIndex(keyIndex), valueType(AttributePairType::Bool), minzoom(minzoom), floatValue_(value ? 1 : 0) { } - AttributePair(uint32_t keyIndex, const std::string& value, char minzoom) + AttributePair(uint32_t keyIndex, const PooledString& value, char minzoom) : keyIndex(keyIndex), valueType(AttributePairType::String), stringValue_(value), minzoom(minzoom) { } AttributePair(uint32_t keyIndex, float value, char minzoom) - : keyIndex(keyIndex), valueType(AttributePairType::Float), floatValue_(value), minzoom(minzoom) + : keyIndex(keyIndex), valueType(AttributePairType::Float), minzoom(minzoom), floatValue_(value) + { + } + + AttributePair(const AttributePair& other): + keyIndex(other.keyIndex), valueType(other.valueType), minzoom(other.minzoom) { + if (valueType == AttributePairType::Bool || valueType == AttributePairType::Float) { + floatValue_ = other.floatValue_; + return; + } + + stringValue_ = other.stringValue_; + } + + AttributePair& operator=(const AttributePair& other) { + keyIndex = other.keyIndex; + valueType = other.valueType; + minzoom = other.minzoom; + + if (valueType == AttributePairType::Bool || valueType == AttributePairType::Float) { + floatValue_ = other.floatValue_; + return *this; + } + + stringValue_ = other.stringValue_; + return *this; + } + + bool operator<(const AttributePair& other) const { + if (minzoom != other.minzoom) + return minzoom < other.minzoom; + if (keyIndex != other.keyIndex) + return keyIndex < other.keyIndex; + if (valueType != other.valueType) return valueType < other.valueType; + + if (hasStringValue()) return pooledString() < other.pooledString(); + if (hasBoolValue()) return boolValue() < other.boolValue(); + if (hasFloatValue()) return floatValue() < other.floatValue(); + throw std::runtime_error("Invalid type in attribute store"); } bool operator==(const AttributePair &other) const { @@ -66,7 +109,7 @@ struct AttributePair { if (valueType == AttributePairType::String) return stringValue_ == other.stringValue_; - if (valueType == AttributePairType::Float) + if (valueType == AttributePairType::Float || valueType == AttributePairType::Bool) return floatValue_ == other.floatValue_; return true; @@ -74,13 +117,16 @@ struct AttributePair { bool hasStringValue() const { return valueType == AttributePairType::String; } bool hasFloatValue() const { return valueType == AttributePairType::Float; } - bool hasBoolValue() const { return valueType == AttributePairType::True || valueType == AttributePairType::False; }; + bool hasBoolValue() const { return valueType == AttributePairType::Bool; } - const std::string& stringValue() const { return stringValue_; } + const PooledString& pooledString() const { return stringValue_; } + const std::string stringValue() const { return stringValue_.toString(); } float floatValue() const { return floatValue_; } - bool boolValue() const { return valueType == AttributePairType::True; } + bool boolValue() const { return floatValue_; } - static bool isHot(const AttributePair& pair, const std::string& keyName) { + void ensureStringIsOwned(); + + static bool isHot(const std::string& keyName, const std::string& value) { // Is this pair a candidate for the hot pool? // Hot pairs are pairs that we think are likely to be re-used, like @@ -89,25 +135,11 @@ struct AttributePair { // The trick is that we commit to putting them in the hot pool // before we know if we were right. - // All boolean pairs are eligible. - if (pair.hasBoolValue()) - return true; - - // Small integers are eligible. - if (pair.hasFloatValue()) { - float v = pair.floatValue(); - - if (ceil(v) == v && v >= 0 && v <= 25) - return true; - } - - // The remaining things should be strings, but just in case... - if (!pair.hasStringValue()) - return false; + // The rules for floats/booleans are managed in their addAttribute call. // Only strings that are IDish are eligible: only lowercase letters. bool ok = true; - for (const auto& c: pair.stringValue()) { + for (const auto& c: value) { if (c != '-' && c != '_' && (c < 'a' || c > 'z')) return false; } @@ -124,9 +156,10 @@ struct AttributePair { boost::hash_combine(rv, keyIndex); boost::hash_combine(rv, valueType); - if(hasStringValue()) - boost::hash_combine(rv, stringValue()); - else if(hasFloatValue()) + if(hasStringValue()) { + const char* data = pooledString().data(); + boost::hash_range(rv, data, data + pooledString().size()); + } else if(hasFloatValue()) boost::hash_combine(rv, floatValue()); else if(hasBoolValue()) boost::hash_combine(rv, boolValue()); @@ -137,6 +170,7 @@ struct AttributePair { return rv; } }; +#pragma pack(pop) // We shard the cold pools to reduce the odds of lock contention on @@ -149,46 +183,32 @@ struct AttributePair { #define SHARD_BITS 14 #define ATTRIBUTE_SHARDS (1 << SHARD_BITS) +class AttributeStore; class AttributePairStore { public: AttributePairStore(): finalized(false), - pairs(ATTRIBUTE_SHARDS), - pairsMaps(ATTRIBUTE_SHARDS), pairsMutex(ATTRIBUTE_SHARDS), - hotShardSize(0) + lookups(0), + lookupsUncached(0) { - // NB: the hot shard is stored in its own, pre-allocated vector. - // pairs[0] is _not_ the hot shard - hotShard.reserve(1 << 16); - for (size_t i = 0; i < 1 << 16; i++) - hotShard.push_back(AttributePair(0, false, 0)); + // The "hot" shard has a capacity of 64K, the others are unbounded. + pairs.push_back(DequeMap(1 << 16)); + // Reserve offset 0 as a sentinel + pairs[0].add(AttributePair(0, false, 0)); + for (size_t i = 1; i < ATTRIBUTE_SHARDS; i++) + pairs.push_back(DequeMap()); } void finalize() { finalized = true; } const AttributePair& getPair(uint32_t i) const; const AttributePair& getPairUnsafe(uint32_t i) const; - uint32_t addPair(const AttributePair& pair, bool isHot); - - struct key_value_less_ptr { - bool operator()(AttributePair const* lhs, AttributePair const* rhs) const { - if (lhs->minzoom != rhs->minzoom) - return lhs->minzoom < rhs->minzoom; - if (lhs->keyIndex != rhs->keyIndex) - return lhs->keyIndex < rhs->keyIndex; - if (lhs->valueType != rhs->valueType) return lhs->valueType < rhs->valueType; - - if (lhs->hasStringValue()) return lhs->stringValue() < rhs->stringValue(); - if (lhs->hasBoolValue()) return lhs->boolValue() < rhs->boolValue(); - if (lhs->hasFloatValue()) return lhs->floatValue() < rhs->floatValue(); - throw std::runtime_error("Invalid type in attribute store"); - } - }; + uint32_t addPair(AttributePair& pair, bool isHot); - std::vector> pairs; - std::vector> pairsMaps; private: + friend class AttributeStore; + std::vector> pairs; bool finalized; // We refer to all attribute pairs by index. // @@ -198,41 +218,39 @@ class AttributePairStore { // we suspect will be popular. It only ever has 64KB items, // so that we can reference it with a short. mutable std::vector pairsMutex; - std::atomic hotShardSize; - std::vector hotShard; + std::atomic lookupsUncached; + std::atomic lookups; }; // AttributeSet is a set of AttributePairs // = the complete attributes for one object struct AttributeSet { - struct less_ptr { - bool operator()(const AttributeSet* lhs, const AttributeSet* rhs) const { - if (lhs->useVector != rhs->useVector) - return lhs->useVector < rhs->useVector; - - if (lhs->useVector) { - if (lhs->intValues.size() != rhs->intValues.size()) - return lhs->intValues.size() < rhs->intValues.size(); - - for (int i = 0; i < lhs->intValues.size(); i++) { - if (lhs->intValues[i] != rhs->intValues[i]) { - return lhs->intValues[i] < rhs->intValues[i]; - } - } + bool operator<(const AttributeSet& other) const { + if (useVector != other.useVector) + return useVector < other.useVector; - return false; - } + if (useVector) { + if (intValues.size() != other.intValues.size()) + return intValues.size() < other.intValues.size(); - for (int i = 0; i < sizeof(lhs->shortValues)/sizeof(lhs->shortValues[0]); i++) { - if (lhs->shortValues[i] != rhs->shortValues[i]) { - return lhs->shortValues[i] < rhs->shortValues[i]; + for (int i = 0; i < intValues.size(); i++) { + if (intValues[i] != other.intValues[i]) { + return intValues[i] < other.intValues[i]; } } return false; } - }; + + for (int i = 0; i < sizeof(shortValues)/sizeof(shortValues[0]); i++) { + if (shortValues[i] != other.shortValues[i]) { + return shortValues[i] < other.shortValues[i]; + } + } + + return false; + } size_t hash() const { // Values are in canonical form after finalizeSet is called, so @@ -253,6 +271,7 @@ struct AttributeSet { return idx; } + bool operator!=(const AttributeSet& other) const { return !(*this == other); } bool operator==(const AttributeSet &other) const { // Equivalent if, for every value in values, there is a value in other.values // whose pair is the same. @@ -380,6 +399,8 @@ struct AttributeSet { struct AttributeStore { AttributeIndex add(AttributeSet &attributes); std::vector getUnsafe(AttributeIndex index) const; + void reset(); // used for testing + size_t size() const; void reportSize() const; void finalize(); @@ -390,9 +411,9 @@ struct AttributeStore { AttributeStore(): finalized(false), sets(ATTRIBUTE_SHARDS), - setsMaps(ATTRIBUTE_SHARDS), setsMutex(ATTRIBUTE_SHARDS), - lookups(0) { + lookups(0), + lookupsUncached(0) { } AttributeKeyStore keyStore; @@ -400,11 +421,11 @@ struct AttributeStore { private: bool finalized; - std::vector> sets; - std::vector> setsMaps; + std::vector> sets; mutable std::vector setsMutex; mutable std::mutex mutex; + std::atomic lookupsUncached; std::atomic lookups; }; diff --git a/include/deque_map.h b/include/deque_map.h new file mode 100644 index 00000000..ea57f669 --- /dev/null +++ b/include/deque_map.h @@ -0,0 +1,132 @@ +#ifndef DEQUE_MAP_H +#define DEQUE_MAP_H + +#include +#include +#include +#include +#include + +// A class which looks deep within the soul of some instance of +// a class T and assigns it a number based on the order in which +// it joined (or reminds it of its number). +// +// Used to translate an 8-byte pointer into a 4-byte ID that can be +// used repeatedly. +template +class DequeMap { +public: + DequeMap(): maxSize(0) {} + DequeMap(uint32_t maxSize): maxSize(maxSize) {} + + bool full() const { + return maxSize != 0 && size() == maxSize; + } + + // If `entry` is already in the map, return its index. + // Otherwise, if maxSize is `0`, or greater than the number of entries in the map, + // add the item and return its index. + // Otherwise, return -1. + int32_t add(const T& entry) { + // Search to see if we've already got this entry. + const auto offsets = boost::irange(0, keys.size()); + const auto it = std::lower_bound( + offsets.begin(), + offsets.end(), + entry, + [&](const auto &e, auto id) { + return objects.at(keys[e]) < id; + } + ); + + // We do, return its index. + if (it != offsets.end() && objects[keys[*it]] == entry) + return keys[*it]; + + if (maxSize > 0 && objects.size() >= maxSize) + return -1; + + // We don't, so store it... + const uint32_t newIndex = objects.size(); + objects.push_back(entry); + + // ...and add its index to our keys vector. + const uint32_t keysOffset = it == offsets.end() ? offsets.size() : *it; + + const uint32_t desiredSize = keys.size() + 1; + + // Amortize growth + if (keys.capacity() < desiredSize) + keys.reserve(keys.capacity() * 1.5); + + keys.resize(desiredSize); + + // Unless we're adding to the end, we need to shuffle existing keys down + // to make room for our new index. + if (keysOffset != newIndex) { + std::memmove(&keys[keysOffset + 1], &keys[keysOffset], sizeof(uint32_t) * (keys.size() - 1 - keysOffset)); + } + + keys[keysOffset] = newIndex; + return newIndex; + } + + void clear() { + objects.clear(); + keys.clear(); + } + + // Returns the index of `entry` if present, -1 otherwise. + int32_t find(const T& entry) const { + const auto offsets = boost::irange(0, keys.size()); + const auto it = std::lower_bound( + offsets.begin(), + offsets.end(), + entry, + [&](const auto &e, auto id) { + return objects.at(keys[e]) < id; + } + ); + + // We do, return its index. + if (it != offsets.end() && objects[keys[*it]] == entry) + return keys[*it]; + + return -1; + } + + inline const T& operator[](uint32_t index) const { + return objects[index]; + } + + inline const T& at(uint32_t index) const { + return objects.at(index); + } + + size_t size() const { return objects.size(); } + + struct iterator { + const DequeMap& dm; + size_t offset; + iterator(const DequeMap& dm, size_t offset): dm(dm), offset(offset) {} + void operator++() { offset++; } + bool operator!=(iterator& other) { return offset != other.offset; } + const T& operator*() const { return dm.objects[dm.keys[offset]]; } + }; + + iterator begin() const { return iterator{*this, 0}; } + iterator end() const { return iterator{*this, keys.size()}; } + +private: + uint32_t maxSize; + + // Using a deque is necessary, as it provides pointer-stability for previously + // added objects when it grows the storage (as opposed to, e.g., vector). + std::deque objects; + + // Whereas `objects` is ordered by insertion-time, `keys` is sorted such that + // objects[key[0]] < objects[key[1]] < ... < objects[key[$]] + // operator< of T. + std::vector keys; +}; +#endif diff --git a/include/helpers.h b/include/helpers.h index 7cb9c027..de490874 100644 --- a/include/helpers.h +++ b/include/helpers.h @@ -3,7 +3,8 @@ #define _HELPERS_H #include -#include "geom.h" +#include +#include // General helper routines @@ -27,12 +28,11 @@ inline std::vector split_string(std::string &inputStr, char sep) { return res; } +void decompress_string(std::string& output, const char* input, uint32_t inputSize, bool asGzip = false); double bboxElementFromStr(const std::string& number); std::vector parseBox(const std::string& bbox); -std::string decompress_string(const std::string& str, bool asGzip = false); - std::string compress_string(const std::string& str, int compressionlevel = Z_DEFAULT_COMPRESSION, bool asGzip = false); diff --git a/include/node_store.h b/include/node_store.h index cc84aba2..76fe18b3 100644 --- a/include/node_store.h +++ b/include/node_store.h @@ -23,6 +23,11 @@ class NodeStore // Accessors virtual size_t size() const = 0; virtual LatpLon at(NodeID i) const = 0; + + virtual bool contains(size_t shard, NodeID id) const = 0; + virtual NodeStore& shard(size_t shard) = 0; + virtual const NodeStore& shard(size_t shard) const = 0; + virtual size_t shards() const = 0; }; #endif diff --git a/include/node_stores.h b/include/node_stores.h index c5151bec..05d00f4e 100644 --- a/include/node_stores.h +++ b/include/node_stores.h @@ -5,6 +5,7 @@ #include #include "node_store.h" #include "sorted_node_store.h" +#include "sharded_node_store.h" #include "mmap_allocator.h" class BinarySearchNodeStore : public NodeStore @@ -19,10 +20,16 @@ class BinarySearchNodeStore : public NodeStore LatpLon at(NodeID i) const override; size_t size() const override; void insert(const std::vector& elements) override; - void clear() { + void clear() override { reopen(); } - void batchStart() {} + void batchStart() override {} + + bool contains(size_t shard, NodeID id) const override; + NodeStore& shard(size_t shard) override { return *this; } + const NodeStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } + private: mutable std::mutex mutex; @@ -49,7 +56,14 @@ class CompactNodeStore : public NodeStore void insert(const std::vector& elements) override; void clear() override; void finalize(size_t numThreads) override {} - void batchStart() {} + void batchStart() override {} + + // CompactNodeStore has no metadata to know whether or not it contains + // a node, so it's not suitable for used in sharded scenarios. + bool contains(size_t shard, NodeID id) const override { return true; } + NodeStore& shard(size_t shard) override { return *this; } + const NodeStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } private: // @brief Insert a latp/lon pair. diff --git a/include/options_parser.h b/include/options_parser.h new file mode 100644 index 00000000..3ca73785 --- /dev/null +++ b/include/options_parser.h @@ -0,0 +1,58 @@ +#ifndef OPTIONS_PARSER_H +#define OPTIONS_PARSER_H + +#include +#include +#include + +namespace OptionsParser { + struct OptionException : std::exception { + OptionException(std::string message): message(message) {} + + /// Returns the explanatory string. + const char* what() const noexcept override { + return message.data(); + } + + private: + std::string message; + }; + + enum class OutputMode: char { File = 0, MBTiles = 1, PMTiles = 2 }; + + struct OsmOptions { + std::string storeFile; + bool fast = false; + bool compact = false; + bool skipIntegrity = false; + bool uncompressedNodes = false; + bool uncompressedWays = false; + bool materializeGeometries = false; + // lazyGeometries is the inverse of materializeGeometries. It can be passed + // to override an implicit materializeGeometries, as in the non-store case. + bool lazyGeometries = false; + bool shardStores = false; + }; + + struct Options { + std::vector inputFiles; + std::string luaFile; + std::string jsonFile; + uint32_t threadNum = 0; + std::string outputFile; + std::string bbox; + + OsmOptions osm; + bool showHelp = false; + bool verbose = false; + bool mergeSqlite = false; + bool mapsplit = false; + OutputMode outputMode = OutputMode::File; + bool logTileTimings = false; + }; + + Options parse(const int argc, const char* argv[]); + void showHelp(); +}; + +#endif diff --git a/include/osm_lua_processing.h b/include/osm_lua_processing.h index b646bc2e..6a6a1d5d 100644 --- a/include/osm_lua_processing.h +++ b/include/osm_lua_processing.h @@ -13,9 +13,12 @@ #include "shp_mem_tiles.h" #include "osm_mem_tiles.h" #include "helpers.h" +#include #include +class TagMap; + // Lua extern "C" { #include "lua.h" @@ -31,6 +34,20 @@ extern bool verbose; class AttributeStore; class AttributeSet; +// A string, which might be in `currentTags` as a value. If Lua +// code refers to an absent value, it'll fallback to passing +// it as a std::string. +// +// The intent is that Attribute("name", Find("name")) is a common +// pattern, and we ought to avoid marshalling a string back and +// forth from C++ to Lua when possible. +struct PossiblyKnownTagValue { + bool found; + uint32_t index; + std::string fallback; +}; + + /** \brief OsmLuaProcessing - converts OSM objects into OutputObjects. @@ -71,34 +88,28 @@ class OsmLuaProcessing { // ---- Data loading methods - using tag_map_t = boost::container::flat_map; + using tag_map_t = boost::container::flat_map; // Scan non-MP relation - bool scanRelation(WayID id, const tag_map_t &tags); + bool scanRelation(WayID id, const TagMap& tags); /// \brief We are now processing a significant node - void setNode(NodeID id, LatpLon node, const tag_map_t &tags); + void setNode(NodeID id, LatpLon node, const TagMap& tags); /// \brief We are now processing a way - bool setWay(WayID wayId, LatpLonVec const &llVec, const tag_map_t &tags); + bool setWay(WayID wayId, LatpLonVec const &llVec, const TagMap& tags); /** \brief We are now processing a relation * (note that we store relations as ways with artificial IDs, and that * we use decrementing positive IDs to give a bit more space for way IDs) */ - void setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const tag_map_t &tags, bool isNativeMP, bool isInnerOuter); + void setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const TagMap& tags, bool isNativeMP, bool isInnerOuter); // ---- Metadata queries called from Lua // Get the ID of the current object std::string Id() const; - // Check if there's a value for a given key - bool Holds(const std::string& key) const; - - // Get an OSM tag for a given key (or return empty string if none) - const std::string& Find(const std::string& key) const; - // ---- Spatial queries called from Lua // Find intersecting shapefile layer @@ -160,11 +171,8 @@ class OsmLuaProcessing { void LayerAsCentroid(const std::string &layerName); // Set attributes in a vector tile's Attributes table - void Attribute(const std::string &key, const std::string &val); - void AttributeWithMinZoom(const std::string &key, const std::string &val, const char minzoom); - void AttributeNumeric(const std::string &key, const float val); + void AttributeWithMinZoom(const std::string &key, const PossiblyKnownTagValue& val, const char minzoom); void AttributeNumericWithMinZoom(const std::string &key, const float val, const char minzoom); - void AttributeBoolean(const std::string &key, const bool val); void AttributeBooleanWithMinZoom(const std::string &key, const bool val, const char minzoom); void MinZoom(const double z); void ZOrder(const double z); @@ -199,6 +207,7 @@ class OsmLuaProcessing { inline AttributeStore &getAttributeStore() { return attributeStore; } struct luaProcessingException :std::exception {}; + const TagMap* currentTags; private: /// Internal: clear current cached state @@ -216,6 +225,8 @@ class OsmLuaProcessing { lastStoredGeometryId = 0; } + void removeAttributeIfNeeded(const std::string& key); + const inline Point getPoint() { return Point(lon/10000000.0,latp/10000000.0); } @@ -258,7 +269,7 @@ class OsmLuaProcessing { class LayerDefinition &layers; std::vector> outputs; // All output objects that have been created - const boost::container::flat_map* currentTags; + std::vector outputKeys; std::vector finalizeOutputs(); diff --git a/include/osm_mem_tiles.h b/include/osm_mem_tiles.h index a6266ea3..3c920b08 100644 --- a/include/osm_mem_tiles.h +++ b/include/osm_mem_tiles.h @@ -6,10 +6,15 @@ #include "osm_store.h" #include "geometry_cache.h" -#define OSM_THRESHOLD (1ull << 35) -#define USE_WAY_STORE (1ull << 35) -#define IS_WAY(x) (((x) >> 35) == (USE_WAY_STORE >> 35)) -#define OSM_ID(x) ((x) & 0b111111111111111111111111111111111) +// NB: Currently, USE_NODE_STORE and USE_WAY_STORE are equivalent. +// If we permit LayerAsCentroid to be generated from the OSM stores, +// this will have to change. +#define OSM_THRESHOLD (1ull << TILE_DATA_ID_SIZE) +#define USE_NODE_STORE (2ull << TILE_DATA_ID_SIZE) +#define IS_NODE(x) (((x) >> TILE_DATA_ID_SIZE) == (USE_NODE_STORE >> TILE_DATA_ID_SIZE)) +#define USE_WAY_STORE (1ull << TILE_DATA_ID_SIZE) +#define IS_WAY(x) (((x) >> TILE_DATA_ID_SIZE) == (USE_WAY_STORE >> TILE_DATA_ID_SIZE)) +#define OSM_ID(x) ((x) & 0b1111111111111111111111111111111111) class NodeStore; class WayStore; @@ -32,18 +37,21 @@ class OsmMemTiles : public TileDataSource { const WayStore& wayStore ); + std::string name() const override { return "osm"; } + Geometry buildWayGeometry( const OutputGeometryType geomType, const NodeID objectID, const TileBbox &bbox ) override; + LatpLon buildNodeGeometry(NodeID const objectID, const TileBbox &bbox) const override; void Clear(); private: - void populateLinestring(Linestring& ls, NodeID objectID); - Linestring& getOrBuildLinestring(NodeID objectID); + void populateLinestring(Linestring& ls, NodeID objectID) const; + Linestring& getOrBuildLinestring(NodeID objectID) const; void populateMultiPolygon(MultiPolygon& dst, NodeID objectID) override; const NodeStore& nodeStore; diff --git a/include/osm_store.h b/include/osm_store.h index 11158bb2..5bb74272 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -11,12 +11,21 @@ #include #include #include +#include extern bool verbose; class NodeStore; class WayStore; +// A comparator for data_view so it can be used in boost's flat_map +struct DataViewLessThan { + bool operator()(const protozero::data_view& a, const protozero::data_view& b) const { + return a < b; + } +}; + + // // Internal data structures. // @@ -72,37 +81,39 @@ class RelationScanStore { private: using tag_map_t = boost::container::flat_map; - std::map> relationsForWays; - std::map relationTags; - mutable std::mutex mutex; + std::vector>> relationsForWays; + std::vector> relationTags; + mutable std::vector mutex; public: + RelationScanStore(): relationsForWays(128), relationTags(128), mutex(128) {} void relation_contains_way(WayID relid, WayID wayid) { - std::lock_guard lock(mutex); - relationsForWays[wayid].emplace_back(relid); + const size_t shard = wayid % mutex.size(); + + std::lock_guard lock(mutex[shard]); + relationsForWays[shard][wayid].emplace_back(relid); } void store_relation_tags(WayID relid, const tag_map_t &tags) { - std::lock_guard lock(mutex); - relationTags[relid] = tags; + const size_t shard = relid % mutex.size(); + std::lock_guard lock(mutex[shard]); + relationTags[shard][relid] = tags; } bool way_in_any_relations(WayID wayid) { - return relationsForWays.find(wayid) != relationsForWays.end(); + const size_t shard = wayid % mutex.size(); + return relationsForWays[shard].find(wayid) != relationsForWays[shard].end(); } std::vector relations_for_way(WayID wayid) { - return relationsForWays[wayid]; + const size_t shard = wayid % mutex.size(); + return relationsForWays[shard][wayid]; } std::string get_relation_tag(WayID relid, const std::string &key) { - auto it = relationTags.find(relid); - if (it==relationTags.end()) return ""; + const size_t shard = relid % mutex.size(); + auto it = relationTags[shard].find(relid); + if (it==relationTags[shard].end()) return ""; auto jt = it->second.find(key); if (jt==it->second.end()) return ""; return jt->second; } - void clear() { - std::lock_guard lock(mutex); - relationsForWays.clear(); - relationTags.clear(); - } }; diff --git a/include/osmformat.proto b/include/osmformat.proto deleted file mode 100644 index 93060586..00000000 --- a/include/osmformat.proto +++ /dev/null @@ -1,226 +0,0 @@ -syntax = "proto2"; - -option java_package = "crosby.binary"; - -/* OSM Binary file format - -This is the master schema file of the OSM binary file format. This -file is designed to support limited random-access and future -extendability. - -A binary OSM file consists of a sequence of FileBlocks (please see -fileformat.proto). The first fileblock contains a serialized instance -of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that -contain the primitives. - -Each primitiveblock is designed to be independently parsable. It -contains a string table storing all strings in that block (keys and -values in tags, roles in relations, usernames, etc.) as well as -metadata containing the precision of coordinates or timestamps in that -block. - -A primitiveblock contains a sequence of primitive groups, each -containing primitives of the same type (nodes, densenodes, ways, -relations). Coordinates are stored in signed 64-bit integers. Lat&lon -are measured in units nanodegrees. The default of -granularity of 100 nanodegrees corresponds to about 1cm on the ground, -and a full lat or lon fits into 32 bits. - -Converting an integer to a lattitude or longitude uses the formula: -$OUT = IN * granularity / 10**9$. Many encoding schemes use delta -coding when representing nodes and relations. - -*/ - -/* Added */ - -message BlobHeader { - required string type = 1; - optional bytes indexdata = 2; - required int32 datasize = 3; -} -message Blob { - optional bytes raw = 1; // No compression - optional int32 raw_size = 2; // Only set when compressed, to the uncompressed size - optional bytes zlib_data = 3; - // optional bytes lzma_data = 4; // PROPOSED. - // optional bytes OBSOLETE_bzip2_data = 5; // Deprecated. -} - - -////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////// - -/* Contains the file header. */ - -message HeaderBlock { - optional HeaderBBox bbox = 1; - /* Additional tags to aid in parsing this dataset */ - repeated string required_features = 4; - repeated string optional_features = 5; - - optional string writingprogram = 16; - optional string source = 17; // From the bbox field. -} - - -/** The bounding box field in the OSM header. BBOX, as used in the OSM -header. Units are always in nanodegrees -- they do not obey -granularity rules. */ - -message HeaderBBox { - required sint64 left = 1; - required sint64 right = 2; - required sint64 top = 3; - required sint64 bottom = 4; -} - - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - - -message PrimitiveBlock { - required StringTable stringtable = 1; - repeated PrimitiveGroup primitivegroup = 2; - - // Granularity, units of nanodegrees, used to store coordinates in this block - optional int32 granularity = 17 [default=100]; - // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees. - optional int64 lat_offset = 19 [default=0]; - optional int64 lon_offset = 20 [default=0]; - -// Granularity of dates, normally represented in units of milliseconds since the 1970 epoch. - optional int32 date_granularity = 18 [default=1000]; - - - // Proposed extension: - //optional BBox bbox = 19; -} - -// Group of OSMPrimitives. All primitives in a group must be the same type. -message PrimitiveGroup { - repeated Node nodes = 1; - optional DenseNodes dense = 2; - repeated Way ways = 3; - repeated Relation relations = 4; - repeated ChangeSet changesets = 5; -} - - -/** String table, contains the common strings in each block. - - Note that we reserve index '0' as a delimiter, so the entry at that - index in the table is ALWAYS blank and unused. - - */ -message StringTable { - repeated bytes s = 1; -} - -/* Optional metadata that may be included into each primitive. */ -message Info { - optional int32 version = 1 [default = -1]; - optional int32 timestamp = 2; - optional int64 changeset = 3; - optional int32 uid = 4; - optional int32 user_sid = 5; // String IDs -} - -/** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */ -message DenseInfo { - repeated int32 version = 1 [packed = true]; - repeated sint64 timestamp = 2 [packed = true]; // DELTA coded - repeated sint64 changeset = 3 [packed = true]; // DELTA coded - repeated sint32 uid = 4 [packed = true]; // DELTA coded - repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded -} - - -// TODO: REMOVE THIS? NOT in osmosis schema. -message ChangeSet { - required int64 id = 1; - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; // String IDs. - repeated uint32 vals = 3 [packed = true]; // String IDs. - - optional Info info = 4; - - required int64 created_at = 8; - optional int64 closetime_delta = 9; - required bool open = 10; - optional HeaderBBox bbox = 11; -} - - -message Node { - required sint64 id = 1; - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; // String IDs. - repeated uint32 vals = 3 [packed = true]; // String IDs. - - optional Info info = 4; // May be omitted in omitmeta - - required sint64 lat = 8; - required sint64 lon = 9; -} - -/* Used to densly represent a sequence of nodes that do not have any tags. - -We represent these nodes columnwise as five columns: ID's, lats, and -lons, all delta coded. When metadata is not omitted, - -We encode keys & vals for all nodes as a single array of integers -containing key-stringid and val-stringid, using a stringid of 0 as a -delimiter between nodes. - - ( ( )* '0' )* - */ - -message DenseNodes { - repeated sint64 id = 1 [packed = true]; // DELTA coded - - //repeated Info info = 4; - optional DenseInfo denseinfo = 5; - - repeated sint64 lat = 8 [packed = true]; // DELTA coded - repeated sint64 lon = 9 [packed = true]; // DELTA coded - - // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless. - repeated int32 keys_vals = 10 [packed = true]; -} - - -message Way { - required int64 id = 1; - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; - repeated uint32 vals = 3 [packed = true]; - - optional Info info = 4; - - repeated sint64 refs = 8 [packed = true]; // DELTA coded - repeated sint64 lats = 9 [packed = true]; - repeated sint64 lons = 10 [packed = true]; -} - -message Relation { - enum MemberType { - NODE = 0; - WAY = 1; - RELATION = 2; - } - required int64 id = 1; - - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; - repeated uint32 vals = 3 [packed = true]; - - optional Info info = 4; - - // Parallel arrays - repeated int32 roles_sid = 8 [packed = true]; - repeated sint64 memids = 9 [packed = true]; // DELTA encoded - repeated MemberType types = 10 [packed = true]; -} - diff --git a/include/output_object.h b/include/output_object.h index 3d2d862e..9afd5cba 100644 --- a/include/output_object.h +++ b/include/output_object.h @@ -12,7 +12,6 @@ #include "osm_store.h" // Protobuf -#include "osmformat.pb.h" #include "vector_tile.pb.h" enum OutputGeometryType : unsigned int { POINT_, LINESTRING_, MULTILINESTRING_, POLYGON_ }; @@ -22,9 +21,6 @@ std::ostream& operator<<(std::ostream& os, OutputGeometryType geomType); /** * \brief OutputObject - any object (node, linestring, polygon) to be outputted to tiles - - * Possible future improvements to save memory: - * - use a global dictionary for attribute key/values */ #pragma pack(push, 4) class OutputObject { diff --git a/include/pbf_blocks.h b/include/pbf_blocks.h deleted file mode 100644 index 5cc28969..00000000 --- a/include/pbf_blocks.h +++ /dev/null @@ -1,48 +0,0 @@ -/*! \file */ -#ifndef _PBF_BLOCKS_H -#define _PBF_BLOCKS_H - -#include -#include -#include -#include - -// Protobuf -#include "osmformat.pb.h" -#include "vector_tile.pb.h" - -/* ------------------- - Protobuf handling - ------------------- */ - -// Read and parse a protobuf message -void readMessage(google::protobuf::Message *message, std::istream &input, unsigned int size); - -// Read an osm.pbf sequence of header length -> BlobHeader -> Blob -// and parse the unzipped contents into a message -BlobHeader readHeader(std::istream &input); -void readBlock(google::protobuf::Message *messagePtr, std::size_t datasize, std::istream &input); - -void writeBlock(google::protobuf::Message *messagePtr, std::ostream &output, std::string headerType); -/* ------------------- - Tag handling - ------------------- */ - -// Populate an array with the contents of a StringTable -void readStringTable(std::vector *strPtr, PrimitiveBlock *pbPtr); - -/// Populate a map with the reverse contents of a StringTable (i.e. string->num) -void readStringMap(std::map *mapPtr, PrimitiveBlock *pbPtr); - -/// Read the tags for a way into a hash -/// requires strings array to have been populated by readStringTable -std::map getTags(std::vector *strPtr, Way *wayPtr); - -/// Find the index of a string in the StringTable, adding it if it's not there -unsigned int findStringInTable(std::string *strPtr, std::map *mapPtr, PrimitiveBlock *pbPtr); - -/// Set a tag for a way to a new value -void setTag(Way *wayPtr, unsigned int keyIndex, unsigned int valueIndex); - -#endif //_PBF_BLOCKS_H - diff --git a/include/read_pbf.h b/include/pbf_processor.h similarity index 56% rename from include/read_pbf.h rename to include/pbf_processor.h index b934a563..691613c1 100644 --- a/include/read_pbf.h +++ b/include/pbf_processor.h @@ -8,10 +8,12 @@ #include #include #include "osm_store.h" +#include "pbf_reader.h" +#include // Protobuf -#include "osmformat.pb.h" #include "vector_tile.pb.h" +#include "tag_map.h" class OsmLuaProcessing; @@ -42,33 +44,34 @@ struct IndexedBlockMetadata: BlockMetadata { * * The output class is typically OsmMemTiles, which is derived from OsmLuaProcessing */ -class PbfReader +class PbfProcessor { public: enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, RelationScan = 8 }; - PbfReader(OSMStore &osmStore); + PbfProcessor(OSMStore &osmStore); using pbfreader_generate_output = std::function< std::shared_ptr () >; using pbfreader_generate_stream = std::function< std::shared_ptr () >; int ReadPbfFile( + uint shards, bool hasSortTypeThenID, const std::unordered_set& nodeKeys, unsigned int threadNum, const pbfreader_generate_stream& generate_stream, - const pbfreader_generate_output& generate_output + const pbfreader_generate_output& generate_output, + const NodeStore& nodeStore, + const WayStore& wayStore ); // Read tags into a map from a way/node/relation - using tag_map_t = boost::container::flat_map; template - void readTags(T &pbfObject, PrimitiveBlock const &pb, tag_map_t &tags) { - tags.reserve(pbfObject.keys_size()); - auto keysPtr = pbfObject.mutable_keys(); - auto valsPtr = pbfObject.mutable_vals(); - for (uint n=0; n < pbfObject.keys_size(); n++) { - tags[pb.stringtable().s(keysPtr->Get(n))] = pb.stringtable().s(valsPtr->Get(n)); + void readTags(T &pbfObject, PbfReader::PrimitiveBlock const &pb, TagMap& tags) { + for (uint n=0; n < pbfObject.keys.size(); n++) { + auto keyIndex = pbfObject.keys[n]; + auto valueIndex = pbfObject.vals[n]; + tags.addTag(pb.stringTable[keyIndex], pb.stringTable[valueIndex]); } } @@ -79,29 +82,40 @@ class PbfReader const BlockMetadata& blockMetadata, const std::unordered_set& nodeKeys, bool locationsOnWays, - ReadPhase phase + ReadPhase phase, + uint shard, + uint effectiveShard ); - bool ReadNodes(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb, const std::unordered_set &nodeKeyPositions); + bool ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const std::unordered_set& nodeKeyPositions); - bool ReadWays(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb, bool locationsOnWays); - bool ScanRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb); + bool ReadWays( + OsmLuaProcessing& output, + PbfReader::PrimitiveGroup& pg, + const PbfReader::PrimitiveBlock& pb, + bool locationsOnWays, + uint shard, + uint effectiveShards + ); + bool ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb); bool ReadRelations( OsmLuaProcessing& output, - PrimitiveGroup& pg, - const PrimitiveBlock& pb, - const BlockMetadata& blockMetadata + PbfReader::PrimitiveGroup& pg, + const PbfReader::PrimitiveBlock& pb, + const BlockMetadata& blockMetadata, + uint shard, + uint effectiveShards ); - inline bool RelationIsType(Relation const &rel, int typeKey, int val) { - if (typeKey==-1 || val==-1) return false; - auto typeI = std::find(rel.keys().begin(), rel.keys().end(), typeKey); - if (typeI==rel.keys().end()) return false; - int typePos = typeI - rel.keys().begin(); - return rel.vals().Get(typePos) == val; + inline bool relationIsType(const PbfReader::Relation& rel, int typeKey, int val) { + if (typeKey == -1 || val == -1) return false; + auto typeI = std::find(rel.keys.begin(), rel.keys.end(), typeKey); + if (typeI == rel.keys.end()) return false; + int typePos = typeI - rel.keys.begin(); + return rel.vals[typePos] == val; } /// Find a string in the dictionary - static int findStringPosition(PrimitiveBlock const &pb, char const *str); + static int findStringPosition(const PbfReader::PrimitiveBlock& pb, const std::string& str); OSMStore &osmStore; std::mutex ioMutex; diff --git a/include/pbf_reader.h b/include/pbf_reader.h new file mode 100644 index 00000000..9af930c5 --- /dev/null +++ b/include/pbf_reader.h @@ -0,0 +1,296 @@ +#ifndef _PBF_READER_H +#define _PBF_READER_H + +#include +#include +#include +#include +#include +#include + +namespace PbfReader { + namespace Schema { + // See https://wiki.openstreetmap.org/wiki/PBF_Format#Definition_of_the_OSMHeader_fileblock + // for more background on the PBF schema. + enum class BlobHeader : protozero::pbf_tag_type { + required_string_type = 1, + optional_bytes_indexdata = 2, + required_int32_datasize = 3 + }; + + enum class Blob : protozero::pbf_tag_type { + optional_int32_raw_size = 2, // When compressed, the uncompressed size + oneof_data_bytes_raw = 1, // No compression + oneof_data_bytes_zlib_data = 3, + oneof_data_bytes_lzma_data = 4, + // Formerly used for bzip2 compressed data. Deprecated in 2010. + // bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number. + oneof_data_bytes_lz4_data = 6, + oneof_data_bytes_zstd_data = 7, + }; + + enum class HeaderBBox : protozero::pbf_tag_type { + // These units are always in nanodegrees, they don't obey granularity rules. + required_sint64_left = 1, + required_sint64_right = 2, + required_sint64_top = 3, + required_sint64_bottom = 4 + }; + + enum class HeaderBlock : protozero::pbf_tag_type { + optional_HeaderBBox_bbox = 1, + repeated_string_optional_features = 5 + }; + + enum class StringTable : protozero::pbf_tag_type { + repeated_bytes_s = 1 + }; + + enum class PrimitiveBlock : protozero::pbf_tag_type { + required_StringTable_stringtable = 1, + repeated_PrimitiveGroup_primitivegroup = 2, + optional_int32_granularity = 17, + optional_int32_date_granularity = 18, + optional_int64_lat_offset = 19, + optional_int64_lon_offset = 20 + }; + + enum class PrimitiveGroup : protozero::pbf_tag_type { + repeated_Node_nodes = 1, + optional_DenseNodes_dense = 2, + repeated_Way_ways = 3, + repeated_Relation_relations = 4, + repeated_ChangeSet_changesets = 5 + }; + + enum class DenseNodes : protozero::pbf_tag_type { + repeated_sint64_id = 1, + repeated_sint64_lat = 8, + repeated_sint64_lon = 9, + repeated_int32_keys_vals = 10 + }; + + enum class Way : protozero::pbf_tag_type { + required_int64_id = 1, + repeated_uint32_keys = 2, + repeated_uint32_vals = 3, + repeated_sint64_refs = 8, + repeated_sint64_lats = 9, + repeated_sint64_lons = 10 + }; + + enum class Relation : protozero::pbf_tag_type { + required_int64_id = 1, + repeated_uint32_keys = 2, + repeated_uint32_vals = 3, + repeated_int32_roles_sid = 8, + repeated_sint64_memids = 9, + repeated_MemberType_types = 10 + }; + } + + struct BlobHeader { + std::string type; + int32_t datasize; + }; + + struct HeaderBBox { + double minLon, maxLon, minLat, maxLat; + }; + + struct HeaderBlock { + bool hasBbox; + HeaderBBox bbox; + std::set optionalFeatures; + }; + + enum class PrimitiveGroupType: char { Node = 1, DenseNodes = 2, Way = 3, Relation = 4, ChangeSet = 5}; + + struct DenseNodes { + struct Node { + uint64_t id; + int32_t lon; + int32_t lat; + uint32_t tagStart; + uint32_t tagEnd; + }; + + struct Iterator { + int32_t offset; + Node node; + DenseNodes& nodes; + + bool operator!=(Iterator& other) const; + void operator++(); + Node& operator*(); + }; + + std::vector ids; + std::vector lons; + std::vector lats; + std::vector tagStart; + std::vector tagEnd; + std::vector keyValues; + Iterator begin(); + Iterator end(); + bool empty(); + void clear(); + void readDenseNodes(protozero::data_view data); + }; + + struct Way { + uint64_t id; + std::vector keys; + std::vector vals; + std::vector refs; + std::vector lats; + std::vector lons; + }; + + struct Relation { + enum MemberType: int { NODE = 0, WAY = 1, RELATION = 2 }; + uint64_t id; + std::vector keys; + std::vector vals; + std::vector memids; + std::vector roles_sid; + std::vector types; + }; + + class PrimitiveGroup; + struct Ways { + struct Iterator { + protozero::pbf_message message; + int offset; + Way& way; + + bool operator!=(Iterator& other) const; + void operator++(); + PbfReader::Way& operator*(); + + private: + void readWay(protozero::data_view data); + }; + + Ways(PrimitiveGroup* pg, Way& way): pg(pg), way(way) {} + Iterator begin(); + Iterator end(); + bool empty(); + + private: + friend PrimitiveGroup; + PrimitiveGroup* pg; + Way& way; + }; + + struct Relations { + struct Iterator { + protozero::pbf_message message; + int offset; + Relation& relation; + + bool operator!=(Iterator& other) const; + void operator++(); + PbfReader::Relation& operator*(); + + private: + void readRelation(protozero::data_view data); + }; + + + Relations(PrimitiveGroup* pg, Relation& relation): pg(pg), relation(relation) {} + Iterator begin(); + Iterator end(); + bool empty(); + + private: + friend PrimitiveGroup; + PrimitiveGroup* pg; + Relation& relation; + }; + + struct PrimitiveGroup { + PrimitiveGroup( + protozero::data_view data, + DenseNodes& nodes, + Way& way, + Relation& relation + ); + DenseNodes& nodes() const; + Ways& ways() const; + Relations& relations() const; + PrimitiveGroupType type() const; + + int32_t translateNodeKeyValue(int32_t i) const; + + // Only meant to be called by our iterator, not by client code. + void ensureData(); + protozero::data_view getDataView(); + private: + protozero::data_view data; + DenseNodes& denseNodes; + mutable Ways internalWays; + mutable Relations internalRelations; + PrimitiveGroupType internalType; + bool denseNodesInitialized; + + }; + + class PbfReader; + struct PrimitiveBlock { + struct PrimitiveGroups { + struct Iterator { + int offset; + std::vector* groups; + + Iterator(): offset(0), groups(nullptr) {} + Iterator(int offset, std::vector& groups): offset(offset), groups(&groups) {} + bool operator!=(Iterator& other) const; + void operator++(); + PrimitiveGroup& operator*(); + }; + + + PrimitiveGroups(): groups(nullptr) {} + PrimitiveGroups(std::vector& groups): groups(&groups) {} + Iterator begin(); + Iterator end(); + + private: + std::vector* groups; + }; + + std::vector stringTable; + PrimitiveGroups& groups(); + + private: + friend PbfReader; + std::vector internalGroups; + PrimitiveGroups groupsImpl; + }; + + // This is a little weird: we use a class only to get private storage + // for multiple PBF readers. Due to the way we plumb the input files + // elsewhere in the system, the readers don't own them, and are not + // responsible for closing them. + class PbfReader { + public: + BlobHeader readBlobHeader(std::istream& input); + protozero::data_view readBlob(int32_t datasize, std::istream& input); + HeaderBlock readHeaderBlock(protozero::data_view data); + HeaderBBox readHeaderBBox(protozero::data_view data); + PrimitiveBlock& readPrimitiveBlock(protozero::data_view data); + void readStringTable(protozero::data_view data, std::vector& stringTable); + HeaderBlock readHeaderFromFile(std::istream& input); + + private: + std::string blobStorage; // the blob as stored in the PBF + std::string blobStorage2; // the blob after decompression, if needed + PrimitiveBlock pb; + DenseNodes denseNodes; + Way way; + Relation relation; + }; +} + +#endif diff --git a/include/pooled_string.h b/include/pooled_string.h new file mode 100644 index 00000000..56d44453 --- /dev/null +++ b/include/pooled_string.h @@ -0,0 +1,61 @@ +#ifndef _POOLED_STRING_H +#define _POOLED_STRING_H + +// std::string is quite general: +// - mutable +// - unlimited length +// - capacity can differ from size +// - can deallocate its dynamic memory +// +// Our use case, by contrast is immutable, bounded strings that live for the +// duration of the process. +// +// This gives us some room to have less memory overhead, especially on +// g++, whose implementation of std::string requires 32 bytes. +// +// Thus, we implement `PooledString`. It has a size of 16 bytes, and a small +// string optimization for strings <= 15 bytes. (We will separately teach +// AttributePair to encode Latin-character strings more efficiently, so that many +// strings of size 24 or less fit in 15 bytes.) +// +// If it needs to allocate memory, it does so from a shared pool. It is unable +// to free the memory once allocated. + +// PooledString has one of three modes: +// - [126:127] = 00: small-string, length is in [120:125], lower 15 bytes are string +// - [126:127] = 10: pooled string, table is in bytes 1..3, offset in bytes 4..5, length in bytes 6..7 +// - [126:127] = 11: pointer to std::string, pointer is in bytes 8..15 +// +// Note that the pointer mode is not safe to be stored. It exists just to allow +// lookups in the AttributePair map before deciding to allocate a string. + +#include +#include + +namespace PooledStringNS { + class PooledString { + public: + // Create a short string or heap string, long-lived. + PooledString(const std::string& str); + + + // Create a std string - only valid so long as the string that is + // pointed to is valid. + PooledString(const std::string* str); + size_t size() const; + bool operator<(const PooledString& other) const; + bool operator==(const PooledString& other) const; + bool operator!=(const PooledString& other) const; + std::string toString() const; + const char* data() const; + void ensureStringIsOwned(); + + private: + // 0..3 is index into table, 4..5 is offset, 6..7 is length + uint8_t storage[16]; + }; +} + +using PooledString = PooledStringNS::PooledString; + +#endif diff --git a/include/protozero/basic_pbf_builder.hpp b/include/protozero/basic_pbf_builder.hpp new file mode 100644 index 00000000..0ede726f --- /dev/null +++ b/include/protozero/basic_pbf_builder.hpp @@ -0,0 +1,266 @@ +#ifndef PROTOZERO_BASIC_PBF_BUILDER_HPP +#define PROTOZERO_BASIC_PBF_BUILDER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file basic_pbf_builder.hpp + * + * @brief Contains the basic_pbf_builder template class. + */ + +#include "basic_pbf_writer.hpp" +#include "types.hpp" + +#include + +namespace protozero { + +/** + * The basic_pbf_builder is used to write PBF formatted messages into a buffer. + * It is based on the basic_pbf_writer class and has all the same methods. The + * difference is that while the pbf_writer class takes an integer tag, + * this template class takes a tag of the template type T. The idea is that + * T will be an enumeration value and this helps reduce the possibility of + * programming errors. + * + * Almost all methods in this class can throw an std::bad_alloc exception if + * the underlying buffer class wants to resize. + * + * Read the tutorial to understand how this class is used. In most cases you + * want to use the pbf_builder class which uses a std::string as buffer type. + */ +template +class basic_pbf_builder : public basic_pbf_writer { + + static_assert(std::is_same::type>::value, + "T must be enum with underlying type protozero::pbf_tag_type"); + +public: + + /// The type of messages this class will build. + using enum_type = T; + + basic_pbf_builder() = default; + + /** + * Create a builder using the given string as a data store. The object + * stores a reference to that string and adds all data to it. The string + * doesn't have to be empty. The pbf_message object will just append data. + */ + explicit basic_pbf_builder(TBuffer& data) noexcept : + basic_pbf_writer{data} { + } + + /** + * Construct a pbf_builder for a submessage from the pbf_message or + * pbf_writer of the parent message. + * + * @param parent_writer The parent pbf_message or pbf_writer + * @param tag Tag of the field that will be written + */ + template + basic_pbf_builder(basic_pbf_writer& parent_writer, P tag) noexcept : + basic_pbf_writer{parent_writer, pbf_tag_type(tag)} { + } + +/// @cond INTERNAL +#define PROTOZERO_WRITER_WRAP_ADD_SCALAR(name, type) \ + void add_##name(T tag, type value) { \ + basic_pbf_writer::add_##name(pbf_tag_type(tag), value); \ + } + + PROTOZERO_WRITER_WRAP_ADD_SCALAR(bool, bool) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(enum, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(int32, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint32, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint32, uint32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(int64, int64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint64, int64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint64, uint64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed32, uint32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed32, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed64, uint64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed64, int64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(float, float) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(double, double) + +#undef PROTOZERO_WRITER_WRAP_ADD_SCALAR +/// @endcond + + /** + * Add "bytes" field to data. + * + * @param tag Tag of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_bytes(T tag, const char* value, std::size_t size) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value, size); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_bytes(T tag, const data_view& value) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_bytes(T tag, const std::string& value) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + /** + * Add "bytes" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag of the field + * @param value Pointer to zero-delimited value to be written + */ + void add_bytes(T tag, const char* value) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + /** + * Add "bytes" field to data using vectored input. All the data in the + * 2nd and further arguments is "concatenated" with only a single copy + * into the final buffer. + * + * This will work with objects of any type supporting the data() and + * size() methods like std::string or protozero::data_view. + * + * Example: + * @code + * std::string data1 = "abc"; + * std::string data2 = "xyz"; + * builder.add_bytes_vectored(1, data1, data2); + * @endcode + * + * @tparam Ts List of types supporting data() and size() methods. + * @param tag Tag of the field + * @param values List of objects of types Ts with data to be appended. + */ + template + void add_bytes_vectored(T tag, Ts&&... values) { + basic_pbf_writer::add_bytes_vectored(pbf_tag_type(tag), std::forward(values)...); + } + + /** + * Add "string" field to data. + * + * @param tag Tag of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_string(T tag, const char* value, std::size_t size) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value, size); + } + + /** + * Add "string" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_string(T tag, const data_view& value) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value); + } + + /** + * Add "string" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_string(T tag, const std::string& value) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value); + } + + /** + * Add "string" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag of the field + * @param value Pointer to value to be written + */ + void add_string(T tag, const char* value) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value); + } + + /** + * Add "message" field to data. + * + * @param tag Tag of the field + * @param value Pointer to message to be written + * @param size Length of the message + */ + void add_message(T tag, const char* value, std::size_t size) { + basic_pbf_writer::add_message(pbf_tag_type(tag), value, size); + } + + /** + * Add "message" field to data. + * + * @param tag Tag of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(T tag, const data_view& value) { + basic_pbf_writer::add_message(pbf_tag_type(tag), value); + } + + /** + * Add "message" field to data. + * + * @param tag Tag of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(T tag, const std::string& value) { + basic_pbf_writer::add_message(pbf_tag_type(tag), value); + } + +/// @cond INTERNAL +#define PROTOZERO_WRITER_WRAP_ADD_PACKED(name) \ + template \ + void add_packed_##name(T tag, InputIterator first, InputIterator last) { \ + basic_pbf_writer::add_packed_##name(pbf_tag_type(tag), first, last); \ + } + + PROTOZERO_WRITER_WRAP_ADD_PACKED(bool) + PROTOZERO_WRITER_WRAP_ADD_PACKED(enum) + PROTOZERO_WRITER_WRAP_ADD_PACKED(int32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sint32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(uint32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(int64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sint64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(uint64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(float) + PROTOZERO_WRITER_WRAP_ADD_PACKED(double) + +#undef PROTOZERO_WRITER_WRAP_ADD_PACKED +/// @endcond + +}; // class basic_pbf_builder + +} // end namespace protozero + +#endif // PROTOZERO_BASIC_PBF_BUILDER_HPP diff --git a/include/protozero/basic_pbf_writer.hpp b/include/protozero/basic_pbf_writer.hpp new file mode 100644 index 00000000..f167c4d1 --- /dev/null +++ b/include/protozero/basic_pbf_writer.hpp @@ -0,0 +1,1054 @@ +#ifndef PROTOZERO_BASIC_PBF_WRITER_HPP +#define PROTOZERO_BASIC_PBF_WRITER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file basic_pbf_writer.hpp + * + * @brief Contains the basic_pbf_writer template class. + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" +#include "data_view.hpp" +#include "types.hpp" +#include "varint.hpp" + +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace protozero { + +namespace detail { + + template class packed_field_varint; + template class packed_field_svarint; + template class packed_field_fixed; + +} // end namespace detail + +/** + * The basic_pbf_writer is used to write PBF formatted messages into a buffer. + * + * This uses TBuffer as the type for the underlaying buffer. In typical uses + * this is std::string, but you can use a different type that must support + * the right interface. Please see the documentation for details. + * + * Almost all methods in this class can throw an std::bad_alloc exception if + * the underlying buffer class wants to resize. + */ +template +class basic_pbf_writer { + + // A pointer to a buffer holding the data already written to the PBF + // message. For default constructed writers or writers that have been + // rolled back, this is a nullptr. + TBuffer* m_data = nullptr; + + // A pointer to a parent writer object if this is a submessage. If this + // is a top-level writer, it is a nullptr. + basic_pbf_writer* m_parent_writer = nullptr; + + // This is usually 0. If there is an open submessage, this is set in the + // parent to the rollback position, ie. the last position before the + // submessage was started. This is the position where the header of the + // submessage starts. + std::size_t m_rollback_pos = 0; + + // This is usually 0. If there is an open submessage, this is set in the + // parent to the position where the data of the submessage is written to. + std::size_t m_pos = 0; + + void add_varint(uint64_t value) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + add_varint_to_buffer(m_data, value); + } + + void add_field(pbf_tag_type tag, pbf_wire_type type) { + protozero_assert(((tag > 0 && tag < 19000) || (tag > 19999 && tag <= ((1U << 29U) - 1))) && "tag out of range"); + const uint32_t b = (tag << 3U) | uint32_t(type); + add_varint(b); + } + + void add_tagged_varint(pbf_tag_type tag, uint64_t value) { + add_field(tag, pbf_wire_type::varint); + add_varint(value); + } + + template + void add_fixed(T value) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + byteswap_inplace(&value); +#endif + buffer_customization::append(m_data, reinterpret_cast(&value), sizeof(T)); + } + + template + void add_packed_fixed(pbf_tag_type tag, It first, It last, std::input_iterator_tag /*unused*/) { + if (first == last) { + return; + } + + basic_pbf_writer sw{*this, tag}; + + while (first != last) { + sw.add_fixed(*first++); + } + } + + template + void add_packed_fixed(pbf_tag_type tag, It first, It last, std::forward_iterator_tag /*unused*/) { + if (first == last) { + return; + } + + const auto length = std::distance(first, last); + add_length_varint(tag, sizeof(T) * pbf_length_type(length)); + reserve(sizeof(T) * std::size_t(length)); + + while (first != last) { + add_fixed(*first++); + } + } + + template + void add_packed_varint(pbf_tag_type tag, It first, It last) { + if (first == last) { + return; + } + + basic_pbf_writer sw{*this, tag}; + + while (first != last) { + sw.add_varint(uint64_t(*first++)); + } + } + + template + void add_packed_svarint(pbf_tag_type tag, It first, It last) { + if (first == last) { + return; + } + + basic_pbf_writer sw{*this, tag}; + + while (first != last) { + sw.add_varint(encode_zigzag64(*first++)); + } + } + + // The number of bytes to reserve for the varint holding the length of + // a length-delimited field. The length has to fit into pbf_length_type, + // and a varint needs 8 bit for every 7 bit. + enum : int { + reserve_bytes = sizeof(pbf_length_type) * 8 / 7 + 1 + }; + + // If m_rollpack_pos is set to this special value, it means that when + // the submessage is closed, nothing needs to be done, because the length + // of the submessage has already been written correctly. + enum : std::size_t { + size_is_known = std::numeric_limits::max() + }; + + void open_submessage(pbf_tag_type tag, std::size_t size) { + protozero_assert(m_pos == 0); + protozero_assert(m_data); + if (size == 0) { + m_rollback_pos = buffer_customization::size(m_data); + add_field(tag, pbf_wire_type::length_delimited); + buffer_customization::append_zeros(m_data, std::size_t(reserve_bytes)); + } else { + m_rollback_pos = size_is_known; + add_length_varint(tag, pbf_length_type(size)); + reserve(size); + } + m_pos = buffer_customization::size(m_data); + } + + void rollback_submessage() { + protozero_assert(m_pos != 0); + protozero_assert(m_rollback_pos != size_is_known); + protozero_assert(m_data); + buffer_customization::resize(m_data, m_rollback_pos); + m_pos = 0; + } + + void commit_submessage() { + protozero_assert(m_pos != 0); + protozero_assert(m_rollback_pos != size_is_known); + protozero_assert(m_data); + const auto length = pbf_length_type(buffer_customization::size(m_data) - m_pos); + + protozero_assert(buffer_customization::size(m_data) >= m_pos - reserve_bytes); + const auto n = add_varint_to_buffer(buffer_customization::at_pos(m_data, m_pos - reserve_bytes), length); + + buffer_customization::erase_range(m_data, m_pos - reserve_bytes + n, m_pos); + m_pos = 0; + } + + void close_submessage() { + protozero_assert(m_data); + if (m_pos == 0 || m_rollback_pos == size_is_known) { + return; + } + if (buffer_customization::size(m_data) - m_pos == 0) { + rollback_submessage(); + } else { + commit_submessage(); + } + } + + void add_length_varint(pbf_tag_type tag, pbf_length_type length) { + add_field(tag, pbf_wire_type::length_delimited); + add_varint(length); + } + +public: + + /** + * Create a writer using the specified buffer as a data store. The + * basic_pbf_writer stores a pointer to that buffer and adds all data to + * it. The buffer doesn't have to be empty. The basic_pbf_writer will just + * append data. + */ + explicit basic_pbf_writer(TBuffer& buffer) noexcept : + m_data{&buffer} { + } + + /** + * Create a writer without a data store. In this form the writer can not + * be used! + */ + basic_pbf_writer() noexcept = default; + + /** + * Construct a basic_pbf_writer for a submessage from the basic_pbf_writer + * of the parent message. + * + * @param parent_writer The basic_pbf_writer + * @param tag Tag (field number) of the field that will be written + * @param size Optional size of the submessage in bytes (use 0 for unknown). + * Setting this allows some optimizations but is only possible in + * a few very specific cases. + */ + basic_pbf_writer(basic_pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size = 0) : + m_data{parent_writer.m_data}, + m_parent_writer{&parent_writer} { + m_parent_writer->open_submessage(tag, size); + } + + /// A basic_pbf_writer object can not be copied + basic_pbf_writer(const basic_pbf_writer&) = delete; + + /// A basic_pbf_writer object can not be copied + basic_pbf_writer& operator=(const basic_pbf_writer&) = delete; + + /** + * A basic_pbf_writer object can be moved. After this the other + * basic_pbf_writer will be invalid. + */ + basic_pbf_writer(basic_pbf_writer&& other) noexcept : + m_data{other.m_data}, + m_parent_writer{other.m_parent_writer}, + m_rollback_pos{other.m_rollback_pos}, + m_pos{other.m_pos} { + other.m_data = nullptr; + other.m_parent_writer = nullptr; + other.m_rollback_pos = 0; + other.m_pos = 0; + } + + /** + * A basic_pbf_writer object can be moved. After this the other + * basic_pbf_writer will be invalid. + */ + basic_pbf_writer& operator=(basic_pbf_writer&& other) noexcept { + m_data = other.m_data; + m_parent_writer = other.m_parent_writer; + m_rollback_pos = other.m_rollback_pos; + m_pos = other.m_pos; + other.m_data = nullptr; + other.m_parent_writer = nullptr; + other.m_rollback_pos = 0; + other.m_pos = 0; + return *this; + } + + ~basic_pbf_writer() noexcept { + try { + if (m_parent_writer != nullptr) { + m_parent_writer->close_submessage(); + } + } catch (...) { + // This try/catch is used to make the destructor formally noexcept. + // close_submessage() is not noexcept, but will not throw the way + // it is called here, so we are good. But to be paranoid, call... + std::terminate(); + } + } + + /** + * Check if this writer is valid. A writer is invalid if it was default + * constructed, moved from, or if commit() has been called on it. + * Otherwise it is valid. + */ + bool valid() const noexcept { + return m_data != nullptr; + } + + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(basic_pbf_writer& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_parent_writer, other.m_parent_writer); + swap(m_rollback_pos, other.m_rollback_pos); + swap(m_pos, other.m_pos); + } + + /** + * Reserve size bytes in the underlying message store in addition to + * whatever the message store already holds. So unlike + * the `std::string::reserve()` method this is not an absolute size, + * but additional memory that should be reserved. + * + * @param size Number of bytes to reserve in underlying message store. + */ + void reserve(std::size_t size) { + protozero_assert(m_data); + buffer_customization::reserve_additional(m_data, size); + } + + /** + * Commit this submessage. This does the same as when the basic_pbf_writer + * goes out of scope and is destructed. + * + * @pre Must be a basic_pbf_writer of a submessage, ie one opened with the + * basic_pbf_writer constructor taking a parent message. + * @post The basic_pbf_writer is invalid and can't be used any more. + */ + void commit() { + protozero_assert(m_parent_writer && "you can't call commit() on a basic_pbf_writer without a parent"); + protozero_assert(m_pos == 0 && "you can't call commit() on a basic_pbf_writer that has an open nested submessage"); + m_parent_writer->close_submessage(); + m_parent_writer = nullptr; + m_data = nullptr; + } + + /** + * Cancel writing of this submessage. The complete submessage will be + * removed as if it was never created and no fields were added. + * + * @pre Must be a basic_pbf_writer of a submessage, ie one opened with the + * basic_pbf_writer constructor taking a parent message. + * @post The basic_pbf_writer is invalid and can't be used any more. + */ + void rollback() { + protozero_assert(m_parent_writer && "you can't call rollback() on a basic_pbf_writer without a parent"); + protozero_assert(m_pos == 0 && "you can't call rollback() on a basic_pbf_writer that has an open nested submessage"); + m_parent_writer->rollback_submessage(); + m_parent_writer = nullptr; + m_data = nullptr; + } + + ///@{ + /** + * @name Scalar field writer functions + */ + + /** + * Add "bool" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_bool(pbf_tag_type tag, bool value) { + add_field(tag, pbf_wire_type::varint); + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + m_data->push_back(char(value)); + } + + /** + * Add "enum" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_enum(pbf_tag_type tag, int32_t value) { + add_tagged_varint(tag, uint64_t(value)); + } + + /** + * Add "int32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_int32(pbf_tag_type tag, int32_t value) { + add_tagged_varint(tag, uint64_t(value)); + } + + /** + * Add "sint32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sint32(pbf_tag_type tag, int32_t value) { + add_tagged_varint(tag, encode_zigzag32(value)); + } + + /** + * Add "uint32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_uint32(pbf_tag_type tag, uint32_t value) { + add_tagged_varint(tag, value); + } + + /** + * Add "int64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_int64(pbf_tag_type tag, int64_t value) { + add_tagged_varint(tag, uint64_t(value)); + } + + /** + * Add "sint64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sint64(pbf_tag_type tag, int64_t value) { + add_tagged_varint(tag, encode_zigzag64(value)); + } + + /** + * Add "uint64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_uint64(pbf_tag_type tag, uint64_t value) { + add_tagged_varint(tag, value); + } + + /** + * Add "fixed32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_fixed32(pbf_tag_type tag, uint32_t value) { + add_field(tag, pbf_wire_type::fixed32); + add_fixed(value); + } + + /** + * Add "sfixed32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sfixed32(pbf_tag_type tag, int32_t value) { + add_field(tag, pbf_wire_type::fixed32); + add_fixed(value); + } + + /** + * Add "fixed64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_fixed64(pbf_tag_type tag, uint64_t value) { + add_field(tag, pbf_wire_type::fixed64); + add_fixed(value); + } + + /** + * Add "sfixed64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sfixed64(pbf_tag_type tag, int64_t value) { + add_field(tag, pbf_wire_type::fixed64); + add_fixed(value); + } + + /** + * Add "float" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_float(pbf_tag_type tag, float value) { + add_field(tag, pbf_wire_type::fixed32); + add_fixed(value); + } + + /** + * Add "double" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_double(pbf_tag_type tag, double value) { + add_field(tag, pbf_wire_type::fixed64); + add_fixed(value); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag (field number) of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_bytes(pbf_tag_type tag, const char* value, std::size_t size) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + protozero_assert(size <= std::numeric_limits::max()); + add_length_varint(tag, pbf_length_type(size)); + buffer_customization::append(m_data, value, size); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_bytes(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_bytes(pbf_tag_type tag, const std::string& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "bytes" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag (field number) of the field + * @param value Pointer to zero-delimited value to be written + */ + void add_bytes(pbf_tag_type tag, const char* value) { + add_bytes(tag, value, std::strlen(value)); + } + + /** + * Add "bytes" field to data using vectored input. All the data in the + * 2nd and further arguments is "concatenated" with only a single copy + * into the final buffer. + * + * This will work with objects of any type supporting the data() and + * size() methods like std::string or protozero::data_view. + * + * Example: + * @code + * std::string data1 = "abc"; + * std::string data2 = "xyz"; + * writer.add_bytes_vectored(1, data1, data2); + * @endcode + * + * @tparam Ts List of types supporting data() and size() methods. + * @param tag Tag (field number) of the field + * @param values List of objects of types Ts with data to be appended. + */ + template + void add_bytes_vectored(pbf_tag_type tag, Ts&&... values) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + size_t sum_size = 0; + (void)std::initializer_list{sum_size += values.size()...}; + protozero_assert(sum_size <= std::numeric_limits::max()); + add_length_varint(tag, pbf_length_type(sum_size)); + buffer_customization::reserve_additional(m_data, sum_size); + (void)std::initializer_list{(buffer_customization::append(m_data, values.data(), values.size()), 0)...}; + } + + /** + * Add "string" field to data. + * + * @param tag Tag (field number) of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_string(pbf_tag_type tag, const char* value, std::size_t size) { + add_bytes(tag, value, size); + } + + /** + * Add "string" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_string(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "string" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_string(pbf_tag_type tag, const std::string& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "string" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag (field number) of the field + * @param value Pointer to value to be written + */ + void add_string(pbf_tag_type tag, const char* value) { + add_bytes(tag, value, std::strlen(value)); + } + + /** + * Add "message" field to data. + * + * @param tag Tag (field number) of the field + * @param value Pointer to message to be written + * @param size Length of the message + */ + void add_message(pbf_tag_type tag, const char* value, std::size_t size) { + add_bytes(tag, value, size); + } + + /** + * Add "message" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "message" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(pbf_tag_type tag, const std::string& value) { + add_bytes(tag, value.data(), value.size()); + } + + ///@} + + ///@{ + /** + * @name Repeated packed field writer functions + */ + + /** + * Add "repeated packed bool" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to bool. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_bool(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed enum" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_enum(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed int32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_int32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed sint32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sint32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_svarint(tag, first, last); + } + + /** + * Add "repeated packed uint32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_uint32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed int64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_int64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed sint64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sint64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_svarint(tag, first, last); + } + + /** + * Add "repeated packed uint64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_uint64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add a "repeated packed" fixed-size field to data. The following + * fixed-size fields are available: + * + * uint32_t -> repeated packed fixed32 + * int32_t -> repeated packed sfixed32 + * uint64_t -> repeated packed fixed64 + * int64_t -> repeated packed sfixed64 + * double -> repeated packed double + * float -> repeated packed float + * + * @tparam ValueType One of the following types: (u)int32/64_t, double, float. + * @tparam InputIterator A type satisfying the InputIterator concept. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_fixed(pbf_tag_type tag, InputIterator first, InputIterator last) { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, "Only some types are allowed"); + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed fixed32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_fixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed sfixed32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sfixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed fixed64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_fixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed sfixed64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sfixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed float" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to float. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_float(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed double" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to double. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_double(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + ///@} + + template friend class detail::packed_field_varint; + template friend class detail::packed_field_svarint; + template friend class detail::packed_field_fixed; + +}; // class basic_pbf_writer + +/** + * Swap two basic_pbf_writer objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +template +inline void swap(basic_pbf_writer& lhs, basic_pbf_writer& rhs) noexcept { + lhs.swap(rhs); +} + +namespace detail { + + template + class packed_field { + + basic_pbf_writer m_writer{}; + + public: + + packed_field(const packed_field&) = delete; + packed_field& operator=(const packed_field&) = delete; + + packed_field(packed_field&&) noexcept = default; + packed_field& operator=(packed_field&&) noexcept = default; + + packed_field() = default; + + packed_field(basic_pbf_writer& parent_writer, pbf_tag_type tag) : + m_writer{parent_writer, tag} { + } + + packed_field(basic_pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size) : + m_writer{parent_writer, tag, size} { + } + + ~packed_field() noexcept = default; + + bool valid() const noexcept { + return m_writer.valid(); + } + + void commit() { + m_writer.commit(); + } + + void rollback() { + m_writer.rollback(); + } + + basic_pbf_writer& writer() noexcept { + return m_writer; + } + + }; // class packed_field + + template + class packed_field_fixed : public packed_field { + + public: + + packed_field_fixed() : + packed_field{} { + } + + template + packed_field_fixed(basic_pbf_writer& parent_writer, P tag) : + packed_field{parent_writer, static_cast(tag)} { + } + + template + packed_field_fixed(basic_pbf_writer& parent_writer, P tag, std::size_t size) : + packed_field{parent_writer, static_cast(tag), size * sizeof(T)} { + } + + void add_element(T value) { + this->writer().template add_fixed(value); + } + + }; // class packed_field_fixed + + template + class packed_field_varint : public packed_field { + + public: + + packed_field_varint() : + packed_field{} { + } + + template + packed_field_varint(basic_pbf_writer& parent_writer, P tag) : + packed_field{parent_writer, static_cast(tag)} { + } + + void add_element(T value) { + this->writer().add_varint(uint64_t(value)); + } + + }; // class packed_field_varint + + template + class packed_field_svarint : public packed_field { + + public: + + packed_field_svarint() : + packed_field{} { + } + + template + packed_field_svarint(basic_pbf_writer& parent_writer, P tag) : + packed_field{parent_writer, static_cast(tag)} { + } + + void add_element(T value) { + this->writer().add_varint(encode_zigzag64(value)); + } + + }; // class packed_field_svarint + +} // end namespace detail + +} // end namespace protozero + +#endif // PROTOZERO_BASIC_PBF_WRITER_HPP diff --git a/include/protozero/buffer_fixed.hpp b/include/protozero/buffer_fixed.hpp new file mode 100644 index 00000000..b2e6d1d2 --- /dev/null +++ b/include/protozero/buffer_fixed.hpp @@ -0,0 +1,222 @@ +#ifndef PROTOZERO_BUFFER_FIXED_HPP +#define PROTOZERO_BUFFER_FIXED_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_fixed.hpp + * + * @brief Contains the fixed_size_buffer_adaptor class. + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" + +#include +#include +#include +#include + +namespace protozero { + +/** + * This class can be used instead of std::string if you want to create a + * vector tile in a fixed-size buffer. Any operation that needs more space + * than is available will fail with a std::length_error exception. + */ +class fixed_size_buffer_adaptor { + + char* m_data; + std::size_t m_capacity; + std::size_t m_size = 0; + +public: + + /// @cond usual container typedefs not documented + + using size_type = std::size_t; + + using value_type = char; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + + using iterator = pointer; + using const_iterator = const_pointer; + + /// @endcond + + /** + * Constructor. + * + * @param data Pointer to some memory allocated for the buffer. + * @param capacity Number of bytes available. + */ + fixed_size_buffer_adaptor(char* data, std::size_t capacity) noexcept : + m_data(data), + m_capacity(capacity) { + } + + /** + * Constructor. + * + * @param container Some container class supporting the member functions + * data() and size(). + */ + template + explicit fixed_size_buffer_adaptor(T& container) : + m_data(container.data()), + m_capacity(container.size()) { + } + + /// Returns a pointer to the data in the buffer. + const char* data() const noexcept { + return m_data; + } + + /// Returns a pointer to the data in the buffer. + char* data() noexcept { + return m_data; + } + + /// The capacity this buffer was created with. + std::size_t capacity() const noexcept { + return m_capacity; + } + + /// The number of bytes used in the buffer. Always <= capacity(). + std::size_t size() const noexcept { + return m_size; + } + + /// Return iterator to beginning of data. + char* begin() noexcept { + return m_data; + } + + /// Return iterator to beginning of data. + const char* begin() const noexcept { + return m_data; + } + + /// Return iterator to beginning of data. + const char* cbegin() const noexcept { + return m_data; + } + + /// Return iterator to end of data. + char* end() noexcept { + return m_data + m_size; + } + + /// Return iterator to end of data. + const char* end() const noexcept { + return m_data + m_size; + } + + /// Return iterator to end of data. + const char* cend() const noexcept { + return m_data + m_size; + } + +/// @cond INTERNAL + + // Do not rely on anything beyond this point + + void append(const char* data, std::size_t count) { + if (m_size + count > m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + std::copy_n(data, count, m_data + m_size); + m_size += count; + } + + void append_zeros(std::size_t count) { + if (m_size + count > m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + std::fill_n(m_data + m_size, count, '\0'); + m_size += count; + } + + void resize(std::size_t size) { + protozero_assert(size < m_size); + if (size > m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + m_size = size; + } + + void erase_range(std::size_t from, std::size_t to) { + protozero_assert(from <= m_size); + protozero_assert(to <= m_size); + protozero_assert(from < to); + std::copy(m_data + to, m_data + m_size, m_data + from); + m_size -= (to - from); + } + + char* at_pos(std::size_t pos) { + protozero_assert(pos <= m_size); + return m_data + pos; + } + + void push_back(char ch) { + if (m_size >= m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + m_data[m_size++] = ch; + } +/// @endcond + +}; // class fixed_size_buffer_adaptor + +/// @cond INTERNAL +template <> +struct buffer_customization { + + static std::size_t size(const fixed_size_buffer_adaptor* buffer) noexcept { + return buffer->size(); + } + + static void append(fixed_size_buffer_adaptor* buffer, const char* data, std::size_t count) { + buffer->append(data, count); + } + + static void append_zeros(fixed_size_buffer_adaptor* buffer, std::size_t count) { + buffer->append_zeros(count); + } + + static void resize(fixed_size_buffer_adaptor* buffer, std::size_t size) { + buffer->resize(size); + } + + static void reserve_additional(fixed_size_buffer_adaptor* /*buffer*/, std::size_t /*size*/) { + /* nothing to be done for fixed-size buffers */ + } + + static void erase_range(fixed_size_buffer_adaptor* buffer, std::size_t from, std::size_t to) { + buffer->erase_range(from, to); + } + + static char* at_pos(fixed_size_buffer_adaptor* buffer, std::size_t pos) { + return buffer->at_pos(pos); + } + + static void push_back(fixed_size_buffer_adaptor* buffer, char ch) { + buffer->push_back(ch); + } + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_FIXED_HPP diff --git a/include/protozero/buffer_string.hpp b/include/protozero/buffer_string.hpp new file mode 100644 index 00000000..02e8ad25 --- /dev/null +++ b/include/protozero/buffer_string.hpp @@ -0,0 +1,78 @@ +#ifndef PROTOZERO_BUFFER_STRING_HPP +#define PROTOZERO_BUFFER_STRING_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_string.hpp + * + * @brief Contains the customization points for buffer implementation based + * on std::string + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" + +#include +#include +#include + +namespace protozero { + +// Implementation of buffer customizations points for std::string + +/// @cond INTERNAL +template <> +struct buffer_customization { + + static std::size_t size(const std::string* buffer) noexcept { + return buffer->size(); + } + + static void append(std::string* buffer, const char* data, std::size_t count) { + buffer->append(data, count); + } + + static void append_zeros(std::string* buffer, std::size_t count) { + buffer->append(count, '\0'); + } + + static void resize(std::string* buffer, std::size_t size) { + protozero_assert(size < buffer->size()); + buffer->resize(size); + } + + static void reserve_additional(std::string* buffer, std::size_t size) { + buffer->reserve(buffer->size() + size); + } + + static void erase_range(std::string* buffer, std::size_t from, std::size_t to) { + protozero_assert(from <= buffer->size()); + protozero_assert(to <= buffer->size()); + protozero_assert(from <= to); + buffer->erase(std::next(buffer->begin(), static_cast(from)), + std::next(buffer->begin(), static_cast(to))); + } + + static char* at_pos(std::string* buffer, std::size_t pos) { + protozero_assert(pos <= buffer->size()); + return (&*buffer->begin()) + pos; + } + + static void push_back(std::string* buffer, char ch) { + buffer->push_back(ch); + } + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_STRING_HPP diff --git a/include/protozero/buffer_tmpl.hpp b/include/protozero/buffer_tmpl.hpp new file mode 100644 index 00000000..ac223996 --- /dev/null +++ b/include/protozero/buffer_tmpl.hpp @@ -0,0 +1,113 @@ +#ifndef PROTOZERO_BUFFER_TMPL_HPP +#define PROTOZERO_BUFFER_TMPL_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_tmpl.hpp + * + * @brief Contains the customization points for buffer implementations. + */ + +#include +#include +#include + +namespace protozero { + +// Implementation of buffer customizations points for std::string + +/// @cond INTERNAL +template +struct buffer_customization { + + /** + * Get the number of bytes currently used in the buffer. + * + * @param buffer Pointer to the buffer. + * @returns number of bytes used in the buffer. + */ + static std::size_t size(const std::string* buffer); + + /** + * Append count bytes from data to the buffer. + * + * @param buffer Pointer to the buffer. + * @param data Pointer to the data. + * @param count Number of bytes to be added to the buffer. + */ + static void append(std::string* buffer, const char* data, std::size_t count); + + /** + * Append count zero bytes to the buffer. + * + * @param buffer Pointer to the buffer. + * @param count Number of bytes to be added to the buffer. + */ + static void append_zeros(std::string* buffer, std::size_t count); + + /** + * Shrink the buffer to the specified size. The new size will always be + * smaller than the current size. + * + * @param buffer Pointer to the buffer. + * @param size New size of the buffer. + * + * @pre size < current size of buffer + */ + static void resize(std::string* buffer, std::size_t size); + + /** + * Reserve an additional size bytes for use in the buffer. This is used for + * variable-sized buffers to tell the buffer implementation that soon more + * memory will be used. The implementation can ignore this. + * + * @param buffer Pointer to the buffer. + * @param size Number of bytes to reserve. + */ + static void reserve_additional(std::string* buffer, std::size_t size); + + /** + * Delete data from the buffer. This must move back the data after the + * part being deleted and resize the buffer accordingly. + * + * @param buffer Pointer to the buffer. + * @param from Offset into the buffer where we want to erase from. + * @param to Offset into the buffer one past the last byte we want to erase. + * + * @pre from, to <= size of the buffer, from < to + */ + static void erase_range(std::string* buffer, std::size_t from, std::size_t to); + + /** + * Return a pointer to the memory at the specified position in the buffer. + * + * @param buffer Pointer to the buffer. + * @param pos The position in the buffer. + * @returns pointer to the memory in the buffer at the specified position. + * + * @pre pos <= size of the buffer + */ + static char* at_pos(std::string* buffer, std::size_t pos); + + /** + * Add a char to the buffer incrementing the number of chars in the buffer. + * + * @param buffer Pointer to the buffer. + * @param ch The character to add. + */ + static void push_back(std::string* buffer, char ch); + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_TMPL_HPP diff --git a/include/protozero/buffer_vector.hpp b/include/protozero/buffer_vector.hpp new file mode 100644 index 00000000..c163300c --- /dev/null +++ b/include/protozero/buffer_vector.hpp @@ -0,0 +1,78 @@ +#ifndef PROTOZERO_BUFFER_VECTOR_HPP +#define PROTOZERO_BUFFER_VECTOR_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_vector.hpp + * + * @brief Contains the customization points for buffer implementation based + * on std::vector + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" + +#include +#include +#include + +namespace protozero { + +// Implementation of buffer customizations points for std::vector + +/// @cond INTERNAL +template <> +struct buffer_customization> { + + static std::size_t size(const std::vector* buffer) noexcept { + return buffer->size(); + } + + static void append(std::vector* buffer, const char* data, std::size_t count) { + buffer->insert(buffer->end(), data, data + count); + } + + static void append_zeros(std::vector* buffer, std::size_t count) { + buffer->insert(buffer->end(), count, '\0'); + } + + static void resize(std::vector* buffer, std::size_t size) { + protozero_assert(size < buffer->size()); + buffer->resize(size); + } + + static void reserve_additional(std::vector* buffer, std::size_t size) { + buffer->reserve(buffer->size() + size); + } + + static void erase_range(std::vector* buffer, std::size_t from, std::size_t to) { + protozero_assert(from <= buffer->size()); + protozero_assert(to <= buffer->size()); + protozero_assert(from <= to); + buffer->erase(std::next(buffer->begin(), static_cast(from)), + std::next(buffer->begin(), static_cast(to))); + } + + static char* at_pos(std::vector* buffer, std::size_t pos) { + protozero_assert(pos <= buffer->size()); + return (&*buffer->begin()) + pos; + } + + static void push_back(std::vector* buffer, char ch) { + buffer->push_back(ch); + } + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_VECTOR_HPP diff --git a/include/protozero/byteswap.hpp b/include/protozero/byteswap.hpp new file mode 100644 index 00000000..75cae691 --- /dev/null +++ b/include/protozero/byteswap.hpp @@ -0,0 +1,108 @@ +#ifndef PROTOZERO_BYTESWAP_HPP +#define PROTOZERO_BYTESWAP_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file byteswap.hpp + * + * @brief Contains functions to swap bytes in values (for different endianness). + */ + +#include "config.hpp" + +#include +#include + +namespace protozero { +namespace detail { + +inline uint32_t byteswap_impl(uint32_t value) noexcept { +#ifdef PROTOZERO_USE_BUILTIN_BSWAP + return __builtin_bswap32(value); +#else + return ((value & 0xff000000U) >> 24U) | + ((value & 0x00ff0000U) >> 8U) | + ((value & 0x0000ff00U) << 8U) | + ((value & 0x000000ffU) << 24U); +#endif +} + +inline uint64_t byteswap_impl(uint64_t value) noexcept { +#ifdef PROTOZERO_USE_BUILTIN_BSWAP + return __builtin_bswap64(value); +#else + return ((value & 0xff00000000000000ULL) >> 56U) | + ((value & 0x00ff000000000000ULL) >> 40U) | + ((value & 0x0000ff0000000000ULL) >> 24U) | + ((value & 0x000000ff00000000ULL) >> 8U) | + ((value & 0x00000000ff000000ULL) << 8U) | + ((value & 0x0000000000ff0000ULL) << 24U) | + ((value & 0x000000000000ff00ULL) << 40U) | + ((value & 0x00000000000000ffULL) << 56U); +#endif +} + +} // end namespace detail + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(uint32_t* ptr) noexcept { + *ptr = detail::byteswap_impl(*ptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(uint64_t* ptr) noexcept { + *ptr = detail::byteswap_impl(*ptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(int32_t* ptr) noexcept { + auto* bptr = reinterpret_cast(ptr); + *bptr = detail::byteswap_impl(*bptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(int64_t* ptr) noexcept { + auto* bptr = reinterpret_cast(ptr); + *bptr = detail::byteswap_impl(*bptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(float* ptr) noexcept { + static_assert(sizeof(float) == 4, "Expecting four byte float"); + + uint32_t tmp = 0; + std::memcpy(&tmp, ptr, 4); + tmp = detail::byteswap_impl(tmp); // uint32 overload + std::memcpy(ptr, &tmp, 4); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(double* ptr) noexcept { + static_assert(sizeof(double) == 8, "Expecting eight byte double"); + + uint64_t tmp = 0; + std::memcpy(&tmp, ptr, 8); + tmp = detail::byteswap_impl(tmp); // uint64 overload + std::memcpy(ptr, &tmp, 8); +} + +namespace detail { + + // Added for backwards compatibility with any code that might use this + // function (even if it shouldn't have). Will be removed in a later + // version of protozero. + using ::protozero::byteswap_inplace; + +} // end namespace detail + +} // end namespace protozero + +#endif // PROTOZERO_BYTESWAP_HPP diff --git a/include/protozero/config.hpp b/include/protozero/config.hpp new file mode 100644 index 00000000..6fc77490 --- /dev/null +++ b/include/protozero/config.hpp @@ -0,0 +1,48 @@ +#ifndef PROTOZERO_CONFIG_HPP +#define PROTOZERO_CONFIG_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +#include + +/** + * @file config.hpp + * + * @brief Contains macro checks for different configurations. + */ + +#define PROTOZERO_LITTLE_ENDIAN 1234 +#define PROTOZERO_BIG_ENDIAN 4321 + +// Find out which byte order the machine has. +#if defined(__BYTE_ORDER) +# if (__BYTE_ORDER == __LITTLE_ENDIAN) +# define PROTOZERO_BYTE_ORDER PROTOZERO_LITTLE_ENDIAN +# endif +# if (__BYTE_ORDER == __BIG_ENDIAN) +# define PROTOZERO_BYTE_ORDER PROTOZERO_BIG_ENDIAN +# endif +#else +// This probably isn't a very good default, but might do until we figure +// out something better. +# define PROTOZERO_BYTE_ORDER PROTOZERO_LITTLE_ENDIAN +#endif + +// Check whether __builtin_bswap is available +#if defined(__GNUC__) || defined(__clang__) +# define PROTOZERO_USE_BUILTIN_BSWAP +#endif + +// Wrapper for assert() used for testing +#ifndef protozero_assert +# define protozero_assert(x) assert(x) +#endif + +#endif // PROTOZERO_CONFIG_HPP diff --git a/include/protozero/data_view.hpp b/include/protozero/data_view.hpp new file mode 100644 index 00000000..3ec87af3 --- /dev/null +++ b/include/protozero/data_view.hpp @@ -0,0 +1,236 @@ +#ifndef PROTOZERO_DATA_VIEW_HPP +#define PROTOZERO_DATA_VIEW_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file data_view.hpp + * + * @brief Contains the implementation of the data_view class. + */ + +#include "config.hpp" + +#include +#include +#include +#include +#include + +namespace protozero { + +#ifdef PROTOZERO_USE_VIEW +using data_view = PROTOZERO_USE_VIEW; +#else + +/** + * Holds a pointer to some data and a length. + * + * This class is supposed to be compatible with the std::string_view + * that will be available in C++17. + */ +class data_view { + + const char* m_data = nullptr; + std::size_t m_size = 0; + +public: + + /** + * Default constructor. Construct an empty data_view. + */ + constexpr data_view() noexcept = default; + + /** + * Create data_view from pointer and size. + * + * @param ptr Pointer to the data. + * @param length Length of the data. + */ + constexpr data_view(const char* ptr, std::size_t length) noexcept + : m_data{ptr}, + m_size{length} { + } + + /** + * Create data_view from string. + * + * @param str String with the data. + */ + data_view(const std::string& str) noexcept // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + : m_data{str.data()}, + m_size{str.size()} { + } + + /** + * Create data_view from zero-terminated string. + * + * @param ptr Pointer to the data. + */ + data_view(const char* ptr) noexcept // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + : m_data{ptr}, + m_size{std::strlen(ptr)} { + } + + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(data_view& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_size, other.m_size); + } + + /// Return pointer to data. + constexpr const char* data() const noexcept { + return m_data; + } + + /// Return length of data in bytes. + constexpr std::size_t size() const noexcept { + return m_size; + } + + /// Returns true if size is 0. + constexpr bool empty() const noexcept { + return m_size == 0; + } + +#ifndef PROTOZERO_STRICT_API + /** + * Convert data view to string. + * + * @pre Must not be default constructed data_view. + * + * @deprecated to_string() is not available in C++17 string_view so it + * should not be used to make conversion to that class easier + * in the future. + */ + std::string to_string() const { + protozero_assert(m_data); + return {m_data, m_size}; + } +#endif + + /** + * Convert data view to string. + * + * @pre Must not be default constructed data_view. + */ + explicit operator std::string() const { + protozero_assert(m_data); + return {m_data, m_size}; + } + + /** + * Compares the contents of this object with the given other object. + * + * @returns 0 if they are the same, <0 if this object is smaller than + * the other or >0 if it is larger. If both objects have the + * same size returns <0 if this object is lexicographically + * before the other, >0 otherwise. + * + * @pre Must not be default constructed data_view. + */ + int compare(data_view other) const noexcept { + assert(m_data && other.m_data); + const int cmp = std::memcmp(data(), other.data(), + std::min(size(), other.size())); + if (cmp == 0) { + if (size() == other.size()) { + return 0; + } + return size() < other.size() ? -1 : 1; + } + return cmp; + } + +}; // class data_view + +/** + * Swap two data_view objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline void swap(data_view& lhs, data_view& rhs) noexcept { + lhs.swap(rhs); +} + +/** + * Two data_view instances are equal if they have the same size and the + * same content. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline constexpr bool operator==(const data_view lhs, const data_view rhs) noexcept { + return lhs.size() == rhs.size() && + std::equal(lhs.data(), lhs.data() + lhs.size(), rhs.data()); +} + +/** + * Two data_view instances are not equal if they have different sizes or the + * content differs. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline constexpr bool operator!=(const data_view lhs, const data_view rhs) noexcept { + return !(lhs == rhs); +} + +/** + * Returns true if lhs.compare(rhs) < 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator<(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) < 0; +} + +/** + * Returns true if lhs.compare(rhs) <= 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator<=(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) <= 0; +} + +/** + * Returns true if lhs.compare(rhs) > 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator>(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) > 0; +} + +/** + * Returns true if lhs.compare(rhs) >= 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator>=(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) >= 0; +} + +#endif + +} // end namespace protozero + +#endif // PROTOZERO_DATA_VIEW_HPP diff --git a/include/protozero/exception.hpp b/include/protozero/exception.hpp new file mode 100644 index 00000000..a3cd0f15 --- /dev/null +++ b/include/protozero/exception.hpp @@ -0,0 +1,101 @@ +#ifndef PROTOZERO_EXCEPTION_HPP +#define PROTOZERO_EXCEPTION_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file exception.hpp + * + * @brief Contains the exceptions used in the protozero library. + */ + +#include + +/** + * @brief All parts of the protozero header-only library are in this namespace. + */ +namespace protozero { + +/** + * All exceptions explicitly thrown by the functions of the protozero library + * derive from this exception. + */ +struct exception : std::exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "pbf exception"; + } +}; + +/** + * This exception is thrown when parsing a varint thats larger than allowed. + * This should never happen unless the data is corrupted. + */ +struct varint_too_long_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "varint too long exception"; + } +}; + +/** + * This exception is thrown when the wire type of a pdf field is unknown. + * This should never happen unless the data is corrupted. + */ +struct unknown_pbf_wire_type_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "unknown pbf field type exception"; + } +}; + +/** + * This exception is thrown when we are trying to read a field and there + * are not enough bytes left in the buffer to read it. Almost all functions + * of the pbf_reader class can throw this exception. + * + * This should never happen unless the data is corrupted or you have + * initialized the pbf_reader object with incomplete data. + */ +struct end_of_buffer_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "end of buffer exception"; + } +}; + +/** + * This exception is thrown when a tag has an invalid value. Tags must be + * unsigned integers between 1 and 2^29-1. Tags between 19000 and 19999 are + * not allowed. See + * https://developers.google.com/protocol-buffers/docs/proto#assigning-tags + */ +struct invalid_tag_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "invalid tag exception"; + } +}; + +/** + * This exception is thrown when a length field of a packed repeated field is + * invalid. For fixed size types the length must be a multiple of the size of + * the type. + */ +struct invalid_length_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "invalid length exception"; + } +}; + +} // end namespace protozero + +#endif // PROTOZERO_EXCEPTION_HPP diff --git a/include/protozero/iterators.hpp b/include/protozero/iterators.hpp new file mode 100644 index 00000000..ee8ef8ec --- /dev/null +++ b/include/protozero/iterators.hpp @@ -0,0 +1,481 @@ +#ifndef PROTOZERO_ITERATORS_HPP +#define PROTOZERO_ITERATORS_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file iterators.hpp + * + * @brief Contains the iterators for access to packed repeated fields. + */ + +#include "config.hpp" +#include "varint.hpp" + +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN +# include +#endif + +#include +#include +#include +#include + +namespace protozero { + +/** + * A range of iterators based on std::pair. Created from beginning and + * end iterators. Used as a return type from some pbf_reader methods + * that is easy to use with range-based for loops. + */ +template > +class iterator_range : +#ifdef PROTOZERO_STRICT_API + protected +#else + public +#endif + P { + +public: + + /// The type of the iterators in this range. + using iterator = T; + + /// The value type of the underlying iterator. + using value_type = typename std::iterator_traits::value_type; + + /** + * Default constructor. Create empty iterator_range. + */ + constexpr iterator_range() : + P{iterator{}, iterator{}} { + } + + /** + * Create iterator range from two iterators. + * + * @param first_iterator Iterator to beginning of range. + * @param last_iterator Iterator to end of range. + */ + constexpr iterator_range(iterator&& first_iterator, iterator&& last_iterator) : + P{std::forward(first_iterator), + std::forward(last_iterator)} { + } + + /// Return iterator to beginning of range. + constexpr iterator begin() const noexcept { + return this->first; + } + + /// Return iterator to end of range. + constexpr iterator end() const noexcept { + return this->second; + } + + /// Return iterator to beginning of range. + constexpr iterator cbegin() const noexcept { + return this->first; + } + + /// Return iterator to end of range. + constexpr iterator cend() const noexcept { + return this->second; + } + + /** + * Return true if this range is empty. + * + * Complexity: Constant. + */ + constexpr bool empty() const noexcept { + return begin() == end(); + } + + /** + * Get the size of the range, ie the number of elements it contains. + * + * Complexity: Constant or linear depending on the underlaying iterator. + */ + std::size_t size() const noexcept { + return static_cast(std::distance(begin(), end())); + } + + /** + * Get element at the beginning of the range. + * + * @pre Range must not be empty. + */ + value_type front() const { + protozero_assert(!empty()); + return *(this->first); + } + + /** + * Advance beginning of range by one. + * + * @pre Range must not be empty. + */ + void drop_front() { + protozero_assert(!empty()); + ++this->first; + } + + /** + * Swap the contents of this range with the other. + * + * @param other Other range to swap data with. + */ + void swap(iterator_range& other) noexcept { + using std::swap; + swap(this->first, other.first); + swap(this->second, other.second); + } + +}; // struct iterator_range + +/** + * Swap two iterator_ranges. + * + * @param lhs First range. + * @param rhs Second range. + */ +template +inline void swap(iterator_range& lhs, iterator_range& rhs) noexcept { + lhs.swap(rhs); +} + +/** + * A forward iterator used for accessing packed repeated fields of fixed + * length (fixed32, sfixed32, float, double). + */ +template +class const_fixed_iterator { + + /// Pointer to current iterator position + const char* m_data = nullptr; + +public: + + /// @cond usual iterator functions not documented + + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + const_fixed_iterator() noexcept = default; + + explicit const_fixed_iterator(const char* data) noexcept : + m_data{data} { + } + + const_fixed_iterator(const const_fixed_iterator&) noexcept = default; + const_fixed_iterator(const_fixed_iterator&&) noexcept = default; + + const_fixed_iterator& operator=(const const_fixed_iterator&) noexcept = default; + const_fixed_iterator& operator=(const_fixed_iterator&&) noexcept = default; + + ~const_fixed_iterator() noexcept = default; + + value_type operator*() const noexcept { + value_type result; + std::memcpy(&result, m_data, sizeof(value_type)); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + byteswap_inplace(&result); +#endif + return result; + } + + const_fixed_iterator& operator++() noexcept { + m_data += sizeof(value_type); + return *this; + } + + const_fixed_iterator operator++(int) noexcept { + const const_fixed_iterator tmp{*this}; + ++(*this); + return tmp; + } + + const_fixed_iterator& operator--() noexcept { + m_data -= sizeof(value_type); + return *this; + } + + const_fixed_iterator operator--(int) noexcept { + const const_fixed_iterator tmp{*this}; + --(*this); + return tmp; + } + + friend bool operator==(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return lhs.m_data == rhs.m_data; + } + + friend bool operator!=(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return !(lhs == rhs); + } + + friend bool operator<(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return lhs.m_data < rhs.m_data; + } + + friend bool operator>(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return rhs < lhs; + } + + friend bool operator<=(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return !(lhs > rhs); + } + + friend bool operator>=(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return !(lhs < rhs); + } + + const_fixed_iterator& operator+=(difference_type val) noexcept { + m_data += (sizeof(value_type) * val); + return *this; + } + + friend const_fixed_iterator operator+(const_fixed_iterator lhs, difference_type rhs) noexcept { + const_fixed_iterator tmp{lhs}; + tmp.m_data += (sizeof(value_type) * rhs); + return tmp; + } + + friend const_fixed_iterator operator+(difference_type lhs, const_fixed_iterator rhs) noexcept { + const_fixed_iterator tmp{rhs}; + tmp.m_data += (sizeof(value_type) * lhs); + return tmp; + } + + const_fixed_iterator& operator-=(difference_type val) noexcept { + m_data -= (sizeof(value_type) * val); + return *this; + } + + friend const_fixed_iterator operator-(const_fixed_iterator lhs, difference_type rhs) noexcept { + const_fixed_iterator tmp{lhs}; + tmp.m_data -= (sizeof(value_type) * rhs); + return tmp; + } + + friend difference_type operator-(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return static_cast(lhs.m_data - rhs.m_data) / static_cast(sizeof(T)); + } + + value_type operator[](difference_type n) const noexcept { + return *(*this + n); + } + + /// @endcond + +}; // class const_fixed_iterator + +/** + * A forward iterator used for accessing packed repeated varint fields + * (int32, uint32, int64, uint64, bool, enum). + */ +template +class const_varint_iterator { + +protected: + + /// Pointer to current iterator position + const char* m_data = nullptr; // NOLINT(misc-non-private-member-variables-in-classes, cppcoreguidelines-non-private-member-variables-in-classes,-warnings-as-errors) + + /// Pointer to end iterator position + const char* m_end = nullptr; // NOLINT(misc-non-private-member-variables-in-classes, cppcoreguidelines-non-private-member-variables-in-classes,-warnings-as-errors) + +public: + + /// @cond usual iterator functions not documented + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + static difference_type distance(const_varint_iterator begin, const_varint_iterator end) noexcept { + // The "distance" between default initialized const_varint_iterator's + // is always 0. + if (!begin.m_data) { + return 0; + } + // We know that each varint contains exactly one byte with the most + // significant bit not set. We can use this to quickly figure out + // how many varints there are without actually decoding the varints. + return std::count_if(begin.m_data, end.m_data, [](char c) noexcept { + return (static_cast(c) & 0x80U) == 0; + }); + } + + const_varint_iterator() noexcept = default; + + const_varint_iterator(const char* data, const char* end) noexcept : + m_data{data}, + m_end{end} { + } + + const_varint_iterator(const const_varint_iterator&) noexcept = default; + const_varint_iterator(const_varint_iterator&&) noexcept = default; + + const_varint_iterator& operator=(const const_varint_iterator&) noexcept = default; + const_varint_iterator& operator=(const_varint_iterator&&) noexcept = default; + + ~const_varint_iterator() noexcept = default; + + value_type operator*() const { + protozero_assert(m_data); + const char* d = m_data; // will be thrown away + return static_cast(decode_varint(&d, m_end)); + } + + const_varint_iterator& operator++() { + protozero_assert(m_data); + skip_varint(&m_data, m_end); + return *this; + } + + const_varint_iterator operator++(int) { + protozero_assert(m_data); + const const_varint_iterator tmp{*this}; + ++(*this); + return tmp; + } + + bool operator==(const const_varint_iterator& rhs) const noexcept { + return m_data == rhs.m_data && m_end == rhs.m_end; + } + + bool operator!=(const const_varint_iterator& rhs) const noexcept { + return !(*this == rhs); + } + + /// @endcond + +}; // class const_varint_iterator + +/** + * A forward iterator used for accessing packed repeated svarint fields + * (sint32, sint64). + */ +template +class const_svarint_iterator : public const_varint_iterator { + +public: + + /// @cond usual iterator functions not documented + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + const_svarint_iterator() noexcept : + const_varint_iterator{} { + } + + const_svarint_iterator(const char* data, const char* end) noexcept : + const_varint_iterator{data, end} { + } + + const_svarint_iterator(const const_svarint_iterator&) = default; + const_svarint_iterator(const_svarint_iterator&&) noexcept = default; + + const_svarint_iterator& operator=(const const_svarint_iterator&) = default; + const_svarint_iterator& operator=(const_svarint_iterator&&) noexcept = default; + + ~const_svarint_iterator() = default; + + value_type operator*() const { + protozero_assert(this->m_data); + const char* d = this->m_data; // will be thrown away + return static_cast(decode_zigzag64(decode_varint(&d, this->m_end))); + } + + const_svarint_iterator& operator++() { + protozero_assert(this->m_data); + skip_varint(&this->m_data, this->m_end); + return *this; + } + + const_svarint_iterator operator++(int) { + protozero_assert(this->m_data); + const const_svarint_iterator tmp{*this}; + ++(*this); + return tmp; + } + + /// @endcond + +}; // class const_svarint_iterator + +} // end namespace protozero + +namespace std { + + // Specialize std::distance for all the protozero iterators. Because + // functions can't be partially specialized, we have to do this for + // every value_type we are using. + + /// @cond individual overloads do not need to be documented + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_svarint_iterator::difference_type + distance>(protozero::const_svarint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_svarint_iterator last) { + return protozero::const_svarint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_svarint_iterator::difference_type + distance>(protozero::const_svarint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_svarint_iterator last) { + return protozero::const_svarint_iterator::distance(first, last); + } + + /// @endcond + +} // end namespace std + +#endif // PROTOZERO_ITERATORS_HPP diff --git a/include/protozero/pbf_builder.hpp b/include/protozero/pbf_builder.hpp new file mode 100644 index 00000000..71a2dec2 --- /dev/null +++ b/include/protozero/pbf_builder.hpp @@ -0,0 +1,32 @@ +#ifndef PROTOZERO_PBF_BUILDER_HPP +#define PROTOZERO_PBF_BUILDER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_builder.hpp + * + * @brief Contains the pbf_builder template class. + */ + +#include "basic_pbf_builder.hpp" +#include "pbf_writer.hpp" + +#include + +namespace protozero { + +/// Specialization of basic_pbf_builder using std::string as buffer type. +template +using pbf_builder = basic_pbf_builder; + +} // end namespace protozero + +#endif // PROTOZERO_PBF_BUILDER_HPP diff --git a/include/protozero/pbf_message.hpp b/include/protozero/pbf_message.hpp new file mode 100644 index 00000000..d7fd8b5d --- /dev/null +++ b/include/protozero/pbf_message.hpp @@ -0,0 +1,184 @@ +#ifndef PROTOZERO_PBF_MESSAGE_HPP +#define PROTOZERO_PBF_MESSAGE_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_message.hpp + * + * @brief Contains the pbf_message template class. + */ + +#include "pbf_reader.hpp" +#include "types.hpp" + +#include + +namespace protozero { + +/** + * This class represents a protobuf message. Either a top-level message or + * a nested sub-message. Top-level messages can be created from any buffer + * with a pointer and length: + * + * @code + * enum class Message : protozero::pbf_tag_type { + * ... + * }; + * + * std::string buffer; + * // fill buffer... + * pbf_message message{buffer.data(), buffer.size()}; + * @endcode + * + * Sub-messages are created using get_message(): + * + * @code + * enum class SubMessage : protozero::pbf_tag_type { + * ... + * }; + * + * pbf_message message{...}; + * message.next(); + * pbf_message submessage = message.get_message(); + * @endcode + * + * All methods of the pbf_message class except get_bytes() and get_string() + * provide the strong exception guarantee, ie they either succeed or do not + * change the pbf_message object they are called on. Use the get_data() method + * instead of get_bytes() or get_string(), if you need this guarantee. + * + * This template class is based on the pbf_reader class and has all the same + * methods. The difference is that whereever the pbf_reader class takes an + * integer tag, this template class takes a tag of the template type T. + * + * Read the tutorial to understand how this class is used. + */ +template +class pbf_message : public pbf_reader { + + static_assert(std::is_same::type>::value, + "T must be enum with underlying type protozero::pbf_tag_type"); + +public: + + /// The type of messages this class will read. + using enum_type = T; + + /** + * Construct a pbf_message. All arguments are forwarded to the pbf_reader + * parent class. + */ + template + pbf_message(Args&&... args) noexcept : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + pbf_reader{std::forward(args)...} { + } + + /** + * Set next field in the message as the current field. This is usually + * called in a while loop: + * + * @code + * pbf_message<...> message(...); + * while (message.next()) { + * // handle field + * } + * @endcode + * + * @returns `true` if there is a next field, `false` if not. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now. + */ + bool next() { + return pbf_reader::next(); + } + + /** + * Set next field with given tag in the message as the current field. + * Fields with other tags are skipped. This is usually called in a while + * loop for repeated fields: + * + * @code + * pbf_message message{...}; + * while (message.next(Example1::repeated_fixed64_r)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_message message{...}; + * if (message.next(Example1::required_uint32_x)) { + * // handle field + * } + * @endcode + * + * Note that this will not check the wire type. The two-argument version + * of this function will also check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(T next_tag) { + return pbf_reader::next(pbf_tag_type(next_tag)); + } + + /** + * Set next field with given tag and wire type in the message as the + * current field. Fields with other tags are skipped. This is usually + * called in a while loop for repeated fields: + * + * @code + * pbf_message message{...}; + * while (message.next(Example1::repeated_fixed64_r, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_message message{...}; + * if (message.next(Example1::required_uint32_x, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * Note that this will also check the wire type. The one-argument version + * of this function will not check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(T next_tag, pbf_wire_type type) { + return pbf_reader::next(pbf_tag_type(next_tag), type); + } + + /** + * The tag of the current field. The tag is the enum value for the field + * number from the description in the .proto file. + * + * Call next() before calling this function to set the current field. + * + * @returns tag of the current field. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + T tag() const noexcept { + return T(pbf_reader::tag()); + } + +}; // class pbf_message + +} // end namespace protozero + +#endif // PROTOZERO_PBF_MESSAGE_HPP diff --git a/include/protozero/pbf_reader.hpp b/include/protozero/pbf_reader.hpp new file mode 100644 index 00000000..92bfdee5 --- /dev/null +++ b/include/protozero/pbf_reader.hpp @@ -0,0 +1,977 @@ +#ifndef PROTOZERO_PBF_READER_HPP +#define PROTOZERO_PBF_READER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_reader.hpp + * + * @brief Contains the pbf_reader class. + */ + +#include "config.hpp" +#include "data_view.hpp" +#include "exception.hpp" +#include "iterators.hpp" +#include "types.hpp" +#include "varint.hpp" + +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN +# include +#endif + +#include +#include +#include +#include +#include + +namespace protozero { + +/** + * This class represents a protobuf message. Either a top-level message or + * a nested sub-message. Top-level messages can be created from any buffer + * with a pointer and length: + * + * @code + * std::string buffer; + * // fill buffer... + * pbf_reader message{buffer.data(), buffer.size()}; + * @endcode + * + * Sub-messages are created using get_message(): + * + * @code + * pbf_reader message{...}; + * message.next(); + * pbf_reader submessage = message.get_message(); + * @endcode + * + * All methods of the pbf_reader class except get_bytes() and get_string() + * provide the strong exception guarantee, ie they either succeed or do not + * change the pbf_reader object they are called on. Use the get_view() method + * instead of get_bytes() or get_string(), if you need this guarantee. + */ +class pbf_reader { + + // A pointer to the next unread data. + const char* m_data = nullptr; + + // A pointer to one past the end of data. + const char* m_end = nullptr; + + // The wire type of the current field. + pbf_wire_type m_wire_type = pbf_wire_type::unknown; + + // The tag of the current field. + pbf_tag_type m_tag = 0; + + template + T get_fixed() { + T result; + const char* data = m_data; + skip_bytes(sizeof(T)); + std::memcpy(&result, data, sizeof(T)); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + byteswap_inplace(&result); +#endif + return result; + } + + template + iterator_range> packed_fixed() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + const auto len = get_len_and_skip(); + if (len % sizeof(T) != 0) { + throw invalid_length_exception{}; + } + return {const_fixed_iterator(m_data - len), + const_fixed_iterator(m_data)}; + } + + template + T get_varint() { + const auto val = static_cast(decode_varint(&m_data, m_end)); + return val; + } + + template + T get_svarint() { + protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint"); + return static_cast(decode_zigzag64(decode_varint(&m_data, m_end))); + } + + pbf_length_type get_length() { + return get_varint(); + } + + void skip_bytes(pbf_length_type len) { + if (m_end - m_data < static_cast(len)) { + throw end_of_buffer_exception{}; + } + m_data += len; + +#ifndef NDEBUG + // In debug builds reset the tag to zero so that we can detect (some) + // wrong code. + m_tag = 0; +#endif + } + + pbf_length_type get_len_and_skip() { + const auto len = get_length(); + skip_bytes(len); + return len; + } + + template + iterator_range get_packed() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + const auto len = get_len_and_skip(); + return {T{m_data - len, m_data}, + T{m_data, m_data}}; + } + +public: + + /** + * Construct a pbf_reader message from a data_view. The pointer from the + * data_view will be stored inside the pbf_reader object, no data is + * copied. So you must make sure the view stays valid as long as the + * pbf_reader object is used. + * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + */ + explicit pbf_reader(const data_view& view) noexcept + : m_data{view.data()}, + m_end{view.data() + view.size()} { + } + + /** + * Construct a pbf_reader message from a data pointer and a length. The + * pointer will be stored inside the pbf_reader object, no data is copied. + * So you must make sure the buffer stays valid as long as the pbf_reader + * object is used. + * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + */ + pbf_reader(const char* data, std::size_t size) noexcept + : m_data{data}, + m_end{data + size} { + } + +#ifndef PROTOZERO_STRICT_API + /** + * Construct a pbf_reader message from a data pointer and a length. The + * pointer will be stored inside the pbf_reader object, no data is copied. + * So you must make sure the buffer stays valid as long as the pbf_reader + * object is used. + * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + * @deprecated Use one of the other constructors. + */ + explicit pbf_reader(const std::pair& data) noexcept + : m_data{data.first}, + m_end{data.first + data.second} { + } +#endif + + /** + * Construct a pbf_reader message from a std::string. A pointer to the + * string internals will be stored inside the pbf_reader object, no data + * is copied. So you must make sure the string is unchanged as long as the + * pbf_reader object is used. + * + * The string must contain a complete protobuf message. + * + * @post There is no current field. + */ + explicit pbf_reader(const std::string& data) noexcept + : m_data{data.data()}, + m_end{data.data() + data.size()} { + } + + /** + * pbf_reader can be default constructed and behaves like it has an empty + * buffer. + */ + pbf_reader() noexcept = default; + + /// pbf_reader messages can be copied trivially. + pbf_reader(const pbf_reader&) noexcept = default; + + /// pbf_reader messages can be moved trivially. + pbf_reader(pbf_reader&&) noexcept = default; + + /// pbf_reader messages can be copied trivially. + pbf_reader& operator=(const pbf_reader& other) noexcept = default; + + /// pbf_reader messages can be moved trivially. + pbf_reader& operator=(pbf_reader&& other) noexcept = default; + + ~pbf_reader() = default; + + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(pbf_reader& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_end, other.m_end); + swap(m_wire_type, other.m_wire_type); + swap(m_tag, other.m_tag); + } + + /** + * In a boolean context the pbf_reader class evaluates to `true` if there + * are still fields available and to `false` if the last field has been + * read. + */ + operator bool() const noexcept { // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + return m_data != m_end; + } + + /** + * Get a view of the not yet read data. + */ + data_view data() const noexcept { + return {m_data, static_cast(m_end - m_data)}; + } + + /** + * Return the length in bytes of the current message. If you have + * already called next() and/or any of the get_*() functions, this will + * return the remaining length. + * + * This can, for instance, be used to estimate the space needed for a + * buffer. Of course you have to know reasonably well what data to expect + * and how it is encoded for this number to have any meaning. + */ + std::size_t length() const noexcept { + return std::size_t(m_end - m_data); + } + + /** + * Set next field in the message as the current field. This is usually + * called in a while loop: + * + * @code + * pbf_reader message(...); + * while (message.next()) { + * // handle field + * } + * @endcode + * + * @returns `true` if there is a next field, `false` if not. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now. + */ + bool next() { + if (m_data == m_end) { + return false; + } + + const auto value = get_varint(); + m_tag = pbf_tag_type(value >> 3U); + + // tags 0 and 19000 to 19999 are not allowed as per + // https://developers.google.com/protocol-buffers/docs/proto#assigning-tags + if (m_tag == 0 || (m_tag >= 19000 && m_tag <= 19999)) { + throw invalid_tag_exception{}; + } + + m_wire_type = pbf_wire_type(value & 0x07U); + switch (m_wire_type) { + case pbf_wire_type::varint: + case pbf_wire_type::fixed64: + case pbf_wire_type::length_delimited: + case pbf_wire_type::fixed32: + break; + default: + throw unknown_pbf_wire_type_exception{}; + } + + return true; + } + + /** + * Set next field with given tag in the message as the current field. + * Fields with other tags are skipped. This is usually called in a while + * loop for repeated fields: + * + * @code + * pbf_reader message{...}; + * while (message.next(17)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_reader message{...}; + * if (message.next(17)) { + * // handle field + * } + * @endcode + * + * Note that this will not check the wire type. The two-argument version + * of this function will also check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(pbf_tag_type next_tag) { + while (next()) { + if (m_tag == next_tag) { + return true; + } + skip(); + } + return false; + } + + /** + * Set next field with given tag and wire type in the message as the + * current field. Fields with other tags are skipped. This is usually + * called in a while loop for repeated fields: + * + * @code + * pbf_reader message{...}; + * while (message.next(17, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_reader message{...}; + * if (message.next(17, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * Note that this will also check the wire type. The one-argument version + * of this function will not check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(pbf_tag_type next_tag, pbf_wire_type type) { + while (next()) { + if (m_tag == next_tag && m_wire_type == type) { + return true; + } + skip(); + } + return false; + } + + /** + * The tag of the current field. The tag is the field number from the + * description in the .proto file. + * + * Call next() before calling this function to set the current field. + * + * @returns tag of the current field. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + pbf_tag_type tag() const noexcept { + return m_tag; + } + + /** + * Get the wire type of the current field. The wire types are: + * + * * 0 - varint + * * 1 - 64 bit + * * 2 - length-delimited + * * 5 - 32 bit + * + * All other types are illegal. + * + * Call next() before calling this function to set the current field. + * + * @returns wire type of the current field. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + pbf_wire_type wire_type() const noexcept { + return m_wire_type; + } + + /** + * Get the tag and wire type of the current field in one integer suitable + * for comparison with a switch statement. + * + * Use it like this: + * + * @code + * pbf_reader message{...}; + * while (message.next()) { + * switch (message.tag_and_type()) { + * case tag_and_type(17, pbf_wire_type::length_delimited): + * .... + * break; + * case tag_and_type(21, pbf_wire_type::varint): + * .... + * break; + * default: + * message.skip(); + * } + * } + * @endcode + */ + uint32_t tag_and_type() const noexcept { + return protozero::tag_and_type(tag(), wire_type()); + } + + /** + * Check the wire type of the current field. + * + * @returns `true` if the current field has the given wire type. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + bool has_wire_type(pbf_wire_type type) const noexcept { + return wire_type() == type; + } + + /** + * Consume the current field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @post The current field was consumed and there is no current field now. + */ + void skip() { + protozero_assert(tag() != 0 && "call next() before calling skip()"); + switch (wire_type()) { + case pbf_wire_type::varint: + skip_varint(&m_data, m_end); + break; + case pbf_wire_type::fixed64: + skip_bytes(8); + break; + case pbf_wire_type::length_delimited: + skip_bytes(get_length()); + break; + case pbf_wire_type::fixed32: + skip_bytes(4); + break; + default: + break; + } + } + + ///@{ + /** + * @name Scalar field accessor functions + */ + + /** + * Consume and return value of current "bool" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bool". + * @post The current field was consumed and there is no current field now. + */ + bool get_bool() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + const bool result = m_data[0] != 0; + skip_varint(&m_data, m_end); + return result; + } + + /** + * Consume and return value of current "enum" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "enum". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_enum() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "int32" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "int32". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_int32() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "sint32" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sint32". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_sint32() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_svarint(); + } + + /** + * Consume and return value of current "uint32" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "uint32". + * @post The current field was consumed and there is no current field now. + */ + uint32_t get_uint32() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "int64" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "int64". + * @post The current field was consumed and there is no current field now. + */ + int64_t get_int64() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "sint64" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sint64". + * @post The current field was consumed and there is no current field now. + */ + int64_t get_sint64() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_svarint(); + } + + /** + * Consume and return value of current "uint64" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "uint64". + * @post The current field was consumed and there is no current field now. + */ + uint64_t get_uint64() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "fixed32" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "fixed32". + * @post The current field was consumed and there is no current field now. + */ + uint32_t get_fixed32() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "sfixed32" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sfixed32". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_sfixed32() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "fixed64" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "fixed64". + * @post The current field was consumed and there is no current field now. + */ + uint64_t get_fixed64() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "sfixed64" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sfixed64". + * @post The current field was consumed and there is no current field now. + */ + int64_t get_sfixed64() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "float" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "float". + * @post The current field was consumed and there is no current field now. + */ + float get_float() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "double" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "double". + * @post The current field was consumed and there is no current field now. + */ + double get_double() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "bytes", "string", or "message" + * field. + * + * @returns A data_view object. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bytes", "string", or "message". + * @post The current field was consumed and there is no current field now. + */ + data_view get_view() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message"); + const auto len = get_len_and_skip(); + return {m_data - len, len}; + } + +#ifndef PROTOZERO_STRICT_API + /** + * Consume and return value of current "bytes" or "string" field. + * + * @returns A pair with a pointer to the data and the length of the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bytes" or "string". + * @post The current field was consumed and there is no current field now. + */ + std::pair get_data() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message"); + const auto len = get_len_and_skip(); + return {m_data - len, len}; + } +#endif + + /** + * Consume and return value of current "bytes" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bytes". + * @post The current field was consumed and there is no current field now. + */ + std::string get_bytes() { + return std::string(get_view()); + } + + /** + * Consume and return value of current "string" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "string". + * @post The current field was consumed and there is no current field now. + */ + std::string get_string() { + return std::string(get_view()); + } + + /** + * Consume and return value of current "message" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "message". + * @post The current field was consumed and there is no current field now. + */ + pbf_reader get_message() { + return pbf_reader{get_view()}; + } + + ///@} + + /// Forward iterator for iterating over bool (int32 varint) values. + using const_bool_iterator = const_varint_iterator< int32_t>; + + /// Forward iterator for iterating over enum (int32 varint) values. + using const_enum_iterator = const_varint_iterator< int32_t>; + + /// Forward iterator for iterating over int32 (varint) values. + using const_int32_iterator = const_varint_iterator< int32_t>; + + /// Forward iterator for iterating over sint32 (varint) values. + using const_sint32_iterator = const_svarint_iterator; + + /// Forward iterator for iterating over uint32 (varint) values. + using const_uint32_iterator = const_varint_iterator; + + /// Forward iterator for iterating over int64 (varint) values. + using const_int64_iterator = const_varint_iterator< int64_t>; + + /// Forward iterator for iterating over sint64 (varint) values. + using const_sint64_iterator = const_svarint_iterator; + + /// Forward iterator for iterating over uint64 (varint) values. + using const_uint64_iterator = const_varint_iterator; + + /// Forward iterator for iterating over fixed32 values. + using const_fixed32_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over sfixed32 values. + using const_sfixed32_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over fixed64 values. + using const_fixed64_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over sfixed64 values. + using const_sfixed64_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over float values. + using const_float_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over double values. + using const_double_iterator = const_fixed_iterator; + + ///@{ + /** + * @name Repeated packed field accessor functions + */ + + /** + * Consume current "repeated packed bool" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed bool". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_bool() { + return get_packed(); + } + + /** + * Consume current "repeated packed enum" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed enum". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_enum() { + return get_packed(); + } + + /** + * Consume current "repeated packed int32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed int32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_int32() { + return get_packed(); + } + + /** + * Consume current "repeated packed sint32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sint32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sint32() { + return get_packed(); + } + + /** + * Consume current "repeated packed uint32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed uint32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_uint32() { + return get_packed(); + } + + /** + * Consume current "repeated packed int64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed int64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_int64() { + return get_packed(); + } + + /** + * Consume current "repeated packed sint64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sint64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sint64() { + return get_packed(); + } + + /** + * Consume current "repeated packed uint64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed uint64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_uint64() { + return get_packed(); + } + + /** + * Consume current "repeated packed fixed32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed fixed32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_fixed32() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed sfixed32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sfixed32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sfixed32() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed fixed64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed fixed64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_fixed64() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed sfixed64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sfixed64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sfixed64() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed float" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed float". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_float() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed double" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed double". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_double() { + return packed_fixed(); + } + + ///@} + +}; // class pbf_reader + +/** + * Swap two pbf_reader objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept { + lhs.swap(rhs); +} + +} // end namespace protozero + +#endif // PROTOZERO_PBF_READER_HPP diff --git a/include/protozero/pbf_writer.hpp b/include/protozero/pbf_writer.hpp new file mode 100644 index 00000000..9a07bd5b --- /dev/null +++ b/include/protozero/pbf_writer.hpp @@ -0,0 +1,76 @@ +#ifndef PROTOZERO_PBF_WRITER_HPP +#define PROTOZERO_PBF_WRITER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_writer.hpp + * + * @brief Contains the pbf_writer class. + */ + +#include "basic_pbf_writer.hpp" +#include "buffer_string.hpp" + +#include +#include + +namespace protozero { + +/** + * Specialization of basic_pbf_writer using std::string as buffer type. + */ +using pbf_writer = basic_pbf_writer; + +/// Class for generating packed repeated bool fields. +using packed_field_bool = detail::packed_field_varint; + +/// Class for generating packed repeated enum fields. +using packed_field_enum = detail::packed_field_varint; + +/// Class for generating packed repeated int32 fields. +using packed_field_int32 = detail::packed_field_varint; + +/// Class for generating packed repeated sint32 fields. +using packed_field_sint32 = detail::packed_field_svarint; + +/// Class for generating packed repeated uint32 fields. +using packed_field_uint32 = detail::packed_field_varint; + +/// Class for generating packed repeated int64 fields. +using packed_field_int64 = detail::packed_field_varint; + +/// Class for generating packed repeated sint64 fields. +using packed_field_sint64 = detail::packed_field_svarint; + +/// Class for generating packed repeated uint64 fields. +using packed_field_uint64 = detail::packed_field_varint; + +/// Class for generating packed repeated fixed32 fields. +using packed_field_fixed32 = detail::packed_field_fixed; + +/// Class for generating packed repeated sfixed32 fields. +using packed_field_sfixed32 = detail::packed_field_fixed; + +/// Class for generating packed repeated fixed64 fields. +using packed_field_fixed64 = detail::packed_field_fixed; + +/// Class for generating packed repeated sfixed64 fields. +using packed_field_sfixed64 = detail::packed_field_fixed; + +/// Class for generating packed repeated float fields. +using packed_field_float = detail::packed_field_fixed; + +/// Class for generating packed repeated double fields. +using packed_field_double = detail::packed_field_fixed; + +} // end namespace protozero + +#endif // PROTOZERO_PBF_WRITER_HPP diff --git a/include/protozero/types.hpp b/include/protozero/types.hpp new file mode 100644 index 00000000..3aefddfb --- /dev/null +++ b/include/protozero/types.hpp @@ -0,0 +1,66 @@ +#ifndef PROTOZERO_TYPES_HPP +#define PROTOZERO_TYPES_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file types.hpp + * + * @brief Contains the declaration of low-level types used in the pbf format. + */ + +#include "config.hpp" + +#include +#include +#include +#include +#include +#include + +namespace protozero { + +/** + * The type used for field tags (field numbers). + */ +using pbf_tag_type = uint32_t; + +/** + * The type used to encode type information. + * See the table on + * https://developers.google.com/protocol-buffers/docs/encoding + */ +enum class pbf_wire_type : uint32_t { + varint = 0, // int32/64, uint32/64, sint32/64, bool, enum + fixed64 = 1, // fixed64, sfixed64, double + length_delimited = 2, // string, bytes, nested messages, packed repeated fields + fixed32 = 5, // fixed32, sfixed32, float + unknown = 99 // used for default setting in this library +}; + +/** + * Get the tag and wire type of the current field in one integer suitable + * for comparison with a switch statement. + * + * See pbf_reader.tag_and_type() for an example how to use this. + */ +template +constexpr inline uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept { + return (static_cast(static_cast(tag)) << 3U) | static_cast(wire_type); +} + +/** + * The type used for length values, such as the length of a field. + */ +using pbf_length_type = uint32_t; + +} // end namespace protozero + +#endif // PROTOZERO_TYPES_HPP diff --git a/include/protozero/varint.hpp b/include/protozero/varint.hpp new file mode 100644 index 00000000..b4648a44 --- /dev/null +++ b/include/protozero/varint.hpp @@ -0,0 +1,245 @@ +#ifndef PROTOZERO_VARINT_HPP +#define PROTOZERO_VARINT_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file varint.hpp + * + * @brief Contains low-level varint and zigzag encoding and decoding functions. + */ + +#include "buffer_tmpl.hpp" +#include "exception.hpp" + +#include + +namespace protozero { + +/** + * The maximum length of a 64 bit varint. + */ +constexpr const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1; + +namespace detail { + + // from https://github.com/facebook/folly/blob/master/folly/Varint.h + inline uint64_t decode_varint_impl(const char** data, const char* end) { + const auto* begin = reinterpret_cast(*data); + const auto* iend = reinterpret_cast(end); + const int8_t* p = begin; + uint64_t val = 0; + + if (iend - begin >= max_varint_length) { // fast path + do { + int64_t b = *p++; + val = ((uint64_t(b) & 0x7fU) ); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 7U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 14U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 21U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 28U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 35U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 42U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 49U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 56U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x01U) << 63U); if (b >= 0) { break; } + throw varint_too_long_exception{}; + } while (false); + } else { + unsigned int shift = 0; + while (p != iend && *p < 0) { + val |= (uint64_t(*p++) & 0x7fU) << shift; + shift += 7; + } + if (p == iend) { + throw end_of_buffer_exception{}; + } + val |= uint64_t(*p++) << shift; + } + + *data = reinterpret_cast(p); + return val; + } + +} // end namespace detail + +/** + * Decode a 64 bit varint. + * + * Strong exception guarantee: if there is an exception the data pointer will + * not be changed. + * + * @param[in,out] data Pointer to pointer to the input data. After the function + * returns this will point to the next data to be read. + * @param[in] end Pointer one past the end of the input data. + * @returns The decoded integer + * @throws varint_too_long_exception if the varint is longer then the maximum + * length that would fit in a 64 bit int. Usually this means your data + * is corrupted or you are trying to read something as a varint that + * isn't. + * @throws end_of_buffer_exception if the *end* of the buffer was reached + * before the end of the varint. + */ +inline uint64_t decode_varint(const char** data, const char* end) { + // If this is a one-byte varint, decode it here. + if (end != *data && ((static_cast(**data) & 0x80U) == 0)) { + const auto val = static_cast(**data); + ++(*data); + return val; + } + // If this varint is more than one byte, defer to complete implementation. + return detail::decode_varint_impl(data, end); +} + +/** + * Skip over a varint. + * + * Strong exception guarantee: if there is an exception the data pointer will + * not be changed. + * + * @param[in,out] data Pointer to pointer to the input data. After the function + * returns this will point to the next data to be read. + * @param[in] end Pointer one past the end of the input data. + * @throws end_of_buffer_exception if the *end* of the buffer was reached + * before the end of the varint. + */ +inline void skip_varint(const char** data, const char* end) { + const auto* begin = reinterpret_cast(*data); + const auto* iend = reinterpret_cast(end); + const int8_t* p = begin; + + while (p != iend && *p < 0) { + ++p; + } + + if (p - begin >= max_varint_length) { + throw varint_too_long_exception{}; + } + + if (p == iend) { + throw end_of_buffer_exception{}; + } + + ++p; + + *data = reinterpret_cast(p); +} + +/** + * Varint encode a 64 bit integer. + * + * @tparam T An output iterator type. + * @param data Output iterator the varint encoded value will be written to + * byte by byte. + * @param value The integer that will be encoded. + * @returns the number of bytes written + * @throws Any exception thrown by increment or dereference operator on data. + * @deprecated Use add_varint_to_buffer() instead. + */ +template +inline int write_varint(T data, uint64_t value) { + int n = 1; + + while (value >= 0x80U) { + *data++ = char((value & 0x7fU) | 0x80U); + value >>= 7U; + ++n; + } + *data = char(value); + + return n; +} + +/** + * Varint encode a 64 bit integer. + * + * @tparam TBuffer A buffer type. + * @param buffer Output buffer the varint will be written to. + * @param value The integer that will be encoded. + * @returns the number of bytes written + * @throws Any exception thrown by calling the buffer_push_back() function. + */ +template +inline void add_varint_to_buffer(TBuffer* buffer, uint64_t value) { + while (value >= 0x80U) { + buffer_customization::push_back(buffer, char((value & 0x7fU) | 0x80U)); + value >>= 7U; + } + buffer_customization::push_back(buffer, char(value)); +} + +/** + * Varint encode a 64 bit integer. + * + * @param data Where to add the varint. There must be enough space available! + * @param value The integer that will be encoded. + * @returns the number of bytes written + */ +inline int add_varint_to_buffer(char* data, uint64_t value) noexcept { + int n = 1; + + while (value >= 0x80U) { + *data++ = char((value & 0x7fU) | 0x80U); + value >>= 7U; + ++n; + } + *data = char(value); + + return n; +} + +/** + * Get the length of the varint the specified value would produce. + * + * @param value The integer to be encoded. + * @returns the number of bytes the varint would have if we created it. + */ +inline int length_of_varint(uint64_t value) noexcept { + int n = 1; + + while (value >= 0x80U) { + value >>= 7U; + ++n; + } + + return n; +} + +/** + * ZigZag encodes a 32 bit integer. + */ +inline constexpr uint32_t encode_zigzag32(int32_t value) noexcept { + return (static_cast(value) << 1U) ^ static_cast(-static_cast(static_cast(value) >> 31U)); +} + +/** + * ZigZag encodes a 64 bit integer. + */ +inline constexpr uint64_t encode_zigzag64(int64_t value) noexcept { + return (static_cast(value) << 1U) ^ static_cast(-static_cast(static_cast(value) >> 63U)); +} + +/** + * Decodes a 32 bit ZigZag-encoded integer. + */ +inline constexpr int32_t decode_zigzag32(uint32_t value) noexcept { + return static_cast((value >> 1U) ^ static_cast(-static_cast(value & 1U))); +} + +/** + * Decodes a 64 bit ZigZag-encoded integer. + */ +inline constexpr int64_t decode_zigzag64(uint64_t value) noexcept { + return static_cast((value >> 1U) ^ static_cast(-static_cast(value & 1U))); +} + +} // end namespace protozero + +#endif // PROTOZERO_VARINT_HPP diff --git a/include/protozero/version.hpp b/include/protozero/version.hpp new file mode 100644 index 00000000..fc9b9287 --- /dev/null +++ b/include/protozero/version.hpp @@ -0,0 +1,34 @@ +#ifndef PROTOZERO_VERSION_HPP +#define PROTOZERO_VERSION_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file version.hpp + * + * @brief Contains macros defining the protozero version. + */ + +/// The major version number +#define PROTOZERO_VERSION_MAJOR 1 + +/// The minor version number +#define PROTOZERO_VERSION_MINOR 7 + +/// The patch number +#define PROTOZERO_VERSION_PATCH 1 + +/// The complete version number +#define PROTOZERO_VERSION_CODE (PROTOZERO_VERSION_MAJOR * 10000 + PROTOZERO_VERSION_MINOR * 100 + PROTOZERO_VERSION_PATCH) + +/// Version number as string +#define PROTOZERO_VERSION_STRING "1.7.1" + +#endif // PROTOZERO_VERSION_HPP diff --git a/include/sharded_node_store.h b/include/sharded_node_store.h new file mode 100644 index 00000000..836c34ef --- /dev/null +++ b/include/sharded_node_store.h @@ -0,0 +1,32 @@ +#ifndef _SHARDED_NODE_STORE +#define _SHARDED_NODE_STORE + +#include +#include +#include "node_store.h" + +class ShardedNodeStore : public NodeStore { +public: + ShardedNodeStore(std::function()> createNodeStore); + ~ShardedNodeStore(); + void reopen() override; + void finalize(size_t threadNum) override; + LatpLon at(NodeID i) const override; + size_t size() const override; + void batchStart() override; + void insert(const std::vector& elements) override; + void clear() override { + reopen(); + } + + bool contains(size_t shard, NodeID id) const override; + NodeStore& shard(size_t shard) override { return *stores[shard]; } + const NodeStore& shard(size_t shard) const override { return *stores[shard]; } + size_t shards() const override; + +private: + std::function()> createNodeStore; + std::vector> stores; +}; + +#endif diff --git a/include/sharded_way_store.h b/include/sharded_way_store.h new file mode 100644 index 00000000..40a3d331 --- /dev/null +++ b/include/sharded_way_store.h @@ -0,0 +1,35 @@ +#ifndef _SHARDED_WAY_STORE +#define _SHARDED_WAY_STORE + +#include +#include +#include "way_store.h" + +class NodeStore; + +class ShardedWayStore : public WayStore { +public: + ShardedWayStore(std::function()> createWayStore, const NodeStore& nodeStore); + ~ShardedWayStore(); + void reopen() override; + void batchStart() override; + std::vector at(WayID wayid) const override; + bool requiresNodes() const override; + void insertLatpLons(std::vector &newWays) override; + void insertNodes(const std::vector>>& newWays) override; + void clear() override; + std::size_t size() const override; + void finalize(unsigned int threadNum) override; + + bool contains(size_t shard, WayID id) const override; + WayStore& shard(size_t shard) override; + const WayStore& shard(size_t shard) const override; + size_t shards() const override; + +private: + std::function()> createWayStore; + const NodeStore& nodeStore; + std::vector> stores; +}; + +#endif diff --git a/include/shared_data.h b/include/shared_data.h index 23ba9a06..45c6e34b 100644 --- a/include/shared_data.h +++ b/include/shared_data.h @@ -7,6 +7,7 @@ #include "rapidjson/document.h" +#include "options_parser.h" #include "osm_store.h" #include "output_object.h" #include "mbtiles.h" @@ -61,10 +62,6 @@ class LayerDefinition { std::string serialiseToJSON() const; }; -const int OUTPUT_FILE = 0; -const int OUTPUT_MBTILES = 1; -const int OUTPUT_PMTILES = 2; - ///\brief Config read from JSON to control behavior of program class Config { @@ -91,7 +88,7 @@ class SharedData { public: const class LayerDefinition &layers; - int outputMode; + OptionsParser::OutputMode outputMode; bool mergeSqlite; MBTiles mbtiles; PMTiles pmtiles; diff --git a/include/shp_mem_tiles.h b/include/shp_mem_tiles.h index 267a0090..508921ff 100644 --- a/include/shp_mem_tiles.h +++ b/include/shp_mem_tiles.h @@ -11,6 +11,8 @@ class ShpMemTiles : public TileDataSource public: ShpMemTiles(size_t threadNum, uint baseZoom); + std::string name() const override { return "shp"; } + void CreateNamedLayerIndex(const std::string& layerName); // Used in shape file loading diff --git a/include/sorted_node_store.h b/include/sorted_node_store.h index 5c156ad3..61fdfad3 100644 --- a/include/sorted_node_store.h +++ b/include/sorted_node_store.h @@ -3,6 +3,7 @@ #include "node_store.h" #include "mmap_allocator.h" +#include #include #include #include @@ -65,10 +66,15 @@ class SortedNodeStore : public NodeStore size_t size() const override; void batchStart() override; void insert(const std::vector& elements) override; - void clear() { + void clear() override { reopen(); } + bool contains(size_t shard, NodeID id) const override; + NodeStore& shard(size_t shard) override { return *this; } + const NodeStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } + private: // When true, store chunks compressed. Only store compressed if the // chunk is sufficiently large. @@ -82,6 +88,15 @@ class SortedNodeStore : public NodeStore // multiple threads. They'll get folded into the index during finalize() std::map> orphanage; std::vector> workerBuffers; + + std::atomic totalGroups; + std::atomic totalNodes; + std::atomic totalGroupSpace; + std::atomic totalAllocatedSpace; + std::atomic totalChunks; + std::atomic chunkSizeFreqs[257]; + std::atomic groupSizeFreqs[257]; + void collectOrphans(const std::vector& orphans); void publishGroup(const std::vector& nodes); }; diff --git a/include/sorted_way_store.h b/include/sorted_way_store.h index 145e467b..b99ba7de 100644 --- a/include/sorted_way_store.h +++ b/include/sorted_way_store.h @@ -1,6 +1,7 @@ #ifndef _SORTED_WAY_STORE_H #define _SORTED_WAY_STORE_H +#include #include #include #include @@ -89,10 +90,15 @@ class SortedWayStore: public WayStore { std::vector at(WayID wayid) const override; bool requiresNodes() const override { return true; } void insertLatpLons(std::vector &newWays) override; - const void insertNodes(const std::vector>>& newWays) override; + void insertNodes(const std::vector>>& newWays) override; void clear() override; std::size_t size() const override; void finalize(unsigned int threadNum) override; + + bool contains(size_t shard, WayID id) const override; + WayStore& shard(size_t shard) override { return *this; } + const WayStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } static uint16_t encodeWay( const std::vector& way, @@ -113,6 +119,13 @@ class SortedWayStore: public WayStore { // multiple threads. They'll get folded into the index during finalize() std::map>>> orphanage; std::vector>>> workerBuffers; + + std::atomic totalWays; + std::atomic totalNodes; + std::atomic totalGroups; + std::atomic totalGroupSpace; + std::atomic totalChunks; + void collectOrphans(const std::vector>>& orphans); void publishGroup(const std::vector>>& ways); }; diff --git a/include/tag_map.h b/include/tag_map.h new file mode 100644 index 00000000..f951d9e9 --- /dev/null +++ b/include/tag_map.h @@ -0,0 +1,56 @@ +#ifndef _TAG_MAP_H +#define _TAG_MAP_H + +#include +#include +#include +#include + +// We track tags in a special structure, which enables some tricks when +// doing Lua interop. +// +// The alternative is a std::map - but often, our map is quite small. +// It's preferable to have a small set of vectors and do linear search. +// +// Further, we can avoid passing std::string from Lua -> C++ in some cases +// by first checking to see if the string we would have passed is already +// stored in our tag map, and passing a reference to its location. + +// Assumptions: +// 1. Not thread-safe +// This is OK because we have 1 instance of OsmLuaProcessing per thread. +// 2. Lifetime of map is less than lifetime of keys/values that are passed +// This is true since the strings are owned by the protobuf block reader +// 3. Max number of tag values will fit in a short +// OSM limit is 5,000 tags per object +class TagMap { +public: + TagMap(); + void reset(); + + void addTag(const protozero::data_view& key, const protozero::data_view& value); + + // Return -1 if key not found, else return its keyLoc. + int64_t getKey(const char* key, size_t size) const; + + // Return -1 if value not found, else return its keyLoc. + int64_t getValue(const char* key, size_t size) const; + + const protozero::data_view* getValueFromKey(uint32_t keyLoc) const; + const protozero::data_view* getValue(uint32_t valueLoc) const; + + boost::container::flat_map exportToBoostMap() const; + +private: + uint32_t ensureString( + std::vector>& vector, + const protozero::data_view& value + ); + + + std::vector> keys; + std::vector> key2value; + std::vector> values; +}; + +#endif _TAG_MAP_H diff --git a/include/tile_data.h b/include/tile_data.h index 814b53ce..b78463e2 100644 --- a/include/tile_data.h +++ b/include/tile_data.h @@ -8,7 +8,11 @@ #include #include #include "output_object.h" +#include "append_vector.h" #include "clip_cache.h" +#include "mmap_allocator.h" + +#define TILE_DATA_ID_SIZE 34 typedef std::vector SourceList; @@ -45,16 +49,40 @@ struct OutputObjectXYID { }; template void finalizeObjects( + const std::string& name, const size_t& threadNum, const unsigned int& baseZoom, - typename std::vector>::iterator begin, - typename std::vector>::iterator end + typename std::vector>::iterator begin, + typename std::vector>::iterator end, + typename std::vector>& lowZoom ) { - for (typename std::vector>::iterator it = begin; it != end; it++) { + size_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; +#ifdef CLOCK_MONOTONIC + timespec startTs, endTs; + clock_gettime(CLOCK_MONOTONIC, &startTs); +#endif + + int i = -1; + for (auto it = begin; it != end; it++) { + i++; + if (it->size() > 0 || i % 10 == 0 || i == 4095) { + std::cout << "\r" << name << ": finalizing z6 tile " << (i + 1) << "/" << CLUSTER_ZOOM_AREA; + +#ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &endTs); + uint64_t elapsedNs = 1e9 * (endTs.tv_sec - startTs.tv_sec) + endTs.tv_nsec - startTs.tv_nsec; + std::cout << " (" << std::to_string((uint32_t)(elapsedNs / 1e6)) << " ms)"; +#endif + std::cout << std::flush; + } if (it->size() == 0) continue; - it->shrink_to_fit(); + // We track a separate copy of low zoom objects to avoid scanning large + // lists of objects that may be on slow disk storage. + for (auto objectIt = it->begin(); objectIt != it->end(); objectIt++) + if (objectIt->oo.minZoom < CLUSTER_ZOOM) + lowZoom[i].push_back(*objectIt); // If the user is doing a a small extract, there are few populated // entries in `object`. @@ -102,17 +130,18 @@ template void finalizeObjects( }, threadNum ); - } + + std::cout << std::endl; } template void collectTilesWithObjectsAtZoomTemplate( const unsigned int& baseZoom, - const typename std::vector>::iterator objects, + const typename std::vector>::iterator objects, const size_t size, - const unsigned int zoom, - TileCoordinatesSet& output + std::vector& zooms ) { + size_t maxZoom = zooms.size() - 1; uint16_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; int64_t lastX = -1; int64_t lastY = -1; @@ -126,13 +155,18 @@ template void collectTilesWithObjectsAtZoomTemplate( TileCoordinate baseY = z6y * z6OffsetDivisor + objects[i][j].y; // Translate the x, y at the requested zoom level - TileCoordinate x = baseX / (1 << (baseZoom - zoom)); - TileCoordinate y = baseY / (1 << (baseZoom - zoom)); + TileCoordinate x = baseX / (1 << (baseZoom - maxZoom)); + TileCoordinate y = baseY / (1 << (baseZoom - maxZoom)); if (lastX != x || lastY != y) { - output.set(x, y); lastX = x; lastY = y; + + for (int zoom = maxZoom; zoom >= 0; zoom--) { + zooms[zoom].set(x, y); + x /= 2; + y /= 2; + } } } } @@ -148,107 +182,124 @@ inline OutputObjectID outputObjectWithId(const OutputObjectXYI return OutputObjectID({ input.oo, input.id }); } +template void collectLowZoomObjectsForTile( + const unsigned int& baseZoom, + typename std::vector> objects, + unsigned int zoom, + const TileCoordinates& dstIndex, + std::vector& output +) { + if (zoom >= CLUSTER_ZOOM) + throw std::runtime_error("collectLowZoomObjectsForTile should not be called for high zooms"); + + uint16_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; + + for (size_t i = 0; i < objects.size(); i++) { + const size_t z6x = i / CLUSTER_ZOOM_WIDTH; + const size_t z6y = i % CLUSTER_ZOOM_WIDTH; + + for (size_t j = 0; j < objects[i].size(); j++) { + // Compute the x, y at the base zoom level + TileCoordinate baseX = z6x * z6OffsetDivisor + objects[i][j].x; + TileCoordinate baseY = z6y * z6OffsetDivisor + objects[i][j].y; + + // Translate the x, y at the requested zoom level + TileCoordinate x = baseX / (1 << (baseZoom - zoom)); + TileCoordinate y = baseY / (1 << (baseZoom - zoom)); + + if (dstIndex.x == x && dstIndex.y == y) { + if (objects[i][j].oo.minZoom <= zoom) { + output.push_back(outputObjectWithId(objects[i][j])); + } + } + } + } +} + template void collectObjectsForTileTemplate( const unsigned int& baseZoom, - typename std::vector>::iterator objects, + typename std::vector>::iterator objects, size_t iStart, size_t iEnd, unsigned int zoom, const TileCoordinates& dstIndex, std::vector& output ) { + if (zoom < CLUSTER_ZOOM) + throw std::runtime_error("collectObjectsForTileTemplate should not be called for low zooms"); + uint16_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; for (size_t i = iStart; i < iEnd; i++) { - const size_t z6x = i / CLUSTER_ZOOM_WIDTH; - const size_t z6y = i % CLUSTER_ZOOM_WIDTH; + // If z >= 6, we can compute the exact bounds within the objects array. + // Translate to the base zoom, then do a binary search to find + // the starting point. + TileCoordinate z6x = dstIndex.x / (1 << (zoom - CLUSTER_ZOOM)); + TileCoordinate z6y = dstIndex.y / (1 << (zoom - CLUSTER_ZOOM)); + + TileCoordinate baseX = dstIndex.x * (1 << (baseZoom - zoom)); + TileCoordinate baseY = dstIndex.y * (1 << (baseZoom - zoom)); + + Z6Offset needleX = baseX - z6x * z6OffsetDivisor; + Z6Offset needleY = baseY - z6y * z6OffsetDivisor; + + // Kind of gross that we have to do this. Might be better if we split + // into two arrays, one of x/y and one of OOs. Would have better locality for + // searching, too. + OutputObject dummyOo(POINT_, 0, 0, 0, 0); + const size_t bz = baseZoom; + + const OO targetXY = {dummyOo, needleX, needleY }; + auto iter = std::lower_bound( + objects[i].begin(), + objects[i].end(), + targetXY, + [bz](const OO& a, const OO& b) { + // Cluster by parent zoom, so that a subsequent search + // can find a contiguous range of entries for any tile + // at zoom 6 or higher. + const size_t aX = a.x; + const size_t aY = a.y; + const size_t bX = b.x; + const size_t bY = b.y; + for (size_t z = CLUSTER_ZOOM; z <= bz; z++) { + const auto aXz = aX / (1 << (bz - z)); + const auto aYz = aY / (1 << (bz - z)); + const auto bXz = bX / (1 << (bz - z)); + const auto bYz = bY / (1 << (bz - z)); - if (zoom >= CLUSTER_ZOOM) { - // If z >= 6, we can compute the exact bounds within the objects array. - // Translate to the base zoom, then do a binary search to find - // the starting point. - TileCoordinate z6x = dstIndex.x / (1 << (zoom - CLUSTER_ZOOM)); - TileCoordinate z6y = dstIndex.y / (1 << (zoom - CLUSTER_ZOOM)); - - TileCoordinate baseX = dstIndex.x * (1 << (baseZoom - zoom)); - TileCoordinate baseY = dstIndex.y * (1 << (baseZoom - zoom)); - - Z6Offset needleX = baseX - z6x * z6OffsetDivisor; - Z6Offset needleY = baseY - z6y * z6OffsetDivisor; - - // Kind of gross that we have to do this. Might be better if we split - // into two arrays, one of x/y and one of OOs. Would have better locality for - // searching, too. - OutputObject dummyOo(POINT_, 0, 0, 0, 0); - const size_t bz = baseZoom; - - const OO targetXY = {dummyOo, needleX, needleY }; - auto iter = std::lower_bound( - objects[i].begin(), - objects[i].end(), - targetXY, - [bz](const OO& a, const OO& b) { - // Cluster by parent zoom, so that a subsequent search - // can find a contiguous range of entries for any tile - // at zoom 6 or higher. - const size_t aX = a.x; - const size_t aY = a.y; - const size_t bX = b.x; - const size_t bY = b.y; - for (size_t z = CLUSTER_ZOOM; z <= bz; z++) { - const auto aXz = aX / (1 << (bz - z)); - const auto aYz = aY / (1 << (bz - z)); - const auto bXz = bX / (1 << (bz - z)); - const auto bYz = bY / (1 << (bz - z)); - - if (aXz != bXz) - return aXz < bXz; - - if (aYz != bYz) - return aYz < bYz; - } - return false; - } - ); - for (; iter != objects[i].end(); iter++) { - // Compute the x, y at the base zoom level - TileCoordinate baseX = z6x * z6OffsetDivisor + iter->x; - TileCoordinate baseY = z6y * z6OffsetDivisor + iter->y; - - // Translate the x, y at the requested zoom level - TileCoordinate x = baseX / (1 << (baseZoom - zoom)); - TileCoordinate y = baseY / (1 << (baseZoom - zoom)); - - if (dstIndex.x == x && dstIndex.y == y) { - if (iter->oo.minZoom <= zoom) { - output.push_back(outputObjectWithId(*iter)); - } - } else { - // Short-circuit when we're confident we'd no longer see relevant matches. - // We've ordered the entries in `objects` such that all objects that - // share the same tile at any zoom are in contiguous runs. - // - // Thus, as soon as we fail to find a match, we can stop looking. - break; - } + if (aXz != bXz) + return aXz < bXz; + if (aYz != bYz) + return aYz < bYz; + } + return false; } - } else { - for (size_t j = 0; j < objects[i].size(); j++) { - // Compute the x, y at the base zoom level - TileCoordinate baseX = z6x * z6OffsetDivisor + objects[i][j].x; - TileCoordinate baseY = z6y * z6OffsetDivisor + objects[i][j].y; - - // Translate the x, y at the requested zoom level - TileCoordinate x = baseX / (1 << (baseZoom - zoom)); - TileCoordinate y = baseY / (1 << (baseZoom - zoom)); - - if (dstIndex.x == x && dstIndex.y == y) { - if (objects[i][j].oo.minZoom <= zoom) { - output.push_back(outputObjectWithId(objects[i][j])); - } + ); + + for (; iter != objects[i].end(); iter++) { + // Compute the x, y at the base zoom level + TileCoordinate baseX = z6x * z6OffsetDivisor + iter->x; + TileCoordinate baseY = z6y * z6OffsetDivisor + iter->y; + + // Translate the x, y at the requested zoom level + TileCoordinate x = baseX / (1 << (baseZoom - zoom)); + TileCoordinate y = baseY / (1 << (baseZoom - zoom)); + + if (dstIndex.x == x && dstIndex.y == y) { + if (iter->oo.minZoom <= zoom) { + output.push_back(outputObjectWithId(*iter)); } + } else { + // Short-circuit when we're confident we'd no longer see relevant matches. + // We've ordered the entries in `objects` such that all objects that + // share the same tile at any zoom are in contiguous runs. + // + // Thus, as soon as we fail to find a match, we can stop looking. + break; } + } } } @@ -275,6 +326,7 @@ class TileDataSource { std::vector> availableMultiLinestringStoreLeases; std::vector> availableMultiPolygonStoreLeases; + virtual std::string name() const = 0; protected: size_t numShards; @@ -292,8 +344,10 @@ class TileDataSource { // // If config.include_ids is true, objectsWithIds will be populated. // Otherwise, objects. - std::vector> objects; - std::vector> objectsWithIds; + std::vector> objects; + std::vector> lowZoomObjects; + std::vector> objectsWithIds; + std::vector> lowZoomObjectsWithIds; // rtree index of large objects using oo_rtree_param_type = boost::geometry::index::quadratic<128>; @@ -310,12 +364,14 @@ class TileDataSource { ClipCache multiPolygonClipCache; ClipCache multiLinestringClipCache; + std::deque>> pendingSmallIndexObjects; + public: TileDataSource(size_t threadNum, unsigned int baseZoom, bool includeID); - void collectTilesWithObjectsAtZoom(uint zoom, TileCoordinatesSet& output); + void collectTilesWithObjectsAtZoom(std::vector& zooms); - void collectTilesWithLargeObjectsAtZoom(uint zoom, TileCoordinatesSet& output); + void collectTilesWithLargeObjectsAtZoom(std::vector& zooms); void collectObjectsForTile(uint zoom, TileCoordinates dstIndex, std::vector& output); void finalize(size_t threadNum); @@ -337,6 +393,8 @@ class TileDataSource { ); void addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id); + void addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id, bool needsLock); + void addObjectToSmallIndexUnsafe(const TileCoordinates& index, const OutputObject& oo, uint64_t id); void addObjectToLargeIndex(const Box& envelope, const OutputObject& oo, uint64_t id) { std::lock_guard lock(mutex); @@ -355,7 +413,7 @@ class TileDataSource { ); virtual Geometry buildWayGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox); - LatpLon buildNodeGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox) const; + virtual LatpLon buildNodeGeometry(NodeID const objectID, const TileBbox &bbox) const; void open() { // Put something at index 0 of all stores so that 0 can be used @@ -373,18 +431,18 @@ class TileDataSource { NodeID storePoint(Point const &input); inline size_t getShard(NodeID id) const { - // Note: we only allocate 35 bits for the IDs. This allows us to - // use bit 36 for TileDataSource-specific handling (e.g., + // Note: we only allocate 34 bits for the IDs. This allows us to + // use bits 35 and 36 for TileDataSource-specific handling (e.g., // OsmMemTiles may want to generate points/ways on the fly by // referring to the WayStore). - return id >> (35 - shardBits); + return id >> (TILE_DATA_ID_SIZE - shardBits); } virtual void populateMultiPolygon(MultiPolygon& dst, NodeID objectID); inline size_t getId(NodeID id) const { - return id & (~(~0ull << (35 - shardBits))); + return id & (~(~0ull << (TILE_DATA_ID_SIZE - shardBits))); } const Point& retrievePoint(NodeID id) const { @@ -426,9 +484,9 @@ class TileDataSource { } }; -TileCoordinatesSet getTilesAtZoom( +void populateTilesAtZoom( const std::vector& sources, - unsigned int zoom + std::vector& zooms ); #endif //_TILE_DATA_H diff --git a/include/way_store.h b/include/way_store.h index 8650cbea..36862344 100644 --- a/include/way_store.h +++ b/include/way_store.h @@ -17,10 +17,15 @@ class WayStore { virtual std::vector at(WayID wayid) const = 0; virtual bool requiresNodes() const = 0; virtual void insertLatpLons(std::vector& newWays) = 0; - virtual const void insertNodes(const std::vector>>& newWays) = 0; + virtual void insertNodes(const std::vector>>& newWays) = 0; virtual void clear() = 0; virtual std::size_t size() const = 0; virtual void finalize(unsigned int threadNum) = 0; + + virtual bool contains(size_t shard, WayID id) const = 0; + virtual WayStore& shard(size_t shard) = 0; + virtual const WayStore& shard(size_t shard) const = 0; + virtual size_t shards() const = 0; }; #endif diff --git a/include/way_stores.h b/include/way_stores.h index dfb5f74c..0f94e845 100644 --- a/include/way_stores.h +++ b/include/way_stores.h @@ -5,6 +5,7 @@ #include #include "way_store.h" #include "sorted_way_store.h" +#include "sharded_way_store.h" class BinarySearchWayStore: public WayStore { @@ -16,11 +17,16 @@ class BinarySearchWayStore: public WayStore { std::vector at(WayID wayid) const override; bool requiresNodes() const override { return false; } void insertLatpLons(std::vector &newWays) override; - const void insertNodes(const std::vector>>& newWays) override; + void insertNodes(const std::vector>>& newWays) override; void clear() override; std::size_t size() const override; void finalize(unsigned int threadNum) override; + bool contains(size_t shard, WayID id) const override; + WayStore& shard(size_t shard) override { return *this; } + const WayStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } + private: mutable std::mutex mutex; std::unique_ptr mLatpLonLists; diff --git a/include/write_geometry.h b/include/write_geometry.h index 8d1d014b..985b7b66 100644 --- a/include/write_geometry.h +++ b/include/write_geometry.h @@ -9,7 +9,6 @@ #include "coordinates_geom.h" // Protobuf -#include "osmformat.pb.h" #include "vector_tile.pb.h" typedef std::vector > XYString; diff --git a/resources/process-coastline.lua b/resources/process-coastline.lua index 5e2aca8e..b49eeee5 100644 --- a/resources/process-coastline.lua +++ b/resources/process-coastline.lua @@ -10,10 +10,10 @@ function exit_function() end node_keys = {} -function node_function(node) +function node_function() end -function way_function(way) +function way_function() end -- Remap coastlines diff --git a/resources/process-debug.lua b/resources/process-debug.lua index ea594c19..e1c8e62f 100644 --- a/resources/process-debug.lua +++ b/resources/process-debug.lua @@ -45,36 +45,36 @@ aerodromeValues = Set { "international", "public", "regional", "military", "priv -- Process node tags node_keys = { "amenity", "shop", "sport", "tourism", "place", "office", "natural", "addr:housenumber", "aeroway" } -function node_function(node) +function node_function() -- Write 'aerodrome_label' - local aeroway = node:Find("aeroway") + local aeroway = Find("aeroway") if aeroway == "aerodrome" then - node:Layer("aerodrome_label", false) - SetNameAttributes(node) - node:Attribute("iata", node:Find("iata")) - SetEleAttributes(node) - node:Attribute("icao", node:Find("icao")) + Layer("aerodrome_label", false) + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome_value = node:Find("aerodrome") + local aerodrome_value = Find("aerodrome") local class if aerodromeValues[aerodrome_value] then class = aerodrome_value else class = "other" end - node:Attribute("class", class) + Attribute("class", class) end -- Write 'housenumber' - local housenumber = node:Find("addr:housenumber") + local housenumber = Find("addr:housenumber") if housenumber~="" then - node:Layer("housenumber", false) - node:Attribute("housenumber", housenumber) + Layer("housenumber", false) + Attribute("housenumber", housenumber) end -- Write 'place' -- note that OpenMapTiles has a rank for countries (1-3), states (1-6) and cities (1-10+); -- we could potentially approximate it for cities based on the population tag - local place = node:Find("place") + local place = Find("place") if place ~= "" then local rank = nil local mz = 13 - local pop = tonumber(node:Find("population")) or 0 + local pop = tonumber(Find("population")) or 0 if place == "continent" then mz=2 elseif place == "country" then mz=3; rank=1 @@ -90,31 +90,31 @@ function node_function(node) elseif place == "locality" then mz=13 end - node:Layer("place", false) - node:Attribute("class", place) - node:MinZoom(mz) - if rank then node:AttributeNumeric("rank", rank) end - SetNameAttributes(node) + Layer("place", false) + Attribute("class", place) + MinZoom(mz) + if rank then AttributeNumeric("rank", rank) end + SetNameAttributes() return end -- Write 'poi' - local rank, class, subclass = GetPOIRank(node) + local rank, class, subclass = GetPOIRank() if rank then WritePOI(node,class,subclass,rank) end -- Write 'mountain_peak' and 'water_name' - local natural = node:Find("natural") + local natural = Find("natural") if natural == "peak" or natural == "volcano" then - node:Layer("mountain_peak", false) - SetEleAttributes(node) - node:AttributeNumeric("rank", 1) - node:Attribute("class", natural) - SetNameAttributes(node) + Layer("mountain_peak", false) + SetEleAttributes() + AttributeNumeric("rank", 1) + Attribute("class", natural) + SetNameAttributes() return end if natural == "bay" then - node:Layer("water_name", false) - SetNameAttributes(node) + Layer("water_name", false) + SetNameAttributes() return end end @@ -196,33 +196,33 @@ waterClasses = Set { "river", "riverbank", "stream", "canal", "drain", "ditch waterwayClasses = Set { "stream", "river", "canal", "drain", "ditch" } -function way_function(way) - local highway = way:Find("highway") - local waterway = way:Find("waterway") - local water = way:Find("water") - local building = way:Find("building") - local natural = way:Find("natural") - local historic = way:Find("historic") - local landuse = way:Find("landuse") - local leisure = way:Find("leisure") - local amenity = way:Find("amenity") - local aeroway = way:Find("aeroway") - local railway = way:Find("railway") - local sport = way:Find("sport") - local shop = way:Find("shop") - local tourism = way:Find("tourism") - local man_made = way:Find("man_made") - local isClosed = way:IsClosed() - local housenumber = way:Find("addr:housenumber") +function way_function() + local highway = Find("highway") + local waterway = Find("waterway") + local water = Find("water") + local building = Find("building") + local natural = Find("natural") + local historic = Find("historic") + local landuse = Find("landuse") + local leisure = Find("leisure") + local amenity = Find("amenity") + local aeroway = Find("aeroway") + local railway = Find("railway") + local sport = Find("sport") + local shop = Find("shop") + local tourism = Find("tourism") + local man_made = Find("man_made") + local isClosed = IsClosed() + local housenumber = Find("addr:housenumber") local write_name = false - local construction = way:Find("construction") + local construction = Find("construction") -- Miscellaneous preprocessing - if way:Find("disused") == "yes" then return end + if Find("disused") == "yes" then return end if highway == "proposed" then return end if aerowayBuildings[aeroway] then building="yes"; aeroway="" end if landuse == "field" then landuse = "farmland" end - if landuse == "meadow" and way:Find("meadow")=="agricultural" then landuse="farmland" end + if landuse == "meadow" and Find("meadow")=="agricultural" then landuse="farmland" end -- Roads ('transportation' and 'transportation_name', plus 'transportation_name_detail') if highway~="" then @@ -235,33 +235,33 @@ function way_function(way) if trackValues[highway] then h = "track"; layer="transportation_detail" end if pathValues[highway] then h = "path" ; layer="transportation_detail" end if h=="service" then layer="transportation_detail" end - way:Layer(layer, false) - way:Attribute("class", h) - SetBrunnelAttributes(way) + Layer(layer, false) + Attribute("class", h) + SetBrunnelAttributes() -- Construction if highway == "construction" then if constructionValues[construction] then - way:Attribute("class", construction .. "_construction") + Attribute("class", construction .. "_construction") else - way:Attribute("class", "minor_construction") + Attribute("class", "minor_construction") end end -- Service - local service = way:Find("service") - if highway == "service" and service ~="" then way:Attribute("service", service) end + local service = Find("service") + if highway == "service" and service ~="" then Attribute("service", service) end -- Links (ramp) if linkValues[highway] then splitHighway = split(highway, "_") highway = splitHighway[1] - way:AttributeNumeric("ramp",1) + AttributeNumeric("ramp",1) end - local oneway = way:Find("oneway") + local oneway = Find("oneway") if oneway == "yes" or oneway == "1" then - way:AttributeNumeric("oneway",1) + AttributeNumeric("oneway",1) end if oneway == "-1" then -- **** TODO @@ -269,115 +269,115 @@ function way_function(way) -- Write names if layer == "motorway" or layer == "trunk" then - way:Layer("transportation_name", false) + Layer("transportation_name", false) elseif h == "minor" or h == "track" or h == "path" or h == "service" then - way:Layer("transportation_name_detail", false) + Layer("transportation_name_detail", false) else - way:Layer("transportation_name_mid", false) + Layer("transportation_name_mid", false) end - SetNameAttributes(way) - way:Attribute("class",h) - way:Attribute("network","road") -- **** needs fixing - if h~=highway then way:Attribute("subclass",highway) end - local ref = way:Find("ref") + SetNameAttributes() + Attribute("class",h) + Attribute("network","road") -- **** needs fixing + if h~=highway then Attribute("subclass",highway) end + local ref = Find("ref") if ref~="" then - way:Attribute("ref",ref) - way:AttributeNumeric("ref_length",ref:len()) + Attribute("ref",ref) + AttributeNumeric("ref_length",ref:len()) end end -- Railways ('transportation' and 'transportation_name', plus 'transportation_name_detail') if railway~="" then - way:Layer("transportation", false) - way:Attribute("class", railway) + Layer("transportation", false) + Attribute("class", railway) - way:Layer("transportation_name", false) - SetNameAttributes(way) - way:MinZoom(14) - way:Attribute("class", "rail") + Layer("transportation_name", false) + SetNameAttributes() + MinZoom(14) + Attribute("class", "rail") end -- 'Aeroway' if aeroway~="" then - way:Layer("aeroway", isClosed) - way:Attribute("class",aeroway) - way:Attribute("ref",way:Find("ref")) + Layer("aeroway", isClosed) + Attribute("class",aeroway) + Attribute("ref",Find("ref")) write_name = true end -- 'aerodrome_label' if aeroway=="aerodrome" then - way:LayerAsCentroid("aerodrome_label") - SetNameAttributes(way) - way:Attribute("iata", way:Find("iata")) - SetEleAttributes(way) - way:Attribute("icao", way:Find("icao")) + LayerAsCentroid("aerodrome_label") + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome = way:Find(aeroway) + local aerodrome = Find(aeroway) local class if aerodromeValues[aerodrome] then class = aerodrome else class = "other" end - way:Attribute("class", class) + Attribute("class", class) end -- Set 'waterway' and associated if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("waterway", false) + if waterway == "river" and Holds("name") then + Layer("waterway", false) else - way:Layer("waterway_detail", false) + Layer("waterway_detail", false) end - if way:Find("intermittent")=="yes" then way:AttributeNumeric("intermittent", 1) else way:AttributeNumeric("intermittent", 0) end - way:Attribute("class", waterway) - SetNameAttributes(way) - SetBrunnelAttributes(way) - elseif waterway == "boatyard" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial") - elseif waterway == "dam" then way:Layer("building",isClosed) - elseif waterway == "fuel" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial") + if Find("intermittent")=="yes" then AttributeNumeric("intermittent", 1) else AttributeNumeric("intermittent", 0) end + Attribute("class", waterway) + SetNameAttributes() + SetBrunnelAttributes() + elseif waterway == "boatyard" then Layer("landuse", isClosed); Attribute("class", "industrial") + elseif waterway == "dam" then Layer("building",isClosed) + elseif waterway == "fuel" then Layer("landuse", isClosed); Attribute("class", "industrial") end -- Set names on rivers if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("water_name", false) + if waterway == "river" and Holds("name") then + Layer("water_name", false) else - way:Layer("water_name_detail", false) - way:MinZoom(14) + Layer("water_name_detail", false) + MinZoom(14) end - way:Attribute("class", waterway) - SetNameAttributes(way) + Attribute("class", waterway) + SetNameAttributes() end -- Set 'building' and associated if building~="" then - way:Layer("building", true) - SetMinZoomByArea(way) + Layer("building", true) + SetMinZoomByArea() end -- Set 'housenumber' if housenumber~="" then - way:LayerAsCentroid("housenumber", false) - way:Attribute("housenumber", housenumber) + LayerAsCentroid("housenumber", false) + Attribute("housenumber", housenumber) end -- Set 'water' if natural=="water" or natural=="bay" or leisure=="swimming_pool" or landuse=="reservoir" or landuse=="basin" or waterClasses[waterway] then - if way:Find("covered")=="yes" or not isClosed then return end + if Find("covered")=="yes" or not isClosed then return end local class="lake"; if natural=="bay" then class="ocean" elseif waterway~="" then class="river" end - way:Layer("water",true) --- SetMinZoomByArea(way) - way:Attribute("class",class) + Layer("water",true) +-- SetMinZoomByArea() + Attribute("class",class) - if way:Find("intermittent")=="yes" then way:Attribute("intermittent",1) end + if Find("intermittent")=="yes" then Attribute("intermittent",1) end -- we only want to show the names of actual lakes not every man-made basin that probably doesn't even have a name other than "basin" -- examples for which we don't want to show a name: -- https://www.openstreetmap.org/way/25958687 -- https://www.openstreetmap.org/way/27201902 -- https://www.openstreetmap.org/way/25309134 -- https://www.openstreetmap.org/way/24579306 - if way:Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then - way:LayerAsCentroid("water_name_detail") - SetNameAttributes(way) --- SetMinZoomByArea(way) - way:Attribute("class", class) + if Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then + LayerAsCentroid("water_name_detail") + SetNameAttributes() +-- SetMinZoomByArea() + Attribute("class", class) end return -- in case we get any landuse processing @@ -388,11 +388,11 @@ function way_function(way) if l=="" then l=natural end if l=="" then l=leisure end if landcoverKeys[l] then - way:Layer("landcover", true) - SetMinZoomByArea(way) - way:Attribute("class", landcoverKeys[l]) - if l=="wetland" then way:Attribute("subclass", way:Find("wetland")) - else way:Attribute("subclass", l) end + Layer("landcover", true) + SetMinZoomByArea() + Attribute("class", landcoverKeys[l]) + if l=="wetland" then Attribute("subclass", Find("wetland")) + else Attribute("subclass", l) end write_name = true -- Set 'landuse' @@ -400,26 +400,26 @@ function way_function(way) if l=="" then l=amenity end if l=="" then l=tourism end if landuseKeys[l] then - way:Layer("landuse", true) - way:Attribute("class", l) + Layer("landuse", true) + Attribute("class", l) write_name = true end end -- Parks - if boundary=="national_park" then way:Layer("park",true); way:Attribute("class",boundary); SetNameAttributes(way) - elseif leisure=="nature_reserve" then way:Layer("park",true); way:Attribute("class",leisure ); SetNameAttributes(way) end + if boundary=="national_park" then Layer("park",true); Attribute("class",boundary); SetNameAttributes() + elseif leisure=="nature_reserve" then Layer("park",true); Attribute("class",leisure ); SetNameAttributes() end -- POIs ('poi' and 'poi_detail') - local rank, class, subclass = GetPOIRank(way) + local rank, class, subclass = GetPOIRank() if rank then WritePOI(way,class,subclass,rank); return end -- Catch-all - if (building~="" or write_name) and way:Holds("name") then - way:LayerAsCentroid("poi_detail") - SetNameAttributes(way) + if (building~="" or write_name) and Holds("name") then + LayerAsCentroid("poi_detail") + SetNameAttributes() if write_name then rank=6 else rank=25 end - way:AttributeNumeric("rank", rank) + AttributeNumeric("rank", rank) end end @@ -435,65 +435,67 @@ end function WritePOI(obj,class,subclass,rank) local layer = "poi" if rank>4 then layer="poi_detail" end - obj:LayerAsCentroid(layer) + LayerAsCentroid(layer) SetNameAttributes(obj) - obj:AttributeNumeric("rank", rank) - obj:Attribute("class", class) - obj:Attribute("subclass", subclass) + AttributeNumeric("rank", rank) + Attribute("class", class) + Attribute("subclass", subclass) end -- Set name attributes on any object function SetNameAttributes(obj) - local name = obj:Find("name"), main_written = name, iname + local name = Find("name") + local main_written = name + local iname -- if we have a preferred language, then write that (if available), and additionally write the base name tag - if preferred_language and obj:Holds("name:"..preferred_language) then - iname = obj:Find("name:"..preferred_language) + if preferred_language and Holds("name:"..preferred_language) then + iname = Find("name:"..preferred_language) print("Found "..preferred_language..": "..iname) - obj:Attribute(preferred_language_attribute, iname) + Attribute(preferred_language_attribute, iname) if iname~=name and default_language_attribute then - obj:Attribute(default_language_attribute, name) + Attribute(default_language_attribute, name) else main_written = iname end else - obj:Attribute(preferred_language_attribute, name) + Attribute(preferred_language_attribute, name) end -- then set any additional languages for i,lang in ipairs(additional_languages) do - iname = obj:Find("name:"..lang) + iname = Find("name:"..lang) if iname=="" then iname=name end - if iname~=main_written then obj:Attribute("name:"..lang, iname) end + if iname~=main_written then Attribute("name:"..lang, iname) end end end -- Set ele and ele_ft on any object function SetEleAttributes(obj) - local ele = obj:Find("ele") + local ele = Find("ele") if ele ~= "" then local meter = math.floor(tonumber(ele) or 0) local feet = math.floor(meter * 3.2808399) - obj:AttributeNumeric("ele", meter) - obj:AttributeNumeric("ele_ft", feet) + AttributeNumeric("ele", meter) + AttributeNumeric("ele_ft", feet) end end function SetBrunnelAttributes(obj) - if obj:Find("bridge") == "yes" then obj:Attribute("brunnel", "bridge") - elseif obj:Find("tunnel") == "yes" then obj:Attribute("brunnel", "tunnel") - elseif obj:Find("ford") == "yes" then obj:Attribute("brunnel", "ford") + if Find("bridge") == "yes" then Attribute("brunnel", "bridge") + elseif Find("tunnel") == "yes" then Attribute("brunnel", "tunnel") + elseif Find("ford") == "yes" then Attribute("brunnel", "ford") end end -- Set minimum zoom level by area -function SetMinZoomByArea(way) - local area=way:Area() - if area>ZRES5^2 then way:MinZoom(6) - elseif area>ZRES6^2 then way:MinZoom(7) - elseif area>ZRES7^2 then way:MinZoom(8) - elseif area>ZRES8^2 then way:MinZoom(9) - elseif area>ZRES9^2 then way:MinZoom(10) - elseif area>ZRES10^2 then way:MinZoom(11) - elseif area>ZRES11^2 then way:MinZoom(12) - elseif area>ZRES12^2 then way:MinZoom(13) - else way:MinZoom(14) end +function SetMinZoomByArea() + local area=Area() + if area>ZRES5^2 then MinZoom(6) + elseif area>ZRES6^2 then MinZoom(7) + elseif area>ZRES7^2 then MinZoom(8) + elseif area>ZRES8^2 then MinZoom(9) + elseif area>ZRES9^2 then MinZoom(10) + elseif area>ZRES10^2 then MinZoom(11) + elseif area>ZRES11^2 then MinZoom(12) + elseif area>ZRES12^2 then MinZoom(13) + else MinZoom(14) end end -- Calculate POIs (typically rank 1-4 go to 'poi' z12-14, rank 5+ to 'poi_detail' z14) @@ -503,8 +505,8 @@ function GetPOIRank(obj) -- Can we find the tag? for k,list in pairs(poiTags) do - if list[obj:Find(k)] then - v = obj:Find(k) -- k/v are the OSM tag pair + if list[Find(k)] then + v = Find(k) -- k/v are the OSM tag pair class = poiClasses[v] or v rank = poiClassRanks[class] or 25 return rank, class, v @@ -512,7 +514,7 @@ function GetPOIRank(obj) end -- Catch-all for shops - local shop = obj:Find("shop") + local shop = Find("shop") if shop~="" then return poiClassRanks['shop'], "shop", shop end -- Nothing found diff --git a/resources/process-example.lua b/resources/process-example.lua index 41b461df..b4b1f108 100644 --- a/resources/process-example.lua +++ b/resources/process-example.lua @@ -14,33 +14,33 @@ end -- Assign nodes to a layer, and set attributes, based on OSM tags function node_function(node) - local amenity = node:Find("amenity") - local shop = node:Find("shop") + local amenity = Find("amenity") + local shop = Find("shop") if amenity~="" or shop~="" then - node:Layer("poi", false) - if amenity~="" then node:Attribute("class",amenity) - else node:Attribute("class",shop) end - node:Attribute("name", node:Find("name")) + Layer("poi", false) + if amenity~="" then Attribute("class",amenity) + else Attribute("class",shop) end + Attribute("name", Find("name")) end end -- Similarly for ways -function way_function(way) - local highway = way:Find("highway") - local waterway = way:Find("waterway") - local building = way:Find("building") +function way_function() + local highway = Find("highway") + local waterway = Find("waterway") + local building = Find("building") if highway~="" then - way:Layer("transportation", false) - way:Attribute("class", highway) --- way:Attribute("id",way:Id()) --- way:AttributeNumeric("area",37) + Layer("transportation", false) + Attribute("class", highway) +-- Attribute("id",Id()) +-- AttributeNumeric("area",37) end if waterway~="" then - way:Layer("waterway", false) - way:Attribute("class", waterway) + Layer("waterway", false) + Attribute("class", waterway) end if building~="" then - way:Layer("building", true) + Layer("building", true) end end diff --git a/resources/process-openmaptiles.lua b/resources/process-openmaptiles.lua index c7f74745..6ede9d26 100644 --- a/resources/process-openmaptiles.lua +++ b/resources/process-openmaptiles.lua @@ -118,36 +118,36 @@ function calcRank(place, population, capital_al) end -function node_function(node) +function node_function() -- Write 'aerodrome_label' - local aeroway = node:Find("aeroway") + local aeroway = Find("aeroway") if aeroway == "aerodrome" then - node:Layer("aerodrome_label", false) - SetNameAttributes(node) - node:Attribute("iata", node:Find("iata")) - SetEleAttributes(node) - node:Attribute("icao", node:Find("icao")) + Layer("aerodrome_label", false) + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome_value = node:Find("aerodrome") + local aerodrome_value = Find("aerodrome") local class if aerodromeValues[aerodrome_value] then class = aerodrome_value else class = "other" end - node:Attribute("class", class) + Attribute("class", class) end -- Write 'housenumber' - local housenumber = node:Find("addr:housenumber") + local housenumber = Find("addr:housenumber") if housenumber~="" then - node:Layer("housenumber", false) - node:Attribute("housenumber", housenumber) + Layer("housenumber", false) + Attribute("housenumber", housenumber) end -- Write 'place' -- note that OpenMapTiles has a rank for countries (1-3), states (1-6) and cities (1-10+); -- we could potentially approximate it for cities based on the population tag - local place = node:Find("place") + local place = Find("place") if place ~= "" then local mz = 13 - local pop = tonumber(node:Find("population")) or 0 - local capital = capitalLevel(node:Find("capital")) + local pop = tonumber(Find("population")) or 0 + local capital = capitalLevel(Find("capital")) local rank = calcRank(place, pop, capital) if place == "continent" then mz=0 @@ -167,33 +167,33 @@ function node_function(node) elseif place == "locality" then mz=13 end - node:Layer("place", false) - node:Attribute("class", place) - node:MinZoom(mz) - if rank then node:AttributeNumeric("rank", rank) end - if capital then node:AttributeNumeric("capital", capital) end - if place=="country" then node:Attribute("iso_a2", node:Find("ISO3166-1:alpha2")) end - SetNameAttributes(node) + Layer("place", false) + Attribute("class", place) + MinZoom(mz) + if rank then AttributeNumeric("rank", rank) end + if capital then AttributeNumeric("capital", capital) end + if place=="country" then Attribute("iso_a2", Find("ISO3166-1:alpha2")) end + SetNameAttributes() return end -- Write 'poi' - local rank, class, subclass = GetPOIRank(node) - if rank then WritePOI(node,class,subclass,rank) end + local rank, class, subclass = GetPOIRank() + if rank then WritePOI(class,subclass,rank) end -- Write 'mountain_peak' and 'water_name' - local natural = node:Find("natural") + local natural = Find("natural") if natural == "peak" or natural == "volcano" then - node:Layer("mountain_peak", false) - SetEleAttributes(node) - node:AttributeNumeric("rank", 1) - node:Attribute("class", natural) - SetNameAttributes(node) + Layer("mountain_peak", false) + SetEleAttributes() + AttributeNumeric("rank", 1) + Attribute("class", natural) + SetNameAttributes() return end if natural == "bay" then - node:Layer("water_name", false) - SetNameAttributes(node) + Layer("water_name", false) + SetNameAttributes() return end end @@ -279,81 +279,81 @@ waterwayClasses = Set { "stream", "river", "canal", "drain", "ditch" } -- Scan relations for use in ways -function relation_scan_function(relation) - if relation:Find("type")=="boundary" and relation:Find("boundary")=="administrative" then - relation:Accept() +function relation_scan_function() + if Find("type")=="boundary" and Find("boundary")=="administrative" then + Accept() end end -function write_to_transportation_layer(way, minzoom, highway_class) - way:Layer("transportation", false) - way:MinZoom(minzoom) - SetZOrder(way) - way:Attribute("class", highway_class) - SetBrunnelAttributes(way) - if ramp then way:AttributeNumeric("ramp",1) end +function write_to_transportation_layer(minzoom, highway_class) + Layer("transportation", false) + MinZoom(minzoom) + SetZOrder() + Attribute("class", highway_class) + SetBrunnelAttributes() + if ramp then AttributeNumeric("ramp",1) end -- Service - if highway == "service" and service ~="" then way:Attribute("service", service) end + if highway == "service" and service ~="" then Attribute("service", service) end - local oneway = way:Find("oneway") + local oneway = Find("oneway") if oneway == "yes" or oneway == "1" then - way:AttributeNumeric("oneway",1) + AttributeNumeric("oneway",1) end if oneway == "-1" then -- **** TODO end - local surface = way:Find("surface") - local surfaceMinzoom = 12 + local surface = Find("surface") + local surfaceMinzoom = 12 if pavedValues[surface] then - way:Attribute("surface", "paved", surfaceMinzoom) + Attribute("surface", "paved", surfaceMinzoom) elseif unpavedValues[surface] then - way:Attribute("surface", "unpaved", surfaceMinzoom) - end - local accessMinzoom = 9 - if way:Holds("access") then way:Attribute("access", way:Find("access"), accessMinzoom) end - if way:Holds("bicycle") then way:Attribute("bicycle", way:Find("bicycle"), accessMinzoom) end - if way:Holds("foot") then way:Attribute("foot", way:Find("foot"), accessMinzoom) end - if way:Holds("horse") then way:Attribute("horse", way:Find("horse"), accessMinzoom) end - way:AttributeBoolean("toll", way:Find("toll") == "yes", accessMinzoom) - way:AttributeNumeric("layer", tonumber(way:Find("layer")) or 0, accessMinzoom) - way:AttributeBoolean("expressway", way:Find("expressway"), 7) - way:Attribute("mtb_scale", way:Find("mtb:scale"), 10) + Attribute("surface", "unpaved", surfaceMinzoom) + end + local accessMinzoom = 9 + if Holds("access") then Attribute("access", Find("access"), accessMinzoom) end + if Holds("bicycle") then Attribute("bicycle", Find("bicycle"), accessMinzoom) end + if Holds("foot") then Attribute("foot", Find("foot"), accessMinzoom) end + if Holds("horse") then Attribute("horse", Find("horse"), accessMinzoom) end + AttributeBoolean("toll", Find("toll") == "yes", accessMinzoom) + AttributeNumeric("layer", tonumber(Find("layer")) or 0, accessMinzoom) + AttributeBoolean("expressway", Find("expressway"), 7) + Attribute("mtb_scale", Find("mtb:scale"), 10) end -- Process way tags -function way_function(way) - local route = way:Find("route") - local highway = way:Find("highway") - local waterway = way:Find("waterway") - local water = way:Find("water") - local building = way:Find("building") - local natural = way:Find("natural") - local historic = way:Find("historic") - local landuse = way:Find("landuse") - local leisure = way:Find("leisure") - local amenity = way:Find("amenity") - local aeroway = way:Find("aeroway") - local railway = way:Find("railway") - local service = way:Find("service") - local sport = way:Find("sport") - local shop = way:Find("shop") - local tourism = way:Find("tourism") - local man_made = way:Find("man_made") - local boundary = way:Find("boundary") - local isClosed = way:IsClosed() - local housenumber = way:Find("addr:housenumber") +function way_function() + local route = Find("route") + local highway = Find("highway") + local waterway = Find("waterway") + local water = Find("water") + local building = Find("building") + local natural = Find("natural") + local historic = Find("historic") + local landuse = Find("landuse") + local leisure = Find("leisure") + local amenity = Find("amenity") + local aeroway = Find("aeroway") + local railway = Find("railway") + local service = Find("service") + local sport = Find("sport") + local shop = Find("shop") + local tourism = Find("tourism") + local man_made = Find("man_made") + local boundary = Find("boundary") + local isClosed = IsClosed() + local housenumber = Find("addr:housenumber") local write_name = false - local construction = way:Find("construction") + local construction = Find("construction") -- Miscellaneous preprocessing - if way:Find("disused") == "yes" then return end - if boundary~="" and way:Find("protection_title")=="National Forest" and way:Find("operator")=="United States Forest Service" then return end + if Find("disused") == "yes" then return end + if boundary~="" and Find("protection_title")=="National Forest" and Find("operator")=="United States Forest Service" then return end if highway == "proposed" then return end if aerowayBuildings[aeroway] then building="yes"; aeroway="" end if landuse == "field" then landuse = "farmland" end - if landuse == "meadow" and way:Find("meadow")=="agricultural" then landuse="farmland" end + if landuse == "meadow" and Find("meadow")=="agricultural" then landuse="farmland" end -- Boundaries within relations -- note that we process administrative boundaries as properties on ways, rather than as single relation geometries, @@ -361,21 +361,21 @@ function way_function(way) local admin_level = 11 local isBoundary = false while true do - local rel = way:NextRelation() + local rel = NextRelation() if not rel then break end isBoundary = true - admin_level = math.min(admin_level, tonumber(way:FindInRelation("admin_level")) or 11) + admin_level = math.min(admin_level, tonumber(FindInRelation("admin_level")) or 11) end -- Boundaries in ways if boundary=="administrative" then - admin_level = math.min(admin_level, tonumber(way:Find("admin_level")) or 11) + admin_level = math.min(admin_level, tonumber(Find("admin_level")) or 11) isBoundary = true end -- Administrative boundaries -- https://openmaptiles.org/schema/#boundary - if isBoundary and not (way:Find("maritime")=="yes") then + if isBoundary and not (Find("maritime")=="yes") then local mz = 0 if admin_level>=3 and admin_level<5 then mz=4 elseif admin_level>=5 and admin_level<7 then mz=8 @@ -383,22 +383,22 @@ function way_function(way) elseif admin_level>=8 then mz=12 end - way:Layer("boundary",false) - way:AttributeNumeric("admin_level", admin_level) - way:MinZoom(mz) + Layer("boundary",false) + AttributeNumeric("admin_level", admin_level) + MinZoom(mz) -- disputed status (0 or 1). some styles need to have the 0 to show it. - local disputed = way:Find("disputed") + local disputed = Find("disputed") if disputed=="yes" then - way:AttributeNumeric("disputed", 1) + AttributeNumeric("disputed", 1) else - way:AttributeNumeric("disputed", 0) + AttributeNumeric("disputed", 0) end end -- Roads ('transportation' and 'transportation_name', plus 'transportation_name_detail') if highway~="" then - local access = way:Find("access") - local surface = way:Find("surface") + local access = Find("access") + local surface = Find("surface") local h = highway local minzoom = 99 @@ -439,158 +439,158 @@ function way_function(way) -- Write to layer if minzoom <= 14 then - write_to_transportation_layer(way, minzoom, h) + write_to_transportation_layer(minzoom, h) -- Write names if minzoom < 8 then minzoom = 8 end if highway == "motorway" or highway == "trunk" then - way:Layer("transportation_name", false) - way:MinZoom(minzoom) + Layer("transportation_name", false) + MinZoom(minzoom) elseif h == "minor" or h == "track" or h == "path" or h == "service" then - way:Layer("transportation_name_detail", false) - way:MinZoom(minzoom) + Layer("transportation_name_detail", false) + MinZoom(minzoom) else - way:Layer("transportation_name_mid", false) - way:MinZoom(minzoom) + Layer("transportation_name_mid", false) + MinZoom(minzoom) end - SetNameAttributes(way) - way:Attribute("class",h) - way:Attribute("network","road") -- **** could also be us-interstate, us-highway, us-state - if h~=highway then way:Attribute("subclass",highway) end - local ref = way:Find("ref") + SetNameAttributes() + Attribute("class",h) + Attribute("network","road") -- **** could also be us-interstate, us-highway, us-state + if h~=highway then Attribute("subclass",highway) end + local ref = Find("ref") if ref~="" then - way:Attribute("ref",ref) - way:AttributeNumeric("ref_length",ref:len()) + Attribute("ref",ref) + AttributeNumeric("ref_length",ref:len()) end end end -- Railways ('transportation' and 'transportation_name', plus 'transportation_name_detail') if railway~="" then - way:Layer("transportation", false) - way:Attribute("class", railway) - SetZOrder(way) - SetBrunnelAttributes(way) + Layer("transportation", false) + Attribute("class", railway) + SetZOrder() + SetBrunnelAttributes() if service~="" then - way:Attribute("service", service) - way:MinZoom(12) + Attribute("service", service) + MinZoom(12) else - way:MinZoom(9) + MinZoom(9) end - way:Layer("transportation_name", false) - SetNameAttributes(way) - way:MinZoom(14) - way:Attribute("class", "rail") + Layer("transportation_name", false) + SetNameAttributes() + MinZoom(14) + Attribute("class", "rail") end -- Pier if man_made=="pier" then - way:Layer("transportation", isClosed) - SetZOrder(way) - way:Attribute("class", "pier") - SetMinZoomByArea(way) + Layer("transportation", isClosed) + SetZOrder() + Attribute("class", "pier") + SetMinZoomByArea() end -- 'Ferry' if route=="ferry" then - way:Layer("transportation", false) - way:Attribute("class", "ferry") - SetZOrder(way) - way:MinZoom(9) - SetBrunnelAttributes(way) + Layer("transportation", false) + Attribute("class", "ferry") + SetZOrder() + MinZoom(9) + SetBrunnelAttributes() - way:Layer("transportation_name", false) - SetNameAttributes(way) - way:MinZoom(12) - way:Attribute("class", "ferry") + Layer("transportation_name", false) + SetNameAttributes() + MinZoom(12) + Attribute("class", "ferry") end -- 'Aeroway' if aeroway~="" then - way:Layer("aeroway", isClosed) - way:Attribute("class",aeroway) - way:Attribute("ref",way:Find("ref")) + Layer("aeroway", isClosed) + Attribute("class",aeroway) + Attribute("ref",Find("ref")) write_name = true end -- 'aerodrome_label' if aeroway=="aerodrome" then - way:LayerAsCentroid("aerodrome_label") - SetNameAttributes(way) - way:Attribute("iata", way:Find("iata")) - SetEleAttributes(way) - way:Attribute("icao", way:Find("icao")) + LayerAsCentroid("aerodrome_label") + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome = way:Find(aeroway) + local aerodrome = Find(aeroway) local class if aerodromeValues[aerodrome] then class = aerodrome else class = "other" end - way:Attribute("class", class) + Attribute("class", class) end -- Set 'waterway' and associated if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("waterway", false) + if waterway == "river" and Holds("name") then + Layer("waterway", false) else - way:Layer("waterway_detail", false) + Layer("waterway_detail", false) end - if way:Find("intermittent")=="yes" then way:AttributeNumeric("intermittent", 1) else way:AttributeNumeric("intermittent", 0) end - way:Attribute("class", waterway) - SetNameAttributes(way) - SetBrunnelAttributes(way) - elseif waterway == "boatyard" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial"); way:MinZoom(12) - elseif waterway == "dam" then way:Layer("building",isClosed) - elseif waterway == "fuel" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial"); way:MinZoom(14) + if Find("intermittent")=="yes" then AttributeNumeric("intermittent", 1) else AttributeNumeric("intermittent", 0) end + Attribute("class", waterway) + SetNameAttributes() + SetBrunnelAttributes() + elseif waterway == "boatyard" then Layer("landuse", isClosed); Attribute("class", "industrial"); MinZoom(12) + elseif waterway == "dam" then Layer("building",isClosed) + elseif waterway == "fuel" then Layer("landuse", isClosed); Attribute("class", "industrial"); MinZoom(14) end -- Set names on rivers if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("water_name", false) + if waterway == "river" and Holds("name") then + Layer("water_name", false) else - way:Layer("water_name_detail", false) - way:MinZoom(14) + Layer("water_name_detail", false) + MinZoom(14) end - way:Attribute("class", waterway) - SetNameAttributes(way) + Attribute("class", waterway) + SetNameAttributes() end -- Set 'building' and associated if building~="" then - way:Layer("building", true) - SetBuildingHeightAttributes(way) - SetMinZoomByArea(way) + Layer("building", true) + SetBuildingHeightAttributes() + SetMinZoomByArea() end -- Set 'housenumber' if housenumber~="" then - way:LayerAsCentroid("housenumber", false) - way:Attribute("housenumber", housenumber) + LayerAsCentroid("housenumber", false) + Attribute("housenumber", housenumber) end -- Set 'water' if natural=="water" or leisure=="swimming_pool" or landuse=="reservoir" or landuse=="basin" or waterClasses[waterway] then - if way:Find("covered")=="yes" or not isClosed then return end + if Find("covered")=="yes" or not isClosed then return end local class="lake"; if waterway~="" then class="river" end - if class=="lake" and way:Find("wikidata")=="Q192770" then return end - way:Layer("water",true) - SetMinZoomByArea(way) - way:Attribute("class",class) + if class=="lake" and Find("wikidata")=="Q192770" then return end + Layer("water",true) + SetMinZoomByArea() + Attribute("class",class) - if way:Find("intermittent")=="yes" then way:Attribute("intermittent",1) end + if Find("intermittent")=="yes" then Attribute("intermittent",1) end -- we only want to show the names of actual lakes not every man-made basin that probably doesn't even have a name other than "basin" -- examples for which we don't want to show a name: -- https://www.openstreetmap.org/way/25958687 -- https://www.openstreetmap.org/way/27201902 -- https://www.openstreetmap.org/way/25309134 -- https://www.openstreetmap.org/way/24579306 - if way:Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then - way:LayerAsCentroid("water_name_detail") - SetNameAttributes(way) - SetMinZoomByArea(way) - way:Attribute("class", class) + if Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then + LayerAsCentroid("water_name_detail") + SetNameAttributes() + SetMinZoomByArea() + Attribute("class", class) end return -- in case we get any landuse processing @@ -601,11 +601,11 @@ function way_function(way) if l=="" then l=natural end if l=="" then l=leisure end if landcoverKeys[l] then - way:Layer("landcover", true) - SetMinZoomByArea(way) - way:Attribute("class", landcoverKeys[l]) - if l=="wetland" then way:Attribute("subclass", way:Find("wetland")) - else way:Attribute("subclass", l) end + Layer("landcover", true) + SetMinZoomByArea() + Attribute("class", landcoverKeys[l]) + if l=="wetland" then Attribute("subclass", Find("wetland")) + else Attribute("subclass", l) end write_name = true -- Set 'landuse' @@ -613,31 +613,31 @@ function way_function(way) if l=="" then l=amenity end if l=="" then l=tourism end if landuseKeys[l] then - way:Layer("landuse", true) - way:Attribute("class", l) + Layer("landuse", true) + Attribute("class", l) if l=="residential" then - if way:Area()4 then layer="poi_detail" end - obj:LayerAsCentroid(layer) - SetNameAttributes(obj) - obj:AttributeNumeric("rank", rank) - obj:Attribute("class", class) - obj:Attribute("subclass", subclass) + LayerAsCentroid(layer) + SetNameAttributes() + AttributeNumeric("rank", rank) + Attribute("class", class) + Attribute("subclass", subclass) -- layer defaults to 0 - obj:AttributeNumeric("layer", tonumber(obj:Find("layer")) or 0) + AttributeNumeric("layer", tonumber(Find("layer")) or 0) -- indoor defaults to false - obj:AttributeBoolean("indoor", (obj:Find("indoor") == "yes")) + AttributeBoolean("indoor", (Find("indoor") == "yes")) -- level has no default - local level = tonumber(obj:Find("level")) + local level = tonumber(Find("level")) if level then - obj:AttributeNumeric("level", level) + AttributeNumeric("level", level) end end -- Set name attributes on any object -function SetNameAttributes(obj) - local name = obj:Find("name"), iname +function SetNameAttributes() + local name = Find("name"), iname local main_written = name -- if we have a preferred language, then write that (if available), and additionally write the base name tag - if preferred_language and obj:Holds("name:"..preferred_language) then - iname = obj:Find("name:"..preferred_language) - obj:Attribute(preferred_language_attribute, iname) + if preferred_language and Holds("name:"..preferred_language) then + iname = Find("name:"..preferred_language) + Attribute(preferred_language_attribute, iname) if iname~=name and default_language_attribute then - obj:Attribute(default_language_attribute, name) + Attribute(default_language_attribute, name) else main_written = iname end else - obj:Attribute(preferred_language_attribute, name) + Attribute(preferred_language_attribute, name) end -- then set any additional languages for i,lang in ipairs(additional_languages) do - iname = obj:Find("name:"..lang) + iname = Find("name:"..lang) if iname=="" then iname=name end - if iname~=main_written then obj:Attribute("name:"..lang, iname) end + if iname~=main_written then Attribute("name:"..lang, iname) end end end -- Set ele and ele_ft on any object -function SetEleAttributes(obj) - local ele = obj:Find("ele") +function SetEleAttributes() + local ele = Find("ele") if ele ~= "" then local meter = math.floor(tonumber(ele) or 0) local feet = math.floor(meter * 3.2808399) - obj:AttributeNumeric("ele", meter) - obj:AttributeNumeric("ele_ft", feet) + AttributeNumeric("ele", meter) + AttributeNumeric("ele_ft", feet) end end -function SetBrunnelAttributes(obj) - if obj:Find("bridge") == "yes" then obj:Attribute("brunnel", "bridge") - elseif obj:Find("tunnel") == "yes" then obj:Attribute("brunnel", "tunnel") - elseif obj:Find("ford") == "yes" then obj:Attribute("brunnel", "ford") +function SetBrunnelAttributes() + if Find("bridge") == "yes" then Attribute("brunnel", "bridge") + elseif Find("tunnel") == "yes" then Attribute("brunnel", "tunnel") + elseif Find("ford") == "yes" then Attribute("brunnel", "ford") end end -- Set minimum zoom level by area -function SetMinZoomByArea(way) - local area=way:Area() - if area>ZRES5^2 then way:MinZoom(6) - elseif area>ZRES6^2 then way:MinZoom(7) - elseif area>ZRES7^2 then way:MinZoom(8) - elseif area>ZRES8^2 then way:MinZoom(9) - elseif area>ZRES9^2 then way:MinZoom(10) - elseif area>ZRES10^2 then way:MinZoom(11) - elseif area>ZRES11^2 then way:MinZoom(12) - elseif area>ZRES12^2 then way:MinZoom(13) - else way:MinZoom(14) end +function SetMinZoomByArea() + local area=Area() + if area>ZRES5^2 then MinZoom(6) + elseif area>ZRES6^2 then MinZoom(7) + elseif area>ZRES7^2 then MinZoom(8) + elseif area>ZRES8^2 then MinZoom(9) + elseif area>ZRES9^2 then MinZoom(10) + elseif area>ZRES10^2 then MinZoom(11) + elseif area>ZRES11^2 then MinZoom(12) + elseif area>ZRES12^2 then MinZoom(13) + else MinZoom(14) end end -- Calculate POIs (typically rank 1-4 go to 'poi' z12-14, rank 5+ to 'poi_detail' z14) -- returns rank, class, subclass -function GetPOIRank(obj) +function GetPOIRank() local k,list,v,class,rank -- Can we find the tag? for k,list in pairs(poiTags) do - if list[obj:Find(k)] then - v = obj:Find(k) -- k/v are the OSM tag pair + if list[Find(k)] then + v = Find(k) -- k/v are the OSM tag pair class = poiClasses[v] or k rank = poiClassRanks[class] or 25 subclassKey = poiSubClasses[v] if subclassKey then class = v - v = obj:Find(subclassKey) + v = Find(subclassKey) end return rank, class, v end end -- Catch-all for shops - local shop = obj:Find("shop") + local shop = Find("shop") if shop~="" then return poiClassRanks['shop'], "shop", shop end -- Nothing found return nil,nil,nil end -function SetBuildingHeightAttributes(way) - local height = tonumber(way:Find("height"), 10) - local minHeight = tonumber(way:Find("min_height"), 10) - local levels = tonumber(way:Find("building:levels"), 10) - local minLevel = tonumber(way:Find("building:min_level"), 10) +function SetBuildingHeightAttributes() + local height = tonumber(Find("height"), 10) + local minHeight = tonumber(Find("min_height"), 10) + local levels = tonumber(Find("building:levels"), 10) + local minLevel = tonumber(Find("building:min_level"), 10) local renderHeight = BUILDING_FLOOR_HEIGHT if height or levels then @@ -779,17 +779,17 @@ function SetBuildingHeightAttributes(way) renderHeight = renderHeight + renderMinHeight end - way:AttributeNumeric("render_height", renderHeight) - way:AttributeNumeric("render_min_height", renderMinHeight) + AttributeNumeric("render_height", renderHeight) + AttributeNumeric("render_min_height", renderMinHeight) end -- Implement z_order as calculated by Imposm -- See https://imposm.org/docs/imposm3/latest/mapping.html#wayzorder for details. -function SetZOrder(way) - local highway = way:Find("highway") - local layer = tonumber(way:Find("layer")) - local bridge = way:Find("bridge") - local tunnel = way:Find("tunnel") +function SetZOrder() + local highway = Find("highway") + local layer = tonumber(Find("layer")) + local bridge = Find("bridge") + local tunnel = Find("tunnel") local zOrder = 0 if bridge ~= "" and bridge ~= "no" then zOrder = zOrder + 10 @@ -820,7 +820,7 @@ function SetZOrder(way) hwClass = 3 end zOrder = zOrder + hwClass - way:ZOrder(zOrder) + ZOrder(zOrder) end -- ========================================================== diff --git a/src/attribute_store.cpp b/src/attribute_store.cpp index f4f9f299..363d167b 100644 --- a/src/attribute_store.cpp +++ b/src/attribute_store.cpp @@ -55,19 +55,38 @@ const std::string& AttributeKeyStore::getKeyUnsafe(uint16_t index) const { return keys[index]; } +// AttributePair +void AttributePair::ensureStringIsOwned() { + // Before we store an AttributePair in our long-term storage, we need + // to make sure it's not pointing to a non-long-lived std::string. + if (valueType == AttributePairType::Bool || valueType == AttributePairType::Float) + return; + + stringValue_.ensureStringIsOwned(); +} + // AttributePairStore -thread_local boost::container::flat_map tlsHotShardMap; -thread_local uint16_t tlsHotShardSize = 0; +thread_local DequeMap tlsHotShard(1 << 16); const AttributePair& AttributePairStore::getPair(uint32_t i) const { uint32_t shard = i >> (32 - SHARD_BITS); uint32_t offset = i & (~(~0u << (32 - SHARD_BITS))); - if (shard == 0) - return hotShard[offset]; + if (shard == 0) { + if (offset < tlsHotShard.size()) + return tlsHotShard[offset]; + + { + std::lock_guard lock(pairsMutex[0]); + tlsHotShard = pairs[0]; + } + + return tlsHotShard[offset]; + } std::lock_guard lock(pairsMutex[shard]); - return pairs[shard].at(offset); + return pairs[shard][offset]; }; + const AttributePair& AttributePairStore::getPairUnsafe(uint32_t i) const { // NB: This is unsafe if called before the PBF has been fully read. // If called during the output phase, it's safe. @@ -75,44 +94,43 @@ const AttributePair& AttributePairStore::getPairUnsafe(uint32_t i) const { uint32_t shard = i >> (32 - SHARD_BITS); uint32_t offset = i & (~(~0u << (32 - SHARD_BITS))); - if (shard == 0) - return hotShard[offset]; - - return pairs[shard].at(offset); + return pairs[shard][offset]; }; -uint32_t AttributePairStore::addPair(const AttributePair& pair, bool isHot) { +// Remember recently queried/added pairs so that we can return them in the +// future without taking a lock. +thread_local uint64_t tlsPairLookups = 0; +thread_local uint64_t tlsPairLookupsUncached = 0; + +thread_local std::vector cachedAttributePairPointers(64); +thread_local std::vector cachedAttributePairIndexes(64); +uint32_t AttributePairStore::addPair(AttributePair& pair, bool isHot) { if (isHot) { { // First, check our thread-local map. - const auto& it = tlsHotShardMap.find(&pair); - if (it != tlsHotShardMap.end()) - return it->second; + const auto& index = tlsHotShard.find(pair); + if (index != -1) + return index; } + // Not found, ensure our local map is up-to-date for future calls, // and fall through to the main map. - // - // Note that we can read `hotShard` without a lock - while (tlsHotShardSize < hotShardSize.load()) { - tlsHotShardSize++; - tlsHotShardMap[&hotShard[tlsHotShardSize]] = tlsHotShardSize; + if (!tlsHotShard.full()) { + std::lock_guard lock(pairsMutex[0]); + tlsHotShard = pairs[0]; } // This might be a popular pair, worth re-using. // Have we already assigned it a hot ID? std::lock_guard lock(pairsMutex[0]); - const auto& it = pairsMaps[0].find(&pair); - if (it != pairsMaps[0].end()) - return it->second; + const auto& index = pairs[0].find(pair); + if (index != -1) + return index; - if (hotShardSize.load() < 1 << 16) { - hotShardSize++; - uint32_t offset = hotShardSize.load(); - - hotShard[offset] = pair; - const AttributePair* ptr = &hotShard[offset]; + if (!pairs[0].full()) { + pair.ensureStringIsOwned(); + uint32_t offset = pairs[0].add(pair); uint32_t rv = (0 << (32 - SHARD_BITS)) + offset; - pairsMaps[0][ptr] = rv; return rv; } } @@ -121,6 +139,23 @@ uint32_t AttributePairStore::addPair(const AttributePair& pair, bool isHot) { // Throw it on the pile with the rest of the pairs. size_t hash = pair.hash(); + const size_t candidateIndex = hash % cachedAttributePairPointers.size(); + // Before taking a lock, see if we've seen this attribute pair recently. + + tlsPairLookups++; + if (tlsPairLookups % 1024 == 0) { + lookups += 1024; + } + + + { + const AttributePair* candidate = cachedAttributePairPointers[candidateIndex]; + + if (candidate != nullptr && *candidate == pair) + return cachedAttributePairIndexes[candidateIndex]; + } + + size_t shard = hash % ATTRIBUTE_SHARDS; // Shard 0 is for hot pairs -- pick another shard if it gets selected. if (shard == 0) shard = (hash >> 8) % ATTRIBUTE_SHARDS; @@ -129,20 +164,27 @@ uint32_t AttributePairStore::addPair(const AttributePair& pair, bool isHot) { if (shard == 0) shard = 1; std::lock_guard lock(pairsMutex[shard]); - const auto& it = pairsMaps[shard].find(&pair); - if (it != pairsMaps[shard].end()) - return it->second; - uint32_t offset = pairs[shard].size(); + tlsPairLookupsUncached++; + if (tlsPairLookupsUncached % 1024 == 0) + lookupsUncached += 1024; + + const auto& index = pairs[shard].find(pair); + if (index != -1) { + const uint32_t rv = (shard << (32 - SHARD_BITS)) + index; + cachedAttributePairPointers[candidateIndex] = &pairs[shard][index]; + cachedAttributePairIndexes[candidateIndex] = rv; + + return rv; + } + + pair.ensureStringIsOwned(); + uint32_t offset = pairs[shard].add(pair); if (offset >= (1 << (32 - SHARD_BITS))) throw std::out_of_range("pair shard overflow"); - pairs[shard].push_back(pair); - const AttributePair* ptr = &pairs[shard][offset]; uint32_t rv = (shard << (32 - SHARD_BITS)) + offset; - - pairsMaps[shard][ptr] = rv; return rv; }; @@ -199,21 +241,19 @@ void AttributeSet::removePairWithKey(const AttributePairStore& pairStore, uint32 } void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const &key, const std::string& v, char minzoom) { - AttributePair kv(keyStore.key2index(key),v,minzoom); - bool isHot = AttributePair::isHot(kv, key); - attributeSet.removePairWithKey(pairStore, kv.keyIndex); + PooledString ps(&v); + AttributePair kv(keyStore.key2index(key), ps, minzoom); + bool isHot = AttributePair::isHot(key, v); attributeSet.addPair(pairStore.addPair(kv, isHot)); } void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const &key, bool v, char minzoom) { AttributePair kv(keyStore.key2index(key),v,minzoom); - bool isHot = AttributePair::isHot(kv, key); - attributeSet.removePairWithKey(pairStore, kv.keyIndex); + bool isHot = true; // All bools are eligible to be hot pairs attributeSet.addPair(pairStore.addPair(kv, isHot)); } void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const &key, float v, char minzoom) { AttributePair kv(keyStore.key2index(key),v,minzoom); - bool isHot = AttributePair::isHot(kv, key); - attributeSet.removePairWithKey(pairStore, kv.keyIndex); + bool isHot = v >= 0 && v <= 25 && ceil(v) == v; // Whole numbers in 0..25 are eligible to be hot pairs attributeSet.addPair(pairStore.addPair(kv, isHot)); } @@ -254,33 +294,54 @@ void AttributeSet::finalize() { } +// Remember recently queried/added sets so that we can return them in the +// future without taking a lock. +thread_local std::vector cachedAttributeSetPointers(64); +thread_local std::vector cachedAttributeSetIndexes(64); + +thread_local uint64_t tlsSetLookups = 0; +thread_local uint64_t tlsSetLookupsUncached = 0; AttributeIndex AttributeStore::add(AttributeSet &attributes) { // TODO: there's probably a way to use C++ types to distinguish a finalized // and non-finalized AttributeSet, which would make this safer. attributes.finalize(); size_t hash = attributes.hash(); + + const size_t candidateIndex = hash % cachedAttributeSetPointers.size(); + // Before taking a lock, see if we've seen this attribute set recently. + + tlsSetLookups++; + if (tlsSetLookups % 1024 == 0) { + lookups += 1024; + } + + + { + const AttributeSet* candidate = cachedAttributeSetPointers[candidateIndex]; + + if (candidate != nullptr && *candidate == attributes) + return cachedAttributeSetIndexes[candidateIndex]; + } + size_t shard = hash % ATTRIBUTE_SHARDS; // We can't use the top 2 bits (see OutputObject's bitfields) shard = shard >> 2; std::lock_guard lock(setsMutex[shard]); - lookups++; - - // Do we already have it? - const auto& existing = setsMaps[shard].find(&attributes); - if (existing != setsMaps[shard].end()) return existing->second; + tlsSetLookupsUncached++; + if (tlsSetLookupsUncached % 1024 == 0) + lookupsUncached += 1024; - // No, so add and return the index - uint32_t offset = sets[shard].size(); + const uint32_t offset = sets[shard].add(attributes); if (offset >= (1 << (32 - SHARD_BITS))) throw std::out_of_range("set shard overflow"); - sets[shard].push_back(attributes); - const AttributeSet* ptr = &sets[shard][offset]; uint32_t rv = (shard << (32 - SHARD_BITS)) + offset; - setsMaps[shard][ptr] = rv; + + cachedAttributeSetPointers[candidateIndex] = &sets[shard][offset]; + cachedAttributeSetIndexes[candidateIndex] = rv; return rv; } @@ -307,16 +368,21 @@ std::vector AttributeStore::getUnsafe(AttributeIndex index } } -void AttributeStore::reportSize() const { +size_t AttributeStore::size() const { size_t numAttributeSets = 0; for (int i = 0; i < ATTRIBUTE_SHARDS; i++) numAttributeSets += sets[i].size(); - std::cout << "Attributes: " << numAttributeSets << " sets from " << lookups.load() << " objects" << std::endl; + + return numAttributeSets; +} + +void AttributeStore::reportSize() const { + std::cout << "Attributes: " << size() << " sets from " << lookups.load() << " objects (" << lookupsUncached.load() << " uncached), " << pairStore.lookups.load() << " pairs (" << pairStore.lookupsUncached.load() << " uncached)" << std::endl; // Print detailed histogram of frequencies of attributes. if (false) { for (int i = 0; i < ATTRIBUTE_SHARDS; i++) { - std::cout << "pairsMaps[" << i << "] has " << pairStore.pairsMaps[i].size() << " entries" << std::endl; + std::cout << "pairs[" << i << "] has " << pairStore.pairs[i].size() << " entries" << std::endl; } std::map tagCountDist; @@ -368,6 +434,20 @@ void AttributeStore::reportSize() const { } } +void AttributeStore::reset() { + // This is only used for tests. + tlsKeys2Index.clear(); + tlsKeys2IndexSize = 0; + + tlsHotShard.clear(); + + for (int i = 0; i < cachedAttributeSetPointers.size(); i++) + cachedAttributeSetPointers[i] = nullptr; + + for (int i = 0; i < cachedAttributePairPointers.size(); i++) + cachedAttributePairPointers[i] = nullptr; +} + void AttributeStore::finalize() { finalized = true; keyStore.finalize(); diff --git a/src/helpers.cpp b/src/helpers.cpp index 444ddcf0..df210b95 100644 --- a/src/helpers.cpp +++ b/src/helpers.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include "helpers.h" @@ -11,7 +13,6 @@ #define MOD_GZIP_ZLIB_CFACTOR 9 #define MOD_GZIP_ZLIB_BSIZE 8096 -namespace geom = boost::geometry; using namespace std; // Bounding box string parsing @@ -89,7 +90,9 @@ std::string compress_string(const std::string& str, } // Decompress an STL string using zlib and return the original data. -std::string decompress_string(const std::string& str, bool asGzip) { +// The output buffer is passed in; callers are meant to re-use the buffer such +// that eventually no allocations are needed when decompressing. +void decompress_string(std::string& output, const char* input, uint32_t inputSize, bool asGzip) { z_stream zs; // z_stream is zlib's control structure memset(&zs, 0, sizeof(zs)); @@ -101,27 +104,27 @@ std::string decompress_string(const std::string& str, bool asGzip) { throw(std::runtime_error("inflateInit failed while decompressing.")); } - zs.next_in = (Bytef*)str.data(); - zs.avail_in = str.size(); + zs.next_in = (Bytef*)input; + zs.avail_in = inputSize; int ret; - char outbuffer[32768]; - std::string outstring; + + int actualOutputSize = 0; // get the decompressed bytes blockwise using repeated calls to inflate do { - zs.next_out = reinterpret_cast(outbuffer); - zs.avail_out = sizeof(outbuffer); + if (output.size() < actualOutputSize + 32768) + output.resize(actualOutputSize + 32768); - ret = inflate(&zs, 0); + zs.next_out = reinterpret_cast(&output[actualOutputSize]); + zs.avail_out = output.size() - actualOutputSize; - if (outstring.size() < zs.total_out) { - outstring.append(outbuffer, - zs.total_out - outstring.size()); - } + ret = inflate(&zs, 0); + actualOutputSize = zs.total_out; } while (ret == Z_OK); + output.resize(actualOutputSize); inflateEnd(&zs); if (ret != Z_STREAM_END) { // an error occurred that was not EOF @@ -130,8 +133,6 @@ std::string decompress_string(const std::string& str, bool asGzip) { << zs.msg; throw(std::runtime_error(oss.str())); } - - return outstring; } // Parse a Boost error diff --git a/src/mmap_allocator.cpp b/src/mmap_allocator.cpp index dc71f687..2b5e26fd 100644 --- a/src/mmap_allocator.cpp +++ b/src/mmap_allocator.cpp @@ -79,10 +79,10 @@ thread_local mmap_shm_ptr mmap_shm_thread_region_ptr; std::mutex mmap_allocator_mutex; mmap_file::mmap_file(std::string const &filename, std::size_t offset) - : mapping(filename.c_str(), boost::interprocess::read_write) + : filename(filename) + , mapping(filename.c_str(), boost::interprocess::read_write) , region(mapping, boost::interprocess::read_write) , buffer(boost::interprocess::create_only, reinterpret_cast(region.get_address()) + offset, region.get_size() - offset) - , filename(filename) { } mmap_file::~mmap_file() diff --git a/src/node_stores.cpp b/src/node_stores.cpp index 8c84b811..06e2fc5e 100644 --- a/src/node_stores.cpp +++ b/src/node_stores.cpp @@ -14,6 +14,17 @@ void BinarySearchNodeStore::reopen() } } +bool BinarySearchNodeStore::contains(size_t shard, NodeID i) const { + auto internalShard = mLatpLons[shardPart(i)]; + auto id = idPart(i); + + auto iter = std::lower_bound(internalShard->begin(), internalShard->end(), id, [](auto const &e, auto i) { + return e.first < i; + }); + + return !(iter == internalShard->end() || iter->first != id); +} + LatpLon BinarySearchNodeStore::at(NodeID i) const { auto shard = mLatpLons[shardPart(i)]; auto id = idPart(i); diff --git a/src/options_parser.cpp b/src/options_parser.cpp new file mode 100644 index 00000000..529e5f4a --- /dev/null +++ b/src/options_parser.cpp @@ -0,0 +1,114 @@ +#include "options_parser.h" + +#include +#include +#include +#include +#include "helpers.h" + +#ifndef TM_VERSION +#define TM_VERSION (version not set) +#endif +#define STR1(x) #x +#define STR(x) STR1(x) + +using namespace std; +namespace po = boost::program_options; + +po::options_description getParser(OptionsParser::Options& options) { + po::options_description desc("tilemaker " STR(TM_VERSION) "\nConvert OpenStreetMap .pbf files into vector tiles\n\nAvailable options"); + desc.add_options() + ("help", "show help message") + ("input", po::value< vector >(&options.inputFiles), "source .osm.pbf file") + ("output", po::value< string >(&options.outputFile), "target directory or .mbtiles/.pmtiles file") + ("bbox", po::value< string >(&options.bbox), "bounding box to use if input file does not have a bbox header set, example: minlon,minlat,maxlon,maxlat") + ("merge" ,po::bool_switch(&options.mergeSqlite), "merge with existing .mbtiles (overwrites otherwise)") + ("config", po::value< string >(&options.jsonFile)->default_value("config.json"), "config JSON file") + ("process",po::value< string >(&options.luaFile)->default_value("process.lua"), "tag-processing Lua file") + ("verbose",po::bool_switch(&options.verbose), "verbose error output") + ("skip-integrity",po::bool_switch(&options.osm.skipIntegrity), "don't enforce way/node integrity") + ("log-tile-timings", po::bool_switch(&options.logTileTimings), "log how long each tile takes"); + po::options_description performance("Performance options"); + performance.add_options() + ("store", po::value< string >(&options.osm.storeFile), "temporary storage for node/ways/relations data") + ("fast", po::bool_switch(&options.osm.fast), "prefer speed at the expense of memory") + ("compact",po::bool_switch(&options.osm.compact), "use faster data structure for node lookups\nNOTE: This requires the input to be renumbered (osmium renumber)") + ("no-compress-nodes", po::bool_switch(&options.osm.uncompressedNodes), "store nodes uncompressed") + ("no-compress-ways", po::bool_switch(&options.osm.uncompressedWays), "store ways uncompressed") + ("lazy-geometries", po::bool_switch(&options.osm.lazyGeometries), "generate geometries from the OSM stores; uses less memory") + ("materialize-geometries", po::bool_switch(&options.osm.materializeGeometries), "materialize geometries; uses more memory") + ("shard-stores", po::bool_switch(&options.osm.shardStores), "use an alternate reading/writing strategy for low-memory machines") + ("threads",po::value(&options.threadNum)->default_value(0), "number of threads (automatically detected if 0)") + ; + + desc.add(performance); + return desc; +} + +void OptionsParser::showHelp() { + Options options; + auto parser = getParser(options); + std::cout << parser << std::endl; +} + +OptionsParser::Options OptionsParser::parse(const int argc, const char* argv[]) { + Options options; + + po::options_description desc = getParser(options); + po::positional_options_description p; + p.add("input", 1).add("output", 1); + + po::variables_map vm; + try { + po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm); + } catch (const po::unknown_option& ex) { + throw OptionException{"Unknown option: " + ex.get_option_name()}; + } + po::notify(vm); + + if (options.osm.storeFile.empty()) { + options.osm.materializeGeometries = true; + } else { + if (!options.osm.fast) { + options.osm.shardStores = true; + } + } + + // You can pass --lazy-geometries to override the default of materialized geometries for + // the non-store case. + if (options.osm.lazyGeometries) + options.osm.materializeGeometries = false; + + + if (vm.count("help")) { + options.showHelp = true; + return options; + } + if (vm.count("output") == 0) { + throw OptionException{ "You must specify an output file or directory. Run with --help to find out more." }; + } + + if (vm.count("input") == 0) { + throw OptionException{ "No source .osm.pbf file supplied" }; + } + + if (ends_with(options.outputFile, ".mbtiles") || ends_with(options.outputFile, ".sqlite")) { + options.outputMode = OutputMode::MBTiles; + } else if (ends_with(options.outputFile, ".pmtiles")) { + options.outputMode = OutputMode::PMTiles; + } + + if (options.threadNum == 0) { + options.threadNum = max(thread::hardware_concurrency(), 1u); + } + + // ---- Check config + if (!boost::filesystem::exists(options.jsonFile)) { + throw OptionException{ "Couldn't open .json config: " + options.jsonFile }; + } + if (!boost::filesystem::exists(options.luaFile)) { + throw OptionException{"Couldn't open .lua script: " + options.luaFile }; + } + + return options; +} diff --git a/src/osm_lua_processing.cpp b/src/osm_lua_processing.cpp index a1bc2536..31d184ed 100644 --- a/src/osm_lua_processing.cpp +++ b/src/osm_lua_processing.cpp @@ -3,15 +3,132 @@ #include "osm_lua_processing.h" #include "attribute_store.h" #include "helpers.h" +#include "tag_map.h" #include "coordinates_geom.h" #include "osm_mem_tiles.h" using namespace std; +const std::string EMPTY_STRING = ""; thread_local kaguya::State *g_luaState = nullptr; +thread_local OsmLuaProcessing* osmLuaProcessing = nullptr; + +// A key in `currentTags`. If Lua code refers to an absent key, +// found will be false. +struct KnownTagKey { + bool found; + uint32_t index; +}; + +template<> struct kaguya::lua_type_traits { + typedef KnownTagKey get_type; + typedef const KnownTagKey& push_type; + + static bool strictCheckType(lua_State* l, int index) + { + return lua_type(l, index) == LUA_TSTRING; + } + static bool checkType(lua_State* l, int index) + { + return lua_isstring(l, index) != 0; + } + static get_type get(lua_State* l, int index) + { + KnownTagKey rv = { false, 0 }; + size_t size = 0; + const char* buffer = lua_tolstring(l, index, &size); + + int64_t tagLoc = osmLuaProcessing->currentTags->getKey(buffer, size); + + if (tagLoc >= 0) { + rv.found = true; + rv.index = tagLoc; + } +// std::string key(buffer, size); +// std::cout << "for key " << key << ": rv.found=" << rv.found << ", rv.index=" << rv.index << std::endl; + return rv; + } + static int push(lua_State* l, push_type s) + { + throw std::runtime_error("Lua code doesn't know how to use KnownTagKey"); + } +}; + +template<> struct kaguya::lua_type_traits { + typedef PossiblyKnownTagValue get_type; + typedef const PossiblyKnownTagValue& push_type; + + static bool strictCheckType(lua_State* l, int index) + { + return lua_type(l, index) == LUA_TSTRING; + } + static bool checkType(lua_State* l, int index) + { + return lua_isstring(l, index) != 0; + } + static get_type get(lua_State* l, int index) + { + PossiblyKnownTagValue rv = { false, 0 }; + size_t size = 0; + const char* buffer = lua_tolstring(l, index, &size); + + // For long strings where we might need to do a malloc, see if we + // can instead pass a pointer to a value from this object's tag + // map. + // + // 15 is the threshold where gcc no longer applies the small string + // optimization. + if (size > 15) { + int64_t tagLoc = osmLuaProcessing->currentTags->getValue(buffer, size); + + if (tagLoc >= 0) { + rv.found = true; + rv.index = tagLoc; + return rv; + } + } + + rv.fallback = std::string(buffer, size); + return rv; + } + static int push(lua_State* l, push_type s) + { + throw std::runtime_error("Lua code doesn't know how to use PossiblyKnownTagValue"); + } +}; + +std::string rawId() { return osmLuaProcessing->Id(); } +bool rawHolds(const KnownTagKey& key) { return key.found; } +const std::string rawFind(const KnownTagKey& key) { + if (key.found) { + auto value = *(osmLuaProcessing->currentTags->getValueFromKey(key.index)); + return std::string(value.data(), value.size()); + } + + return EMPTY_STRING; +} +std::vector rawFindIntersecting(const std::string &layerName) { return osmLuaProcessing->FindIntersecting(layerName); } +bool rawIntersects(const std::string& layerName) { return osmLuaProcessing->Intersects(layerName); } +std::vector rawFindCovering(const std::string& layerName) { return osmLuaProcessing->FindCovering(layerName); } +bool rawCoveredBy(const std::string& layerName) { return osmLuaProcessing->CoveredBy(layerName); } +bool rawIsClosed() { return osmLuaProcessing->IsClosed(); } +double rawArea() { return osmLuaProcessing->Area(); } +double rawLength() { return osmLuaProcessing->Length(); } +std::vector Centroid() { return osmLuaProcessing->Centroid(); } +void rawLayer(const std::string& layerName, bool area) { return osmLuaProcessing->Layer(layerName, area); } +void rawLayerAsCentroid(const std::string &layerName) { return osmLuaProcessing->LayerAsCentroid(layerName); } +void rawMinZoom(const double z) { return osmLuaProcessing->MinZoom(z); } +void rawZOrder(const double z) { return osmLuaProcessing->ZOrder(z); } +kaguya::optional rawNextRelation() { return osmLuaProcessing->NextRelation(); } +void rawRestartRelations() { return osmLuaProcessing->RestartRelations(); } +std::string rawFindInRelation(const std::string& key) { return osmLuaProcessing->FindInRelation(key); } +void rawAccept() { return osmLuaProcessing->Accept(); } +double rawAreaIntersecting(const std::string& layerName) { return osmLuaProcessing->AreaIntersecting(layerName); } +std::vector rawCentroid() { return osmLuaProcessing->Centroid(); } + + bool supportsRemappingShapefiles = false; -const std::string EMPTY_STRING = ""; int lua_error_handler(int errCode, const char *errMessage) { @@ -45,31 +162,41 @@ OsmLuaProcessing::OsmLuaProcessing( g_luaState = &luaState; luaState.setErrorHandler(lua_error_handler); luaState.dofile(luaFile.c_str()); - luaState["OSM"].setClass(kaguya::UserdataMetatable() - .addFunction("Id", &OsmLuaProcessing::Id) - .addFunction("Holds", &OsmLuaProcessing::Holds) - .addFunction("Find", &OsmLuaProcessing::Find) - .addFunction("FindIntersecting", &OsmLuaProcessing::FindIntersecting) - .addFunction("Intersects", &OsmLuaProcessing::Intersects) - .addFunction("FindCovering", &OsmLuaProcessing::FindCovering) - .addFunction("CoveredBy", &OsmLuaProcessing::CoveredBy) - .addFunction("IsClosed", &OsmLuaProcessing::IsClosed) - .addFunction("Area", &OsmLuaProcessing::Area) - .addFunction("AreaIntersecting", &OsmLuaProcessing::AreaIntersecting) - .addFunction("Length", &OsmLuaProcessing::Length) - .addFunction("Centroid", &OsmLuaProcessing::Centroid) - .addFunction("Layer", &OsmLuaProcessing::Layer) - .addFunction("LayerAsCentroid", &OsmLuaProcessing::LayerAsCentroid) - .addOverloadedFunctions("Attribute", &OsmLuaProcessing::Attribute, &OsmLuaProcessing::AttributeWithMinZoom) - .addOverloadedFunctions("AttributeNumeric", &OsmLuaProcessing::AttributeNumeric, &OsmLuaProcessing::AttributeNumericWithMinZoom) - .addOverloadedFunctions("AttributeBoolean", &OsmLuaProcessing::AttributeBoolean, &OsmLuaProcessing::AttributeBooleanWithMinZoom) - .addFunction("MinZoom", &OsmLuaProcessing::MinZoom) - .addFunction("ZOrder", &OsmLuaProcessing::ZOrder) - .addFunction("Accept", &OsmLuaProcessing::Accept) - .addFunction("NextRelation", &OsmLuaProcessing::NextRelation) - .addFunction("RestartRelations", &OsmLuaProcessing::RestartRelations) - .addFunction("FindInRelation", &OsmLuaProcessing::FindInRelation) + + osmLuaProcessing = this; + luaState["Id"] = &rawId; + luaState["Holds"] = &rawHolds; + luaState["Find"] = &rawFind; + luaState["FindIntersecting"] = &rawFindIntersecting; + luaState["Intersects"] = &rawIntersects; + luaState["FindCovering"] = &rawFindCovering; + luaState["CoveredBy"] = &rawCoveredBy; + luaState["IsClosed"] = &rawIsClosed; + luaState["Area"] = &rawArea; + luaState["AreaIntersecting"] = &rawAreaIntersecting; + luaState["Length"] = &rawLength; + luaState["Centroid"] = &rawCentroid; + luaState["Layer"] = &rawLayer; + luaState["LayerAsCentroid"] = &rawLayerAsCentroid; + luaState["Attribute"] = kaguya::overload( + [](const std::string &key, const PossiblyKnownTagValue& val) { osmLuaProcessing->AttributeWithMinZoom(key, val, 0); }, + [](const std::string &key, const PossiblyKnownTagValue& val, const char minzoom) { osmLuaProcessing->AttributeWithMinZoom(key, val, minzoom); } + ); + luaState["AttributeNumeric"] = kaguya::overload( + [](const std::string &key, const float val) { osmLuaProcessing->AttributeNumericWithMinZoom(key, val, 0); }, + [](const std::string &key, const float val, const char minzoom) { osmLuaProcessing->AttributeNumericWithMinZoom(key, val, minzoom); } ); + luaState["AttributeBoolean"] = kaguya::overload( + [](const std::string &key, const bool val) { osmLuaProcessing->AttributeBooleanWithMinZoom(key, val, 0); }, + [](const std::string &key, const bool val, const char minzoom) { osmLuaProcessing->AttributeBooleanWithMinZoom(key, val, minzoom); } + ); + + luaState["MinZoom"] = &rawMinZoom; + luaState["ZOrder"] = &rawZOrder; + luaState["Accept"] = &rawAccept; + luaState["NextRelation"] = &rawNextRelation; + luaState["RestartRelations"] = &rawRestartRelations; + luaState["FindInRelation"] = &rawFindInRelation; supportsRemappingShapefiles = !!luaState["attribute_function"]; supportsReadingRelations = !!luaState["relation_scan_function"]; supportsWritingRelations = !!luaState["relation_function"]; @@ -121,18 +248,6 @@ string OsmLuaProcessing::Id() const { return to_string(originalOsmID); } -// Check if there's a value for a given key -bool OsmLuaProcessing::Holds(const string& key) const { - return currentTags->find(key) != currentTags->end(); -} - -// Get an OSM tag for a given key (or return empty string if none) -const string& OsmLuaProcessing::Find(const string& key) const { - auto it = currentTags->find(key); - if(it == currentTags->end()) return EMPTY_STRING; - return it->second; -} - // ---- Spatial queries called from Lua vector OsmLuaProcessing::FindIntersecting(const string &layerName) { @@ -327,6 +442,7 @@ const MultiPolygon &OsmLuaProcessing::multiPolygonCached() { // Add object to specified layer from Lua void OsmLuaProcessing::Layer(const string &layerName, bool area) { + outputKeys.clear(); if (layers.layerMap.count(layerName) == 0) { throw out_of_range("ERROR: Layer(): a layer named as \"" + layerName + "\" doesn't exist."); } @@ -350,7 +466,9 @@ void OsmLuaProcessing::Layer(const string &layerName, bool area) { if(CorrectGeometry(p) == CorrectGeometryResult::Invalid) return; - NodeID id = osmMemTiles.storePoint(p); + NodeID id = USE_NODE_STORE | originalOsmID; + if (materializeGeometries) + id = osmMemTiles.storePoint(p); OutputObject oo(geomType, layers.layerMap[layerName], id, 0, layerMinZoom); outputs.push_back(std::make_pair(std::move(oo), attributes)); return; @@ -441,6 +559,7 @@ void OsmLuaProcessing::Layer(const string &layerName, bool area) { } void OsmLuaProcessing::LayerAsCentroid(const string &layerName) { + outputKeys.clear(); if (layers.layerMap.count(layerName) == 0) { throw out_of_range("ERROR: LayerAsCentroid(): a layer named as \"" + layerName + "\" doesn't exist."); } @@ -466,7 +585,21 @@ void OsmLuaProcessing::LayerAsCentroid(const string &layerName) { return; } - NodeID id = osmMemTiles.storePoint(geomp); + NodeID id = 0; + // We don't do lazy centroids for relations - calculating their centroid + // can be quite expensive, and there's not as many of them as there are + // ways. + if (materializeGeometries || isRelation) { + id = osmMemTiles.storePoint(geomp); + } else if (!isRelation && !isWay) { + // Sometimes people call LayerAsCentroid(...) on a node, because they're + // writing a generic handler that doesn't know if it's a node or a way, + // e.g. POIs. + id = USE_NODE_STORE | originalOsmID; + } else { + id = USE_WAY_STORE | originalOsmID; + wayEmitted = true; + } OutputObject oo(POINT_, layers.layerMap[layerName], id, 0, layerMinZoom); outputs.push_back(std::make_pair(std::move(oo), attributes)); } @@ -475,8 +608,7 @@ Point OsmLuaProcessing::calculateCentroid() { Point centroid; if (isRelation) { Geometry tmp; - tmp = osmStore.wayListMultiPolygon( - outerWayVecPtr->cbegin(), outerWayVecPtr->cend(), innerWayVecPtr->begin(), innerWayVecPtr->cend()); + tmp = multiPolygonCached(); geom::centroid(tmp, centroid); return Point(centroid.x()*10000000.0, centroid.y()*10000000.0); } else if (isWay) { @@ -499,25 +631,47 @@ void OsmLuaProcessing::Accept() { relationAccepted = true; } +void OsmLuaProcessing::removeAttributeIfNeeded(const string& key) { + // Does it exist? + for (int i = 0; i < outputKeys.size(); i++) { + if (outputKeys[i] == key) { + AttributeSet& set = outputs.back().second; + set.removePairWithKey(attributeStore.pairStore, attributeStore.keyStore.key2index(key)); + return; + } + } + + outputKeys.push_back(key); +} + // Set attributes in a vector tile's Attributes table -void OsmLuaProcessing::Attribute(const string &key, const string &val) { AttributeWithMinZoom(key,val,0); } -void OsmLuaProcessing::AttributeWithMinZoom(const string &key, const string &val, const char minzoom) { - if (val.size()==0) { return; } // don't set empty strings +void OsmLuaProcessing::AttributeWithMinZoom(const string &key, const PossiblyKnownTagValue& val, const char minzoom) { + std::string str; + + if (val.found) { + auto existingValue = currentTags->getValue(val.index); + str = std::string(existingValue->data(), existingValue->size()); + } else { + str = val.fallback; + } + + if (str.size()==0) { return; } // don't set empty strings if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; } - attributeStore.addAttribute(outputs.back().second, key, val, minzoom); + removeAttributeIfNeeded(key); + attributeStore.addAttribute(outputs.back().second, key, str, minzoom); setVectorLayerMetadata(outputs.back().first.layer, key, 0); } -void OsmLuaProcessing::AttributeNumeric(const string &key, const float val) { AttributeNumericWithMinZoom(key,val,0); } void OsmLuaProcessing::AttributeNumericWithMinZoom(const string &key, const float val, const char minzoom) { if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; } + removeAttributeIfNeeded(key); attributeStore.addAttribute(outputs.back().second, key, val, minzoom); setVectorLayerMetadata(outputs.back().first.layer, key, 1); } -void OsmLuaProcessing::AttributeBoolean(const string &key, const bool val) { AttributeBooleanWithMinZoom(key,val,0); } void OsmLuaProcessing::AttributeBooleanWithMinZoom(const string &key, const bool val, const char minzoom) { if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; } + removeAttributeIfNeeded(key); attributeStore.addAttribute(outputs.back().second, key, val, minzoom); setVectorLayerMetadata(outputs.back().first.layer, key, 2); } @@ -556,25 +710,27 @@ void OsmLuaProcessing::setVectorLayerMetadata(const uint_least8_t layer, const s // Scan relation (but don't write geometry) // return true if we want it, false if we don't -bool OsmLuaProcessing::scanRelation(WayID id, const tag_map_t &tags) { +bool OsmLuaProcessing::scanRelation(WayID id, const TagMap& tags) { reset(); originalOsmID = id; isWay = false; isRelation = true; currentTags = &tags; try { - luaState["relation_scan_function"](this); + luaState["relation_scan_function"](); } catch(luaProcessingException &e) { std::cerr << "Lua error on scanning relation " << originalOsmID << std::endl; exit(1); } if (!relationAccepted) return false; - osmStore.store_relation_tags(id, tags); + // If we're persisting, we need to make a real map that owns its + // own keys and values. + osmStore.store_relation_tags(id, tags.exportToBoostMap()); return true; } -void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const tag_map_t &tags) { +void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) { reset(); originalOsmID = id; @@ -586,7 +742,7 @@ void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const tag_map_t &tags) { //Start Lua processing for node try { - luaState["node_function"](this); + luaState["node_function"](); } catch(luaProcessingException &e) { std::cerr << "Lua error on node " << originalOsmID << std::endl; exit(1); @@ -602,7 +758,7 @@ void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const tag_map_t &tags) { } // We are now processing a way -bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const tag_map_t &tags) { +bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const TagMap& tags) { reset(); wayEmitted = false; originalOsmID = wayId; @@ -630,17 +786,14 @@ bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const tag_ma currentTags = &tags; - bool ok = true; - if (ok) { - //Start Lua processing for way - try { - kaguya::LuaFunction way_function = luaState["way_function"]; - kaguya::LuaRef ret = way_function(this); - assert(!ret); - } catch(luaProcessingException &e) { - std::cerr << "Lua error on way " << originalOsmID << std::endl; - exit(1); - } + //Start Lua processing for way + try { + kaguya::LuaFunction way_function = luaState["way_function"]; + kaguya::LuaRef ret = way_function(); + assert(!ret); + } catch(luaProcessingException &e) { + std::cerr << "Lua error on way " << originalOsmID << std::endl; + exit(1); } if (!this->empty()) { @@ -652,7 +805,7 @@ bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const tag_ma } // We are now processing a relation -void OsmLuaProcessing::setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const tag_map_t &tags, +void OsmLuaProcessing::setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const TagMap& tags, bool isNativeMP, // only OSM type=multipolygon bool isInnerOuter) { // any OSM relation with "inner" and "outer" roles (e.g. type=multipolygon|boundary) reset(); @@ -669,7 +822,7 @@ void OsmLuaProcessing::setRelation(int64_t relationId, WayVec const &outerWayVec // Start Lua processing for relation if (!isNativeMP && !supportsWritingRelations) return; try { - luaState[isNativeMP ? "way_function" : "relation_function"](this); + luaState[isNativeMP ? "way_function" : "relation_function"](); } catch(luaProcessingException &e) { std::cerr << "Lua error on relation " << originalOsmID << std::endl; exit(1); diff --git a/src/osm_mem_tiles.cpp b/src/osm_mem_tiles.cpp index f5527d0e..7dc03f45 100644 --- a/src/osm_mem_tiles.cpp +++ b/src/osm_mem_tiles.cpp @@ -18,6 +18,30 @@ OsmMemTiles::OsmMemTiles( { } +LatpLon OsmMemTiles::buildNodeGeometry( + NodeID const objectID, + const TileBbox &bbox +) const { + if (objectID < OSM_THRESHOLD) { + return TileDataSource::buildNodeGeometry(objectID, bbox); + } + + if (IS_NODE(objectID)) + return nodeStore.at(OSM_ID(objectID)); + + + if (IS_WAY(objectID)) { + Linestring& ls = getOrBuildLinestring(objectID); + Point centroid; + Polygon p; + geom::assign_points(p, ls); + geom::centroid(p, centroid); + return LatpLon{(int32_t)(centroid.y()*10000000.0), (int32_t)(centroid.x()*10000000.0)}; + } + + throw std::runtime_error("OsmMemTiles::buildNodeGeometry: unsupported objectID"); +} + Geometry OsmMemTiles::buildWayGeometry( const OutputGeometryType geomType, const NodeID objectID, @@ -58,7 +82,7 @@ Geometry OsmMemTiles::buildWayGeometry( throw std::runtime_error("buildWayGeometry: unexpected objectID: " + std::to_string(objectID)); } -void OsmMemTiles::populateLinestring(Linestring& ls, NodeID objectID) { +void OsmMemTiles::populateLinestring(Linestring& ls, NodeID objectID) const { std::vector nodes = wayStore.at(OSM_ID(objectID)); for (const LatpLon& node : nodes) { @@ -66,7 +90,7 @@ void OsmMemTiles::populateLinestring(Linestring& ls, NodeID objectID) { } } -Linestring& OsmMemTiles::getOrBuildLinestring(NodeID objectID) { +Linestring& OsmMemTiles::getOrBuildLinestring(NodeID objectID) const { // Note: this function returns a reference, not a shared_ptr. // // This is safe, because this function is the only thing that can diff --git a/src/output_object.cpp b/src/output_object.cpp index b68fb27f..7f9f0edb 100644 --- a/src/output_object.cpp +++ b/src/output_object.cpp @@ -87,9 +87,12 @@ void OutputObject::writeAttributes( int OutputObject::findValue(const vector* valueList, const AttributePair& value) const { for (size_t i=0; isize(); i++) { const vector_tile::Tile_Value& v = valueList->at(i); - if (v.has_string_value() && value.hasStringValue() && v.string_value()==value.stringValue()) { return i; } - if (v.has_float_value() && value.hasFloatValue() && v.float_value() ==value.floatValue() ) { return i; } - if (v.has_bool_value() && value.hasBoolValue() && v.bool_value() ==value.boolValue() ) { return i; } + if (v.has_string_value() && value.hasStringValue()) { + const size_t valueSize = value.pooledString().size(); + if (valueSize == v.string_value().size() && memcmp(v.string_value().data(), value.pooledString().data(), valueSize) == 0) + return i; + } else if (v.has_float_value() && value.hasFloatValue() && v.float_value() ==value.floatValue() ) { return i; } + else if (v.has_bool_value() && value.hasBoolValue() && v.bool_value() ==value.boolValue() ) { return i; } } return -1; } diff --git a/src/pbf_blocks.cpp b/src/pbf_blocks.cpp deleted file mode 100644 index e33ffca0..00000000 --- a/src/pbf_blocks.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#include "pbf_blocks.h" -#include "helpers.h" -#include -using namespace std; - -/* ------------------- - Protobuf handling - ------------------- */ - -// Read and parse a protobuf message -void readMessage(google::protobuf::Message *message, istream &input, unsigned int size) { - vector buffer(size); - input.read(&buffer.front(), size); - message->ParseFromArray(&buffer.front(), size); -} - -// Read an osm.pbf sequence of header length -> BlobHeader -> Blob -// and parse the unzipped contents into a message -BlobHeader readHeader(istream &input) { - BlobHeader bh; - - unsigned int size; - input.read((char*)&size, sizeof(size)); - if (input.eof()) { return bh; } - endian_swap(size); - - // get BlobHeader and parse - readMessage(&bh, input, size); - return bh; -} - -void readBlock(google::protobuf::Message *messagePtr, std::size_t datasize, istream &input) { - if (input.eof()) { return ; } - - // get Blob and parse - Blob blob; - readMessage(&blob, input, datasize); - - // Unzip the gzipped content - string contents = decompress_string(blob.zlib_data(), false); - messagePtr->ParseFromString(contents); -} - -void writeBlock(google::protobuf::Message *messagePtr, ostream &output, string headerType) { - // encode the message - string serialised; - messagePtr->SerializeToString(&serialised); - // create a blob and store it - Blob blob; - blob.set_raw_size(serialised.length()); - blob.set_zlib_data(compress_string(serialised)); - // encode the blob - string blob_encoded; - blob.SerializeToString(&blob_encoded); - - // create the BlobHeader - BlobHeader bh; - bh.set_type(headerType); - bh.set_datasize(blob_encoded.length()); - // encode it - string header_encoded; - bh.SerializeToString(&header_encoded); - - // write out - unsigned int bhLength=header_encoded.length(); - endian_swap(bhLength); - output.write(reinterpret_cast(&bhLength), 4); - output.write(header_encoded.c_str(), header_encoded.length() ); - output.write(blob_encoded.c_str(), blob_encoded.length() ); -} - -/* ------------------- - Tag handling - ------------------- */ - -// Populate an array with the contents of a StringTable -void readStringTable(vector *strPtr, PrimitiveBlock *pbPtr) { - strPtr->resize(pbPtr->stringtable().s_size()); - for (int i=0; istringtable().s_size(); i++) { - (*strPtr)[i] = pbPtr->stringtable().s(i); // dereference strPtr to get strings - } -} - -// Populate a map with the reverse contents of a StringTable (i.e. string->num) -void readStringMap(map *mapPtr, PrimitiveBlock *pbPtr) { - for (int i=0; istringtable().s_size(); i++) { - mapPtr->insert(pair (pbPtr->stringtable().s(i), i)); - } -} - -// Read the tags for a way into a hash -// requires strings array to have been populated by readStringTable -map getTags(vector *strPtr, Way *wayPtr) { - map tags; - for (int n=0; nkeys_size(); n++) { - tags[(*strPtr)[wayPtr->keys(n)]] = (*strPtr)[wayPtr->vals(n)]; - } - return tags; -} - -// Find the index of a string in the StringTable, adding it if it's not there -unsigned int findStringInTable(string *strPtr, map *mapPtr, PrimitiveBlock *pbPtr) { - if (mapPtr->find(*strPtr) == mapPtr->end()) { - pbPtr->mutable_stringtable()->add_s(*strPtr); - unsigned int ix = pbPtr->stringtable().s_size()-1; - mapPtr->insert(pair (*strPtr, ix)); - } - return mapPtr->at(*strPtr); -} - -// Set a tag for a way to a new value -void setTag(Way *wayPtr, unsigned int keyIndex, unsigned int valueIndex) { - for (int i=0; ikeys_size(); i++) { - if (wayPtr->keys(i)==keyIndex) { - wayPtr->mutable_vals()->Set(i,valueIndex); - return; - } - } - wayPtr->mutable_keys()->Add(keyIndex); - wayPtr->mutable_vals()->Add(valueIndex); -} diff --git a/src/pbf_processor.cpp b/src/pbf_processor.cpp new file mode 100644 index 00000000..78ddfaaf --- /dev/null +++ b/src/pbf_processor.cpp @@ -0,0 +1,663 @@ +#include +#include "tag_map.h" +#include "pbf_processor.h" +#include "pbf_reader.h" + +#include +#include +#include + +#include "node_store.h" +#include "way_store.h" +#include "osm_lua_processing.h" +#include "mmap_allocator.h" + +using namespace std; + +const std::string OptionSortTypeThenID = "Sort.Type_then_ID"; +const std::string OptionLocationsOnWays = "LocationsOnWays"; +std::atomic blocksProcessed(0), blocksToProcess(0); + +// Thread-local so that we can re-use buffers during parsing. +thread_local PbfReader::PbfReader reader; + +PbfProcessor::PbfProcessor(OSMStore &osmStore) + : osmStore(osmStore) +{ } + +bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const unordered_set& nodeKeyPositions) +{ + // ---- Read nodes + TagMap tags; + std::vector nodes; + + for (auto& node : pg.nodes()) { + NodeID nodeId = node.id; + LatpLon latplon = { int(lat2latp(double(node.lat)/10000000.0)*10000000.0), node.lon }; + + bool significant = false; + for (int i = node.tagStart; i < node.tagEnd; i += 2) { + auto keyIndex = pg.translateNodeKeyValue(i); + + if (nodeKeyPositions.find(keyIndex) != nodeKeyPositions.end()) { + significant = true; + } + } + + nodes.push_back(std::make_pair(static_cast(nodeId), latplon)); + + if (significant) { + // For tagged nodes, call Lua, then save the OutputObject + tags.reset(); + + for (int n = node.tagStart; n < node.tagEnd; n += 2) { + auto keyIndex = pg.translateNodeKeyValue(n); + auto valueIndex = pg.translateNodeKeyValue(n + 1); + + const protozero::data_view& key = pb.stringTable[keyIndex]; + const protozero::data_view& value = pb.stringTable[valueIndex]; + tags.addTag(key, value); + } + output.setNode(static_cast(nodeId), latplon, tags); + } + } + + if (nodes.size() > 0) { + osmStore.nodes.insert(nodes); + } + + return !pg.nodes().empty(); +} + +bool PbfProcessor::ReadWays( + OsmLuaProcessing &output, + PbfReader::PrimitiveGroup& pg, + const PbfReader::PrimitiveBlock& pb, + bool locationsOnWays, + uint shard, + uint effectiveShards +) { + // ---- Read ways + if (pg.ways().empty()) + return false; + + TagMap tags; + + const bool wayStoreRequiresNodes = osmStore.ways.requiresNodes(); + + std::vector llWays; + std::vector>> nodeWays; + LatpLonVec llVec; + std::vector nodeVec; + + for (PbfReader::Way pbfWay : pg.ways()) { + llVec.clear(); + nodeVec.clear(); + + WayID wayId = static_cast(pbfWay.id); + if (wayId >= pow(2,42)) throw std::runtime_error("Way ID negative or too large: "+std::to_string(wayId)); + + // Assemble nodelist + if (locationsOnWays) { + llVec.reserve(pbfWay.lats.size()); + for (int k=0; k 1 && !osmStore.nodes.contains(shard, nodeId)) { + skipToNext = true; + break; + } + + try { + llVec.push_back(osmStore.nodes.at(static_cast(nodeId))); + nodeVec.push_back(nodeId); + } catch (std::out_of_range &err) { + if (osmStore.integrity_enforced()) throw err; + } + } + + if (skipToNext) + continue; + } + if (llVec.empty()) continue; + + try { + tags.reset(); + readTags(pbfWay, pb, tags); + bool emitted = output.setWay(static_cast(pbfWay.id), llVec, tags); + + // If we need it for later, store the way's coordinates in the global way store + if (emitted || osmStore.way_is_used(wayId)) { + if (wayStoreRequiresNodes) + nodeWays.push_back(std::make_pair(wayId, nodeVec)); + else + llWays.push_back(std::make_pair(wayId, WayStore::latplon_vector_t(llVec.begin(), llVec.end()))); + } + + } catch (std::out_of_range &err) { + // Way is missing a node? + cerr << endl << err.what() << endl; + } + + } + + if (wayStoreRequiresNodes) { + osmStore.ways.shard(shard).insertNodes(nodeWays); + } else { + osmStore.ways.shard(shard).insertLatpLons(llWays); + } + + return true; +} + +bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb) { + // Scan relations to see which ways we need to save + if (pg.relations().empty()) + return false; + + int typeKey = findStringPosition(pb, "type"); + int mpKey = findStringPosition(pb, "multipolygon"); + + TagMap tags; + for (PbfReader::Relation pbfRelation : pg.relations()) { + bool isMultiPolygon = relationIsType(pbfRelation, typeKey, mpKey); + bool isAccepted = false; + WayID relid = static_cast(pbfRelation.id); + if (!isMultiPolygon) { + if (output.canReadRelations()) { + tags.reset(); + readTags(pbfRelation, pb, tags); + isAccepted = output.scanRelation(relid, tags); + } + if (!isAccepted) continue; + } + for (int n=0; n < pbfRelation.memids.size(); n++) { + uint64_t lastID = pbfRelation.memids[n]; + if (pbfRelation.types[n] != PbfReader::Relation::MemberType::WAY) { continue; } + if (lastID >= pow(2,42)) throw std::runtime_error("Way ID in relation "+std::to_string(relid)+" negative or too large: "+std::to_string(lastID)); + osmStore.mark_way_used(static_cast(lastID)); + if (isAccepted) { osmStore.relation_contains_way(relid, lastID); } + } + } + return true; +} + +bool PbfProcessor::ReadRelations( + OsmLuaProcessing& output, + PbfReader::PrimitiveGroup& pg, + const PbfReader::PrimitiveBlock& pb, + const BlockMetadata& blockMetadata, + uint shard, + uint effectiveShards +) { + // ---- Read relations + if (pg.relations().empty()) + return false; + + TagMap tags; + + std::vector relations; + + int typeKey = findStringPosition(pb, "type"); + int mpKey = findStringPosition(pb, "multipolygon"); + int boundaryKey = findStringPosition(pb, "boundary"); + int innerKey= findStringPosition(pb, "inner"); + int outerKey= findStringPosition(pb, "outer"); + if (typeKey >-1 && mpKey>-1) { + int j = -1; + for (PbfReader::Relation pbfRelation : pg.relations()) { + j++; + if (j % blockMetadata.chunks != blockMetadata.chunk) + continue; + + bool isMultiPolygon = relationIsType(pbfRelation, typeKey, mpKey); + bool isBoundary = relationIsType(pbfRelation, typeKey, boundaryKey); + if (!isMultiPolygon && !isBoundary && !output.canWriteRelations()) continue; + + // Read relation members + WayVec outerWayVec, innerWayVec; + bool isInnerOuter = isBoundary || isMultiPolygon; + bool skipToNext = false; + bool firstWay = true; + for (int n = 0; n < pbfRelation.memids.size(); n++) { + uint64_t lastID = pbfRelation.memids[n]; + if (pbfRelation.types[n] != PbfReader::Relation::MemberType::WAY) { continue; } + int32_t role = pbfRelation.roles_sid[n]; + if (role==innerKey || role==outerKey) isInnerOuter=true; + WayID wayId = static_cast(lastID); + + if (firstWay && effectiveShards > 1 && !osmStore.ways.contains(shard, wayId)) { + skipToNext = true; + break; + } + if (firstWay) + firstWay = false; + (role == innerKey ? innerWayVec : outerWayVec).push_back(wayId); + } + + if (skipToNext) + continue; + + try { + tags.reset(); + readTags(pbfRelation, pb, tags); + output.setRelation(pbfRelation.id, outerWayVec, innerWayVec, tags, isMultiPolygon, isInnerOuter); + + } catch (std::out_of_range &err) { + // Relation is missing a member? + cerr << endl << err.what() << endl; + } + } + } + + osmStore.relations_insert_front(relations); + return true; +} + +// Returns true when block was completely handled, thus could be omited by another phases. +bool PbfProcessor::ReadBlock( + std::istream& infile, + OsmLuaProcessing& output, + const BlockMetadata& blockMetadata, + const unordered_set& nodeKeys, + bool locationsOnWays, + ReadPhase phase, + uint shard, + uint effectiveShards +) +{ + infile.seekg(blockMetadata.offset); + + protozero::data_view blob = reader.readBlob(blockMetadata.length, infile); + PbfReader::PrimitiveBlock& pb = reader.readPrimitiveBlock(blob); + if (infile.eof()) { + return true; + } + + // Keep count of groups read during this phase. + std::size_t read_groups = 0; + + // Read the string table, and pre-calculate the positions of valid node keys + unordered_set nodeKeyPositions; + for (auto it : nodeKeys) { + //nodeKeyPositions.insert(findStringPosition(pb, it)); + auto rv = findStringPosition(pb, it); + nodeKeyPositions.insert(rv); + } + + int primitiveGroupSize = 0; + for (auto& pg : pb.groups()) { + primitiveGroupSize++; + + auto output_progress = [&]() + { + if (ioMutex.try_lock()) { + std::ostringstream str; + str << "\r"; + void_mmap_allocator::reportStoreSize(str); + if (effectiveShards > 1) + str << std::to_string(shard + 1) << "/" << std::to_string(effectiveShards) << " "; + + // TODO: revive showing the # of ways/relations? + str << "Block " << blocksProcessed.load() << "/" << blocksToProcess.load() << " "; + std::cout << str.str(); + std::cout.flush(); + ioMutex.unlock(); + } + }; + + if(phase == ReadPhase::Nodes) { + bool done = ReadNodes(output, pg, pb, nodeKeyPositions); + if(done) { + output_progress(); + ++read_groups; + continue; + } + } + + if(phase == ReadPhase::RelationScan) { + osmStore.ensureUsedWaysInited(); + bool done = ScanRelations(output, pg, pb); + if(done) { + if (ioMutex.try_lock()) { + std::cout << "\r(Scanning for ways used in relations: " << (100*blocksProcessed.load()/blocksToProcess.load()) << "%) "; + std::cout.flush(); + ioMutex.unlock(); + } + continue; + } + } + + if(phase == ReadPhase::Ways) { + bool done = ReadWays(output, pg, pb, locationsOnWays, shard, effectiveShards); + if(done) { + output_progress(); + ++read_groups; + continue; + } + } + + if(phase == ReadPhase::Relations) { + bool done = ReadRelations(output, pg, pb, blockMetadata, shard, effectiveShards); + if(done) { + output_progress(); + ++read_groups; + continue; + } + } + } + + // Possible cases of a block contents: + // - single group + // - multiple groups of the same type + // - multiple groups of the different type + // + // In later case block would not be handled during this phase, and should be + // read again in remaining phases. Thus we return false to indicate that the + // block was not handled completelly. + if(read_groups != primitiveGroupSize) { + return false; + } + + // We can only delete blocks if we're confident we've processed everything, + // which is not possible in the case of subdivided blocks. + return (shard + 1 == effectiveShards) && blockMetadata.chunks == 1; +} + +bool blockHasPrimitiveGroupSatisfying( + std::istream& infile, + const BlockMetadata block, + std::function test +) { + // We may have previously read to EOF, so clear the internal error state + infile.clear(); + infile.seekg(block.offset); + protozero::data_view blob = reader.readBlob(block.length, infile); + PbfReader::PrimitiveBlock pb = reader.readPrimitiveBlock(blob); + + if (infile.eof()) { + throw std::runtime_error("blockHasPrimitiveGroupSatisfying got unexpected eof"); + } + + for (auto& pg : pb.groups()) { + if (test(pg)) + return false; + } + + return true; +} + +int PbfProcessor::ReadPbfFile( + uint shards, + bool hasSortTypeThenID, + unordered_set const& nodeKeys, + unsigned int threadNum, + const pbfreader_generate_stream& generate_stream, + const pbfreader_generate_output& generate_output, + const NodeStore& nodeStore, + const WayStore& wayStore +) +{ + auto infile = generate_stream(); + + // ---- Read PBF + osmStore.clear(); + + PbfReader::HeaderBlock block = reader.readHeaderFromFile(*infile); + bool locationsOnWays = block.optionalFeatures.find(OptionLocationsOnWays) != block.optionalFeatures.end(); + if (locationsOnWays) { + std::cout << ".osm.pbf file has locations on ways" << std::endl; + } + + std::map blocks; + + // Track the filesize - note that we can't rely on tellg(), as + // its meant to be an opaque token useful only for seeking. + size_t filesize = 0; + while (true) { + PbfReader::BlobHeader bh = reader.readBlobHeader(*infile); + filesize += bh.datasize; + if (infile->eof()) { + break; + } + + blocks[blocks.size()] = { (long int)infile->tellg(), bh.datasize, true, true, true, 0, 1 }; + infile->seekg(bh.datasize, std::ios_base::cur); + } + + if (hasSortTypeThenID) { + // The PBF's blocks are sorted by type, then ID. We can do a binary search + // to learn where the blocks transition between object types, which + // enables a more efficient partitioning of work for reading. + std::vector indexes; + for (int i = 0; i < blocks.size(); i++) + indexes.push_back(i); + + const auto& waysStart = std::lower_bound( + indexes.begin(), + indexes.end(), + 0, + [&blocks, &infile](const auto &i, const auto &ignored) { + return blockHasPrimitiveGroupSatisfying( + *infile, + blocks[i], + [](const PbfReader::PrimitiveGroup& pg) { + for(auto w : pg.ways()) return true; + for(auto r : pg.relations()) return true; + return false; + } + ); + } + ); + + const auto& relationsStart = std::lower_bound( + indexes.begin(), + indexes.end(), + 0, + [&blocks, &infile](const auto &i, const auto &ignored) { + return blockHasPrimitiveGroupSatisfying( + *infile, + blocks[i], + [](const PbfReader::PrimitiveGroup& pg) { + for (auto r : pg.relations()) return true; + return false; + } + ); + } + ); + + for (auto it = indexes.begin(); it != indexes.end(); it++) { + blocks[*it].hasNodes = it <= waysStart; + blocks[*it].hasWays = it >= waysStart && it <= relationsStart; + blocks[*it].hasRelations = it >= relationsStart; + } + } + + + // PBFs generated by Osmium have 8,000 entities per block, + // and each block is about 64KB. + // + // PBFs generated by osmconvert (e.g., BBBike PBFs) have as + // many entities as fit in 31MB. Each block is about 16MB. + // + // Osmium PBFs seem to be processed about 3x faster than osmconvert + // PBFs, so try to hint to the user when they could speed up their + // pipeline. + if (filesize / blocks.size() > 1000000) { + std::cout << "warning: PBF has very large blocks, which may slow processing" << std::endl; + std::cout << " to fix: osmium cat -f pbf your-file.osm.pbf -o optimized.osm.pbf" << std::endl; + } + + + std::vector all_phases = { ReadPhase::Nodes, ReadPhase::RelationScan, ReadPhase::Ways, ReadPhase::Relations }; + for(auto phase: all_phases) { + uint effectiveShards = 1; + + // On memory-constrained machines, we might read ways/relations + // multiple times in order to keep the working set of nodes limited. + if (phase == ReadPhase::Ways || phase == ReadPhase::Relations) + effectiveShards = shards; + + for (int shard = 0; shard < effectiveShards; shard++) { + // If we're in ReadPhase::Ways, only do a pass if there is at least one + // entry in the pass's shard. + if (phase == ReadPhase::Ways && nodeStore.shard(shard).size() == 0) + continue; + + // Ditto, but for relations + if (phase == ReadPhase::Relations && wayStore.shard(shard).size() == 0) + continue; + +#ifdef CLOCK_MONOTONIC + timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); +#endif + + // Launch the pool with threadNum threads + boost::asio::thread_pool pool(threadNum); + std::mutex block_mutex; + + // If we're in ReadPhase::Relations and there aren't many blocks left + // to read, increase parallelism by letting each thread only process + // a portion of the block. + if (phase == ReadPhase::Relations && blocks.size() < threadNum * 2) { + std::cout << "only " << blocks.size() << " relation blocks; subdividing for better parallelism" << std::endl; + std::map moreBlocks; + for (const auto& block : blocks) { + BlockMetadata newBlock = block.second; + newBlock.chunks = threadNum; + for (size_t i = 0; i < threadNum; i++) { + newBlock.chunk = i; + moreBlocks[moreBlocks.size()] = newBlock; + } + } + blocks = moreBlocks; + } + + std::deque> blockRanges; + std::map filteredBlocks; + for (const auto& entry : blocks) { + if ((phase == ReadPhase::Nodes && entry.second.hasNodes) || + (phase == ReadPhase::RelationScan && entry.second.hasRelations) || + (phase == ReadPhase::Ways && entry.second.hasWays) || + (phase == ReadPhase::Relations && entry.second.hasRelations)) + filteredBlocks[entry.first] = entry.second; + } + + blocksToProcess = filteredBlocks.size(); + blocksProcessed = 0; + + // Relations have very non-uniform processing times, so prefer + // to process them as granularly as possible. + size_t batchSize = 1; + + // When creating NodeStore/WayStore, we try to give each worker + // large batches of contiguous blocks, so that they might benefit from + // long runs of sorted indexes, and locality of nearby IDs. + if (phase == ReadPhase::Nodes || phase == ReadPhase::Ways) + batchSize = (filteredBlocks.size() / (threadNum * 8)) + 1; + + size_t consumed = 0; + auto it = filteredBlocks.begin(); + while(it != filteredBlocks.end()) { + std::vector blockRange; + blockRange.reserve(batchSize); + size_t max = consumed + batchSize; + for (; consumed < max && it != filteredBlocks.end(); consumed++) { + IndexedBlockMetadata ibm; + memcpy(&ibm, &it->second, sizeof(BlockMetadata)); + ibm.index = it->first; + blockRange.push_back(ibm); + it++; + } + blockRanges.push_back(blockRange); + } + + { + for(const std::vector& blockRange: blockRanges) { + boost::asio::post(pool, [=, &blockRange, &blocks, &block_mutex, &nodeKeys]() { + if (phase == ReadPhase::Nodes) + osmStore.nodes.batchStart(); + if (phase == ReadPhase::Ways) + osmStore.ways.batchStart(); + + for (const IndexedBlockMetadata& indexedBlockMetadata: blockRange) { + auto infile = generate_stream(); + auto output = generate_output(); + + if(ReadBlock(*infile, *output, indexedBlockMetadata, nodeKeys, locationsOnWays, phase, shard, effectiveShards)) { + const std::lock_guard lock(block_mutex); + blocks.erase(indexedBlockMetadata.index); + } + blocksProcessed++; + } + }); + } + } + + pool.join(); + +#ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &end); + uint64_t elapsedNs = 1e9 * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; + std::cout << "(" << std::to_string((uint32_t)(elapsedNs / 1e6)) << " ms)" << std::endl; +#endif + } + + if(phase == ReadPhase::Nodes) { + osmStore.nodes.finalize(threadNum); + } + if(phase == ReadPhase::Ways) { + osmStore.ways.finalize(threadNum); + } + } + return 0; +} + +// Find a string in the dictionary +int PbfProcessor::findStringPosition(const PbfReader::PrimitiveBlock& pb, const std::string& str) { + for (int i = 0; i < pb.stringTable.size(); i++) { + if(str.size() == pb.stringTable[i].size() && memcmp(str.data(), pb.stringTable[i].data(), str.size()) == 0) + return i; + } + return -1; +} + + +// ************************************************* + +int ReadPbfBoundingBox(const std::string &inputFile, double &minLon, double &maxLon, + double &minLat, double &maxLat, bool &hasClippingBox) +{ + fstream infile(inputFile, ios::in | ios::binary); + if (!infile) { cerr << "Couldn't open .pbf file " << inputFile << endl; return -1; } + auto header = reader.readHeaderFromFile(infile); + if (header.hasBbox) { + hasClippingBox = true; + minLon = header.bbox.minLon; + maxLon = header.bbox.maxLon; + minLat = header.bbox.minLat; + maxLat = header.bbox.maxLat; + } + infile.close(); + return 0; +} + +bool PbfHasOptionalFeature(const std::string& inputFile, const std::string& feature) { + std::ifstream infile(inputFile, std::ifstream::in); + auto header = reader.readHeaderFromFile(infile); + infile.close(); + return header.optionalFeatures.find(feature) != header.optionalFeatures.end(); +} diff --git a/src/pbf_reader.cpp b/src/pbf_reader.cpp new file mode 100644 index 00000000..ed400a49 --- /dev/null +++ b/src/pbf_reader.cpp @@ -0,0 +1,590 @@ +#include +#include +#include +#include "pbf_reader.h" +#include "helpers.h" + +// Where pbf_processor.cpp has higher-level routines that populate our structures, +// pbf_reader.cpp has low-level tools that interact with the protobuf. +// +// The lifetime of an object is only until someone calls a readXyz function at +// the same or higher level. +// - e.g. readPrimitiveGroup invalidates the result of a prior readPrimitiveGroup call, +// but not the result of a prior readBlob call +// +// This allows us to re-use buffers to minimize heap churn and allocation cost. +// +// If you want to persist the data beyond that, you must make a copy in memory +// that you own. + +PbfReader::BlobHeader PbfReader::PbfReader::readBlobHeader(std::istream& input) { + // See https://wiki.openstreetmap.org/wiki/PBF_Format#File_format + unsigned int size; + input.read((char*)&size, sizeof(size)); + if (input.eof()) { + return {"eof", -1}; + } + + endian_swap(size); + std::vector data; + data.resize(size); + input.read(&data[0], size); + + if (input.eof()) + throw std::runtime_error("readBlobHeader: unexpected eof"); + + protozero::pbf_message message{&data[0], data.size()}; + + std::string type; + int32_t datasize = -1; + + while (message.next()) { + switch (message.tag()) { + case Schema::BlobHeader::required_string_type: + type = message.get_string(); + break; + case Schema::BlobHeader::required_int32_datasize: + datasize = message.get_int32(); + break; + default: + // ignore data for unknown tags to allow for future extensions + // std::cout << "BlobHeader: unknown tag: " << std::to_string(static_cast(message.tag())) << std::endl; + message.skip(); + } + } + + if (type.empty()) + throw std::runtime_error("BlobHeader type is missing"); + + if (datasize == -1) + throw std::runtime_error("BlobHeader datasize is missing"); + + return { type, datasize }; +} + +protozero::data_view PbfReader::PbfReader::readBlob(int32_t datasize, std::istream& input) { + blobStorage.resize(datasize); + input.read(&blobStorage[0], datasize); + if (input.eof()) + throw std::runtime_error("readBlob: unexpected eof"); + + int32_t rawSize = -1; + protozero::data_view view; + protozero::pbf_message message{&blobStorage[0], blobStorage.size()}; + while (message.next()) { + switch (message.tag()) { + case Schema::Blob::optional_int32_raw_size: + rawSize = message.get_int32(); + break; + case Schema::Blob::oneof_data_bytes_raw: + view = message.get_view(); + break; + case Schema::Blob::oneof_data_bytes_zlib_data: + view = message.get_view(); + break; + default: + throw std::runtime_error("Blob: unknown tag: " + std::to_string(static_cast(message.tag()))); + } + } + + if (rawSize == -1) + // Data is not compressed, can return it directly. + return view; + + blobStorage2.resize(rawSize); + decompress_string(blobStorage2, view.data(), view.size(), false); + return { &blobStorage2[0], blobStorage2.size() }; +} + +PbfReader::HeaderBBox PbfReader::PbfReader::readHeaderBBox(protozero::data_view data) { + HeaderBBox box{0, 0, 0, 0}; + + protozero::pbf_message message{data}; + while (message.next()) { + switch (message.tag()) { + case Schema::HeaderBBox::required_sint64_left: + box.minLon = message.get_sint64() / 1000000000.0; + break; + case Schema::HeaderBBox::required_sint64_right: + box.maxLon = message.get_sint64() / 1000000000.0; + break; + case Schema::HeaderBBox::required_sint64_bottom: + box.minLat = message.get_sint64() / 1000000000.0; + break; + case Schema::HeaderBBox::required_sint64_top: + box.maxLat = message.get_sint64() / 1000000000.0; + break; + default: + throw std::runtime_error("HeaderBBox: unknown tag: " + std::to_string(static_cast(message.tag()))); + } + } + + return box; +} + +PbfReader::HeaderBlock PbfReader::PbfReader::readHeaderBlock(protozero::data_view data) { + HeaderBlock block{false}; + + protozero::pbf_message message{data}; + while (message.next()) { + switch (message.tag()) { + case Schema::HeaderBlock::optional_HeaderBBox_bbox: + block.hasBbox = true; + block.bbox = PbfReader::readHeaderBBox(message.get_view()); + break; + case Schema::HeaderBlock::repeated_string_optional_features: { + const auto feature = message.get_string(); + block.optionalFeatures.insert(feature); + break; + } + default: + // ignore data for unknown tags to allow for future extensions + //std::cout << "HeaderBlock: unknown tag: " << std::to_string(static_cast(message.tag())) << std::endl; + message.skip(); + } + } + + return block; +} + +void PbfReader::PbfReader::readStringTable(protozero::data_view data, std::vector& stringTable) { + protozero::pbf_message message{data}; + while (message.next()) { + switch (message.tag()) { + case Schema::StringTable::repeated_bytes_s: + stringTable.push_back(message.get_view()); + break; + default: + throw std::runtime_error("StringTable: unknown tag: " + std::to_string(static_cast(message.tag()))); + } + } +} + +PbfReader::PrimitiveBlock& PbfReader::PbfReader::readPrimitiveBlock(protozero::data_view data) { + pb.stringTable.clear(); + pb.internalGroups.clear(); + + protozero::pbf_message message{data}; + while (message.next()) { + switch (message.tag()) { + case Schema::PrimitiveBlock::required_StringTable_stringtable: + // Most of our use cases require the string table, so we eagerly + // initialize it. + PbfReader::readStringTable(message.get_view(), pb.stringTable); + break; + case Schema::PrimitiveBlock::repeated_PrimitiveGroup_primitivegroup: { + pb.internalGroups.push_back(PrimitiveGroup( + message.get_view(), + denseNodes, + way, + relation + )); + break; + } + default: + // ignore data for unknown tags to allow for future extensions + //std::cout << "HeaderBlock: unknown tag: " << std::to_string(static_cast(message.tag())) << std::endl; + message.skip(); + } + } + + pb.groupsImpl = PrimitiveBlock::PrimitiveGroups(pb.internalGroups); + + return pb; +} + +void PbfReader::DenseNodes::readDenseNodes(protozero::data_view data) { + protozero::pbf_message message{data}; + + uint64_t id = 0; + int32_t lon = 0, lat = 0; + + while (message.next()) { + switch (message.tag()) { + case Schema::DenseNodes::repeated_sint64_id: { + auto pi = message.get_packed_sint64(); + for (auto i : pi) { + id += i; + ids.push_back(id); + } + break; + } case Schema::DenseNodes::repeated_sint64_lat: { + auto pi = message.get_packed_sint64(); + for (auto i : pi) { + lat += i; + lats.push_back(lat); + } + break; + } + case Schema::DenseNodes::repeated_sint64_lon: { + auto pi = message.get_packed_sint64(); + for (auto i : pi) { + lon += i; + lons.push_back(lon); + } + break; + } + case Schema::DenseNodes::repeated_int32_keys_vals: { + auto pi = message.get_packed_int32(); + for (auto kv : pi) { + keyValues.push_back(kv); + } + break; + } + + default: + // ignore data for unknown tags to allow for future extensions + //std::cout << "HeaderBlock: unknown tag: " << std::to_string(static_cast(message.tag())) << std::endl; + message.skip(); + } + } + + for (uint32_t cur = 0, prev = 0; cur < keyValues.size(); cur++) { + if (keyValues[cur] == 0) { + tagStart.push_back(prev); + tagEnd.push_back(cur); + prev = cur + 1; + } + } + + while(tagStart.size() < ids.size()) { + tagStart.push_back(0); + tagEnd.push_back(0); + } +} + +PbfReader::PrimitiveGroup::PrimitiveGroup( + protozero::data_view data, + DenseNodes& denseNodes, + Way& way, + Relation& relation +): + data(data), + denseNodes(denseNodes), + internalWays({this, way}), + internalRelations({this, relation}), + denseNodesInitialized(false) { +} + +int32_t PbfReader::PrimitiveGroup::translateNodeKeyValue(int32_t i) const { + return denseNodes.keyValues.at(i); +} + +protozero::data_view PbfReader::PrimitiveGroup::getDataView() { + return data; +} + +void PbfReader::PrimitiveGroup::ensureData() { + // Reset our thread locals. + denseNodes.clear(); + internalWays.pg = this; + internalRelations.pg = this; + + protozero::pbf_message message{data}; + if (message.next()) { + switch (message.tag()) { + case Schema::PrimitiveGroup::repeated_Node_nodes: + throw std::runtime_error("PrimitiveGroup: non-dense Nodes are not supported"); + break; + case Schema::PrimitiveGroup::optional_DenseNodes_dense: + internalType = PrimitiveGroupType::DenseNodes; + denseNodes.readDenseNodes(message.get_view()); + break; + case Schema::PrimitiveGroup::repeated_Way_ways: + internalType = PrimitiveGroupType::Way; + break; + case Schema::PrimitiveGroup::repeated_Relation_relations: + internalType = PrimitiveGroupType::Relation; + break; + case Schema::PrimitiveGroup::repeated_ChangeSet_changesets: + internalType = PrimitiveGroupType::ChangeSet; + break; + default: + throw std::runtime_error("PrimitiveGroup: unknown tag: " + std::to_string(static_cast(message.tag()))); + } + } +} + +PbfReader::DenseNodes& PbfReader::PrimitiveGroup::nodes() const { return denseNodes; }; +PbfReader::PrimitiveBlock::PrimitiveGroups& PbfReader::PrimitiveBlock::groups() { return groupsImpl; }; + +void PbfReader::DenseNodes::clear() { + ids.clear(); + lons.clear(); + lats.clear(); + tagStart.clear(); + tagEnd.clear(); + keyValues.clear(); +} + +bool PbfReader::DenseNodes::Iterator::operator!=(Iterator& other) const { + return offset != other.offset; +} + +void PbfReader::DenseNodes::Iterator::operator++() { + offset++; + + if (offset < nodes.ids.size()) { + node.id = nodes.ids[offset]; + node.lon = nodes.lons[offset]; + node.lat = nodes.lats[offset]; + node.tagStart = nodes.tagStart[offset]; + node.tagEnd = nodes.tagEnd[offset]; + } +} + +PbfReader::DenseNodes::Node& PbfReader::DenseNodes::Iterator::operator*() { + return node; +} + +bool PbfReader::DenseNodes::empty() { + return ids.empty(); +} + +PbfReader::DenseNodes::Iterator PbfReader::DenseNodes::begin() { + auto it = Iterator {-1, Node{}, *this}; + ++it; + return it; +} + +PbfReader::DenseNodes::Iterator PbfReader::DenseNodes::end() { + return Iterator {static_cast(ids.size()), Node{}, *this}; +} + +bool PbfReader::PrimitiveBlock::PrimitiveGroups::Iterator::operator!=(Iterator& other) const { + return offset != other.offset; +} +void PbfReader::PrimitiveBlock::PrimitiveGroups::Iterator::operator++() { + offset++; + + if (offset < groups->size()) { + (*groups)[offset].ensureData(); + } +} +PbfReader::PrimitiveGroup& PbfReader::PrimitiveBlock::PrimitiveGroups::Iterator::operator*() { + return (*groups)[offset]; +} +PbfReader::PrimitiveBlock::PrimitiveGroups::Iterator PbfReader::PrimitiveBlock::PrimitiveGroups::begin() { + auto it = PrimitiveBlock::PrimitiveGroups::Iterator {-1, *groups }; + ++it; + return it; +} +PbfReader::PrimitiveBlock::PrimitiveGroups::Iterator PbfReader::PrimitiveBlock::PrimitiveGroups::end() { + return PrimitiveBlock::PrimitiveGroups::Iterator {static_cast(groups->size()), *groups }; +} + +PbfReader::PrimitiveGroupType PbfReader::PrimitiveGroup::type() const { + return internalType; +} + +void PbfReader::Ways::Iterator::readWay(protozero::data_view data) { + protozero::pbf_message message{data}; + + way.id = 0; + way.keys.clear(); + way.vals.clear(); + way.refs.clear(); + way.lats.clear(); + way.lons.clear(); + + uint64_t ref = 0; + uint32_t lat = 0, lon = 0; + + while (message.next()) { + switch (message.tag()) { + case Schema::Way::required_int64_id: + way.id = message.get_int64(); + break; + case Schema::Way::repeated_uint32_keys: { + auto pi = message.get_packed_uint32(); + for (auto i : pi) { + way.keys.push_back(i); + } + break; + } + case Schema::Way::repeated_uint32_vals: { + auto pi = message.get_packed_uint32(); + for (auto i : pi) { + way.vals.push_back(i); + } + break; + } + case Schema::Way::repeated_sint64_refs: { + auto pi = message.get_packed_sint64(); + for (auto i : pi) { + ref += i; + way.refs.push_back(ref); + } + break; + } + case Schema::Way::repeated_sint64_lats: { + auto pi = message.get_packed_sint64(); + for (auto i : pi) { + lat += i; + way.lats.push_back(lat); + } + break; + } + case Schema::Way::repeated_sint64_lons: { + auto pi = message.get_packed_sint64(); + for (auto i : pi) { + lon += i; + way.lons.push_back(lon); + } + break; + } + + default: + // ignore data for unknown tags to allow for future extensions + //std::cout << "Way: unknown tag: " << std::to_string(static_cast(message.tag())) << std::endl; + message.skip(); + } + } +} + +PbfReader::Ways& PbfReader::PrimitiveGroup::ways() const { + return internalWays; +} +bool PbfReader::Ways::Iterator::operator!=(Ways::Iterator& other) const { + return offset != other.offset; +} +void PbfReader::Ways::Iterator::operator++() { + if (message.next()) { + readWay(message.get_view()); + offset++; + } else { + offset = -1; + } +} +PbfReader::Way& PbfReader::Ways::Iterator::operator*() { + return way; +} +bool PbfReader::Ways::empty() { + return pg->type() != PrimitiveGroupType::Way; +} +PbfReader::Ways::Iterator PbfReader::Ways::begin() { + if (pg->type() != PrimitiveGroupType::Way) + return Ways::Iterator{protozero::pbf_message{nullptr, 0ul}, -1, way}; + + protozero::pbf_message message{pg->getDataView()}; + if (message.next()) { + protozero::pbf_message message{pg->getDataView()}; + auto it = Ways::Iterator{message, -1, way}; + ++it; + return it; + } + + return Ways::Iterator{message, -1, way}; +} +PbfReader::Ways::Iterator PbfReader::Ways::end() { + return Ways::Iterator{protozero::pbf_message{nullptr, 0ul}, -1, way}; +} + +void PbfReader::Relations::Iterator::readRelation(protozero::data_view data) { + protozero::pbf_message message{data}; + + relation.id = 0; + relation.keys.clear(); + relation.vals.clear(); + relation.memids.clear(); + relation.roles_sid.clear(); + relation.types.clear(); + + uint64_t memid = 0; + + while (message.next()) { + switch (message.tag()) { + case Schema::Relation::required_int64_id: + relation.id = message.get_int64(); + break; + case Schema::Relation::repeated_uint32_keys: { + auto pi = message.get_packed_uint32(); + for (auto i : pi) { + relation.keys.push_back(i); + } + break; + } + case Schema::Relation::repeated_uint32_vals: { + auto pi = message.get_packed_uint32(); + for (auto i : pi) { + relation.vals.push_back(i); + } + break; + } + case Schema::Relation::repeated_int32_roles_sid: { + auto pi = message.get_packed_int32(); + for (auto i : pi) { + relation.roles_sid.push_back(i); + } + break; + } + case Schema::Relation::repeated_sint64_memids: { + auto pi = message.get_packed_sint64(); + for (auto i : pi) { + memid += i; + relation.memids.push_back(memid); + } + break; + } + case Schema::Relation::repeated_MemberType_types: { + auto pi = message.get_packed_int32(); + for (auto i : pi) { + relation.types.push_back(i); + } + break; + } + + default: + // ignore data for unknown tags to allow for future extensions + //std::cout << "Way: unknown tag: " << std::to_string(static_cast(message.tag())) << std::endl; + message.skip(); + } + } +} + +PbfReader::Relations& PbfReader::PrimitiveGroup::relations() const { + return internalRelations; +} +bool PbfReader::Relations::Iterator::operator!=(Relations::Iterator& other) const { + return offset != other.offset; +} +void PbfReader::Relations::Iterator::operator++() { + if (message.next()) { + readRelation(message.get_view()); + offset++; + } else { + offset = -1; + } +} +PbfReader::Relation& PbfReader::Relations::Iterator::operator*() { + return relation; +} +bool PbfReader::Relations::empty() { + return pg->type() != PrimitiveGroupType::Relation; +} +PbfReader::Relations::Iterator PbfReader::Relations::begin() { + if (pg->type() != PrimitiveGroupType::Relation) + return Relations::Iterator{protozero::pbf_message{nullptr, 0ul}, -1, relation}; + + protozero::pbf_message message{pg->getDataView()}; + if (message.next()) { + protozero::pbf_message message{pg->getDataView()}; + auto it = Relations::Iterator{message, -1, relation}; + ++it; + return it; + } + + return Relations::Iterator{message, -1, relation}; +} +PbfReader::Relations::Iterator PbfReader::Relations::end() { + return Relations::Iterator{protozero::pbf_message{nullptr, 0ul}, -1, relation}; +} + +PbfReader::HeaderBlock PbfReader::PbfReader::readHeaderFromFile(std::istream& input) { + BlobHeader bh = readBlobHeader(input); + protozero::data_view blob = readBlob(bh.datasize, input); + HeaderBlock header = readHeaderBlock(blob); + + return header; +} + diff --git a/src/pooled_string.cpp b/src/pooled_string.cpp new file mode 100644 index 00000000..500408d4 --- /dev/null +++ b/src/pooled_string.cpp @@ -0,0 +1,170 @@ +#include "pooled_string.h" +#include +#include + +namespace PooledStringNS { + std::vector tables; + std::mutex mutex; + + const uint8_t ShortString = 0b00; + const uint8_t HeapString = 0b10; + const uint8_t StdString = 0b11; + + // Each thread has its own string table, we only take a lock + // to push a new table onto the vector. + thread_local int64_t tableIndex = -1; + thread_local int64_t spaceLeft = -1; +} + +PooledString::PooledString(const std::string& str) { + if (str.size() >= 65536) + throw std::runtime_error("cannot store string longer than 64K"); + + if (str.size() <= 15) { + storage[0] = str.size(); + memcpy(storage + 1, str.data(), str.size()); + memset(storage + 1 + str.size(), 0, 16 - 1 - str.size()); + } else { + memset(storage + 8, 0, 8); + storage[0] = 1 << 7; + + if (spaceLeft < 0 || spaceLeft < str.size()) { + std::lock_guard lock(mutex); + spaceLeft = 65536; + char* buffer = (char*)malloc(spaceLeft); + if (buffer == 0) + throw std::runtime_error("PooledString could not malloc"); + tables.push_back(buffer); + tableIndex = tables.size() - 1; + } + + storage[1] = tableIndex >> 16; + storage[2] = tableIndex >> 8; + storage[3] = tableIndex; + + uint16_t offset = 65536 - spaceLeft; + storage[4] = offset >> 8; + storage[5] = offset; + + uint16_t length = str.size(); + storage[6] = length >> 8; + storage[7] = length; + + memcpy(tables[tableIndex] + offset, str.data(), str.size()); + + spaceLeft -= str.size(); + } +} + +PooledString::PooledString(const std::string* str) { + storage[0] = StdString << 6; + + *(const std::string**)((void*)(storage + 8)) = str; +} + +bool PooledStringNS::PooledString::operator==(const PooledString& other) const { + // NOTE: We have surprising equality semantics! + // + // If one of the strings is a StdString, it's value equality. + // + // Else, for short strings, you are equal if the strings are equal. + // + // For large strings, you are equal if you use the same heap memory locations. + // This implies that someone outside of PooledString is managing pooling! In our + // case, it is the responsibility of AttributePairStore. + uint8_t kind = storage[0] >> 6; + uint8_t otherKind = other.storage[0] >> 6; + + if (kind == StdString || otherKind == StdString) { + size_t mySize = size(); + if (mySize != other.size()) + return false; + + return memcmp(data(), other.data(), mySize) == 0; + } + + return memcmp(storage, other.storage, 16) == 0; +} + +bool PooledStringNS::PooledString::operator!=(const PooledString& other) const { + return !(*this == other); +} + +const char* PooledStringNS::PooledString::data() const { + uint8_t kind = storage[0] >> 6; + + if (kind == ShortString) + return (char *)(storage + 1); + + if (kind == StdString) { + const std::string* str = *(const std::string**)((void*)(storage + 8)); + return str->data(); + } + + uint32_t tableIndex = (storage[1] << 16) + (storage[2] << 8) + storage[3]; + uint16_t offset = (storage[4] << 8) + storage[5]; + + const char* data = tables[tableIndex] + offset; + return data; +} + +size_t PooledStringNS::PooledString::size() const { + uint8_t kind = storage[0] >> 6; + // If the uppermost bit is set, we're in heap. + if (kind == HeapString) { + uint16_t length = (storage[6] << 8) + storage[7]; + return length; + } + + if (kind == ShortString) + // Otherwise it's stored in the lower 7 bits of the highest byte. + return storage[0] & 0b01111111; + + const std::string* str = *(const std::string**)((void*)(storage + 8)); + return str->size(); +} + +std::string PooledStringNS::PooledString::toString() const { + std::string rv; + uint8_t kind = storage[0] >> 6; + if (kind == HeapString) { + // heap + rv.reserve(size()); + + uint32_t tableIndex = (storage[1] << 16) + (storage[2] << 8) + storage[3]; + uint16_t offset = (storage[4] << 8) + storage[5]; + + char* data = tables[tableIndex] + offset; + rv.append(data, size()); + return rv; + } + + if (kind == ShortString) { + for (int i = 0; i < storage[0]; i++) + rv += storage[i + 1]; + return rv; + } + + const std::string* str = *(const std::string**)((void*)(storage + 8)); + return *str; +} + +void PooledStringNS::PooledString::ensureStringIsOwned() { + uint8_t kind = storage[0] >> 6; + + if (kind != StdString) + return; + + *this = PooledString(toString()); +} + +bool PooledStringNS::PooledString::operator<(const PooledString& other) const { + size_t mySize = size(); + size_t otherSize = other.size(); + + if (mySize != otherSize) + return mySize < otherSize; + + return memcmp(data(), other.data(), mySize) < 0; +} + diff --git a/src/read_pbf.cpp b/src/read_pbf.cpp deleted file mode 100644 index 605618fa..00000000 --- a/src/read_pbf.cpp +++ /dev/null @@ -1,592 +0,0 @@ -#include -#include "read_pbf.h" -#include "pbf_blocks.h" - -#include -#include -#include -#include - -#include "node_store.h" -#include "way_store.h" -#include "osm_lua_processing.h" -#include "mmap_allocator.h" - -using namespace std; - -const std::string OptionSortTypeThenID = "Sort.Type_then_ID"; -const std::string OptionLocationsOnWays = "LocationsOnWays"; -std::atomic blocksProcessed(0), blocksToProcess(0); - -PbfReader::PbfReader(OSMStore &osmStore) - : osmStore(osmStore) -{ } - -bool PbfReader::ReadNodes(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb, const unordered_set &nodeKeyPositions) -{ - // ---- Read nodes - - if (pg.has_dense()) { - int64_t nodeId = 0; - int lon = 0; - int lat = 0; - int kvPos = 0; - DenseNodes dense = pg.dense(); - - std::vector nodes; - for (int j=0; j0) { - while (dense.keys_vals(kvPos)>0) { - if (nodeKeyPositions.find(dense.keys_vals(kvPos)) != nodeKeyPositions.end()) { - significant = true; - } - kvPos+=2; - } - kvPos++; - } - - nodes.push_back(std::make_pair(static_cast(nodeId), node)); - - if (significant) { - // For tagged nodes, call Lua, then save the OutputObject - boost::container::flat_map tags; - tags.reserve(kvPos / 2); - - for (uint n=kvStart; n(nodeId), node, tags); - } - - } - - osmStore.nodes.insert(nodes); - return true; - } - return false; -} - -bool PbfReader::ReadWays(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb, bool locationsOnWays) { - // ---- Read ways - - if (pg.ways_size() > 0) { - Way pbfWay; - - const bool wayStoreRequiresNodes = osmStore.ways.requiresNodes(); - - std::vector llWays; - std::vector>> nodeWays; - - for (int j=0; j(pbfWay.id()); - if (wayId >= pow(2,42)) throw std::runtime_error("Way ID negative or too large: "+std::to_string(wayId)); - - // Assemble nodelist - LatpLonVec llVec; - std::vector nodeVec; - if (locationsOnWays) { - int lat=0, lon=0; - llVec.reserve(pbfWay.lats_size()); - for (int k=0; k(nodeId))); - nodeVec.push_back(nodeId); - } catch (std::out_of_range &err) { - if (osmStore.integrity_enforced()) throw err; - } - } - } - if (llVec.empty()) continue; - - try { - tag_map_t tags; - readTags(pbfWay, pb, tags); - bool emitted = output.setWay(static_cast(pbfWay.id()), llVec, tags); - - // If we need it for later, store the way's coordinates in the global way store - if (emitted || osmStore.way_is_used(wayId)) { - if (wayStoreRequiresNodes) - nodeWays.push_back(std::make_pair(wayId, nodeVec)); - else - llWays.push_back(std::make_pair(wayId, WayStore::latplon_vector_t(llVec.begin(), llVec.end()))); - } - - } catch (std::out_of_range &err) { - // Way is missing a node? - cerr << endl << err.what() << endl; - } - - } - - if (wayStoreRequiresNodes) { - osmStore.ways.insertNodes(nodeWays); - } else { - osmStore.ways.insertLatpLons(llWays); - } - - return true; - } - return false; -} - -bool PbfReader::ScanRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb) { - // Scan relations to see which ways we need to save - if (pg.relations_size()==0) return false; - - int typeKey = findStringPosition(pb, "type"); - int mpKey = findStringPosition(pb, "multipolygon"); - - for (int j=0; j(pbfRelation.id()); - if (!isMultiPolygon) { - if (output.canReadRelations()) { - tag_map_t tags; - readTags(pbfRelation, pb, tags); - isAccepted = output.scanRelation(relid, tags); - } - if (!isAccepted) continue; - } - int64_t lastID = 0; - for (int n=0; n < pbfRelation.memids_size(); n++) { - lastID += pbfRelation.memids(n); - if (pbfRelation.types(n) != Relation_MemberType_WAY) { continue; } - if (lastID >= pow(2,42)) throw std::runtime_error("Way ID in relation "+std::to_string(relid)+" negative or too large: "+std::to_string(lastID)); - osmStore.mark_way_used(static_cast(lastID)); - if (isAccepted) { osmStore.relation_contains_way(relid, lastID); } - } - } - return true; -} - -bool PbfReader::ReadRelations( - OsmLuaProcessing& output, - PrimitiveGroup& pg, - const PrimitiveBlock& pb, - const BlockMetadata& blockMetadata -) { - // ---- Read relations - - if (pg.relations_size() > 0) { - std::vector relations; - - int typeKey = findStringPosition(pb, "type"); - int mpKey = findStringPosition(pb, "multipolygon"); - int boundaryKey = findStringPosition(pb, "boundary"); - int innerKey= findStringPosition(pb, "inner"); - int outerKey= findStringPosition(pb, "outer"); - if (typeKey >-1 && mpKey>-1) { - for (int j=0; j(lastID); - (role == innerKey ? innerWayVec : outerWayVec).push_back(wayId); - } - - try { - tag_map_t tags; - readTags(pbfRelation, pb, tags); - output.setRelation(pbfRelation.id(), outerWayVec, innerWayVec, tags, isMultiPolygon, isInnerOuter); - - } catch (std::out_of_range &err) { - // Relation is missing a member? - cerr << endl << err.what() << endl; - } - } - } - - osmStore.relations_insert_front(relations); - return true; - } - return false; -} - -// Returns true when block was completely handled, thus could be omited by another phases. -bool PbfReader::ReadBlock( - std::istream& infile, - OsmLuaProcessing& output, - const BlockMetadata& blockMetadata, - const unordered_set& nodeKeys, - bool locationsOnWays, - ReadPhase phase -) -{ - infile.seekg(blockMetadata.offset); - - PrimitiveBlock pb; - readBlock(&pb, blockMetadata.length, infile); - if (infile.eof()) { - return true; - } - - // Keep count of groups read during this phase. - std::size_t read_groups = 0; - - // Read the string table, and pre-calculate the positions of valid node keys - unordered_set nodeKeyPositions; - for (auto it : nodeKeys) { - nodeKeyPositions.insert(findStringPosition(pb, it.c_str())); - } - - for (int i=0; i test -) { - PrimitiveBlock pb; - - // We may have previously read to EOF, so clear the internal error state - infile.clear(); - infile.seekg(block.offset); - readBlock(&pb, block.length, infile); - if (infile.eof()) { - throw std::runtime_error("blockHasPrimitiveGroupSatisfying got unexpected eof"); - } - - for (int i=0; i const& nodeKeys, - unsigned int threadNum, - const pbfreader_generate_stream& generate_stream, - const pbfreader_generate_output& generate_output -) -{ - auto infile = generate_stream(); - - // ---- Read PBF - osmStore.clear(); - - HeaderBlock block; - readBlock(&block, readHeader(*infile).datasize(), *infile); - bool locationsOnWays = false; - for (std::string option : block.optional_features()) { - if (option == OptionLocationsOnWays) { - std::cout << ".osm.pbf file has locations on ways" << std::endl; - locationsOnWays = true; - } - } - - std::map blocks; - - // Track the filesize - note that we can't rely on tellg(), as - // its meant to be an opaque token useful only for seeking. - size_t filesize = 0; - while (true) { - BlobHeader bh = readHeader(*infile); - filesize += bh.datasize(); - if (infile->eof()) { - break; - } - - blocks[blocks.size()] = { (long int)infile->tellg(), bh.datasize(), true, true, true, 0, 1 }; - infile->seekg(bh.datasize(), std::ios_base::cur); - } - - if (hasSortTypeThenID) { - // The PBF's blocks are sorted by type, then ID. We can do a binary search - // to learn where the blocks transition between object types, which - // enables a more efficient partitioning of work for reading. - std::vector indexes; - for (int i = 0; i < blocks.size(); i++) - indexes.push_back(i); - - const auto& waysStart = std::lower_bound( - indexes.begin(), - indexes.end(), - 0, - [&blocks, &infile](const auto &i, const auto &ignored) { - return blockHasPrimitiveGroupSatisfying( - *infile, - blocks[i], - [](const PrimitiveGroup&pg) { return pg.ways_size() > 0 || pg.relations_size() > 0; } - ); - } - ); - - const auto& relationsStart = std::lower_bound( - indexes.begin(), - indexes.end(), - 0, - [&blocks, &infile](const auto &i, const auto &ignored) { - return blockHasPrimitiveGroupSatisfying( - *infile, - blocks[i], - [](const PrimitiveGroup&pg) { return pg.relations_size() > 0; } - ); - } - ); - - for (auto it = indexes.begin(); it != indexes.end(); it++) { - blocks[*it].hasNodes = it <= waysStart; - blocks[*it].hasWays = it >= waysStart && it <= relationsStart; - blocks[*it].hasRelations = it >= relationsStart; - } - } - - - // PBFs generated by Osmium have 8,000 entities per block, - // and each block is about 64KB. - // - // PBFs generated by osmconvert (e.g., BBBike PBFs) have as - // many entities as fit in 31MB. Each block is about 16MB. - // - // Osmium PBFs seem to be processed about 3x faster than osmconvert - // PBFs, so try to hint to the user when they could speed up their - // pipeline. - if (filesize / blocks.size() > 1000000) { - std::cout << "warning: PBF has very large blocks, which may slow processing" << std::endl; - std::cout << " to fix: osmium cat -f pbf your-file.osm.pbf -o optimized.osm.pbf" << std::endl; - } - - - std::vector all_phases = { ReadPhase::Nodes, ReadPhase::RelationScan, ReadPhase::Ways, ReadPhase::Relations }; - for(auto phase: all_phases) { - // Launch the pool with threadNum threads - boost::asio::thread_pool pool(threadNum); - std::mutex block_mutex; - - // If we're in ReadPhase::Relations and there aren't many blocks left - // to read, increase parallelism by letting each thread only process - // a portion of the block. - if (phase == ReadPhase::Relations && blocks.size() < threadNum * 2) { - std::cout << "only " << blocks.size() << " relation blocks; subdividing for better parallelism" << std::endl; - std::map moreBlocks; - for (const auto& block : blocks) { - BlockMetadata newBlock = block.second; - newBlock.chunks = threadNum; - for (size_t i = 0; i < threadNum; i++) { - newBlock.chunk = i; - moreBlocks[moreBlocks.size()] = newBlock; - } - } - blocks = moreBlocks; - } - - std::deque> blockRanges; - std::map filteredBlocks; - for (const auto& entry : blocks) { - if ((phase == ReadPhase::Nodes && entry.second.hasNodes) || - (phase == ReadPhase::RelationScan && entry.second.hasRelations) || - (phase == ReadPhase::Ways && entry.second.hasWays) || - (phase == ReadPhase::Relations && entry.second.hasRelations)) - filteredBlocks[entry.first] = entry.second; - } - - blocksToProcess = filteredBlocks.size(); - blocksProcessed = 0; - - // When processing blocks, we try to give each worker large batches - // of contiguous blocks, so that they might benefit from long runs - // of sorted indexes, and locality of nearby IDs. - const size_t batchSize = (filteredBlocks.size() / (threadNum * 8)) + 1; - - size_t consumed = 0; - auto it = filteredBlocks.begin(); - while(it != filteredBlocks.end()) { - std::vector blockRange; - blockRange.reserve(batchSize); - size_t max = consumed + batchSize; - for (; consumed < max && it != filteredBlocks.end(); consumed++) { - IndexedBlockMetadata ibm; - memcpy(&ibm, &it->second, sizeof(BlockMetadata)); - ibm.index = it->first; - blockRange.push_back(ibm); - it++; - } - blockRanges.push_back(blockRange); - } - - { - for(const std::vector& blockRange: blockRanges) { - boost::asio::post(pool, [=, &blockRange, &blocks, &block_mutex, &nodeKeys]() { - if (phase == ReadPhase::Nodes) - osmStore.nodes.batchStart(); - if (phase == ReadPhase::Ways) - osmStore.ways.batchStart(); - - for (const IndexedBlockMetadata& indexedBlockMetadata: blockRange) { - auto infile = generate_stream(); - auto output = generate_output(); - - if(ReadBlock(*infile, *output, indexedBlockMetadata, nodeKeys, locationsOnWays, phase)) { - const std::lock_guard lock(block_mutex); - blocks.erase(indexedBlockMetadata.index); - blocksProcessed++; - } - } - }); - } - } - - pool.join(); - - if(phase == ReadPhase::Nodes) { - osmStore.nodes.finalize(threadNum); - } - if(phase == ReadPhase::Ways) { - osmStore.ways.finalize(threadNum); - } - } - return 0; -} - -// Find a string in the dictionary -int PbfReader::findStringPosition(PrimitiveBlock const &pb, char const *str) { - for (int i=0; i()> createNodeStore): + createNodeStore(createNodeStore) { + for (int i = 0; i < shards(); i++) + stores.push_back(createNodeStore()); +} + +ShardedNodeStore::~ShardedNodeStore() { +} + +void ShardedNodeStore::reopen() { + for (auto& store : stores) + store->reopen(); +} + +void ShardedNodeStore::finalize(size_t threadNum) { + for (auto& store : stores) + store->finalize(threadNum); +} + +LatpLon ShardedNodeStore::at(NodeID id) const { + for (int i = 0; i < shards(); i++) { + size_t index = (lastNodeShard + i) % shards(); + + if (stores[index]->contains(0, id)) { + lastNodeShard = index; + return stores[index]->at(id); + } + } + + // Superfluous return to silence a compiler warning + return stores[shards() - 1]->at(id); +} + +size_t ShardedNodeStore::size() const { + size_t rv = 0; + for (auto& store : stores) + rv += store->size(); + + return rv; +} + +void ShardedNodeStore::batchStart() { + for (auto& store : stores) + store->batchStart(); +} + +size_t pickStore(const LatpLon& el) { + // Assign the element to a shard. This is a pretty naive division + // of the globe, tuned to have max ~10GB of nodes/ways per shard. + + const size_t z5x = lon2tilex(el.lon / 10000000, 5); + const size_t z5y = latp2tiley(el.latp / 10000000, 5); + + const size_t z4x = z5x / 2; + const size_t z4y = z5y / 2; + + const size_t z3x = z4x / 2; + const size_t z3y = z4y / 2; + + if (z3x == 5 && z3y == 2) return 5; // Western Russia + if (z3x == 4 && z3y == 3) return 5; // North Africa + if (z3x == 5 && z3y == 3) return 5; // India + + if ((z5x == 16 && z5y == 10) || (z5x == 16 && z5y == 11)) return 4; // some of Central Europe + if ((z5x == 17 && z5y == 10) || (z5x == 17 && z5y == 11)) return 1; // some more of Central Europe + + if (z3x == 4 && z3y == 2) return 3; // rest of Central Europe + + const size_t z2x = z3x / 2; + const size_t z2y = z3y / 2; + + if (z2x == 3 && z2y == 1) return 3; // Asia, Russia + if (z2x == 1 && z2y == 1) return 2; // North Atlantic Ocean and bordering countries + if (z2x == 0 && z2y == 1) return 1; // North America + +// std::cout << "z2x=" << std::to_string(z2x) << ", z2y=" << std::to_string(z2y) << std::endl; + return 0; // Artic, Antartcica, Oceania, South Africa, South America +} + +void ShardedNodeStore::insert(const std::vector& elements) { + std::vector> perStore(shards()); + + for (const auto& el : elements) { + perStore[pickStore(el.second)].push_back(el); + } + + for (int i = 0; i < shards(); i++) { + if (!perStore[i].empty()) + stores[i]->insert(perStore[i]); + } +} + +bool ShardedNodeStore::contains(size_t shard, NodeID id) const { + return stores[shard]->contains(0, id); +} + +size_t ShardedNodeStore::shards() const { + return 6; +} diff --git a/src/sharded_way_store.cpp b/src/sharded_way_store.cpp new file mode 100644 index 00000000..d9741082 --- /dev/null +++ b/src/sharded_way_store.cpp @@ -0,0 +1,81 @@ +#include "sharded_way_store.h" +#include "node_store.h" + +thread_local size_t lastWayShard = 0; + +ShardedWayStore::ShardedWayStore(std::function()> createWayStore, const NodeStore& nodeStore): + createWayStore(createWayStore), + nodeStore(nodeStore) { + for (int i = 0; i < shards(); i++) + stores.push_back(createWayStore()); +} + +ShardedWayStore::~ShardedWayStore() { +} + +void ShardedWayStore::reopen() { + for (auto& store : stores) + store->reopen(); +} + +void ShardedWayStore::batchStart() { + for (auto& store : stores) + store->batchStart(); +} + +std::vector ShardedWayStore::at(WayID wayid) const { + for (int i = 0; i < shards(); i++) { + size_t index = (lastWayShard + i) % shards(); + if (stores[index]->contains(0, wayid)) { + lastWayShard = index; + return stores[index]->at(wayid); + } + } + + // Superfluous return to silence a compiler warning + return stores[shards() - 1]->at(wayid); +} + +bool ShardedWayStore::requiresNodes() const { + return stores[0]->requiresNodes(); +} + +void ShardedWayStore::insertLatpLons(std::vector &newWays) { + throw std::runtime_error("ShardedWayStore::insertLatpLons: don't call this directly"); +} + +void ShardedWayStore::insertNodes(const std::vector>>& newWays) { + throw std::runtime_error("ShardedWayStore::insertNodes: don't call this directly"); +} + +void ShardedWayStore::clear() { + for (auto& store : stores) + store->clear(); +} + +std::size_t ShardedWayStore::size() const { + size_t rv = 0; + for (auto& store : stores) + rv += store->size(); + return rv; +} + +void ShardedWayStore::finalize(unsigned int threadNum) { + for (auto& store : stores) + store->finalize(threadNum); +} + +bool ShardedWayStore::contains(size_t shard, WayID id) const { + return stores[shard]->contains(0, id); +} + +WayStore& ShardedWayStore::shard(size_t shard) { + return *stores[shard].get(); +} + +const WayStore& ShardedWayStore::shard(size_t shard) const { + return *stores[shard].get(); +} + +size_t ShardedWayStore::shards() const { return nodeStore.shards(); } + diff --git a/src/shared_data.cpp b/src/shared_data.cpp index 78cfe11d..da9787d8 100644 --- a/src/shared_data.cpp +++ b/src/shared_data.cpp @@ -10,7 +10,7 @@ using namespace rapidjson; SharedData::SharedData(Config &configIn, const class LayerDefinition &layers) : layers(layers), config(configIn) { - outputMode=OUTPUT_FILE; + outputMode=OptionsParser::OutputMode::File; mergeSqlite=false; } diff --git a/src/sorted_node_store.cpp b/src/sorted_node_store.cpp index 76aa81b8..82dccb55 100644 --- a/src/sorted_node_store.cpp +++ b/src/sorted_node_store.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include "sorted_node_store.h" @@ -16,40 +15,51 @@ namespace SortedNodeStoreTypes { const uint16_t ChunkAlignment = 16; const uint32_t ChunkCompressed = 1 << 31; - std::atomic totalGroups; - std::atomic totalNodes; - std::atomic totalGroupSpace; - std::atomic totalAllocatedSpace; - std::atomic totalChunks; - std::atomic chunkSizeFreqs[257]; - std::atomic groupSizeFreqs[257]; - - - // When SortedNodeStore first starts, it's not confident that it has seen an - // entire segment, so it's in "collecting orphans" mode. Once it crosses a - // threshold of 64K elements, it ceases to be in this mode. - // - // Orphans are rounded up across multiple threads, and dealt with in - // the finalize step. - thread_local bool collectingOrphans = true; - thread_local uint64_t groupStart = -1; - thread_local std::vector* localNodes = nullptr; - - thread_local int64_t cachedChunk = -1; - thread_local std::vector cacheChunkLons; - thread_local std::vector cacheChunkLatps; - - thread_local uint32_t arenaSpace = 0; - thread_local char* arenaPtr = nullptr; + struct ThreadStorage { + ThreadStorage(): + collectingOrphans(true), + groupStart(-1), + localNodes(nullptr), + cachedChunk(-1), + arenaSpace(0), + arenaPtr(nullptr) {} + // When SortedNodeStore first starts, it's not confident that it has seen an + // entire segment, so it's in "collecting orphans" mode. Once it crosses a + // threshold of 64K elements, it ceases to be in this mode. + // + // Orphans are rounded up across multiple threads, and dealt with in + // the finalize step. + bool collectingOrphans = true; + uint64_t groupStart = -1; + std::vector* localNodes = nullptr; + + int64_t cachedChunk = -1; + std::vector cacheChunkLons; + std::vector cacheChunkLatps; + + uint32_t arenaSpace = 0; + char* arenaPtr = nullptr; + }; + + thread_local std::deque> threadStorage; + + ThreadStorage& s(const SortedNodeStore* who) { + for (auto& entry : threadStorage) + if (entry.first == who) + return entry.second; + + threadStorage.push_back(std::make_pair(who, ThreadStorage())); + + auto& rv = threadStorage.back(); + return rv.second; + } } using namespace SortedNodeStoreTypes; SortedNodeStore::SortedNodeStore(bool compressNodes): compressNodes(compressNodes) { - // Each group can store 64K nodes. If we allocate 256K slots - // for groups, we support 2^34 = 17B nodes, or about twice - // the number used by OSM as of November 2023. - groups.resize(256 * 1024); + s(this); // allocate our ThreadStorage before multi-threading + reopen(); } void SortedNodeStore::reopen() @@ -61,11 +71,16 @@ void SortedNodeStore::reopen() totalNodes = 0; totalGroups = 0; totalGroupSpace = 0; + totalAllocatedSpace = 0; totalChunks = 0; memset(chunkSizeFreqs, 0, sizeof(chunkSizeFreqs)); memset(groupSizeFreqs, 0, sizeof(groupSizeFreqs)); orphanage.clear(); workerBuffers.clear(); + + // Each group can store 64K nodes. If we allocate 256K slots + // for groups, we support 2^34 = 17B nodes, or about twice + // the number used by OSM as of November 2023. groups.clear(); groups.resize(256 * 1024); } @@ -73,6 +88,48 @@ void SortedNodeStore::reopen() SortedNodeStore::~SortedNodeStore() { for (const auto entry: allocatedMemory) void_mmap_allocator::deallocate(entry.first, entry.second); + + s(this) = ThreadStorage(); +} + +bool SortedNodeStore::contains(size_t shard, NodeID id) const { + const size_t groupIndex = id / (GroupSize * ChunkSize); + const size_t chunk = (id % (GroupSize * ChunkSize)) / ChunkSize; + const uint64_t chunkMaskByte = chunk / 8; + const uint64_t chunkMaskBit = chunk % 8; + + const uint64_t nodeMaskByte = (id % ChunkSize) / 8; + const uint64_t nodeMaskBit = id % 8; + + GroupInfo* groupPtr = groups[groupIndex]; + + if (groupPtr == nullptr) + return false; + + size_t chunkOffset = 0; + { + chunkOffset = popcnt(groupPtr->chunkMask, chunkMaskByte); + uint8_t maskByte = groupPtr->chunkMask[chunkMaskByte]; + maskByte = maskByte & ((1 << chunkMaskBit) - 1); + chunkOffset += popcnt(&maskByte, 1); + + if (!(groupPtr->chunkMask[chunkMaskByte] & (1 << chunkMaskBit))) + return false; + } + + uint16_t scaledOffset = groupPtr->chunkOffsets[chunkOffset]; + ChunkInfoBase* basePtr = (ChunkInfoBase*)(((char *)(groupPtr->chunkOffsets + popcnt(groupPtr->chunkMask, 32))) + (scaledOffset * ChunkAlignment)); + + size_t nodeOffset = 0; + nodeOffset = popcnt(basePtr->nodeMask, nodeMaskByte); + uint8_t maskByte = basePtr->nodeMask[nodeMaskByte]; + maskByte = maskByte & ((1 << nodeMaskBit) - 1); + nodeOffset += popcnt(&maskByte, 1); + if (!(basePtr->nodeMask[nodeMaskByte] & (1 << nodeMaskBit))) + return false; + + + return true; } LatpLon SortedNodeStore::at(const NodeID id) const { @@ -109,29 +166,30 @@ LatpLon SortedNodeStore::at(const NodeID id) const { size_t latpSize = (ptr->flags >> 10) & ((1 << 10) - 1); // TODO: we don't actually need the lonSize to decompress the data. // May as well store it as a sanity check for now. - size_t lonSize = ptr->flags & ((1 << 10) - 1); + // size_t lonSize = ptr->flags & ((1 << 10) - 1); size_t n = popcnt(ptr->nodeMask, 32) - 1; const size_t neededChunk = groupIndex * ChunkSize + chunk; // Really naive caching strategy - just cache the last-used chunk. // Probably good enough? - if (cachedChunk != neededChunk) { - cachedChunk = neededChunk; - cacheChunkLons.reserve(256); - cacheChunkLatps.reserve(256); + ThreadStorage& tls = s(this); + if (tls.cachedChunk != neededChunk) { + tls.cachedChunk = neededChunk; + tls.cacheChunkLons.reserve(256); + tls.cacheChunkLatps.reserve(256); uint8_t* latpData = ptr->data; uint8_t* lonData = ptr->data + latpSize; uint32_t recovdata[256] = {0}; streamvbyte_decode(latpData, recovdata, n); - cacheChunkLatps[0] = ptr->firstLatp; - zigzag_delta_decode(recovdata, &cacheChunkLatps[1], n, cacheChunkLatps[0]); + tls.cacheChunkLatps[0] = ptr->firstLatp; + zigzag_delta_decode(recovdata, &tls.cacheChunkLatps[1], n, tls.cacheChunkLatps[0]); streamvbyte_decode(lonData, recovdata, n); - cacheChunkLons[0] = ptr->firstLon; - zigzag_delta_decode(recovdata, &cacheChunkLons[1], n, cacheChunkLons[0]); + tls.cacheChunkLons[0] = ptr->firstLon; + zigzag_delta_decode(recovdata, &tls.cacheChunkLons[1], n, tls.cacheChunkLons[0]); } size_t nodeOffset = 0; @@ -142,7 +200,7 @@ LatpLon SortedNodeStore::at(const NodeID id) const { if (!(ptr->nodeMask[nodeMaskByte] & (1 << nodeMaskBit))) throw std::out_of_range("SortedNodeStore: node " + std::to_string(id) + " missing, no node"); - return { cacheChunkLatps[nodeOffset], cacheChunkLons[nodeOffset] }; + return { tls.cacheChunkLatps[nodeOffset], tls.cacheChunkLons[nodeOffset] }; } UncompressedChunkInfo* ptr = (UncompressedChunkInfo*)basePtr; @@ -184,58 +242,60 @@ size_t SortedNodeStore::size() const { } void SortedNodeStore::insert(const std::vector& elements) { - if (localNodes == nullptr) { + ThreadStorage& tls = s(this); + if (tls.localNodes == nullptr) { std::lock_guard lock(orphanageMutex); if (workerBuffers.size() == 0) workerBuffers.reserve(256); else if (workerBuffers.size() == workerBuffers.capacity()) throw std::runtime_error("SortedNodeStore doesn't support more than 256 cores"); workerBuffers.push_back(std::vector()); - localNodes = &workerBuffers.back(); + tls.localNodes = &workerBuffers.back(); } - if (groupStart == -1) { + if (tls.groupStart == -1) { // Mark where the first full group starts, so we know when to transition // out of collecting orphans. - groupStart = elements[0].first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); + tls.groupStart = elements[0].first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); } int i = 0; - while (collectingOrphans && i < elements.size()) { + while (tls.collectingOrphans && i < elements.size()) { const element_t& el = elements[i]; - if (el.first >= groupStart + (GroupSize * ChunkSize)) { - collectingOrphans = false; + if (el.first >= tls.groupStart + (GroupSize * ChunkSize)) { + tls.collectingOrphans = false; // Calculate new groupStart, rounding to previous boundary. - groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); - collectOrphans(*localNodes); - localNodes->clear(); + tls.groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); + collectOrphans(*tls.localNodes); + tls.localNodes->clear(); } - localNodes->push_back(el); + tls.localNodes->push_back(el); i++; } while(i < elements.size()) { const element_t& el = elements[i]; - if (el.first >= groupStart + (GroupSize * ChunkSize)) { - publishGroup(*localNodes); - localNodes->clear(); - groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); + if (el.first >= tls.groupStart + (GroupSize * ChunkSize)) { + publishGroup(*tls.localNodes); + tls.localNodes->clear(); + tls.groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); } - localNodes->push_back(el); + tls.localNodes->push_back(el); i++; } } void SortedNodeStore::batchStart() { - collectingOrphans = true; - groupStart = -1; - if (localNodes == nullptr || localNodes->size() == 0) + ThreadStorage& tls = s(this); + tls.collectingOrphans = true; + tls.groupStart = -1; + if (tls.localNodes == nullptr || tls.localNodes->size() == 0) return; - collectOrphans(*localNodes); - localNodes->clear(); + collectOrphans(*tls.localNodes); + tls.localNodes->clear(); } void SortedNodeStore::finalize(size_t threadNum) { @@ -264,7 +324,7 @@ void SortedNodeStore::finalize(size_t threadNum) { orphanage.clear(); - std::cout << "SortedNodeStore: " << totalGroups << " groups, " << totalChunks << " chunks, " << totalNodes.load() << " nodes, " << totalGroupSpace.load() << " bytes (" << (1000ull * (totalAllocatedSpace.load() - totalGroupSpace.load()) / totalAllocatedSpace.load()) / 10.0 << "% wasted)" << std::endl; + std::cout << "SortedNodeStore: " << totalGroups << " groups, " << totalChunks << " chunks, " << totalNodes.load() << " nodes, " << totalGroupSpace.load() << " bytes (" << (1000ull * (totalAllocatedSpace.load() - totalGroupSpace.load()) / (totalAllocatedSpace.load() + 1)) / 10.0 << "% wasted)" << std::endl; /* for (int i = 0; i < 257; i++) std::cout << "chunkSizeFreqs[ " << i << " ]= " << chunkSizeFreqs[i].load() << std::endl; @@ -410,22 +470,23 @@ void SortedNodeStore::publishGroup(const std::vector& nodes) { GroupInfo* groupInfo = nullptr; - if (arenaSpace < groupSpace) { + ThreadStorage& tls = s(this); + if (tls.arenaSpace < groupSpace) { // A full group takes ~330KB. Nodes are read _fast_, and there ends // up being contention calling the allocator when reading the // planet on a machine with 48 cores -- so allocate in large chunks. - arenaSpace = 4 * 1024 * 1024; - totalAllocatedSpace += arenaSpace; - arenaPtr = (char*)void_mmap_allocator::allocate(arenaSpace); - if (arenaPtr == nullptr) + tls.arenaSpace = 4 * 1024 * 1024; + totalAllocatedSpace += tls.arenaSpace; + tls.arenaPtr = (char*)void_mmap_allocator::allocate(tls.arenaSpace); + if (tls.arenaPtr == nullptr) throw std::runtime_error("SortedNodeStore: failed to allocate arena"); std::lock_guard lock(orphanageMutex); - allocatedMemory.push_back(std::make_pair((void*)arenaPtr, arenaSpace)); + allocatedMemory.push_back(std::make_pair((void*)tls.arenaPtr, tls.arenaSpace)); } - arenaSpace -= groupSpace; - groupInfo = (GroupInfo*)arenaPtr; - arenaPtr += groupSpace; + tls.arenaSpace -= groupSpace; + groupInfo = (GroupInfo*)tls.arenaPtr; + tls.arenaPtr += groupSpace; if (groups[groupIndex] != nullptr) throw std::runtime_error("SortedNodeStore: group already present"); diff --git a/src/sorted_way_store.cpp b/src/sorted_way_store.cpp index 8fdaa806..302deab9 100644 --- a/src/sorted_way_store.cpp +++ b/src/sorted_way_store.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -19,40 +18,56 @@ namespace SortedWayStoreTypes { const uint16_t ClosedWay = 1 << 14; const uint16_t UniformUpperBits = 1 << 13; - thread_local bool collectingOrphans = true; - thread_local uint64_t groupStart = -1; - thread_local std::vector>>* localWays = NULL; + struct ThreadStorage { + ThreadStorage(): + collectingOrphans(true), + groupStart(-1), + localWays(nullptr) {} - thread_local std::vector encodedWay; + bool collectingOrphans; + uint64_t groupStart; + std::vector>>* localWays; + std::vector encodedWay; + }; + + thread_local std::deque> threadStorage; + + inline ThreadStorage& s(const SortedWayStore* who) { + for (auto& entry : threadStorage) + if (entry.first == who) + return entry.second; + + threadStorage.push_back(std::make_pair(who, ThreadStorage())); + + auto& rv = threadStorage.back(); + return rv.second; + } // C++ doesn't support variable length arrays declared on stack. // g++ and clang support it, but msvc doesn't. Rather than pay the // cost of a vector for every decode, we use a thread_local with room for at // least 2,000 nodes. + // + // Note: these are scratch buffers, so they remain as true thread-locals, + // and aren't part of ThreadStorage. thread_local uint64_t highBytes[2000]; thread_local uint32_t uint32Buffer[2000]; thread_local int32_t int32Buffer[2000]; thread_local uint8_t uint8Buffer[8192]; - - std::atomic totalWays; - std::atomic totalNodes; - std::atomic totalGroups; - std::atomic totalGroupSpace; - std::atomic totalChunks; } using namespace SortedWayStoreTypes; SortedWayStore::SortedWayStore(bool compressWays, const NodeStore& nodeStore): compressWays(compressWays), nodeStore(nodeStore) { - // Each group can store 64K ways. If we allocate 32K slots, - // we support 2^31 = 2B ways, or about twice the number used - // by OSM as of December 2023. - groups.resize(32 * 1024); + s(this); // allocate our ThreadStorage before multi-threading + reopen(); } SortedWayStore::~SortedWayStore() { for (const auto entry: allocatedMemory) void_mmap_allocator::deallocate(entry.first, entry.second); + + s(this) = ThreadStorage(); } void SortedWayStore::reopen() { @@ -67,11 +82,64 @@ void SortedWayStore::reopen() { totalChunks = 0; orphanage.clear(); workerBuffers.clear(); + + // Each group can store 64K ways. If we allocate 32K slots, + // we support 2^31 = 2B ways, or about twice the number used + // by OSM as of December 2023. groups.clear(); - groups.resize(256 * 1024); + groups.resize(32 * 1024); } +bool SortedWayStore::contains(size_t shard, WayID id) const { + const size_t groupIndex = id / (GroupSize * ChunkSize); + const size_t chunk = (id % (GroupSize * ChunkSize)) / ChunkSize; + const uint64_t chunkMaskByte = chunk / 8; + const uint64_t chunkMaskBit = chunk % 8; + + const uint64_t wayMaskByte = (id % ChunkSize) / 8; + const uint64_t wayMaskBit = id % 8; + + GroupInfo* groupPtr = groups[groupIndex]; + + if (groupPtr == nullptr) + return false; + + size_t chunkOffset = 0; + { + chunkOffset = popcnt(groupPtr->chunkMask, chunkMaskByte); + uint8_t maskByte = groupPtr->chunkMask[chunkMaskByte]; + maskByte = maskByte & ((1 << chunkMaskBit) - 1); + chunkOffset += popcnt(&maskByte, 1); + + if (!(groupPtr->chunkMask[chunkMaskByte] & (1 << chunkMaskBit))) + return false; + } + + ChunkInfo* chunkPtr = (ChunkInfo*)((char*)groupPtr + groupPtr->chunkOffsets[chunkOffset]); + + { + size_t wayOffset = 0; + wayOffset = popcnt(chunkPtr->smallWayMask, wayMaskByte); + uint8_t maskByte = chunkPtr->smallWayMask[wayMaskByte]; + maskByte = maskByte & ((1 << wayMaskBit) - 1); + wayOffset += popcnt(&maskByte, 1); + if (chunkPtr->smallWayMask[wayMaskByte] & (1 << wayMaskBit)) + return true; + } + + size_t wayOffset = 0; + wayOffset += popcnt(chunkPtr->smallWayMask, 32); + wayOffset += popcnt(chunkPtr->bigWayMask, wayMaskByte); + uint8_t maskByte = chunkPtr->bigWayMask[wayMaskByte]; + maskByte = maskByte & ((1 << wayMaskBit) - 1); + wayOffset += popcnt(&maskByte, 1); + if (!(chunkPtr->bigWayMask[wayMaskByte] & (1 << wayMaskBit))) + return false; + + return true; +} + std::vector SortedWayStore::at(WayID id) const { const size_t groupIndex = id / (GroupSize * ChunkSize); const size_t chunk = (id % (GroupSize * ChunkSize)) / ChunkSize; @@ -140,52 +208,53 @@ void SortedWayStore::insertLatpLons(std::vector &newWays throw std::runtime_error("SortedWayStore does not support insertLatpLons"); } -const void SortedWayStore::insertNodes(const std::vector>>& newWays) { - // read_pbf can call with an empty array if the only ways it read were unable to +void SortedWayStore::insertNodes(const std::vector>>& newWays) { + // pbf_processor can call with an empty array if the only ways it read were unable to // be processed due to missing nodes, so be robust against empty way vector. if (newWays.empty()) return; - if (localWays == nullptr) { + ThreadStorage& tls = s(this); + if (tls.localWays == nullptr) { std::lock_guard lock(orphanageMutex); if (workerBuffers.size() == 0) workerBuffers.reserve(256); else if (workerBuffers.size() == workerBuffers.capacity()) throw std::runtime_error("SortedWayStore doesn't support more than 256 cores"); workerBuffers.push_back(std::vector>>()); - localWays = &workerBuffers.back(); + tls.localWays = &workerBuffers.back(); } - if (groupStart == -1) { + if (tls.groupStart == -1) { // Mark where the first full group starts, so we know when to transition // out of collecting orphans. - groupStart = newWays[0].first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); + tls.groupStart = newWays[0].first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); } int i = 0; - while (collectingOrphans && i < newWays.size()) { + while (tls.collectingOrphans && i < newWays.size()) { const auto& el = newWays[i]; - if (el.first >= groupStart + (GroupSize * ChunkSize)) { - collectingOrphans = false; + if (el.first >= tls.groupStart + (GroupSize * ChunkSize)) { + tls.collectingOrphans = false; // Calculate new groupStart, rounding to previous boundary. - groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); - collectOrphans(*localWays); - localWays->clear(); + tls.groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); + collectOrphans(*tls.localWays); + tls.localWays->clear(); } - localWays->push_back(el); + tls.localWays->push_back(el); i++; } while(i < newWays.size()) { const auto& el = newWays[i]; - if (el.first >= groupStart + (GroupSize * ChunkSize)) { - publishGroup(*localWays); - localWays->clear(); - groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); + if (el.first >= tls.groupStart + (GroupSize * ChunkSize)) { + publishGroup(*tls.localWays); + tls.localWays->clear(); + tls.groupStart = el.first / (GroupSize * ChunkSize) * (GroupSize * ChunkSize); } - localWays->push_back(el); + tls.localWays->push_back(el); i++; } } @@ -229,13 +298,14 @@ void SortedWayStore::finalize(unsigned int threadNum) { } void SortedWayStore::batchStart() { - collectingOrphans = true; - groupStart = -1; - if (localWays == nullptr || localWays->size() == 0) + ThreadStorage& tls = s(this); + tls.collectingOrphans = true; + tls.groupStart = -1; + if (tls.localWays == nullptr || tls.localWays->size() == 0) return; - collectOrphans(*localWays); - localWays->clear(); + collectOrphans(*tls.localWays); + tls.localWays->clear(); } void SortedWayStore::collectOrphans(const std::vector>>& orphans) { @@ -244,6 +314,7 @@ void SortedWayStore::collectOrphans(const std::vector>>& vec = orphanage[groupIndex]; const size_t i = vec.size(); + vec.resize(i + orphans.size()); std::copy(orphans.begin(), orphans.end(), vec.begin() + i); } @@ -284,7 +355,6 @@ std::vector SortedWayStore::decodeWay(uint16_t flags, const uint8_t* inp for (int i = 0; i < length; i++) rv.push_back(highBytes[i] | lowIntData[i]); } else { - uint16_t compressedLength = *(uint16_t*)input; input += 2; uint32_t firstInt = *(uint32_t*)(input); @@ -408,6 +478,7 @@ void populateMask(uint8_t* mask, const std::vector& ids) { } void SortedWayStore::publishGroup(const std::vector>>& ways) { + ThreadStorage& tls = s(this); totalWays += ways.size(); if (ways.size() == 0) { throw std::runtime_error("SortedWayStore: group is empty"); @@ -451,12 +522,12 @@ void SortedWayStore::publishGroup(const std::vectorwayIds.push_back(id % ChunkSize); - uint16_t flags = encodeWay(way.second, encodedWay, compressWays && way.second.size() >= 4); + uint16_t flags = encodeWay(way.second, tls.encodedWay, compressWays && way.second.size() >= 4); lastChunk->wayFlags.push_back(flags); std::vector encoded; - encoded.resize(encodedWay.size()); - memcpy(encoded.data(), encodedWay.data(), encodedWay.size()); + encoded.resize(tls.encodedWay.size()); + memcpy(encoded.data(), tls.encodedWay.data(), tls.encodedWay.size()); lastChunk->encodedWays.push_back(std::move(encoded)); } diff --git a/src/tag_map.cpp b/src/tag_map.cpp new file mode 100644 index 00000000..8fc02a96 --- /dev/null +++ b/src/tag_map.cpp @@ -0,0 +1,135 @@ +#include "tag_map.h" +#include +#include + +TagMap::TagMap() { + keys.resize(16); + key2value.resize(16); + values.resize(16); +} + +void TagMap::reset() { + for (int i = 0; i < 16; i++) { + keys[i].clear(); + key2value[i].clear(); + values[i].clear(); + } +} + +const std::size_t hashString(const std::string& str) { + // This is a pretty crappy hash function in terms of bit + // avalanching and distribution of output values. + // + // But it's very good in terms of speed, which turns out + // to be the important measure. + std::size_t hash = str.size(); + if (hash >= 4) + hash ^= *(uint32_t*)str.data(); + + return hash; +} + +const std::size_t hashString(const char* str, size_t size) { + // This is a pretty crappy hash function in terms of bit + // avalanching and distribution of output values. + // + // But it's very good in terms of speed, which turns out + // to be the important measure. + std::size_t hash = size; + if (hash >= 4) + hash ^= *(uint32_t*)str; + + return hash; +} + +uint32_t TagMap::ensureString( + std::vector>& vector, + const protozero::data_view& value +) { + std::size_t hash = hashString(value.data(), value.size()); + + const uint16_t shard = hash % vector.size(); + for (int i = 0; i < vector[shard].size(); i++) + if (*(vector[shard][i]) == value) + return shard << 16 | i; + + vector[shard].push_back(&value); + return shard << 16 | (vector[shard].size() - 1); +} + + +void TagMap::addTag(const protozero::data_view& key, const protozero::data_view& value) { + uint32_t valueLoc = ensureString(values, value); +// std::cout << "valueLoc = " << valueLoc << std::endl; + uint32_t keyLoc = ensureString(keys, key); +// std::cout << "keyLoc = " << keyLoc << std::endl; + + + const uint16_t shard = keyLoc >> 16; + const uint16_t pos = keyLoc; +// std::cout << "shard=" << shard << ", pos=" << pos << std::endl; + if (key2value[shard].size() <= pos) { +// std::cout << "growing shard" << std::endl; + key2value[shard].resize(pos + 1); + } + + key2value[shard][pos] = valueLoc; +} + +int64_t TagMap::getKey(const char* key, size_t size) const { + // Return -1 if key not found, else return its keyLoc. + std::size_t hash = hashString(key, size); + + const uint16_t shard = hash % keys.size(); + for (int i = 0; i < keys[shard].size(); i++) { + const protozero::data_view& candidate = *keys[shard][i]; + if (candidate.size() != size) + continue; + + if (memcmp(candidate.data(), key, size) == 0) + return shard << 16 | i; + } + + return -1; +} + +int64_t TagMap::getValue(const char* value, size_t size) const { + // Return -1 if value not found, else return its valueLoc. + std::size_t hash = hashString(value, size); + + const uint16_t shard = hash % values.size(); + for (int i = 0; i < values[shard].size(); i++) { + const protozero::data_view& candidate = *values[shard][i]; + if (candidate.size() != size) + continue; + + if (memcmp(candidate.data(), value, size) == 0) + return shard << 16 | i; + } + + return -1; +} + +const protozero::data_view* TagMap::getValueFromKey(uint32_t keyLoc) const { + const uint32_t valueLoc = key2value[keyLoc >> 16][keyLoc & 0xFFFF]; + return values[valueLoc >> 16][valueLoc & 0xFFFF]; +} + +const protozero::data_view* TagMap::getValue(uint32_t valueLoc) const { + return values[valueLoc >> 16][valueLoc & 0xFFFF]; +} + +boost::container::flat_map TagMap::exportToBoostMap() const { + boost::container::flat_map rv; + + for (int i = 0; i < keys.size(); i++) { + for (int j = 0; j < keys[i].size(); j++) { + uint32_t valueLoc = key2value[i][j]; + auto key = *keys[i][j]; + auto value = *values[valueLoc >> 16][valueLoc & 0xFFFF]; + rv[std::string(key.data(), key.size())] = std::string(value.data(), value.size()); + } + } + + return rv; +} diff --git a/src/tile_data.cpp b/src/tile_data.cpp index 696ed333..407f534a 100644 --- a/src/tile_data.cpp +++ b/src/tile_data.cpp @@ -47,12 +47,14 @@ TileDataSource::TileDataSource(size_t threadNum, unsigned int baseZoom, bool inc z6OffsetDivisor(baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1), objectsMutex(threadNum * 4), objects(CLUSTER_ZOOM_AREA), + lowZoomObjects(CLUSTER_ZOOM_AREA), objectsWithIds(CLUSTER_ZOOM_AREA), + lowZoomObjectsWithIds(CLUSTER_ZOOM_AREA), baseZoom(baseZoom), pointStores(threadNum), linestringStores(threadNum), - multipolygonStores(threadNum), multilinestringStores(threadNum), + multipolygonStores(threadNum), multiPolygonClipCache(ClipCache(threadNum, baseZoom)), multiLinestringClipCache(ClipCache(threadNum, baseZoom)) { @@ -71,9 +73,21 @@ TileDataSource::TileDataSource(size_t threadNum, unsigned int baseZoom, bool inc } } +thread_local std::vector>* tlsPendingSmallIndexObjects = nullptr; + void TileDataSource::finalize(size_t threadNum) { - finalizeObjects(threadNum, baseZoom, objects.begin(), objects.end()); - finalizeObjects(threadNum, baseZoom, objectsWithIds.begin(), objectsWithIds.end()); + uint64_t finalized = 0; + for (const auto& vec : pendingSmallIndexObjects) { + for (const auto& tuple : vec) { + finalized++; + addObjectToSmallIndexUnsafe(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple)); + } + } + + std::cout << "indexed " << finalized << " contended objects" << std::endl; + + finalizeObjects(name(), threadNum, baseZoom, objects.begin(), objects.end(), lowZoomObjects); + finalizeObjects(name(), threadNum, baseZoom, objectsWithIds.begin(), objectsWithIds.end(), lowZoomObjectsWithIds); } void TileDataSource::addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id) { @@ -87,8 +101,28 @@ void TileDataSource::addObjectToSmallIndex(const TileCoordinates& index, const O } const size_t z6index = z6x * CLUSTER_ZOOM_WIDTH + z6y; + auto& mutex = objectsMutex[z6index % objectsMutex.size()]; + + if (mutex.try_lock()) { + addObjectToSmallIndexUnsafe(index, oo, id); + mutex.unlock(); + } else { + // add to tlsPendingSmallIndexObjects + if (tlsPendingSmallIndexObjects == nullptr) { + std::lock_guard lock(objectsMutex[0]); + pendingSmallIndexObjects.push_back(std::vector>()); + tlsPendingSmallIndexObjects = &pendingSmallIndexObjects.back(); + } - std::lock_guard lock(objectsMutex[z6index % objectsMutex.size()]); + tlsPendingSmallIndexObjects->push_back(std::make_tuple(index, oo, id)); + } +} + +void TileDataSource::addObjectToSmallIndexUnsafe(const TileCoordinates& index, const OutputObject& oo, uint64_t id) { + // Pick the z6 index + const size_t z6x = index.x / z6OffsetDivisor; + const size_t z6y = index.y / z6OffsetDivisor; + const size_t z6index = z6x * CLUSTER_ZOOM_WIDTH + z6y; if (id == 0 || !includeID) objects[z6index].push_back({ @@ -105,32 +139,39 @@ void TileDataSource::addObjectToSmallIndex(const TileCoordinates& index, const O }); } -void TileDataSource::collectTilesWithObjectsAtZoom(uint zoom, TileCoordinatesSet& output) { +void TileDataSource::collectTilesWithObjectsAtZoom(std::vector& zooms) { // Scan through all shards. Convert to base zoom, then convert to the requested zoom. - collectTilesWithObjectsAtZoomTemplate(baseZoom, objects.begin(), objects.size(), zoom, output); - collectTilesWithObjectsAtZoomTemplate(baseZoom, objectsWithIds.begin(), objectsWithIds.size(), zoom, output); + collectTilesWithObjectsAtZoomTemplate(baseZoom, objects.begin(), objects.size(), zooms); + collectTilesWithObjectsAtZoomTemplate(baseZoom, objectsWithIds.begin(), objectsWithIds.size(), zooms); } -void addCoveredTilesToOutput(const uint baseZoom, const uint zoom, const Box& box, TileCoordinatesSet& output) { - int scale = pow(2, baseZoom-zoom); +void addCoveredTilesToOutput(const uint baseZoom, std::vector& zooms, const Box& box) { + size_t maxZoom = zooms.size() - 1; + int scale = pow(2, baseZoom - maxZoom); TileCoordinate minx = box.min_corner().x() / scale; TileCoordinate maxx = box.max_corner().x() / scale; TileCoordinate miny = box.min_corner().y() / scale; TileCoordinate maxy = box.max_corner().y() / scale; for (int x=minx; x<=maxx; x++) { for (int y=miny; y<=maxy; y++) { - output.set(x, y); + size_t zx = x, zy = y; + + for (int zoom = maxZoom; zoom >= 0; zoom--) { + zooms[zoom].set(zx, zy); + zx /= 2; + zy /= 2; + } } } } // Find the tiles used by the "large objects" from the rtree index -void TileDataSource::collectTilesWithLargeObjectsAtZoom(uint zoom, TileCoordinatesSet &output) { +void TileDataSource::collectTilesWithLargeObjectsAtZoom(std::vector& zooms) { for(auto const &result: boxRtree) - addCoveredTilesToOutput(baseZoom, zoom, result.first, output); + addCoveredTilesToOutput(baseZoom, zooms, result.first); for(auto const &result: boxRtreeWithIds) - addCoveredTilesToOutput(baseZoom, zoom, result.first, output); + addCoveredTilesToOutput(baseZoom, zooms, result.first); } // Copy objects from the tile at dstIndex (in the dataset srcTiles) into output @@ -139,11 +180,15 @@ void TileDataSource::collectObjectsForTile( TileCoordinates dstIndex, std::vector& output ) { + if (zoom < CLUSTER_ZOOM) { + collectLowZoomObjectsForTile(baseZoom, lowZoomObjects, zoom, dstIndex, output); + collectLowZoomObjectsForTile(baseZoom, lowZoomObjectsWithIds, zoom, dstIndex, output); + return; + } + size_t iStart = 0; size_t iEnd = objects.size(); - // TODO: we could also narrow the search space for z1..z5, too. - // They're less important, as they have fewer tiles. if (zoom >= CLUSTER_ZOOM) { // Compute the x, y at the base zoom level TileCoordinate z6x = dstIndex.x / (1 << (zoom - CLUSTER_ZOOM)); @@ -188,11 +233,7 @@ Geometry TileDataSource::buildWayGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox) { switch(geomType) { case POINT_: { - auto p = retrievePoint(objectID); - if (geom::within(p, bbox.clippingBox)) { - return p; - } - return MultiLinestring(); + throw std::runtime_error("unexpected geomType in buildWayGeometry"); } case LINESTRING_: { @@ -329,22 +370,12 @@ Geometry TileDataSource::buildWayGeometry(OutputGeometryType const geomType, } } -LatpLon TileDataSource::buildNodeGeometry(OutputGeometryType const geomType, - NodeID const objectID, const TileBbox &bbox) const { - switch(geomType) { - case POINT_: { - auto p = retrievePoint(objectID); - LatpLon out; - out.latp = p.y(); - out.lon = p.x(); - return out; - } - - default: - break; - } - - throw std::runtime_error("Geometry type is not point"); +LatpLon TileDataSource::buildNodeGeometry(NodeID const objectID, const TileBbox &bbox) const { + auto p = retrievePoint(objectID); + LatpLon out; + out.latp = p.y(); + out.lon = p.x(); + return out; } @@ -366,18 +397,14 @@ void TileDataSource::reportSize() const { std::cout << "Generated points: " << (points - 1) << ", lines: " << (linestrings - 2) << ", polygons: " << (polygons - 1) << std::endl; } -TileCoordinatesSet getTilesAtZoom( +void populateTilesAtZoom( const std::vector& sources, - unsigned int zoom + std::vector& zooms ) { - TileCoordinatesSet tileCoordinates(zoom); - for(size_t i=0; icollectTilesWithObjectsAtZoom(zoom, tileCoordinates); - sources[i]->collectTilesWithLargeObjectsAtZoom(zoom, tileCoordinates); + sources[i]->collectTilesWithObjectsAtZoom(zooms); + sources[i]->collectTilesWithLargeObjectsAtZoom(zooms); } - - return tileCoordinates; } std::vector TileDataSource::getObjectsForTile( @@ -532,7 +559,7 @@ NodeID TileDataSource::storePoint(const Point& input) { NodeID offset = store.second->size(); store.second->emplace_back(input); - NodeID rv = (store.first << (35 - shardBits)) + offset; + NodeID rv = (store.first << (TILE_DATA_ID_SIZE - shardBits)) + offset; return rv; } @@ -542,7 +569,7 @@ NodeID TileDataSource::storeLinestring(const Linestring& src) { NodeID offset = store.second->size(); store.second->emplace_back(std::move(dst)); - NodeID rv = (store.first << (35 - shardBits)) + offset; + NodeID rv = (store.first << (TILE_DATA_ID_SIZE - shardBits)) + offset; return rv; } @@ -564,7 +591,7 @@ NodeID TileDataSource::storeMultiPolygon(const MultiPolygon& src) { NodeID offset = store.second->size(); store.second->emplace_back(std::move(dst)); - NodeID rv = (store.first << (35 - shardBits)) + offset; + NodeID rv = (store.first << (TILE_DATA_ID_SIZE - shardBits)) + offset; return rv; } @@ -579,7 +606,7 @@ NodeID TileDataSource::storeMultiLinestring(const MultiLinestring& src) { NodeID offset = store.second->size(); store.second->emplace_back(std::move(dst)); - NodeID rv = (store.first << (35 - shardBits)) + offset; + NodeID rv = (store.first << (TILE_DATA_ID_SIZE - shardBits)) + offset; return rv; } diff --git a/src/tile_worker.cpp b/src/tile_worker.cpp index 5f5c48b2..7951fcaf 100644 --- a/src/tile_worker.cpp +++ b/src/tile_worker.cpp @@ -176,7 +176,7 @@ void ProcessObjects( if (oo.oo.geomType == POINT_) { vector_tile::Tile_Feature *featurePtr = vtLayer->add_features(); - LatpLon pos = source->buildNodeGeometry(oo.oo.geomType, oo.oo.objectID, bbox); + LatpLon pos = source->buildNodeGeometry(oo.oo.objectID, bbox); featurePtr->add_geometry(9); // moveTo, repeat x1 pair xy = bbox.scaleLatpLon(pos.latp/10000000.0, pos.lon/10000000.0); featurePtr->add_geometry((xy.first << 1) ^ (xy.first >> 31)); @@ -378,13 +378,13 @@ void outputProc( // Write to file or sqlite string outputdata, compressed; - if (sharedData.outputMode == OUTPUT_MBTILES) { + if (sharedData.outputMode == OptionsParser::OutputMode::MBTiles) { // Write to sqlite tile.SerializeToString(&outputdata); if (sharedData.config.compress) { compressed = compress_string(outputdata, Z_DEFAULT_COMPRESSION, sharedData.config.gzip); } sharedData.mbtiles.saveTile(zoom, bbox.index.x, bbox.index.y, sharedData.config.compress ? &compressed : &outputdata, sharedData.mergeSqlite); - } else if (sharedData.outputMode == OUTPUT_PMTILES) { + } else if (sharedData.outputMode == OptionsParser::OutputMode::PMTiles) { // Write to pmtiles tile.SerializeToString(&outputdata); sharedData.pmtiles.saveTile(zoom, bbox.index.x, bbox.index.y, outputdata); diff --git a/src/tilemaker.cpp b/src/tilemaker.cpp index 852be49b..3c3f55fe 100644 --- a/src/tilemaker.cpp +++ b/src/tilemaker.cpp @@ -48,8 +48,9 @@ #include "osm_lua_processing.h" #include "mbtiles.h" +#include "options_parser.h" #include "shared_data.h" -#include "read_pbf.h" +#include "pbf_processor.h" #include "read_shp.h" #include "tile_worker.h" #include "osm_mem_tiles.h" @@ -80,89 +81,46 @@ bool verbose = false; * * Worker threads write the output tiles, and start in the outputProc function. */ -int main(int argc, char* argv[]) { - +int main(const int argc, const char* argv[]) { // ---- Read command-line options - vector inputFiles; - string luaFile; - string osmStoreFile; - string jsonFile; - uint threadNum; - string outputFile; - string bbox; - bool _verbose = false, mergeSqlite = false, mapsplit = false, osmStoreCompact = false, skipIntegrity = false, osmStoreUncompressedNodes = false, osmStoreUncompressedWays = false, materializeGeometries = false; - int outputMode = OUTPUT_FILE; - bool logTileTimings = false; - - po::options_description desc("tilemaker " STR(TM_VERSION) "\nConvert OpenStreetMap .pbf files into vector tiles\n\nAvailable options"); - desc.add_options() - ("help", "show help message") - ("input", po::value< vector >(&inputFiles), "source .osm.pbf file") - ("output", po::value< string >(&outputFile), "target directory or .mbtiles/.pmtiles file") - ("bbox", po::value< string >(&bbox), "bounding box to use if input file does not have a bbox header set, example: minlon,minlat,maxlon,maxlat") - ("merge" ,po::bool_switch(&mergeSqlite), "merge with existing .mbtiles (overwrites otherwise)") - ("config", po::value< string >(&jsonFile)->default_value("config.json"), "config JSON file") - ("process",po::value< string >(&luaFile)->default_value("process.lua"), "tag-processing Lua file") - ("store", po::value< string >(&osmStoreFile), "temporary storage for node/ways/relations data") - ("compact",po::bool_switch(&osmStoreCompact), "Reduce overall memory usage (compact mode).\nNOTE: This requires the input to be renumbered (osmium renumber)") - ("no-compress-nodes", po::bool_switch(&osmStoreUncompressedNodes), "Store nodes uncompressed") - ("no-compress-ways", po::bool_switch(&osmStoreUncompressedWays), "Store ways uncompressed") - ("materialize-geometries", po::bool_switch(&materializeGeometries), "Materialize geometries - faster, but requires more memory") - ("verbose",po::bool_switch(&_verbose), "verbose error output") - ("skip-integrity",po::bool_switch(&skipIntegrity), "don't enforce way/node integrity") - ("log-tile-timings", po::bool_switch(&logTileTimings), "log how long each tile takes") - ("threads",po::value< uint >(&threadNum)->default_value(0), "number of threads (automatically detected if 0)"); - po::positional_options_description p; - p.add("input", 1).add("output", 1); - po::variables_map vm; + OptionsParser::Options options; try { - po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm); - } catch (const po::unknown_option& ex) { - cerr << "Unknown option: " << ex.get_option_name() << endl; - return -1; + options = OptionsParser::parse(argc, argv); + } catch (OptionsParser::OptionException& e) { + cerr << e.what() << endl; + return 1; } - po::notify(vm); - - if (vm.count("help")) { cout << desc << endl; return 0; } - if (vm.count("output")==0) { cerr << "You must specify an output file or directory. Run with --help to find out more." << endl; return -1; } - if (vm.count("input")==0) { cout << "No source .osm.pbf file supplied" << endl; } - vector bboxElements = parseBox(bbox); + if (options.showHelp) { OptionsParser::showHelp(); return 0; } - if (ends_with(outputFile, ".mbtiles") || ends_with(outputFile, ".sqlite")) { outputMode = OUTPUT_MBTILES; } - else if (ends_with(outputFile, ".pmtiles")) { outputMode = OUTPUT_PMTILES; } - if (threadNum == 0) { threadNum = max(thread::hardware_concurrency(), 1u); } - verbose = _verbose; + verbose = options.verbose; - - // ---- Check config - - if (!boost::filesystem::exists(jsonFile)) { cerr << "Couldn't open .json config: " << jsonFile << endl; return -1; } - if (!boost::filesystem::exists(luaFile )) { cerr << "Couldn't open .lua script: " << luaFile << endl; return -1; } + vector bboxElements = parseBox(options.bbox); // ---- Remove existing .mbtiles if it exists - - if ((outputMode==OUTPUT_MBTILES || outputMode==OUTPUT_PMTILES) && !mergeSqlite && static_cast(std::ifstream(outputFile))) { + if ((options.outputMode == OptionsParser::OutputMode::MBTiles || options.outputMode == OptionsParser::OutputMode::PMTiles) && !options.mergeSqlite && static_cast(std::ifstream(options.outputFile))) { cout << "Output file exists, will overwrite (Ctrl-C to abort"; - if (outputMode==OUTPUT_MBTILES) cout << ", rerun with --merge to keep"; + if (options.outputMode == OptionsParser::OutputMode::MBTiles) cout << ", rerun with --merge to keep"; cout << ")" << endl; std::this_thread::sleep_for(std::chrono::milliseconds(2000)); - if (remove(outputFile.c_str()) != 0) { + if (remove(options.outputFile.c_str()) != 0) { cerr << "Couldn't remove existing file" << endl; return 0; } - } else if (mergeSqlite && outputMode!=OUTPUT_MBTILES) { + } else if (options.mergeSqlite && options.outputMode != OptionsParser::OutputMode::MBTiles) { cerr << "--merge only works with .mbtiles" << endl; return 0; - } else if (mergeSqlite && !static_cast(std::ifstream(outputFile))) { + } else if (options.mergeSqlite && !static_cast(std::ifstream(options.outputFile))) { cout << "--merge specified but .mbtiles file doesn't already exist, ignoring" << endl; - mergeSqlite = false; + options.mergeSqlite = false; } + // ---- Read bounding box from first .pbf (if there is one) or mapsplit file bool hasClippingBox = false; Box clippingBox; + bool mapsplit = false; MBTiles mapsplitFile; double minLon=0.0, maxLon=0.0, minLat=0.0, maxLat=0.0; if (!bboxElements.empty()) { @@ -172,14 +130,14 @@ int main(int argc, char* argv[]) { maxLon = bboxElementFromStr(bboxElements.at(2)); maxLat = bboxElementFromStr(bboxElements.at(3)); - } else if (inputFiles.size()==1 && (ends_with(inputFiles[0], ".mbtiles") || ends_with(inputFiles[0], ".sqlite") || ends_with(inputFiles[0], ".msf"))) { + } else if (options.inputFiles.size()==1 && (ends_with(options.inputFiles[0], ".mbtiles") || ends_with(options.inputFiles[0], ".sqlite") || ends_with(options.inputFiles[0], ".msf"))) { mapsplit = true; - mapsplitFile.openForReading(inputFiles[0]); + mapsplitFile.openForReading(options.inputFiles[0]); mapsplitFile.readBoundingBox(minLon, maxLon, minLat, maxLat); hasClippingBox = true; - } else if (inputFiles.size()>0) { - int ret = ReadPbfBoundingBox(inputFiles[0], minLon, maxLon, minLat, maxLat, hasClippingBox); + } else if (options.inputFiles.size()>0) { + int ret = ReadPbfBoundingBox(options.inputFiles[0], minLon, maxLon, minLat, maxLat, hasClippingBox); if(ret != 0) return ret; } @@ -193,7 +151,7 @@ int main(int argc, char* argv[]) { rapidjson::Document jsonConfig; class Config config; try { - FILE* fp = fopen(jsonFile.c_str(), "r"); + FILE* fp = fopen(options.jsonFile.c_str(), "r"); char readBuffer[65536]; rapidjson::FileReadStream is(fp, readBuffer, sizeof(readBuffer)); jsonConfig.ParseStream(is); @@ -211,52 +169,73 @@ int main(int argc, char* argv[]) { } // For each tile, objects to be used in processing - shared_ptr nodeStore; - bool allPbfsHaveSortTypeThenID = true; bool anyPbfHasLocationsOnWays = false; - for (const std::string& file: inputFiles) { + for (const std::string& file: options.inputFiles) { if (ends_with(file, ".pbf")) { allPbfsHaveSortTypeThenID = allPbfsHaveSortTypeThenID && PbfHasOptionalFeature(file, OptionSortTypeThenID); anyPbfHasLocationsOnWays = anyPbfHasLocationsOnWays || PbfHasOptionalFeature(file, OptionLocationsOnWays); } } - if (osmStoreCompact) - nodeStore = make_shared(); - else { - if (allPbfsHaveSortTypeThenID) - nodeStore = make_shared(!osmStoreUncompressedNodes); - else - nodeStore = make_shared(); + auto createNodeStore = [allPbfsHaveSortTypeThenID, options]() { + if (options.osm.compact) { + std::shared_ptr rv = make_shared(); + return rv; + } + + if (allPbfsHaveSortTypeThenID) { + std::shared_ptr rv = make_shared(!options.osm.uncompressedNodes); + return rv; + } + std::shared_ptr rv = make_shared(); + return rv; + }; + + shared_ptr nodeStore; + + if (options.osm.shardStores) { + nodeStore = std::make_shared(createNodeStore); + } else { + nodeStore = createNodeStore(); } + auto createWayStore = [anyPbfHasLocationsOnWays, allPbfsHaveSortTypeThenID, options, &nodeStore]() { + if (!anyPbfHasLocationsOnWays && allPbfsHaveSortTypeThenID) { + std::shared_ptr rv = make_shared(!options.osm.uncompressedWays, *nodeStore.get()); + return rv; + } + + std::shared_ptr rv = make_shared(); + return rv; + }; + shared_ptr wayStore; - if (!anyPbfHasLocationsOnWays && allPbfsHaveSortTypeThenID) { - wayStore = make_shared(!osmStoreUncompressedNodes, *nodeStore.get()); + if (options.osm.shardStores) { + wayStore = std::make_shared(createWayStore, *nodeStore.get()); } else { - wayStore = make_shared(); + wayStore = createWayStore(); } OSMStore osmStore(*nodeStore.get(), *wayStore.get()); - osmStore.use_compact_store(osmStoreCompact); - osmStore.enforce_integrity(!skipIntegrity); - if(!osmStoreFile.empty()) { - std::cout << "Using osm store file: " << osmStoreFile << std::endl; - osmStore.open(osmStoreFile); + osmStore.use_compact_store(options.osm.compact); + osmStore.enforce_integrity(!options.osm.skipIntegrity); + if(!options.osm.storeFile.empty()) { + std::cout << "Using osm store file: " << options.osm.storeFile << std::endl; + osmStore.open(options.osm.storeFile); } AttributeStore attributeStore; class LayerDefinition layers(config.layers); - class OsmMemTiles osmMemTiles(threadNum, config.baseZoom, config.includeID, *nodeStore, *wayStore); - class ShpMemTiles shpMemTiles(threadNum, config.baseZoom); + class OsmMemTiles osmMemTiles(options.threadNum, config.baseZoom, config.includeID, *nodeStore, *wayStore); + class ShpMemTiles shpMemTiles(options.threadNum, config.baseZoom); osmMemTiles.open(); shpMemTiles.open(); - OsmLuaProcessing osmLuaProcessing(osmStore, config, layers, luaFile, - shpMemTiles, osmMemTiles, attributeStore, materializeGeometries); + OsmLuaProcessing osmLuaProcessing(osmStore, config, layers, options.luaFile, + shpMemTiles, osmMemTiles, attributeStore, options.osm.materializeGeometries); // ---- Load external shp files @@ -274,7 +253,7 @@ int main(int argc, char* argv[]) { readShapefile(clippingBox, layers, config.baseZoom, layerNum, - threadNum, + options.threadNum, shpMemTiles, osmLuaProcessing); } } @@ -287,28 +266,31 @@ int main(int argc, char* argv[]) { // ---- Read all PBFs - PbfReader pbfReader(osmStore); + PbfProcessor pbfProcessor(osmStore); std::vector sortOrders = layers.getSortOrders(); if (!mapsplit) { - for (auto inputFile : inputFiles) { + for (auto inputFile : options.inputFiles) { cout << "Reading .pbf " << inputFile << endl; ifstream infile(inputFile, ios::in | ios::binary); if (!infile) { cerr << "Couldn't open .pbf file " << inputFile << endl; return -1; } const bool hasSortTypeThenID = PbfHasOptionalFeature(inputFile, OptionSortTypeThenID); - int ret = pbfReader.ReadPbfFile( + int ret = pbfProcessor.ReadPbfFile( + nodeStore->shards(), hasSortTypeThenID, nodeKeys, - threadNum, + options.threadNum, [&]() { thread_local std::shared_ptr pbfStream(new ifstream(inputFile, ios::in | ios::binary)); return pbfStream; }, [&]() { - thread_local std::shared_ptr osmLuaProcessing(new OsmLuaProcessing(osmStore, config, layers, luaFile, shpMemTiles, osmMemTiles, attributeStore, materializeGeometries)); + thread_local std::shared_ptr osmLuaProcessing(new OsmLuaProcessing(osmStore, config, layers, options.luaFile, shpMemTiles, osmMemTiles, attributeStore, options.osm.materializeGeometries)); return osmLuaProcessing; - } + }, + *nodeStore, + *wayStore ); if (ret != 0) return ret; } @@ -319,16 +301,16 @@ int main(int argc, char* argv[]) { // ---- Initialise SharedData SourceList sources = {&osmMemTiles, &shpMemTiles}; class SharedData sharedData(config, layers); - sharedData.outputFile = outputFile; - sharedData.outputMode = outputMode; - sharedData.mergeSqlite = mergeSqlite; + sharedData.outputFile = options.outputFile; + sharedData.outputMode = options.outputMode; + sharedData.mergeSqlite = options.mergeSqlite; // ---- Initialise mbtiles/pmtiles if required - if (sharedData.outputMode==OUTPUT_MBTILES) { + if (sharedData.outputMode == OptionsParser::OutputMode::MBTiles) { sharedData.mbtiles.openForWriting(sharedData.outputFile); sharedData.writeMBTilesProjectData(); - } else if (sharedData.outputMode==OUTPUT_PMTILES) { + } else if (sharedData.outputMode == OptionsParser::OutputMode::PMTiles) { sharedData.pmtiles.open(sharedData.outputFile); } @@ -361,7 +343,8 @@ int main(int argc, char* argv[]) { cout << "Reading tile " << srcZ << ": " << srcX << "," << srcY << " (" << (run+1) << "/" << runs << ")" << endl; vector pbf = mapsplitFile.readTile(srcZ,srcX,tmsY); - int ret = pbfReader.ReadPbfFile( + int ret = pbfProcessor.ReadPbfFile( + nodeStore->shards(), false, nodeKeys, 1, @@ -369,8 +352,10 @@ int main(int argc, char* argv[]) { return make_unique(pbf.data(), pbf.size(), ios::in | ios::binary); }, [&]() { - return std::make_unique(osmStore, config, layers, luaFile, shpMemTiles, osmMemTiles, attributeStore, materializeGeometries); - } + return std::make_unique(osmStore, config, layers, options.luaFile, shpMemTiles, osmMemTiles, attributeStore, options.osm.materializeGeometries); + }, + *nodeStore, + *wayStore ); if (ret != 0) return ret; @@ -378,7 +363,7 @@ int main(int argc, char* argv[]) { } // Launch the pool with threadNum threads - boost::asio::thread_pool pool(threadNum); + boost::asio::thread_pool pool(options.threadNum); // Mutex is hold when IO is performed std::mutex io_mutex; @@ -387,14 +372,14 @@ int main(int argc, char* argv[]) { std::atomic tilesWritten(0); for (auto source : sources) { - source->finalize(threadNum); + source->finalize(options.threadNum); } // tiles by zoom level // The clipping bbox check is expensive - as an optimization, compute the set of // z6 tiles that are wholly covered by the clipping box. Membership in this // set is quick to test. - std::set coveredZ6Tiles; + TileCoordinatesSet coveredZ6Tiles(6); if (hasClippingBox) { for (int x = 0; x < 1 << 6; x++) { for (int y = 0; y < 1 << 6; y++) { @@ -402,20 +387,47 @@ int main(int argc, char* argv[]) { TileBbox(TileCoordinates(x, y), 6, false, false).getTileBox(), clippingBox )) - coveredZ6Tiles.insert(TileCoordinates(x, y)); + coveredZ6Tiles.set(x, y); } } } // For large areas (arbitrarily defined as 100 z6 tiles), use a dense index for pmtiles - if (coveredZ6Tiles.size()>100 && outputMode==OUTPUT_PMTILES) { + if (coveredZ6Tiles.size()>100 && options.outputMode == OptionsParser::OutputMode::PMTiles) { std::cout << "Using dense index for .pmtiles" << std::endl; sharedData.pmtiles.isSparse = false; } std::deque> tileCoordinates; + std::vector zoomResults; + for (uint zoom = 0; zoom <= sharedData.config.endZoom; zoom++) { + zoomResults.push_back(TileCoordinatesSet(zoom)); + } + + { +#ifdef CLOCK_MONOTONIC + timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); +#endif + std::cout << "collecting tiles" << std::flush; + populateTilesAtZoom(sources, zoomResults); +#ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &end); + uint64_t tileNs = 1e9 * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; + std::cout << ": " << (uint32_t)(tileNs / 1e6) << "ms"; +#endif + } + + std::cout << ", filtering tiles:" << std::flush; for (uint zoom=sharedData.config.startZoom; zoom <= sharedData.config.endZoom; zoom++) { - auto zoomResult = getTilesAtZoom(sources, zoom); + std::cout << " z" << std::to_string(zoom) << std::flush; +#ifdef CLOCK_MONOTONIC + timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); +#endif + + const auto& zoomResult = zoomResults[zoom]; + int numTiles = 0; for (int x = 0; x < 1 << zoom; x++) { for (int y = 0; y < 1 << zoom; y++) { if (!zoomResult.test(x, y)) @@ -433,7 +445,7 @@ int main(int argc, char* argv[]) { if (zoom >= 6) { TileCoordinate z6x = x / (1 << (zoom - 6)); TileCoordinate z6y = y / (1 << (zoom - 6)); - isInAWhollyCoveredZ6Tile = coveredZ6Tiles.find(TileCoordinates(z6x, z6y)) != coveredZ6Tiles.end(); + isInAWhollyCoveredZ6Tile = coveredZ6Tiles.test(z6x, z6y); } if(!isInAWhollyCoveredZ6Tile && !boost::geometry::intersects(TileBbox(TileCoordinates(x, y), zoom, false, false).getTileBox(), clippingBox)) @@ -441,9 +453,22 @@ int main(int argc, char* argv[]) { } tileCoordinates.push_back(std::make_pair(zoom, TileCoordinates(x, y))); + numTiles++; } } + + std::cout << " (" << numTiles; +#ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &end); + uint64_t tileNs = 1e9 * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; + std::cout << ", " << (uint32_t)(tileNs / 1e6) << "ms"; + +#endif + std::cout << ")" << std::flush; } + zoomResults.clear(); + + std::cout << std::endl; // Cluster tiles: breadth-first for z0..z5, depth-first for z6 const size_t baseZoom = config.baseZoom; @@ -494,7 +519,7 @@ int main(int argc, char* argv[]) { return false; }, - threadNum); + options.threadNum); std::size_t batchSize = 0; for(std::size_t startIndex = 0; startIndex < tileCoordinates.size(); startIndex += batchSize) { @@ -523,9 +548,9 @@ int main(int argc, char* argv[]) { unsigned int zoom = tileCoordinates[i].first; TileCoordinates coords = tileCoordinates[i].second; -#ifndef _WIN32 +#ifdef CLOCK_MONOTONIC timespec start, end; - if (logTileTimings) + if (options.logTileTimings) clock_gettime(CLOCK_MONOTONIC, &start); #endif @@ -535,8 +560,8 @@ int main(int argc, char* argv[]) { } outputProc(sharedData, sources, attributeStore, data, coords, zoom); -#ifndef _WIN32 - if (logTileTimings) { +#ifdef CLOCK_MONOTONIC + if (options.logTileTimings) { clock_gettime(CLOCK_MONOTONIC, &end); uint64_t tileNs = 1e9 * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; std::string output = "z" + std::to_string(zoom) + "/" + std::to_string(coords.x) + "/" + std::to_string(coords.y) + " took " + std::to_string(tileNs/1e6) + " ms"; @@ -545,7 +570,7 @@ int main(int argc, char* argv[]) { #endif } - if (logTileTimings) { + if (options.logTileTimings) { const std::lock_guard lock(io_mutex); std::cout << std::endl; for (const auto& output : tileTimings) @@ -575,10 +600,10 @@ int main(int argc, char* argv[]) { // ---- Close tileset - if (outputMode==OUTPUT_MBTILES) { + if (options.outputMode == OptionsParser::OutputMode::MBTiles) { sharedData.writeMBTilesMetadata(jsonConfig); sharedData.mbtiles.closeForWriting(); - } else if (outputMode==OUTPUT_PMTILES) { + } else if (options.outputMode == OptionsParser::OutputMode::PMTiles) { sharedData.writePMTilesBounds(); std::string metadata = sharedData.pmTilesMetadata(); sharedData.pmtiles.close(metadata); diff --git a/src/way_stores.cpp b/src/way_stores.cpp index 05d884d0..790ad816 100644 --- a/src/way_stores.cpp +++ b/src/way_stores.cpp @@ -14,6 +14,14 @@ void BinarySearchWayStore::reopen() { mLatpLonLists = std::make_unique(); } +bool BinarySearchWayStore::contains(size_t shard, WayID id) const { + auto iter = std::lower_bound(mLatpLonLists->begin(), mLatpLonLists->end(), id, [](auto const &e, auto id) { + return e.first < id; + }); + + return !(iter == mLatpLonLists->end() || iter->first != id); +} + std::vector BinarySearchWayStore::at(WayID wayid) const { std::lock_guard lock(mutex); @@ -39,7 +47,7 @@ void BinarySearchWayStore::insertLatpLons(std::vector &n std::copy(std::make_move_iterator(newWays.begin()), std::make_move_iterator(newWays.end()), mLatpLonLists->begin() + i); } -const void BinarySearchWayStore::insertNodes(const std::vector>>& newWays) { +void BinarySearchWayStore::insertNodes(const std::vector>>& newWays) { throw std::runtime_error("BinarySearchWayStore does not support insertNodes"); } diff --git a/test/append_vector.test.cpp b/test/append_vector.test.cpp new file mode 100644 index 00000000..db4949e2 --- /dev/null +++ b/test/append_vector.test.cpp @@ -0,0 +1,98 @@ +#include +#include +#include "external/minunit.h" +#include "append_vector.h" + +using namespace AppendVectorNS; + +MU_TEST(test_append_vector) { + AppendVector vec; + AppendVector vec2; + mu_check(vec.size() == 0); + mu_check(vec.begin() == vec.end()); + mu_check(vec.begin() != vec2.begin()); + + for (int i = 0; i < 10000; i++) { + vec.push_back(i); + } + mu_check(vec.size() == 10000); + + mu_check(vec[25] == 25); + + const AppendVector::Iterator& it = vec.begin(); + mu_check(*it == 0); + mu_check(*(it + 1) == 1); + mu_check(*(it + 2) == 2); + mu_check(*(it + 9000) == 9000); + mu_check(*(it + 1 - 1) == 0); + mu_check(*(vec.end() + -1) == 9999); + mu_check(*(vec.end() - 1) == 9999); + mu_check(*(vec.end() - 2) == 9998); + mu_check(*(vec.end() - 9000) == 1000); + mu_check(*(vec.begin() - -1) == 1); + + boost::sort::block_indirect_sort( + vec.begin(), + vec.end(), + [](auto const &a, auto const&b) { return b < a; }, + 1 + ); + + mu_check(vec[0] == 9999); + mu_check(vec[9999] == 0); + + boost::sort::block_indirect_sort( + vec.begin(), + vec.end(), + [](auto const &a, auto const&b) { return a < b; }, + 1 + ); + + mu_check(vec[0] == 0); + mu_check(vec[9999] == 9999); + + auto iter = std::lower_bound( + vec.begin(), + vec.end(), + 123, + [](const int32_t& a, const int32_t& toFind) { + return a < toFind; + } + ); + + mu_check(iter != vec.end()); + mu_check(*iter == 123); + + iter = std::lower_bound( + vec.begin(), + vec.end(), + 123123, + [](const int32_t& a, const int32_t& toFind) { + return a < toFind; + } + ); + + mu_check(iter == vec.end()); + + iter = std::lower_bound( + vec.begin(), + vec.end(), + -2, + [](const int32_t& a, const int32_t& toFind) { + return a < toFind; + } + ); + + mu_check(iter == vec.begin()); +} + +MU_TEST_SUITE(test_suite_append_vector) { + MU_RUN_TEST(test_append_vector); +} + +int main() { + MU_RUN_SUITE(test_suite_append_vector); + MU_REPORT(); + return MU_EXIT_CODE; +} + diff --git a/test/attribute_store.test.cpp b/test/attribute_store.test.cpp new file mode 100644 index 00000000..db104a14 --- /dev/null +++ b/test/attribute_store.test.cpp @@ -0,0 +1,103 @@ +#include +#include +#include "external/minunit.h" +#include "attribute_store.h" + +MU_TEST(test_attribute_store) { + AttributeStore store; + store.reset(); + + mu_check(store.size() == 0); + + AttributeSet s1; + store.addAttribute(s1, "str1", std::string("someval"), 0); + store.addAttribute(s1, "str2", std::string("a very long string"), 0); + store.addAttribute(s1, "bool1", false, 0); + store.addAttribute(s1, "bool2", true, 0); + store.addAttribute(s1, "float1", (float)42.0, 0); + + const auto s1Index = store.add(s1); + + mu_check(store.size() == 1); + + const auto s1Pairs = store.getUnsafe(s1Index); + mu_check(s1Pairs.size() == 5); + const auto str1 = std::find_if(s1Pairs.begin(), s1Pairs.end(), [&store](auto ap) { + return ap->keyIndex == store.keyStore.key2index("str1"); + }); + mu_check(str1 != s1Pairs.end()); + mu_check((*str1)->hasStringValue()); + mu_check((*str1)->stringValue() == "someval"); + + const auto str2 = std::find_if(s1Pairs.begin(), s1Pairs.end(), [&store](auto ap) { + return ap->keyIndex == store.keyStore.key2index("str2"); + }); + mu_check(str2 != s1Pairs.end()); + mu_check((*str2)->hasStringValue()); + mu_check((*str2)->stringValue() == "a very long string"); + + const auto bool1 = std::find_if(s1Pairs.begin(), s1Pairs.end(), [&store](auto ap) { + return ap->keyIndex == store.keyStore.key2index("bool1"); + }); + mu_check(bool1 != s1Pairs.end()); + mu_check((*bool1)->hasBoolValue()); + mu_check((*bool1)->boolValue() == false); + + const auto bool2 = std::find_if(s1Pairs.begin(), s1Pairs.end(), [&store](auto ap) { + return ap->keyIndex == store.keyStore.key2index("bool2"); + }); + mu_check(bool2 != s1Pairs.end()); + mu_check((*bool2)->hasBoolValue()); + mu_check((*bool2)->boolValue() == true); + + const auto float1 = std::find_if(s1Pairs.begin(), s1Pairs.end(), [&store](auto ap) { + return ap->keyIndex == store.keyStore.key2index("float1"); + }); + mu_check(float1 != s1Pairs.end()); + mu_check((*float1)->hasFloatValue()); + mu_check((*float1)->floatValue() == 42); +} + +MU_TEST(test_attribute_store_reuses) { + AttributeStore store; + store.reset(); + + mu_check(store.size() == 0); + + { + AttributeSet s1a; + store.addAttribute(s1a, "str1", std::string("someval"), 0); + const auto s1aIndex = store.add(s1a); + + AttributeSet s1b; + store.addAttribute(s1b, "str1", std::string("someval"), 0); + const auto s1bIndex = store.add(s1b); + + mu_check(s1aIndex == s1bIndex); + } + + { + AttributeSet s1a; + store.addAttribute(s1a, "str1", std::string("this is a very long string"), 0); + const auto s1aIndex = store.add(s1a); + + AttributeSet s1b; + store.addAttribute(s1b, "str1", std::string("this is a very long string"), 0); + const auto s1bIndex = store.add(s1b); + + mu_check(s1aIndex == s1bIndex); + } + + +} + +MU_TEST_SUITE(test_suite_attribute_store) { + MU_RUN_TEST(test_attribute_store); + MU_RUN_TEST(test_attribute_store_reuses); +} + +int main() { + MU_RUN_SUITE(test_suite_attribute_store); + MU_REPORT(); + return MU_EXIT_CODE; +} diff --git a/test/deque_map.test.cpp b/test/deque_map.test.cpp new file mode 100644 index 00000000..28023542 --- /dev/null +++ b/test/deque_map.test.cpp @@ -0,0 +1,67 @@ +#include +#include +#include "external/minunit.h" +#include "deque_map.h" + +MU_TEST(test_deque_map) { + DequeMap strs; + + mu_check(strs.size() == 0); + mu_check(!strs.full()); + mu_check(strs.find("foo") == -1); + mu_check(strs.add("foo") == 0); + mu_check(!strs.full()); + mu_check(strs.find("foo") == 0); + mu_check(strs.size() == 1); + mu_check(strs.add("foo") == 0); + mu_check(strs.size() == 1); + mu_check(strs.add("bar") == 1); + mu_check(strs.size() == 2); + mu_check(strs.add("aardvark") == 2); + mu_check(strs.size() == 3); + mu_check(strs.add("foo") == 0); + mu_check(strs.add("bar") == 1); + mu_check(strs.add("quux") == 3); + mu_check(strs.size() == 4); + + mu_check(strs.at(0) == "foo"); + mu_check(strs[0] == "foo"); + mu_check(strs.at(1) == "bar"); + mu_check(strs[1] == "bar"); + mu_check(strs.at(2) == "aardvark"); + mu_check(strs[2] == "aardvark"); + mu_check(strs.at(3) == "quux"); + mu_check(strs[3] == "quux"); + + std::vector rv; + for (std::string x : strs) { + rv.push_back(x); + } + mu_check(rv[0] == "aardvark"); + mu_check(rv[1] == "bar"); + mu_check(rv[2] == "foo"); + mu_check(rv[3] == "quux"); + + DequeMap boundedMap(1); + mu_check(!boundedMap.full()); + mu_check(boundedMap.add("foo") == 0); + mu_check(boundedMap.add("foo") == 0); + mu_check(boundedMap.full()); + mu_check(boundedMap.add("bar") == -1); + boundedMap.clear(); + mu_check(!boundedMap.full()); + mu_check(boundedMap.find("foo") == -1); + mu_check(boundedMap.add("bar") == 0); + mu_check(boundedMap.add("bar") == 0); + mu_check(boundedMap.full()); +} + +MU_TEST_SUITE(test_suite_deque_map) { + MU_RUN_TEST(test_deque_map); +} + +int main() { + MU_RUN_SUITE(test_suite_deque_map); + MU_REPORT(); + return MU_EXIT_CODE; +} diff --git a/test/monaco.pbf b/test/monaco.pbf new file mode 100644 index 00000000..6e6c3122 Binary files /dev/null and b/test/monaco.pbf differ diff --git a/test/options_parser.test.cpp b/test/options_parser.test.cpp new file mode 100644 index 00000000..e230fc0d --- /dev/null +++ b/test/options_parser.test.cpp @@ -0,0 +1,107 @@ +#include +#include "external/minunit.h" +#include "options_parser.h" + +const char* PROGRAM_NAME = "./tilemaker"; +using namespace OptionsParser; + +Options parse(std::vector& args) { + const char* argv[100]; + + argv[0] = PROGRAM_NAME; + for(int i = 0; i < args.size(); i++) + argv[1 + i] = args[i].data(); + + return parse(1 + args.size(), argv); +} + +#define ASSERT_THROWS(MESSAGE, ...) \ +{ \ + std::vector args = { __VA_ARGS__ }; \ + bool threw = false; \ + try { \ + auto opts = parse(args); \ + } catch(OptionsParser::OptionException& e) { \ + threw = std::string(e.what()).find(MESSAGE) != std::string::npos; \ + } \ + if (!threw) mu_check((std::string("expected exception with ") + MESSAGE).empty()); \ +} + +MU_TEST(test_options_parser) { + // No args is invalid. + ASSERT_THROWS("You must specify an output file"); + + // Output without input is invalid + ASSERT_THROWS("No source .osm.pbf", "--output", "foo.mbtiles"); + + // You can ask for --help. + { + std::vector args = {"--help"}; + auto opts = parse(args); + mu_check(opts.showHelp); + } + + // Minimal valid is output and input + { + std::vector args = {"--output", "foo.mbtiles", "--input", "ontario.pbf"}; + auto opts = parse(args); + mu_check(opts.inputFiles.size() == 1); + mu_check(opts.inputFiles[0] == "ontario.pbf"); + mu_check(opts.outputFile == "foo.mbtiles"); + mu_check(opts.outputMode == OutputMode::MBTiles); + mu_check(opts.osm.materializeGeometries); + mu_check(!opts.osm.shardStores); + } + + // --lazy-geometries overrides default + { + std::vector args = {"--output", "foo.mbtiles", "--input", "ontario.pbf", "--lazy-geometries"}; + auto opts = parse(args); + mu_check(opts.inputFiles.size() == 1); + mu_check(opts.inputFiles[0] == "ontario.pbf"); + mu_check(opts.outputFile == "foo.mbtiles"); + mu_check(opts.outputMode == OutputMode::MBTiles); + mu_check(!opts.osm.materializeGeometries); + mu_check(opts.osm.lazyGeometries); + mu_check(!opts.osm.shardStores); + } + + // --store should optimize for reduced memory + { + std::vector args = {"--output", "foo.mbtiles", "--input", "ontario.pbf", "--store", "/tmp/store"}; + auto opts = parse(args); + mu_check(opts.inputFiles.size() == 1); + mu_check(opts.inputFiles[0] == "ontario.pbf"); + mu_check(opts.outputFile == "foo.mbtiles"); + mu_check(opts.outputMode == OutputMode::MBTiles); + mu_check(opts.osm.storeFile == "/tmp/store"); + mu_check(!opts.osm.materializeGeometries); + mu_check(opts.osm.shardStores); + } + + // --store --fast should optimize for speed + { + std::vector args = {"--output", "foo.pmtiles", "--input", "ontario.pbf", "--store", "/tmp/store", "--fast"}; + auto opts = parse(args); + mu_check(opts.inputFiles.size() == 1); + mu_check(opts.inputFiles[0] == "ontario.pbf"); + mu_check(opts.outputFile == "foo.pmtiles"); + mu_check(opts.outputMode == OutputMode::PMTiles); + mu_check(opts.osm.storeFile == "/tmp/store"); + mu_check(!opts.osm.materializeGeometries); + mu_check(!opts.osm.shardStores); + } + + ASSERT_THROWS("Couldn't open .json config", "--input", "foo", "--output", "bar", "--config", "nonexistent-config.json"); + ASSERT_THROWS("Couldn't open .lua script", "--input", "foo", "--output", "bar", "--process", "nonexistent-script.lua"); +} + +MU_TEST_SUITE(test_suite_options_parser) { + MU_RUN_TEST(test_options_parser); +} + +int main() { + MU_RUN_SUITE(test_suite_options_parser); + MU_REPORT(); + return MU_EXIT_CODE; +} diff --git a/test/pbf_reader.test.cpp b/test/pbf_reader.test.cpp new file mode 100644 index 00000000..8d4c8fad --- /dev/null +++ b/test/pbf_reader.test.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include "external/minunit.h" +#include "pbf_reader.h" + +MU_TEST(test_pbf_reader) { + std::string filename; + filename = "test/monaco.pbf"; +// filename = "/home/cldellow/Downloads/north-america-latest.osm.pbf"; +// filename = "/home/cldellow/Downloads/great-britain-latest.osm.pbf"; +// filename = "/home/cldellow/Downloads/nova-scotia-latest.osm.pbf"; + std::ifstream monaco(filename, std::ifstream::in); + + PbfReader::PbfReader reader; + PbfReader::BlobHeader bh = reader.readBlobHeader(monaco); + protozero::data_view blob = reader.readBlob(bh.datasize, monaco); + PbfReader::HeaderBlock header = reader.readHeaderBlock(blob); + + mu_check(header.hasBbox); + mu_check(header.optionalFeatures.size() == 1); + mu_check(header.optionalFeatures.find("Sort.Type_then_ID") != header.optionalFeatures.end()); + + mu_check(header.bbox.minLon == 7.409205); + mu_check(header.bbox.maxLon == 7.448637); + mu_check(header.bbox.minLat == 43.723350); + mu_check(header.bbox.maxLat == 43.751690); + + + bool foundNode = false, foundWay = false, foundRelation = false; + int blocks = 0, groups = 0, strings = 0, nodes = 0, ways = 0, relations = 0; + while (!monaco.eof()) { + bh = reader.readBlobHeader(monaco); + if (bh.type == "eof") + break; + + + blocks++; + blob = reader.readBlob(bh.datasize, monaco); + + PbfReader::PrimitiveBlock pb = reader.readPrimitiveBlock(blob); + + for (const auto str : pb.stringTable) { + if (strings == 200) { + std::string s(str.data(), str.size()); + mu_check(s == "description:FR"); + } + strings++; + } + + for (const auto& group : pb.groups()) { + groups++; + for (const auto& node : group.nodes()) { + nodes++; + + if (node.id == 21911886) { + foundNode = true; + + bool foundHighwayCrossing = false; + + for (int i = node.tagStart; i < node.tagEnd; i += 2) { + const auto keyIndex = group.translateNodeKeyValue(i); + const auto valueIndex = group.translateNodeKeyValue(i + 1); + std::string key(pb.stringTable[keyIndex].data(), pb.stringTable[keyIndex].size()); + std::string value(pb.stringTable[valueIndex].data(), pb.stringTable[valueIndex].size()); + + if (key == "highway" && value == "crossing") + foundHighwayCrossing = true; + } + mu_check(foundHighwayCrossing); + } + } + + for (const auto& way : group.ways()) { + ways++; + + if (way.id == 4224978) { + foundWay = true; + + bool foundSportSoccer = false; + for (int i = 0; i < way.keys.size(); i++) { + std::string key(pb.stringTable[way.keys[i]].data(), pb.stringTable[way.keys[i]].size()); + std::string value(pb.stringTable[way.vals[i]].data(), pb.stringTable[way.vals[i]].size()); + + if (key == "sport" && value == "soccer") + foundSportSoccer = true; + } + mu_check(foundSportSoccer); + + mu_check(way.refs.size() == 5); + mu_check(way.refs[0] == 25178088); + mu_check(way.refs[2] == 25178045); + mu_check(way.refs[4] == 25178088); + } + } + + for (const auto& relation : group.relations()) { + relations++; + + if (relation.id == 1124039) { + foundRelation = true; + mu_check(relation.memids.size() == 17); + mu_check(relation.types.size() == 17); + mu_check(relation.roles_sid.size() == 17); + mu_check(relation.types[0] == PbfReader::Relation::MemberType::NODE); + mu_check(relation.types[2] == PbfReader::Relation::MemberType::WAY); + mu_check(relation.types[16] == PbfReader::Relation::MemberType::RELATION); + } + } + } + } + + //std::cout << blocks << " blocks, " << groups << " groups, " << nodes << " nodes, " << ways << " ways, " << relations << " relations" << std::endl; + + mu_check(foundNode); + mu_check(foundWay); + mu_check(foundRelation); + + mu_check(blocks == 6); + mu_check(groups == 6); + mu_check(strings == 8236); + mu_check(nodes == 30477); + mu_check(ways == 4825); + mu_check(relations == 285); +} + +MU_TEST_SUITE(test_suite_pbf_reader) { + MU_RUN_TEST(test_pbf_reader); +} + +int main() { + MU_RUN_SUITE(test_suite_pbf_reader); + MU_REPORT(); + return MU_EXIT_CODE; +} diff --git a/test/pooled_string.test.cpp b/test/pooled_string.test.cpp new file mode 100644 index 00000000..91fb2da5 --- /dev/null +++ b/test/pooled_string.test.cpp @@ -0,0 +1,55 @@ +#include +#include "external/minunit.h" +#include "pooled_string.h" + +MU_TEST(test_pooled_string) { + mu_check(PooledString("").size() == 0); + mu_check(PooledString("").toString() == ""); + mu_check(PooledString("f").size() == 1); + mu_check(PooledString("f").toString() == "f"); + mu_check(PooledString("hi").size() == 2); + mu_check(PooledString("f") == PooledString("f")); + mu_check(PooledString("f") != PooledString("g")); + + mu_check(PooledString("this is more than fifteen bytes").size() == 31); + mu_check(PooledString("this is more than fifteen bytes") != PooledString("f")); + + PooledString big("this is also a really long string"); + mu_check(big == big); + mu_check(big.toString() == "this is also a really long string"); + + PooledString big2("this is also a quite long string"); + mu_check(big != big2); + mu_check(big.toString() != big2.toString()); + + std::string shortString("short"); + std::string longString("this is a very long string"); + + PooledString stdShortString(&shortString); + mu_check(stdShortString.size() == 5); + mu_check(stdShortString.toString() == "short"); + + PooledString stdLongString(&longString); + mu_check(stdLongString.size() == 26); + mu_check(stdLongString.toString() == "this is a very long string"); + + // PooledStrings that are backed by std::string have the usual + // == semantics. + mu_check(stdShortString == PooledString("short")); + mu_check(PooledString("short") == stdShortString); + + mu_check(stdLongString == PooledString("this is a very long string")); + mu_check(PooledString("this is a very long string") == stdLongString); + + mu_check(stdShortString != stdLongString); +} + +MU_TEST_SUITE(test_suite_pooled_string) { + MU_RUN_TEST(test_pooled_string); +} + +int main() { + MU_RUN_SUITE(test_suite_pooled_string); + MU_REPORT(); + return MU_EXIT_CODE; +} diff --git a/test/sorted_node_store.test.cpp b/test/sorted_node_store.test.cpp new file mode 100644 index 00000000..de66445f --- /dev/null +++ b/test/sorted_node_store.test.cpp @@ -0,0 +1,41 @@ +#include +#include "external/minunit.h" +#include "sorted_node_store.h" + +MU_TEST(test_sorted_node_store) { + bool compressed = true; + + for (int i = 0; i < 2; i++) { + compressed = !compressed; + SortedNodeStore s1(compressed), s2(compressed); + mu_check(s1.size() == 0); + mu_check(s2.size() == 0); + + s1.batchStart(); + s2.batchStart(); + + s1.insert({ {1, {2, 3 } } }); + s2.insert({ {2, {3, 4 } } }); + + s1.finalize(1); + s2.finalize(1); + + mu_check(s1.size() == 1); + mu_check(s1.at(1) == LatpLon({2, 3})); + mu_check(s1.contains(0, 1)); + mu_check(!s1.contains(0, 2)); + mu_check(!s1.contains(0, 1ull << 34)); + mu_check(s2.size() == 1); + mu_check(s2.at(2) == LatpLon({3, 4})); + } +} + +MU_TEST_SUITE(test_suite_sorted_node_store) { + MU_RUN_TEST(test_sorted_node_store); +} + +int main() { + MU_RUN_SUITE(test_suite_sorted_node_store); + MU_REPORT(); + return MU_EXIT_CODE; +} diff --git a/test/sorted_way_store.test.cpp b/test/sorted_way_store.test.cpp index 1c50a494..65d34816 100644 --- a/test/sorted_way_store.test.cpp +++ b/test/sorted_way_store.test.cpp @@ -13,6 +13,10 @@ class TestNodeStore : public NodeStore { return { (int32_t)id, -(int32_t)id }; } void insert(const std::vector>& elements) override {} + + bool contains(size_t shard, NodeID id) const override { return true; } + size_t shard() const override { return 0; } + size_t shards() const override { return 1; } }; void roundtripWay(const std::vector& way) { @@ -70,6 +74,39 @@ MU_TEST(test_encode_way) { } } +MU_TEST(test_multiple_stores) { + bool compressed = false; + + for (int i = 0; i < 2; i++) { + compressed = !compressed; + TestNodeStore ns; + SortedWayStore s1(compressed, ns), s2(compressed, ns); + s1.batchStart(); + s2.batchStart(); + + s1.insertNodes({{ 1, { 1 } }}); + + // We store small ways differently than large ways, so + // store both kinds for testing. + std::vector longWay; + for (int i = 200; i < 2048; i++) + longWay.push_back(i + 3 * (i % 37)); + + s1.insertNodes({{ 42, longWay }}); + s2.insertNodes({{ 2, { 2 } }}); + + s1.finalize(1); + s2.finalize(1); + + mu_check(s1.size() == 2); + mu_check(s2.size() == 1); + + mu_check(s1.contains(0, 1)); + mu_check(s1.contains(0, 42)); + mu_check(!s1.contains(0, 2)); + } +} + MU_TEST(test_way_store) { TestNodeStore ns; SortedWayStore sws(true, ns); @@ -178,6 +215,7 @@ MU_TEST(test_populate_mask) { MU_TEST_SUITE(test_suite_sorted_way_store) { MU_RUN_TEST(test_encode_way); + MU_RUN_TEST(test_multiple_stores); MU_RUN_TEST(test_way_store); }