diff --git a/CMakeLists.txt b/CMakeLists.txt index ad353d84..423980c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,10 +33,6 @@ ENDIF () find_package(Boost 1.66 REQUIRED COMPONENTS system filesystem program_options iostreams) -set(CMAKE_FIND_PACKAGE_PREFER_CONFIG ON) -find_package(Protobuf REQUIRED) -set(CMAKE_FIND_PACKAGE_PREFER_CONFIG OFF) - find_package(libshp REQUIRED) find_package(Rapidjson REQUIRED) @@ -72,19 +68,6 @@ else() set(THREAD_LIB pthread) endif() -if(NOT PROTOBUF_PROTOC_EXECUTABLE) - set (PROTOBUF_PROTOC_EXECUTABLE "protobuf::protoc") -endif() - - -ADD_CUSTOM_COMMAND(OUTPUT vector_tile.pb.cc vector_tile.pb.h - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS --cpp_out ${CMAKE_BINARY_DIR} -I ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/vector_tile.proto) - -ADD_CUSTOM_COMMAND(OUTPUT osmformat.pb.cc osmformat.pb.h - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS --cpp_out ${CMAKE_BINARY_DIR} -I ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/osmformat.proto) - file(GLOB tilemaker_src_files src/attribute_store.cpp src/coordinates.cpp @@ -97,31 +80,34 @@ file(GLOB tilemaker_src_files src/mbtiles.cpp src/mmap_allocator.cpp src/node_stores.cpp + src/options_parser.cpp src/osm_lua_processing.cpp src/osm_mem_tiles.cpp src/osm_store.cpp src/output_object.cpp - src/pbf_blocks.cpp + src/pbf_processor.cpp + src/pbf_reader.cpp src/pmtiles.cpp - src/read_pbf.cpp + src/pooled_string.cpp src/read_shp.cpp + src/sharded_node_store.cpp + src/sharded_way_store.cpp src/shared_data.cpp src/shp_mem_tiles.cpp src/sorted_node_store.cpp src/sorted_way_store.cpp + src/tag_map.cpp src/tile_data.cpp src/tilemaker.cpp src/tile_worker.cpp src/way_stores.cpp - src/write_geometry.cpp ) -add_executable(tilemaker vector_tile.pb.cc osmformat.pb.cc ${tilemaker_src_files}) +add_executable(tilemaker ${tilemaker_src_files}) target_include_directories(tilemaker PRIVATE include) target_include_directories(tilemaker PRIVATE ${CMAKE_BINARY_DIR}) # for generated files target_link_libraries(tilemaker ${THREAD_LIB} ${CMAKE_DL_LIBS} ${LUA_LIBRARIES} - protobuf::libprotobuf shapelib::shp SQLite::SQLite3 ZLIB::ZLIB diff --git a/Dockerfile b/Dockerfile index 82fd4d50..dc172f7d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,7 @@ RUN apt-get update && \ build-essential \ liblua5.1-0 \ liblua5.1-0-dev \ - libprotobuf-dev \ libsqlite3-dev \ - protobuf-compiler \ shapelib \ libshp-dev \ libboost-program-options-dev \ @@ -36,7 +34,6 @@ FROM debian:bullseye-slim RUN apt-get update && \ apt-get install -y --no-install-recommends \ liblua5.1-0 \ - libprotobuf-dev \ libshp-dev \ libsqlite3-dev \ libboost-filesystem-dev \ @@ -49,4 +46,4 @@ COPY process.lua . COPY config.json . # Entrypoint for docker, wrapped with /bin/sh to remove requirement for executable permissions on script -ENTRYPOINT ["/bin/sh", "/resources/docker-entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["/bin/sh", "/resources/docker-entrypoint.sh"] diff --git a/Makefile b/Makefile index 45b7c8af..d98b5df4 100644 --- a/Makefile +++ b/Makefile @@ -84,7 +84,7 @@ prefix = /usr/local MANPREFIX := /usr/share/man TM_VERSION ?= $(shell git describe --tags --abbrev=0) CXXFLAGS ?= -O3 -Wall -Wno-unknown-pragmas -Wno-sign-compare -std=c++14 -pthread -fPIE -DTM_VERSION=$(TM_VERSION) $(CONFIG) -LIB := -L$(PLATFORM_PATH)/lib -lz $(LUA_LIBS) -lboost_program_options -lsqlite3 -lboost_filesystem -lboost_system -lboost_iostreams -lprotobuf -lshp -pthread +LIB := -L$(PLATFORM_PATH)/lib -lz $(LUA_LIBS) -lboost_program_options -lsqlite3 -lboost_filesystem -lboost_system -lboost_iostreams -lshp -pthread INC := -I$(PLATFORM_PATH)/include -isystem ./include -I./src $(LUA_CFLAGS) # Targets @@ -93,8 +93,6 @@ INC := -I$(PLATFORM_PATH)/include -isystem ./include -I./src $(LUA_CFLAGS) all: tilemaker tilemaker: \ - include/osmformat.pb.o \ - include/vector_tile.pb.o \ src/attribute_store.o \ src/coordinates_geom.o \ src/coordinates.o \ @@ -106,26 +104,73 @@ tilemaker: \ src/mbtiles.o \ src/mmap_allocator.o \ src/node_stores.o \ + src/options_parser.o \ src/osm_lua_processing.o \ src/osm_mem_tiles.o \ src/osm_store.o \ src/output_object.o \ - src/pbf_blocks.o \ + src/pbf_processor.o \ + src/pbf_reader.o \ src/pmtiles.o \ - src/read_pbf.o \ + src/pooled_string.o \ src/read_shp.o \ + src/sharded_node_store.o \ + src/sharded_way_store.o \ src/shared_data.o \ src/shp_mem_tiles.o \ src/sorted_node_store.o \ src/sorted_way_store.o \ + src/tag_map.o \ src/tile_data.o \ src/tilemaker.o \ src/tile_worker.o \ - src/way_stores.o \ - src/write_geometry.o + src/way_stores.o $(CXX) $(CXXFLAGS) -o tilemaker $^ $(INC) $(LIB) $(LDFLAGS) -test: test_sorted_way_store +test: \ + test_append_vector \ + test_attribute_store \ + test_deque_map \ + test_pbf_reader \ + test_pooled_string \ + test_sorted_node_store \ + test_sorted_way_store + +test_append_vector: \ + src/mmap_allocator.o \ + test/append_vector.test.o + $(CXX) $(CXXFLAGS) -o test.append_vector $^ $(INC) $(LIB) $(LDFLAGS) && ./test.append_vector + +test_attribute_store: \ + src/mmap_allocator.o \ + src/attribute_store.o \ + src/pooled_string.o \ + test/attribute_store.test.o + $(CXX) $(CXXFLAGS) -o test.attribute_store $^ $(INC) $(LIB) $(LDFLAGS) && ./test.attribute_store + +test_deque_map: \ + test/deque_map.test.o + $(CXX) $(CXXFLAGS) -o test.deque_map $^ $(INC) $(LIB) $(LDFLAGS) && ./test.deque_map + +test_options_parser: \ + src/options_parser.o \ + test/options_parser.test.o + $(CXX) $(CXXFLAGS) -o test.options_parser $^ $(INC) $(LIB) $(LDFLAGS) && ./test.options_parser + +test_pooled_string: \ + src/mmap_allocator.o \ + src/pooled_string.o \ + test/pooled_string.test.o + $(CXX) $(CXXFLAGS) -o test.pooled_string $^ $(INC) $(LIB) $(LDFLAGS) && ./test.pooled_string + +test_sorted_node_store: \ + src/external/streamvbyte_decode.o \ + src/external/streamvbyte_encode.o \ + src/external/streamvbyte_zigzag.o \ + src/mmap_allocator.o \ + src/sorted_node_store.o \ + test/sorted_node_store.test.o + $(CXX) $(CXXFLAGS) -o test.sorted_node_store $^ $(INC) $(LIB) $(LDFLAGS) && ./test.sorted_node_store test_sorted_way_store: \ src/external/streamvbyte_decode.o \ @@ -133,9 +178,14 @@ test_sorted_way_store: \ src/external/streamvbyte_zigzag.o \ src/mmap_allocator.o \ src/sorted_way_store.o \ - src/sorted_way_store.test.o + test/sorted_way_store.test.o $(CXX) $(CXXFLAGS) -o test.sorted_way_store $^ $(INC) $(LIB) $(LDFLAGS) && ./test.sorted_way_store +test_pbf_reader: \ + src/helpers.o \ + src/pbf_reader.o \ + test/pbf_reader.test.o + $(CXX) $(CXXFLAGS) -o test.pbf_reader $^ $(INC) $(LIB) $(LDFLAGS) && ./test.pbf_reader %.o: %.cpp $(CXX) $(CXXFLAGS) -o $@ -c $< $(INC) @@ -143,9 +193,6 @@ test_sorted_way_store: \ %.o: %.cc $(CXX) $(CXXFLAGS) -o $@ -c $< $(INC) -%.pb.cc: %.proto - protoc --proto_path=include --cpp_out=include $< - install: install -m 0755 -d $(DESTDIR)$(prefix)/bin/ install -m 0755 tilemaker $(DESTDIR)$(prefix)/bin/ @@ -153,6 +200,6 @@ install: install docs/man/tilemaker.1 ${DESTDIR}${MANPREFIX}/man1/ clean: - rm -f tilemaker src/*.o src/external/*.o include/*.o include/*.pb.h + rm -f tilemaker src/*.o src/external/*.o include/*.o include/*.pb.h test/*.o .PHONY: install diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index d41fba9b..d605d153 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -107,13 +107,16 @@ For example: ### Lua processing -Your Lua file needs to supply 5 things: +Your Lua file needs to supply a few things: 1. `node_keys`, a list of those OSM keys which indicate that a node should be processed -2. `init_function(name)` (optional), a function to initialize Lua logic -2. `node_function(node)`, a function to process an OSM node and add it to layers -3. `way_function(way)`, a function to process an OSM way and add it to layers -3. `exit_function` (optional), a function to finalize Lua logic (useful to show statistics) +2. `node_function()`, a function to process an OSM node and add it to layers +3. `way_function()`, a function to process an OSM way and add it to layers +4. (optional) `init_function(name)`, a function to initialize Lua logic +5. (optional) `exit_function`, a function to finalize Lua logic (useful to show statistics) +6. (optional) `relation_scan_function`, a function to determine whether your Lua file wishes to process the given relation +7. (optional) `relation_function`, a function to process an OSM relation and add it to layers +8. (optional) `attribute_function`, a function to remap attributes from shapefiles `node_keys` is a simple list (or in Lua parlance, a 'table') of OSM tag keys. If a node has one of those keys, it will be processed by `node_function`; if not, it'll be skipped. For example, if you wanted to show highway crossings and railway stations, it should be `{ "highway", "railway" }`. (This avoids the need to process the vast majority of nodes which contain no important tags at all.) @@ -127,28 +130,30 @@ Note the order: you write to a layer first, then set attributes after. To do that, you use these methods: -* `node:Find(key)` or `way:Find(key)`: get the value for a tag, or the empty string if not present. For example, `way:Find("railway")` might return "rail" for a railway, "siding" for a siding, or "" if it isn't a railway at all. -* `node:Holds(key)` or `way:Holds(key)`: returns true if that key exists, false otherwise. -* `node:Layer("layer_name", false)` or `way:Layer("layer_name", is_area)`: write this node/way to the named layer. This is how you put objects in your vector tile. is_area (true/false) specifies whether a way should be treated as an area, or just as a linestring. -* `way:LayerAsCentroid("layer_name")`: write a single centroid point for this way to the named layer (useful for labels and POIs). -* `node:Attribute(key,value,minzoom)` or `node:Attribute(key,value,minzoom)`: add an attribute to the most recently written layer. Argument `minzoom` is optional, use it if you do not want to write the attribute on lower zoom levels. -* `node:AttributeNumeric(key,value,minzoom)`, `node:AttributeBoolean(key,value,minzoom)` (and `way:`...): for numeric/boolean columns. -* `node:Id()` or `way:Id()`: get the OSM ID of the current object. -* `node:ZOrder(number)` or `way:ZOrder(number)`: Set a numeric value (default 0, 1-byte signed integer) used to sort features within a layer. Use this feature to ensure a proper rendering order if the rendering engine itself does not support sorting. Sorting is not supported across layers merged with `write_to`. Features with different z-order are not merged if `combine_below` or `combine_polygons_below` is used. -* `node:MinZoom(zoom)` or `way:MinZoom(zoom)`: set the minimum zoom level (0-15) at which this object will be written. Note that the JSON layer configuration minimum still applies (so `:MinZoom(5)` will have no effect if your layer only starts at z6). -* `way:Length()` and `way:Area()`: return the length (metres)/area (square metres) of the current object. Requires recent Boost. -* `way:Centroid()`: return the lat/lon of the centre of the current object as a two-element Lua table (element 1 is lat, 2 is lon). +* `Find(key)`: get the value for a tag, or the empty string if not present. For example, `Find("railway")` might return "rail" for a railway, "siding" for a siding, or "" if it isn't a railway at all. +* `Holds(key)`: returns true if that key exists, false otherwise. +* `Layer("layer_name", is_area)`: write this node/way to the named layer. This is how you put objects in your vector tile. is_area (true/false) specifies whether a way should be treated as an area, or just as a linestring. +* `LayerAsCentroid("layer_name")`: write a single centroid point for this way to the named layer (useful for labels and POIs). +* `Attribute(key,value,minzoom)`: add an attribute to the most recently written layer. Argument `minzoom` is optional, use it if you do not want to write the attribute on lower zoom levels. +* `AttributeNumeric(key,value,minzoom)`, `AttributeBoolean(key,value,minzoom)`: for numeric/boolean columns. +* `Id()`: get the OSM ID of the current object. +* `ZOrder(number)`: Set a numeric value (default 0, 1-byte signed integer) used to sort features within a layer. Use this feature to ensure a proper rendering order if the rendering engine itself does not support sorting. Sorting is not supported across layers merged with `write_to`. Features with different z-order are not merged if `combine_below` or `combine_polygons_below` is used. +* `MinZoom(zoom)`: set the minimum zoom level (0-15) at which this object will be written. Note that the JSON layer configuration minimum still applies (so `:MinZoom(5)` will have no effect if your layer only starts at z6). +* `Length()` and `Area()`: return the length (metres)/area (square metres) of the current object. Requires recent Boost. +* `Centroid()`: return the lat/lon of the centre of the current object as a two-element Lua table (element 1 is lat, 2 is lon). The simplest possible function, to include roads/paths and nothing else, might look like this: - function way_function(way) - local highway = way:Find("highway") +```lua + function way_function() + local highway = Find("highway") if highway~="" then - way:Layer("roads", false) - way:Attribute("name", way:Find("name")) - way:Attribute("type", highway) + Layer("roads", false) + Attribute("name", Find("name")) + Attribute("type", highway) end end +``` Take a look at the supplied process.lua for a simple example, or the more complex OpenMapTiles-compatible script in `resources/`. You can specify another filename with the `--process` option. @@ -197,11 +202,11 @@ When processing OSM objects with your Lua script, you can perform simple spatial You can then find out whether a node is within one of these polygons using the `Intersects` method: - if node:Intersects("countries") then print("Looks like it's on land"); end + if Intersects("countries") then print("Looks like it's on land"); end Or you can find out what country(/ies) the node is within using `FindIntersecting`, which returns a table: - names = node:FindIntersecting("countries") + names = FindIntersecting("countries") print(table.concat(name,",")) To enable these functions, set `index` to true in your shapefile layer definition. `index_column` is not needed for `Intersects` but required for `FindIntersecting`. diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 497cfa48..b9aa2d74 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -4,7 +4,7 @@ Install all dependencies with Homebrew: - brew install protobuf boost lua51 shapelib rapidjson + brew install boost lua51 shapelib rapidjson Then: @@ -15,7 +15,7 @@ Then: Start with: - sudo apt install build-essential libboost-dev libboost-filesystem-dev libboost-iostreams-dev libboost-program-options-dev libboost-system-dev liblua5.1-0-dev libprotobuf-dev libshp-dev libsqlite3-dev protobuf-compiler rapidjson-dev + sudo apt install build-essential libboost-dev libboost-filesystem-dev libboost-iostreams-dev libboost-program-options-dev libboost-system-dev liblua5.1-0-dev libshp-dev libsqlite3-dev rapidjson-dev Once you've installed those, then `cd` back to your Tilemaker directory and simply: @@ -28,7 +28,7 @@ If it fails, check that the LIB and INC lines in the Makefile correspond with yo Start with: - dnf install lua-devel luajit-devel sqlite-devel protobuf-devel protobuf-compiler shapelib-devel rapidjson + dnf install lua-devel luajit-devel sqlite-devel shapelib-devel rapidjson then build either with lua: diff --git a/docs/RELATIONS.md b/docs/RELATIONS.md index 6e436b68..6fc3b557 100644 --- a/docs/RELATIONS.md +++ b/docs/RELATIONS.md @@ -22,26 +22,30 @@ This is a two-stage process: first, when reading relations, indicate that these To define which relations should be accepted, add a `relation_scan_function`: - function relation_scan_function(relation) - if relation:Find("type")=="route" and relation:Find("route")=="bicycle" then - local network = relation:Find("network") - if network=="ncn" then relation:Accept() end +```lua + function relation_scan_function() + if Find("type")=="route" and Find("route")=="bicycle" then + local network = Find("network") + if network=="ncn" then Accept() end end end +``` -This function takes the relation as its sole argument. Examine the tags using `relation:Find(key)` as normal. (You can also use `relation:Holds(key)` and `relation:Id()`.) If you want to use this relation, call `relation:Accept()`. +Examine the tags using `Find(key)` as normal. (You can also use `Holds(key)` and `Id()`.) If you want to use this relation, call `Accept()`. #### Stage 2: accessing relations from ways -Now that you've accepted the relations, they will be available from `way_function`. They are accessed using an iterator (`way:NextRelation()`) which reads each relation for that way in turn, returning nil when there are no more relations available. Once you have accessed a relation with the iterator, you can read its tags with `way:FindInRelation(key)`. For example: +Now that you've accepted the relations, they will be available from `way_function`. They are accessed using an iterator (`NextRelation()`) which reads each relation for that way in turn, returning nil when there are no more relations available. Once you have accessed a relation with the iterator, you can read its tags with `FindInRelation(key)`. For example: +```lua while true do - local rel = way:NextRelation() + local rel = NextRelation() if not rel then break end - print ("Part of route "..way:FindInRelation("ref")) + print ("Part of route "..FindInRelation("ref")) end +``` -(Should you need to re-read the relations, you can reset the iterator with `way:RestartRelations()`.) +(Should you need to re-read the relations, you can reset the iterator with `RestartRelations()`.) ### Writing relation geometries @@ -52,13 +56,15 @@ First, make sure that you have accepted the relations using `relation_scan_funct Then write a `relation_function`, which works in the same way as `way_function` would: - function relation_function(relation) - if relation:Find("type")=="route" and relation:Find("route")=="bicycle" then - relation:Layer("bike_routes", false) - relation:Attribute("class", relation:Find("network")) - relation:Attribute("ref", relation:Find("ref")) +```lua + function relation_function() + if Find("type")=="route" and Find("route")=="bicycle" then + Layer("bike_routes", false) + Attribute("class", Find("network")) + Attribute("ref", Find("ref")) end end +``` ### Not supported diff --git a/include/append_vector.h b/include/append_vector.h new file mode 100644 index 00000000..07531217 --- /dev/null +++ b/include/append_vector.h @@ -0,0 +1,195 @@ +#ifndef _APPEND_VECTOR_H +#define _APPEND_VECTOR_H + +#include "mmap_allocator.h" +#include +#include + +// Tilemaker collects OutputObjects in a list that +// - spills to disk +// - only gets appended to +// +// Vector is great for linear access, but resizes cause expensive disk I/O to +// copy elements. +// +// Deque is great for growing without disk I/O, but it allocates in blocks of 512, +// which is inefficient for linear access. +// +// Instead, we author a limited vector-of-vectors class that allocates in bigger chunks, +// to get the best of both worlds. + +#define APPEND_VECTOR_SIZE 8192 +namespace AppendVectorNS { + template + class AppendVector { + public: + struct Iterator { + using iterator_category = std::random_access_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + + Iterator(AppendVector& appendVector, uint16_t vec, uint16_t offset): + appendVector(&appendVector), vec(vec), offset(offset) {} + + Iterator(): + appendVector(nullptr), vec(0), offset(0) {} + + + bool operator<(const Iterator& other) const { + if (vec < other.vec) + return true; + + if (vec > other.vec) + return false; + + return offset < other.offset; + } + + bool operator>=(const Iterator& other) const { + return !(*this < other); + } + + Iterator operator-(int delta) const { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute -= delta; + return Iterator(*appendVector, absolute / APPEND_VECTOR_SIZE, absolute % APPEND_VECTOR_SIZE); + } + + Iterator operator+(int delta) const { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute += delta; + return Iterator(*appendVector, absolute / APPEND_VECTOR_SIZE, absolute % APPEND_VECTOR_SIZE); + } + + bool operator==(const Iterator& other) const { + return appendVector == other.appendVector && vec == other.vec && offset == other.offset; + } + + bool operator!=(const Iterator& other) const { + return !(*this == other); + } + + std::ptrdiff_t operator-(const Iterator& other) const { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + int64_t otherAbsolute = other.vec * APPEND_VECTOR_SIZE + other.offset; + + return absolute - otherAbsolute; + } + + reference operator*() const { + auto& vector = appendVector->vecs[vec]; + auto& el = vector[offset]; + return el; + } + + pointer operator->() const { + auto& vector = appendVector->vecs[vec]; + auto& el = vector[offset]; + return ⪙ + } + + Iterator& operator+= (int delta) { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute += delta; + + vec = absolute / APPEND_VECTOR_SIZE; + offset = absolute % APPEND_VECTOR_SIZE; + return *this; + } + + Iterator& operator-= (int delta) { + int64_t absolute = vec * APPEND_VECTOR_SIZE + offset; + absolute -= delta; + + vec = absolute / APPEND_VECTOR_SIZE; + offset = absolute % APPEND_VECTOR_SIZE; + return *this; + } + + // Prefix increment + Iterator& operator++() { + offset++; + if (offset == APPEND_VECTOR_SIZE) { + offset = 0; + vec++; + } + return *this; + } + + // Postfix increment + Iterator operator++(int) { Iterator tmp = *this; ++(*this); return tmp; } + + // Prefix decrement + Iterator& operator--() { + if (offset > 0) { + offset--; + } else { + vec--; + offset = APPEND_VECTOR_SIZE - 1; + } + + return *this; + } + + // Postfix decrement + Iterator operator--(int) { Iterator tmp = *this; --(*this); return tmp; } + + private: + mutable AppendVector* appendVector; + int32_t vec, offset; + }; + + AppendVector(): + count(0), + vecs(1) { + } + + void clear() { + count = 0; + vecs.clear(); + vecs.push_back(std::vector>()); + vecs.back().reserve(APPEND_VECTOR_SIZE); + } + + size_t size() const { + return count; + } + + T& operator [](int idx) { + auto& vec = vecs[idx / APPEND_VECTOR_SIZE]; + auto& el = vec[idx % APPEND_VECTOR_SIZE]; + return el; + } + + Iterator begin() { + return Iterator(*this, 0, 0); + } + + Iterator end() { + return Iterator(*this, vecs.size() - 1, count % APPEND_VECTOR_SIZE); + } + + void push_back(const T& el) { + if (vecs.back().capacity() == 0) + vecs.back().reserve(APPEND_VECTOR_SIZE); + + vecs.back().push_back(el); + + if (vecs.back().size() == vecs.back().capacity()) { + vecs.push_back(std::vector>()); + vecs.back().reserve(APPEND_VECTOR_SIZE); + } + + count++; + } + + size_t count; + std::deque>> vecs; + }; +} + +#undef APPEND_VECTOR_SIZE + +#endif diff --git a/include/attribute_store.h b/include/attribute_store.h index ad1aa4e1..6f11ba00 100644 --- a/include/attribute_store.h +++ b/include/attribute_store.h @@ -10,6 +10,8 @@ #include #include #include +#include "pooled_string.h" +#include "deque_map.h" /* AttributeStore - global dictionary for attributes */ @@ -39,26 +41,67 @@ class AttributeKeyStore { std::map keys2index; }; -enum class AttributePairType: char { False = 0, True = 1, Float = 2, String = 3 }; +enum class AttributePairType: char { Bool = 0, Float = 1, String = 2 }; // AttributePair is a key/value pair (with minzoom) +#pragma pack(push, 1) struct AttributePair { - std::string stringValue_; - float floatValue_; - short keyIndex; - char minzoom; - AttributePairType valueType; + short keyIndex : 9; + AttributePairType valueType : 3; + char minzoom : 4; + union { + float floatValue_; + PooledString stringValue_; + }; AttributePair(uint32_t keyIndex, bool value, char minzoom) - : keyIndex(keyIndex), valueType(value ? AttributePairType::True : AttributePairType::False), minzoom(minzoom) + : keyIndex(keyIndex), valueType(AttributePairType::Bool), minzoom(minzoom), floatValue_(value ? 1 : 0) { } - AttributePair(uint32_t keyIndex, const std::string& value, char minzoom) + AttributePair(uint32_t keyIndex, const PooledString& value, char minzoom) : keyIndex(keyIndex), valueType(AttributePairType::String), stringValue_(value), minzoom(minzoom) { } AttributePair(uint32_t keyIndex, float value, char minzoom) - : keyIndex(keyIndex), valueType(AttributePairType::Float), floatValue_(value), minzoom(minzoom) + : keyIndex(keyIndex), valueType(AttributePairType::Float), minzoom(minzoom), floatValue_(value) + { + } + + AttributePair(const AttributePair& other): + keyIndex(other.keyIndex), valueType(other.valueType), minzoom(other.minzoom) { + if (valueType == AttributePairType::Bool || valueType == AttributePairType::Float) { + floatValue_ = other.floatValue_; + return; + } + + stringValue_ = other.stringValue_; + } + + AttributePair& operator=(const AttributePair& other) { + keyIndex = other.keyIndex; + valueType = other.valueType; + minzoom = other.minzoom; + + if (valueType == AttributePairType::Bool || valueType == AttributePairType::Float) { + floatValue_ = other.floatValue_; + return *this; + } + + stringValue_ = other.stringValue_; + return *this; + } + + bool operator<(const AttributePair& other) const { + if (minzoom != other.minzoom) + return minzoom < other.minzoom; + if (keyIndex != other.keyIndex) + return keyIndex < other.keyIndex; + if (valueType != other.valueType) return valueType < other.valueType; + + if (hasStringValue()) return pooledString() < other.pooledString(); + if (hasBoolValue()) return boolValue() < other.boolValue(); + if (hasFloatValue()) return floatValue() < other.floatValue(); + throw std::runtime_error("Invalid type in attribute store"); } bool operator==(const AttributePair &other) const { @@ -66,7 +109,7 @@ struct AttributePair { if (valueType == AttributePairType::String) return stringValue_ == other.stringValue_; - if (valueType == AttributePairType::Float) + if (valueType == AttributePairType::Float || valueType == AttributePairType::Bool) return floatValue_ == other.floatValue_; return true; @@ -74,13 +117,16 @@ struct AttributePair { bool hasStringValue() const { return valueType == AttributePairType::String; } bool hasFloatValue() const { return valueType == AttributePairType::Float; } - bool hasBoolValue() const { return valueType == AttributePairType::True || valueType == AttributePairType::False; }; + bool hasBoolValue() const { return valueType == AttributePairType::Bool; } - const std::string& stringValue() const { return stringValue_; } + const PooledString& pooledString() const { return stringValue_; } + const std::string stringValue() const { return stringValue_.toString(); } float floatValue() const { return floatValue_; } - bool boolValue() const { return valueType == AttributePairType::True; } + bool boolValue() const { return floatValue_; } - static bool isHot(const AttributePair& pair, const std::string& keyName) { + void ensureStringIsOwned(); + + static bool isHot(const std::string& keyName, const std::string& value) { // Is this pair a candidate for the hot pool? // Hot pairs are pairs that we think are likely to be re-used, like @@ -89,25 +135,11 @@ struct AttributePair { // The trick is that we commit to putting them in the hot pool // before we know if we were right. - // All boolean pairs are eligible. - if (pair.hasBoolValue()) - return true; - - // Small integers are eligible. - if (pair.hasFloatValue()) { - float v = pair.floatValue(); - - if (ceil(v) == v && v >= 0 && v <= 25) - return true; - } - - // The remaining things should be strings, but just in case... - if (!pair.hasStringValue()) - return false; + // The rules for floats/booleans are managed in their addAttribute call. // Only strings that are IDish are eligible: only lowercase letters. bool ok = true; - for (const auto& c: pair.stringValue()) { + for (const auto& c: value) { if (c != '-' && c != '_' && (c < 'a' || c > 'z')) return false; } @@ -124,9 +156,10 @@ struct AttributePair { boost::hash_combine(rv, keyIndex); boost::hash_combine(rv, valueType); - if(hasStringValue()) - boost::hash_combine(rv, stringValue()); - else if(hasFloatValue()) + if(hasStringValue()) { + const char* data = pooledString().data(); + boost::hash_range(rv, data, data + pooledString().size()); + } else if(hasFloatValue()) boost::hash_combine(rv, floatValue()); else if(hasBoolValue()) boost::hash_combine(rv, boolValue()); @@ -137,6 +170,7 @@ struct AttributePair { return rv; } }; +#pragma pack(pop) // We shard the cold pools to reduce the odds of lock contention on @@ -149,46 +183,32 @@ struct AttributePair { #define SHARD_BITS 14 #define ATTRIBUTE_SHARDS (1 << SHARD_BITS) +class AttributeStore; class AttributePairStore { public: AttributePairStore(): finalized(false), - pairs(ATTRIBUTE_SHARDS), - pairsMaps(ATTRIBUTE_SHARDS), pairsMutex(ATTRIBUTE_SHARDS), - hotShardSize(0) + lookups(0), + lookupsUncached(0) { - // NB: the hot shard is stored in its own, pre-allocated vector. - // pairs[0] is _not_ the hot shard - hotShard.reserve(1 << 16); - for (size_t i = 0; i < 1 << 16; i++) - hotShard.push_back(AttributePair(0, false, 0)); + // The "hot" shard has a capacity of 64K, the others are unbounded. + pairs.push_back(DequeMap(1 << 16)); + // Reserve offset 0 as a sentinel + pairs[0].add(AttributePair(0, false, 0)); + for (size_t i = 1; i < ATTRIBUTE_SHARDS; i++) + pairs.push_back(DequeMap()); } void finalize() { finalized = true; } const AttributePair& getPair(uint32_t i) const; const AttributePair& getPairUnsafe(uint32_t i) const; - uint32_t addPair(const AttributePair& pair, bool isHot); - - struct key_value_less_ptr { - bool operator()(AttributePair const* lhs, AttributePair const* rhs) const { - if (lhs->minzoom != rhs->minzoom) - return lhs->minzoom < rhs->minzoom; - if (lhs->keyIndex != rhs->keyIndex) - return lhs->keyIndex < rhs->keyIndex; - if (lhs->valueType != rhs->valueType) return lhs->valueType < rhs->valueType; - - if (lhs->hasStringValue()) return lhs->stringValue() < rhs->stringValue(); - if (lhs->hasBoolValue()) return lhs->boolValue() < rhs->boolValue(); - if (lhs->hasFloatValue()) return lhs->floatValue() < rhs->floatValue(); - throw std::runtime_error("Invalid type in attribute store"); - } - }; + uint32_t addPair(AttributePair& pair, bool isHot); - std::vector> pairs; - std::vector> pairsMaps; private: + friend class AttributeStore; + std::vector> pairs; bool finalized; // We refer to all attribute pairs by index. // @@ -198,41 +218,39 @@ class AttributePairStore { // we suspect will be popular. It only ever has 64KB items, // so that we can reference it with a short. mutable std::vector pairsMutex; - std::atomic hotShardSize; - std::vector hotShard; + std::atomic lookupsUncached; + std::atomic lookups; }; // AttributeSet is a set of AttributePairs // = the complete attributes for one object struct AttributeSet { - struct less_ptr { - bool operator()(const AttributeSet* lhs, const AttributeSet* rhs) const { - if (lhs->useVector != rhs->useVector) - return lhs->useVector < rhs->useVector; - - if (lhs->useVector) { - if (lhs->intValues.size() != rhs->intValues.size()) - return lhs->intValues.size() < rhs->intValues.size(); - - for (int i = 0; i < lhs->intValues.size(); i++) { - if (lhs->intValues[i] != rhs->intValues[i]) { - return lhs->intValues[i] < rhs->intValues[i]; - } - } + bool operator<(const AttributeSet& other) const { + if (useVector != other.useVector) + return useVector < other.useVector; - return false; - } + if (useVector) { + if (intValues.size() != other.intValues.size()) + return intValues.size() < other.intValues.size(); - for (int i = 0; i < sizeof(lhs->shortValues)/sizeof(lhs->shortValues[0]); i++) { - if (lhs->shortValues[i] != rhs->shortValues[i]) { - return lhs->shortValues[i] < rhs->shortValues[i]; + for (int i = 0; i < intValues.size(); i++) { + if (intValues[i] != other.intValues[i]) { + return intValues[i] < other.intValues[i]; } } return false; } - }; + + for (int i = 0; i < sizeof(shortValues)/sizeof(shortValues[0]); i++) { + if (shortValues[i] != other.shortValues[i]) { + return shortValues[i] < other.shortValues[i]; + } + } + + return false; + } size_t hash() const { // Values are in canonical form after finalizeSet is called, so @@ -253,6 +271,7 @@ struct AttributeSet { return idx; } + bool operator!=(const AttributeSet& other) const { return !(*this == other); } bool operator==(const AttributeSet &other) const { // Equivalent if, for every value in values, there is a value in other.values // whose pair is the same. @@ -380,6 +399,8 @@ struct AttributeSet { struct AttributeStore { AttributeIndex add(AttributeSet &attributes); std::vector getUnsafe(AttributeIndex index) const; + void reset(); // used for testing + size_t size() const; void reportSize() const; void finalize(); @@ -390,9 +411,9 @@ struct AttributeStore { AttributeStore(): finalized(false), sets(ATTRIBUTE_SHARDS), - setsMaps(ATTRIBUTE_SHARDS), setsMutex(ATTRIBUTE_SHARDS), - lookups(0) { + lookups(0), + lookupsUncached(0) { } AttributeKeyStore keyStore; @@ -400,11 +421,11 @@ struct AttributeStore { private: bool finalized; - std::vector> sets; - std::vector> setsMaps; + std::vector> sets; mutable std::vector setsMutex; mutable std::mutex mutex; + std::atomic lookupsUncached; std::atomic lookups; }; diff --git a/include/deque_map.h b/include/deque_map.h new file mode 100644 index 00000000..ea57f669 --- /dev/null +++ b/include/deque_map.h @@ -0,0 +1,132 @@ +#ifndef DEQUE_MAP_H +#define DEQUE_MAP_H + +#include +#include +#include +#include +#include + +// A class which looks deep within the soul of some instance of +// a class T and assigns it a number based on the order in which +// it joined (or reminds it of its number). +// +// Used to translate an 8-byte pointer into a 4-byte ID that can be +// used repeatedly. +template +class DequeMap { +public: + DequeMap(): maxSize(0) {} + DequeMap(uint32_t maxSize): maxSize(maxSize) {} + + bool full() const { + return maxSize != 0 && size() == maxSize; + } + + // If `entry` is already in the map, return its index. + // Otherwise, if maxSize is `0`, or greater than the number of entries in the map, + // add the item and return its index. + // Otherwise, return -1. + int32_t add(const T& entry) { + // Search to see if we've already got this entry. + const auto offsets = boost::irange(0, keys.size()); + const auto it = std::lower_bound( + offsets.begin(), + offsets.end(), + entry, + [&](const auto &e, auto id) { + return objects.at(keys[e]) < id; + } + ); + + // We do, return its index. + if (it != offsets.end() && objects[keys[*it]] == entry) + return keys[*it]; + + if (maxSize > 0 && objects.size() >= maxSize) + return -1; + + // We don't, so store it... + const uint32_t newIndex = objects.size(); + objects.push_back(entry); + + // ...and add its index to our keys vector. + const uint32_t keysOffset = it == offsets.end() ? offsets.size() : *it; + + const uint32_t desiredSize = keys.size() + 1; + + // Amortize growth + if (keys.capacity() < desiredSize) + keys.reserve(keys.capacity() * 1.5); + + keys.resize(desiredSize); + + // Unless we're adding to the end, we need to shuffle existing keys down + // to make room for our new index. + if (keysOffset != newIndex) { + std::memmove(&keys[keysOffset + 1], &keys[keysOffset], sizeof(uint32_t) * (keys.size() - 1 - keysOffset)); + } + + keys[keysOffset] = newIndex; + return newIndex; + } + + void clear() { + objects.clear(); + keys.clear(); + } + + // Returns the index of `entry` if present, -1 otherwise. + int32_t find(const T& entry) const { + const auto offsets = boost::irange(0, keys.size()); + const auto it = std::lower_bound( + offsets.begin(), + offsets.end(), + entry, + [&](const auto &e, auto id) { + return objects.at(keys[e]) < id; + } + ); + + // We do, return its index. + if (it != offsets.end() && objects[keys[*it]] == entry) + return keys[*it]; + + return -1; + } + + inline const T& operator[](uint32_t index) const { + return objects[index]; + } + + inline const T& at(uint32_t index) const { + return objects.at(index); + } + + size_t size() const { return objects.size(); } + + struct iterator { + const DequeMap& dm; + size_t offset; + iterator(const DequeMap& dm, size_t offset): dm(dm), offset(offset) {} + void operator++() { offset++; } + bool operator!=(iterator& other) { return offset != other.offset; } + const T& operator*() const { return dm.objects[dm.keys[offset]]; } + }; + + iterator begin() const { return iterator{*this, 0}; } + iterator end() const { return iterator{*this, keys.size()}; } + +private: + uint32_t maxSize; + + // Using a deque is necessary, as it provides pointer-stability for previously + // added objects when it grows the storage (as opposed to, e.g., vector). + std::deque objects; + + // Whereas `objects` is ordered by insertion-time, `keys` is sorted such that + // objects[key[0]] < objects[key[1]] < ... < objects[key[$]] + // operator< of T. + std::vector keys; +}; +#endif diff --git a/include/helpers.h b/include/helpers.h index 7cb9c027..de490874 100644 --- a/include/helpers.h +++ b/include/helpers.h @@ -3,7 +3,8 @@ #define _HELPERS_H #include -#include "geom.h" +#include +#include // General helper routines @@ -27,12 +28,11 @@ inline std::vector split_string(std::string &inputStr, char sep) { return res; } +void decompress_string(std::string& output, const char* input, uint32_t inputSize, bool asGzip = false); double bboxElementFromStr(const std::string& number); std::vector parseBox(const std::string& bbox); -std::string decompress_string(const std::string& str, bool asGzip = false); - std::string compress_string(const std::string& str, int compressionlevel = Z_DEFAULT_COMPRESSION, bool asGzip = false); diff --git a/include/node_store.h b/include/node_store.h index cc84aba2..76fe18b3 100644 --- a/include/node_store.h +++ b/include/node_store.h @@ -23,6 +23,11 @@ class NodeStore // Accessors virtual size_t size() const = 0; virtual LatpLon at(NodeID i) const = 0; + + virtual bool contains(size_t shard, NodeID id) const = 0; + virtual NodeStore& shard(size_t shard) = 0; + virtual const NodeStore& shard(size_t shard) const = 0; + virtual size_t shards() const = 0; }; #endif diff --git a/include/node_stores.h b/include/node_stores.h index c5151bec..05d00f4e 100644 --- a/include/node_stores.h +++ b/include/node_stores.h @@ -5,6 +5,7 @@ #include #include "node_store.h" #include "sorted_node_store.h" +#include "sharded_node_store.h" #include "mmap_allocator.h" class BinarySearchNodeStore : public NodeStore @@ -19,10 +20,16 @@ class BinarySearchNodeStore : public NodeStore LatpLon at(NodeID i) const override; size_t size() const override; void insert(const std::vector& elements) override; - void clear() { + void clear() override { reopen(); } - void batchStart() {} + void batchStart() override {} + + bool contains(size_t shard, NodeID id) const override; + NodeStore& shard(size_t shard) override { return *this; } + const NodeStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } + private: mutable std::mutex mutex; @@ -49,7 +56,14 @@ class CompactNodeStore : public NodeStore void insert(const std::vector& elements) override; void clear() override; void finalize(size_t numThreads) override {} - void batchStart() {} + void batchStart() override {} + + // CompactNodeStore has no metadata to know whether or not it contains + // a node, so it's not suitable for used in sharded scenarios. + bool contains(size_t shard, NodeID id) const override { return true; } + NodeStore& shard(size_t shard) override { return *this; } + const NodeStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } private: // @brief Insert a latp/lon pair. diff --git a/include/options_parser.h b/include/options_parser.h new file mode 100644 index 00000000..3ca73785 --- /dev/null +++ b/include/options_parser.h @@ -0,0 +1,58 @@ +#ifndef OPTIONS_PARSER_H +#define OPTIONS_PARSER_H + +#include +#include +#include + +namespace OptionsParser { + struct OptionException : std::exception { + OptionException(std::string message): message(message) {} + + /// Returns the explanatory string. + const char* what() const noexcept override { + return message.data(); + } + + private: + std::string message; + }; + + enum class OutputMode: char { File = 0, MBTiles = 1, PMTiles = 2 }; + + struct OsmOptions { + std::string storeFile; + bool fast = false; + bool compact = false; + bool skipIntegrity = false; + bool uncompressedNodes = false; + bool uncompressedWays = false; + bool materializeGeometries = false; + // lazyGeometries is the inverse of materializeGeometries. It can be passed + // to override an implicit materializeGeometries, as in the non-store case. + bool lazyGeometries = false; + bool shardStores = false; + }; + + struct Options { + std::vector inputFiles; + std::string luaFile; + std::string jsonFile; + uint32_t threadNum = 0; + std::string outputFile; + std::string bbox; + + OsmOptions osm; + bool showHelp = false; + bool verbose = false; + bool mergeSqlite = false; + bool mapsplit = false; + OutputMode outputMode = OutputMode::File; + bool logTileTimings = false; + }; + + Options parse(const int argc, const char* argv[]); + void showHelp(); +}; + +#endif diff --git a/include/osm_lua_processing.h b/include/osm_lua_processing.h index b646bc2e..6a6a1d5d 100644 --- a/include/osm_lua_processing.h +++ b/include/osm_lua_processing.h @@ -13,9 +13,12 @@ #include "shp_mem_tiles.h" #include "osm_mem_tiles.h" #include "helpers.h" +#include #include +class TagMap; + // Lua extern "C" { #include "lua.h" @@ -31,6 +34,20 @@ extern bool verbose; class AttributeStore; class AttributeSet; +// A string, which might be in `currentTags` as a value. If Lua +// code refers to an absent value, it'll fallback to passing +// it as a std::string. +// +// The intent is that Attribute("name", Find("name")) is a common +// pattern, and we ought to avoid marshalling a string back and +// forth from C++ to Lua when possible. +struct PossiblyKnownTagValue { + bool found; + uint32_t index; + std::string fallback; +}; + + /** \brief OsmLuaProcessing - converts OSM objects into OutputObjects. @@ -71,34 +88,28 @@ class OsmLuaProcessing { // ---- Data loading methods - using tag_map_t = boost::container::flat_map; + using tag_map_t = boost::container::flat_map; // Scan non-MP relation - bool scanRelation(WayID id, const tag_map_t &tags); + bool scanRelation(WayID id, const TagMap& tags); /// \brief We are now processing a significant node - void setNode(NodeID id, LatpLon node, const tag_map_t &tags); + void setNode(NodeID id, LatpLon node, const TagMap& tags); /// \brief We are now processing a way - bool setWay(WayID wayId, LatpLonVec const &llVec, const tag_map_t &tags); + bool setWay(WayID wayId, LatpLonVec const &llVec, const TagMap& tags); /** \brief We are now processing a relation * (note that we store relations as ways with artificial IDs, and that * we use decrementing positive IDs to give a bit more space for way IDs) */ - void setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const tag_map_t &tags, bool isNativeMP, bool isInnerOuter); + void setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const TagMap& tags, bool isNativeMP, bool isInnerOuter); // ---- Metadata queries called from Lua // Get the ID of the current object std::string Id() const; - // Check if there's a value for a given key - bool Holds(const std::string& key) const; - - // Get an OSM tag for a given key (or return empty string if none) - const std::string& Find(const std::string& key) const; - // ---- Spatial queries called from Lua // Find intersecting shapefile layer @@ -160,11 +171,8 @@ class OsmLuaProcessing { void LayerAsCentroid(const std::string &layerName); // Set attributes in a vector tile's Attributes table - void Attribute(const std::string &key, const std::string &val); - void AttributeWithMinZoom(const std::string &key, const std::string &val, const char minzoom); - void AttributeNumeric(const std::string &key, const float val); + void AttributeWithMinZoom(const std::string &key, const PossiblyKnownTagValue& val, const char minzoom); void AttributeNumericWithMinZoom(const std::string &key, const float val, const char minzoom); - void AttributeBoolean(const std::string &key, const bool val); void AttributeBooleanWithMinZoom(const std::string &key, const bool val, const char minzoom); void MinZoom(const double z); void ZOrder(const double z); @@ -199,6 +207,7 @@ class OsmLuaProcessing { inline AttributeStore &getAttributeStore() { return attributeStore; } struct luaProcessingException :std::exception {}; + const TagMap* currentTags; private: /// Internal: clear current cached state @@ -216,6 +225,8 @@ class OsmLuaProcessing { lastStoredGeometryId = 0; } + void removeAttributeIfNeeded(const std::string& key); + const inline Point getPoint() { return Point(lon/10000000.0,latp/10000000.0); } @@ -258,7 +269,7 @@ class OsmLuaProcessing { class LayerDefinition &layers; std::vector> outputs; // All output objects that have been created - const boost::container::flat_map* currentTags; + std::vector outputKeys; std::vector finalizeOutputs(); diff --git a/include/osm_mem_tiles.h b/include/osm_mem_tiles.h index a6266ea3..3c920b08 100644 --- a/include/osm_mem_tiles.h +++ b/include/osm_mem_tiles.h @@ -6,10 +6,15 @@ #include "osm_store.h" #include "geometry_cache.h" -#define OSM_THRESHOLD (1ull << 35) -#define USE_WAY_STORE (1ull << 35) -#define IS_WAY(x) (((x) >> 35) == (USE_WAY_STORE >> 35)) -#define OSM_ID(x) ((x) & 0b111111111111111111111111111111111) +// NB: Currently, USE_NODE_STORE and USE_WAY_STORE are equivalent. +// If we permit LayerAsCentroid to be generated from the OSM stores, +// this will have to change. +#define OSM_THRESHOLD (1ull << TILE_DATA_ID_SIZE) +#define USE_NODE_STORE (2ull << TILE_DATA_ID_SIZE) +#define IS_NODE(x) (((x) >> TILE_DATA_ID_SIZE) == (USE_NODE_STORE >> TILE_DATA_ID_SIZE)) +#define USE_WAY_STORE (1ull << TILE_DATA_ID_SIZE) +#define IS_WAY(x) (((x) >> TILE_DATA_ID_SIZE) == (USE_WAY_STORE >> TILE_DATA_ID_SIZE)) +#define OSM_ID(x) ((x) & 0b1111111111111111111111111111111111) class NodeStore; class WayStore; @@ -32,18 +37,21 @@ class OsmMemTiles : public TileDataSource { const WayStore& wayStore ); + std::string name() const override { return "osm"; } + Geometry buildWayGeometry( const OutputGeometryType geomType, const NodeID objectID, const TileBbox &bbox ) override; + LatpLon buildNodeGeometry(NodeID const objectID, const TileBbox &bbox) const override; void Clear(); private: - void populateLinestring(Linestring& ls, NodeID objectID); - Linestring& getOrBuildLinestring(NodeID objectID); + void populateLinestring(Linestring& ls, NodeID objectID) const; + Linestring& getOrBuildLinestring(NodeID objectID) const; void populateMultiPolygon(MultiPolygon& dst, NodeID objectID) override; const NodeStore& nodeStore; diff --git a/include/osm_store.h b/include/osm_store.h index 11158bb2..5bb74272 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -11,12 +11,21 @@ #include #include #include +#include extern bool verbose; class NodeStore; class WayStore; +// A comparator for data_view so it can be used in boost's flat_map +struct DataViewLessThan { + bool operator()(const protozero::data_view& a, const protozero::data_view& b) const { + return a < b; + } +}; + + // // Internal data structures. // @@ -72,37 +81,39 @@ class RelationScanStore { private: using tag_map_t = boost::container::flat_map; - std::map> relationsForWays; - std::map relationTags; - mutable std::mutex mutex; + std::vector>> relationsForWays; + std::vector> relationTags; + mutable std::vector mutex; public: + RelationScanStore(): relationsForWays(128), relationTags(128), mutex(128) {} void relation_contains_way(WayID relid, WayID wayid) { - std::lock_guard lock(mutex); - relationsForWays[wayid].emplace_back(relid); + const size_t shard = wayid % mutex.size(); + + std::lock_guard lock(mutex[shard]); + relationsForWays[shard][wayid].emplace_back(relid); } void store_relation_tags(WayID relid, const tag_map_t &tags) { - std::lock_guard lock(mutex); - relationTags[relid] = tags; + const size_t shard = relid % mutex.size(); + std::lock_guard lock(mutex[shard]); + relationTags[shard][relid] = tags; } bool way_in_any_relations(WayID wayid) { - return relationsForWays.find(wayid) != relationsForWays.end(); + const size_t shard = wayid % mutex.size(); + return relationsForWays[shard].find(wayid) != relationsForWays[shard].end(); } std::vector relations_for_way(WayID wayid) { - return relationsForWays[wayid]; + const size_t shard = wayid % mutex.size(); + return relationsForWays[shard][wayid]; } std::string get_relation_tag(WayID relid, const std::string &key) { - auto it = relationTags.find(relid); - if (it==relationTags.end()) return ""; + const size_t shard = relid % mutex.size(); + auto it = relationTags[shard].find(relid); + if (it==relationTags[shard].end()) return ""; auto jt = it->second.find(key); if (jt==it->second.end()) return ""; return jt->second; } - void clear() { - std::lock_guard lock(mutex); - relationsForWays.clear(); - relationTags.clear(); - } }; diff --git a/include/osmformat.proto b/include/osmformat.proto deleted file mode 100644 index 93060586..00000000 --- a/include/osmformat.proto +++ /dev/null @@ -1,226 +0,0 @@ -syntax = "proto2"; - -option java_package = "crosby.binary"; - -/* OSM Binary file format - -This is the master schema file of the OSM binary file format. This -file is designed to support limited random-access and future -extendability. - -A binary OSM file consists of a sequence of FileBlocks (please see -fileformat.proto). The first fileblock contains a serialized instance -of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that -contain the primitives. - -Each primitiveblock is designed to be independently parsable. It -contains a string table storing all strings in that block (keys and -values in tags, roles in relations, usernames, etc.) as well as -metadata containing the precision of coordinates or timestamps in that -block. - -A primitiveblock contains a sequence of primitive groups, each -containing primitives of the same type (nodes, densenodes, ways, -relations). Coordinates are stored in signed 64-bit integers. Lat&lon -are measured in units nanodegrees. The default of -granularity of 100 nanodegrees corresponds to about 1cm on the ground, -and a full lat or lon fits into 32 bits. - -Converting an integer to a lattitude or longitude uses the formula: -$OUT = IN * granularity / 10**9$. Many encoding schemes use delta -coding when representing nodes and relations. - -*/ - -/* Added */ - -message BlobHeader { - required string type = 1; - optional bytes indexdata = 2; - required int32 datasize = 3; -} -message Blob { - optional bytes raw = 1; // No compression - optional int32 raw_size = 2; // Only set when compressed, to the uncompressed size - optional bytes zlib_data = 3; - // optional bytes lzma_data = 4; // PROPOSED. - // optional bytes OBSOLETE_bzip2_data = 5; // Deprecated. -} - - -////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////// - -/* Contains the file header. */ - -message HeaderBlock { - optional HeaderBBox bbox = 1; - /* Additional tags to aid in parsing this dataset */ - repeated string required_features = 4; - repeated string optional_features = 5; - - optional string writingprogram = 16; - optional string source = 17; // From the bbox field. -} - - -/** The bounding box field in the OSM header. BBOX, as used in the OSM -header. Units are always in nanodegrees -- they do not obey -granularity rules. */ - -message HeaderBBox { - required sint64 left = 1; - required sint64 right = 2; - required sint64 top = 3; - required sint64 bottom = 4; -} - - -/////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - - -message PrimitiveBlock { - required StringTable stringtable = 1; - repeated PrimitiveGroup primitivegroup = 2; - - // Granularity, units of nanodegrees, used to store coordinates in this block - optional int32 granularity = 17 [default=100]; - // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees. - optional int64 lat_offset = 19 [default=0]; - optional int64 lon_offset = 20 [default=0]; - -// Granularity of dates, normally represented in units of milliseconds since the 1970 epoch. - optional int32 date_granularity = 18 [default=1000]; - - - // Proposed extension: - //optional BBox bbox = 19; -} - -// Group of OSMPrimitives. All primitives in a group must be the same type. -message PrimitiveGroup { - repeated Node nodes = 1; - optional DenseNodes dense = 2; - repeated Way ways = 3; - repeated Relation relations = 4; - repeated ChangeSet changesets = 5; -} - - -/** String table, contains the common strings in each block. - - Note that we reserve index '0' as a delimiter, so the entry at that - index in the table is ALWAYS blank and unused. - - */ -message StringTable { - repeated bytes s = 1; -} - -/* Optional metadata that may be included into each primitive. */ -message Info { - optional int32 version = 1 [default = -1]; - optional int32 timestamp = 2; - optional int64 changeset = 3; - optional int32 uid = 4; - optional int32 user_sid = 5; // String IDs -} - -/** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */ -message DenseInfo { - repeated int32 version = 1 [packed = true]; - repeated sint64 timestamp = 2 [packed = true]; // DELTA coded - repeated sint64 changeset = 3 [packed = true]; // DELTA coded - repeated sint32 uid = 4 [packed = true]; // DELTA coded - repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded -} - - -// TODO: REMOVE THIS? NOT in osmosis schema. -message ChangeSet { - required int64 id = 1; - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; // String IDs. - repeated uint32 vals = 3 [packed = true]; // String IDs. - - optional Info info = 4; - - required int64 created_at = 8; - optional int64 closetime_delta = 9; - required bool open = 10; - optional HeaderBBox bbox = 11; -} - - -message Node { - required sint64 id = 1; - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; // String IDs. - repeated uint32 vals = 3 [packed = true]; // String IDs. - - optional Info info = 4; // May be omitted in omitmeta - - required sint64 lat = 8; - required sint64 lon = 9; -} - -/* Used to densly represent a sequence of nodes that do not have any tags. - -We represent these nodes columnwise as five columns: ID's, lats, and -lons, all delta coded. When metadata is not omitted, - -We encode keys & vals for all nodes as a single array of integers -containing key-stringid and val-stringid, using a stringid of 0 as a -delimiter between nodes. - - ( ( )* '0' )* - */ - -message DenseNodes { - repeated sint64 id = 1 [packed = true]; // DELTA coded - - //repeated Info info = 4; - optional DenseInfo denseinfo = 5; - - repeated sint64 lat = 8 [packed = true]; // DELTA coded - repeated sint64 lon = 9 [packed = true]; // DELTA coded - - // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless. - repeated int32 keys_vals = 10 [packed = true]; -} - - -message Way { - required int64 id = 1; - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; - repeated uint32 vals = 3 [packed = true]; - - optional Info info = 4; - - repeated sint64 refs = 8 [packed = true]; // DELTA coded - repeated sint64 lats = 9 [packed = true]; - repeated sint64 lons = 10 [packed = true]; -} - -message Relation { - enum MemberType { - NODE = 0; - WAY = 1; - RELATION = 2; - } - required int64 id = 1; - - // Parallel arrays. - repeated uint32 keys = 2 [packed = true]; - repeated uint32 vals = 3 [packed = true]; - - optional Info info = 4; - - // Parallel arrays - repeated int32 roles_sid = 8 [packed = true]; - repeated sint64 memids = 9 [packed = true]; // DELTA encoded - repeated MemberType types = 10 [packed = true]; -} - diff --git a/include/output_object.h b/include/output_object.h index 3d2d862e..d994fbfa 100644 --- a/include/output_object.h +++ b/include/output_object.h @@ -10,10 +10,7 @@ #include "coordinates.h" #include "attribute_store.h" #include "osm_store.h" - -// Protobuf -#include "osmformat.pb.h" -#include "vector_tile.pb.h" +#include enum OutputGeometryType : unsigned int { POINT_, LINESTRING_, MULTILINESTRING_, POLYGON_ }; @@ -22,9 +19,6 @@ std::ostream& operator<<(std::ostream& os, OutputGeometryType geomType); /** * \brief OutputObject - any object (node, linestring, polygon) to be outputted to tiles - - * Possible future improvements to save memory: - * - use a global dictionary for attribute key/values */ #pragma pack(push, 4) class OutputObject { @@ -77,18 +71,11 @@ class OutputObject { this->attributes = attributes; } - //\brief Write attribute key/value pairs (dictionary-encoded) - void writeAttributes(std::vector *keyList, - std::vector *valueList, - AttributeStore const &attributeStore, - vector_tile::Tile_Feature *featurePtr, char zoom) const; - - /** - * \brief Find a value in the value dictionary - * (we can't easily use find() because of the different value-type encoding - - * should be possible to improve this though) - */ - int findValue(const std::vector* valueList, const AttributePair& value) const; + void writeAttributes( + const AttributeStore& attributeStore, + vtzero::feature_builder& fbuilder, + char zoom + ) const; }; #pragma pack(pop) @@ -101,9 +88,4 @@ struct OutputObjectID { bool operator==(const OutputObject& x, const OutputObject& y); bool operator==(const OutputObjectID& x, const OutputObjectID& y); -namespace vector_tile { - bool operator==(const vector_tile::Tile_Value &x, const vector_tile::Tile_Value &y); - bool operator<(const vector_tile::Tile_Value &x, const vector_tile::Tile_Value &y); -} - #endif //_OUTPUT_OBJECT_H diff --git a/include/pbf_blocks.h b/include/pbf_blocks.h deleted file mode 100644 index 5cc28969..00000000 --- a/include/pbf_blocks.h +++ /dev/null @@ -1,48 +0,0 @@ -/*! \file */ -#ifndef _PBF_BLOCKS_H -#define _PBF_BLOCKS_H - -#include -#include -#include -#include - -// Protobuf -#include "osmformat.pb.h" -#include "vector_tile.pb.h" - -/* ------------------- - Protobuf handling - ------------------- */ - -// Read and parse a protobuf message -void readMessage(google::protobuf::Message *message, std::istream &input, unsigned int size); - -// Read an osm.pbf sequence of header length -> BlobHeader -> Blob -// and parse the unzipped contents into a message -BlobHeader readHeader(std::istream &input); -void readBlock(google::protobuf::Message *messagePtr, std::size_t datasize, std::istream &input); - -void writeBlock(google::protobuf::Message *messagePtr, std::ostream &output, std::string headerType); -/* ------------------- - Tag handling - ------------------- */ - -// Populate an array with the contents of a StringTable -void readStringTable(std::vector *strPtr, PrimitiveBlock *pbPtr); - -/// Populate a map with the reverse contents of a StringTable (i.e. string->num) -void readStringMap(std::map *mapPtr, PrimitiveBlock *pbPtr); - -/// Read the tags for a way into a hash -/// requires strings array to have been populated by readStringTable -std::map getTags(std::vector *strPtr, Way *wayPtr); - -/// Find the index of a string in the StringTable, adding it if it's not there -unsigned int findStringInTable(std::string *strPtr, std::map *mapPtr, PrimitiveBlock *pbPtr); - -/// Set a tag for a way to a new value -void setTag(Way *wayPtr, unsigned int keyIndex, unsigned int valueIndex); - -#endif //_PBF_BLOCKS_H - diff --git a/include/read_pbf.h b/include/pbf_processor.h similarity index 55% rename from include/read_pbf.h rename to include/pbf_processor.h index b934a563..4ae72c97 100644 --- a/include/read_pbf.h +++ b/include/pbf_processor.h @@ -8,10 +8,10 @@ #include #include #include "osm_store.h" +#include "pbf_reader.h" +#include -// Protobuf -#include "osmformat.pb.h" -#include "vector_tile.pb.h" +#include "tag_map.h" class OsmLuaProcessing; @@ -20,7 +20,7 @@ extern const std::string OptionLocationsOnWays; struct BlockMetadata { long int offset; - google::protobuf::int32 length; + int32_t length; bool hasNodes; bool hasWays; bool hasRelations; @@ -42,33 +42,34 @@ struct IndexedBlockMetadata: BlockMetadata { * * The output class is typically OsmMemTiles, which is derived from OsmLuaProcessing */ -class PbfReader +class PbfProcessor { public: enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, RelationScan = 8 }; - PbfReader(OSMStore &osmStore); + PbfProcessor(OSMStore &osmStore); using pbfreader_generate_output = std::function< std::shared_ptr () >; using pbfreader_generate_stream = std::function< std::shared_ptr () >; int ReadPbfFile( + uint shards, bool hasSortTypeThenID, const std::unordered_set& nodeKeys, unsigned int threadNum, const pbfreader_generate_stream& generate_stream, - const pbfreader_generate_output& generate_output + const pbfreader_generate_output& generate_output, + const NodeStore& nodeStore, + const WayStore& wayStore ); // Read tags into a map from a way/node/relation - using tag_map_t = boost::container::flat_map; template - void readTags(T &pbfObject, PrimitiveBlock const &pb, tag_map_t &tags) { - tags.reserve(pbfObject.keys_size()); - auto keysPtr = pbfObject.mutable_keys(); - auto valsPtr = pbfObject.mutable_vals(); - for (uint n=0; n < pbfObject.keys_size(); n++) { - tags[pb.stringtable().s(keysPtr->Get(n))] = pb.stringtable().s(valsPtr->Get(n)); + void readTags(T &pbfObject, PbfReader::PrimitiveBlock const &pb, TagMap& tags) { + for (uint n=0; n < pbfObject.keys.size(); n++) { + auto keyIndex = pbfObject.keys[n]; + auto valueIndex = pbfObject.vals[n]; + tags.addTag(pb.stringTable[keyIndex], pb.stringTable[valueIndex]); } } @@ -79,29 +80,40 @@ class PbfReader const BlockMetadata& blockMetadata, const std::unordered_set& nodeKeys, bool locationsOnWays, - ReadPhase phase + ReadPhase phase, + uint shard, + uint effectiveShard ); - bool ReadNodes(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb, const std::unordered_set &nodeKeyPositions); + bool ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const std::unordered_set& nodeKeyPositions); - bool ReadWays(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb, bool locationsOnWays); - bool ScanRelations(OsmLuaProcessing &output, PrimitiveGroup &pg, PrimitiveBlock const &pb); + bool ReadWays( + OsmLuaProcessing& output, + PbfReader::PrimitiveGroup& pg, + const PbfReader::PrimitiveBlock& pb, + bool locationsOnWays, + uint shard, + uint effectiveShards + ); + bool ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb); bool ReadRelations( OsmLuaProcessing& output, - PrimitiveGroup& pg, - const PrimitiveBlock& pb, - const BlockMetadata& blockMetadata + PbfReader::PrimitiveGroup& pg, + const PbfReader::PrimitiveBlock& pb, + const BlockMetadata& blockMetadata, + uint shard, + uint effectiveShards ); - inline bool RelationIsType(Relation const &rel, int typeKey, int val) { - if (typeKey==-1 || val==-1) return false; - auto typeI = std::find(rel.keys().begin(), rel.keys().end(), typeKey); - if (typeI==rel.keys().end()) return false; - int typePos = typeI - rel.keys().begin(); - return rel.vals().Get(typePos) == val; + inline bool relationIsType(const PbfReader::Relation& rel, int typeKey, int val) { + if (typeKey == -1 || val == -1) return false; + auto typeI = std::find(rel.keys.begin(), rel.keys.end(), typeKey); + if (typeI == rel.keys.end()) return false; + int typePos = typeI - rel.keys.begin(); + return rel.vals[typePos] == val; } /// Find a string in the dictionary - static int findStringPosition(PrimitiveBlock const &pb, char const *str); + static int findStringPosition(const PbfReader::PrimitiveBlock& pb, const std::string& str); OSMStore &osmStore; std::mutex ioMutex; diff --git a/include/pbf_reader.h b/include/pbf_reader.h new file mode 100644 index 00000000..9af930c5 --- /dev/null +++ b/include/pbf_reader.h @@ -0,0 +1,296 @@ +#ifndef _PBF_READER_H +#define _PBF_READER_H + +#include +#include +#include +#include +#include +#include + +namespace PbfReader { + namespace Schema { + // See https://wiki.openstreetmap.org/wiki/PBF_Format#Definition_of_the_OSMHeader_fileblock + // for more background on the PBF schema. + enum class BlobHeader : protozero::pbf_tag_type { + required_string_type = 1, + optional_bytes_indexdata = 2, + required_int32_datasize = 3 + }; + + enum class Blob : protozero::pbf_tag_type { + optional_int32_raw_size = 2, // When compressed, the uncompressed size + oneof_data_bytes_raw = 1, // No compression + oneof_data_bytes_zlib_data = 3, + oneof_data_bytes_lzma_data = 4, + // Formerly used for bzip2 compressed data. Deprecated in 2010. + // bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number. + oneof_data_bytes_lz4_data = 6, + oneof_data_bytes_zstd_data = 7, + }; + + enum class HeaderBBox : protozero::pbf_tag_type { + // These units are always in nanodegrees, they don't obey granularity rules. + required_sint64_left = 1, + required_sint64_right = 2, + required_sint64_top = 3, + required_sint64_bottom = 4 + }; + + enum class HeaderBlock : protozero::pbf_tag_type { + optional_HeaderBBox_bbox = 1, + repeated_string_optional_features = 5 + }; + + enum class StringTable : protozero::pbf_tag_type { + repeated_bytes_s = 1 + }; + + enum class PrimitiveBlock : protozero::pbf_tag_type { + required_StringTable_stringtable = 1, + repeated_PrimitiveGroup_primitivegroup = 2, + optional_int32_granularity = 17, + optional_int32_date_granularity = 18, + optional_int64_lat_offset = 19, + optional_int64_lon_offset = 20 + }; + + enum class PrimitiveGroup : protozero::pbf_tag_type { + repeated_Node_nodes = 1, + optional_DenseNodes_dense = 2, + repeated_Way_ways = 3, + repeated_Relation_relations = 4, + repeated_ChangeSet_changesets = 5 + }; + + enum class DenseNodes : protozero::pbf_tag_type { + repeated_sint64_id = 1, + repeated_sint64_lat = 8, + repeated_sint64_lon = 9, + repeated_int32_keys_vals = 10 + }; + + enum class Way : protozero::pbf_tag_type { + required_int64_id = 1, + repeated_uint32_keys = 2, + repeated_uint32_vals = 3, + repeated_sint64_refs = 8, + repeated_sint64_lats = 9, + repeated_sint64_lons = 10 + }; + + enum class Relation : protozero::pbf_tag_type { + required_int64_id = 1, + repeated_uint32_keys = 2, + repeated_uint32_vals = 3, + repeated_int32_roles_sid = 8, + repeated_sint64_memids = 9, + repeated_MemberType_types = 10 + }; + } + + struct BlobHeader { + std::string type; + int32_t datasize; + }; + + struct HeaderBBox { + double minLon, maxLon, minLat, maxLat; + }; + + struct HeaderBlock { + bool hasBbox; + HeaderBBox bbox; + std::set optionalFeatures; + }; + + enum class PrimitiveGroupType: char { Node = 1, DenseNodes = 2, Way = 3, Relation = 4, ChangeSet = 5}; + + struct DenseNodes { + struct Node { + uint64_t id; + int32_t lon; + int32_t lat; + uint32_t tagStart; + uint32_t tagEnd; + }; + + struct Iterator { + int32_t offset; + Node node; + DenseNodes& nodes; + + bool operator!=(Iterator& other) const; + void operator++(); + Node& operator*(); + }; + + std::vector ids; + std::vector lons; + std::vector lats; + std::vector tagStart; + std::vector tagEnd; + std::vector keyValues; + Iterator begin(); + Iterator end(); + bool empty(); + void clear(); + void readDenseNodes(protozero::data_view data); + }; + + struct Way { + uint64_t id; + std::vector keys; + std::vector vals; + std::vector refs; + std::vector lats; + std::vector lons; + }; + + struct Relation { + enum MemberType: int { NODE = 0, WAY = 1, RELATION = 2 }; + uint64_t id; + std::vector keys; + std::vector vals; + std::vector memids; + std::vector roles_sid; + std::vector types; + }; + + class PrimitiveGroup; + struct Ways { + struct Iterator { + protozero::pbf_message message; + int offset; + Way& way; + + bool operator!=(Iterator& other) const; + void operator++(); + PbfReader::Way& operator*(); + + private: + void readWay(protozero::data_view data); + }; + + Ways(PrimitiveGroup* pg, Way& way): pg(pg), way(way) {} + Iterator begin(); + Iterator end(); + bool empty(); + + private: + friend PrimitiveGroup; + PrimitiveGroup* pg; + Way& way; + }; + + struct Relations { + struct Iterator { + protozero::pbf_message message; + int offset; + Relation& relation; + + bool operator!=(Iterator& other) const; + void operator++(); + PbfReader::Relation& operator*(); + + private: + void readRelation(protozero::data_view data); + }; + + + Relations(PrimitiveGroup* pg, Relation& relation): pg(pg), relation(relation) {} + Iterator begin(); + Iterator end(); + bool empty(); + + private: + friend PrimitiveGroup; + PrimitiveGroup* pg; + Relation& relation; + }; + + struct PrimitiveGroup { + PrimitiveGroup( + protozero::data_view data, + DenseNodes& nodes, + Way& way, + Relation& relation + ); + DenseNodes& nodes() const; + Ways& ways() const; + Relations& relations() const; + PrimitiveGroupType type() const; + + int32_t translateNodeKeyValue(int32_t i) const; + + // Only meant to be called by our iterator, not by client code. + void ensureData(); + protozero::data_view getDataView(); + private: + protozero::data_view data; + DenseNodes& denseNodes; + mutable Ways internalWays; + mutable Relations internalRelations; + PrimitiveGroupType internalType; + bool denseNodesInitialized; + + }; + + class PbfReader; + struct PrimitiveBlock { + struct PrimitiveGroups { + struct Iterator { + int offset; + std::vector* groups; + + Iterator(): offset(0), groups(nullptr) {} + Iterator(int offset, std::vector& groups): offset(offset), groups(&groups) {} + bool operator!=(Iterator& other) const; + void operator++(); + PrimitiveGroup& operator*(); + }; + + + PrimitiveGroups(): groups(nullptr) {} + PrimitiveGroups(std::vector& groups): groups(&groups) {} + Iterator begin(); + Iterator end(); + + private: + std::vector* groups; + }; + + std::vector stringTable; + PrimitiveGroups& groups(); + + private: + friend PbfReader; + std::vector internalGroups; + PrimitiveGroups groupsImpl; + }; + + // This is a little weird: we use a class only to get private storage + // for multiple PBF readers. Due to the way we plumb the input files + // elsewhere in the system, the readers don't own them, and are not + // responsible for closing them. + class PbfReader { + public: + BlobHeader readBlobHeader(std::istream& input); + protozero::data_view readBlob(int32_t datasize, std::istream& input); + HeaderBlock readHeaderBlock(protozero::data_view data); + HeaderBBox readHeaderBBox(protozero::data_view data); + PrimitiveBlock& readPrimitiveBlock(protozero::data_view data); + void readStringTable(protozero::data_view data, std::vector& stringTable); + HeaderBlock readHeaderFromFile(std::istream& input); + + private: + std::string blobStorage; // the blob as stored in the PBF + std::string blobStorage2; // the blob after decompression, if needed + PrimitiveBlock pb; + DenseNodes denseNodes; + Way way; + Relation relation; + }; +} + +#endif diff --git a/include/pooled_string.h b/include/pooled_string.h new file mode 100644 index 00000000..56d44453 --- /dev/null +++ b/include/pooled_string.h @@ -0,0 +1,61 @@ +#ifndef _POOLED_STRING_H +#define _POOLED_STRING_H + +// std::string is quite general: +// - mutable +// - unlimited length +// - capacity can differ from size +// - can deallocate its dynamic memory +// +// Our use case, by contrast is immutable, bounded strings that live for the +// duration of the process. +// +// This gives us some room to have less memory overhead, especially on +// g++, whose implementation of std::string requires 32 bytes. +// +// Thus, we implement `PooledString`. It has a size of 16 bytes, and a small +// string optimization for strings <= 15 bytes. (We will separately teach +// AttributePair to encode Latin-character strings more efficiently, so that many +// strings of size 24 or less fit in 15 bytes.) +// +// If it needs to allocate memory, it does so from a shared pool. It is unable +// to free the memory once allocated. + +// PooledString has one of three modes: +// - [126:127] = 00: small-string, length is in [120:125], lower 15 bytes are string +// - [126:127] = 10: pooled string, table is in bytes 1..3, offset in bytes 4..5, length in bytes 6..7 +// - [126:127] = 11: pointer to std::string, pointer is in bytes 8..15 +// +// Note that the pointer mode is not safe to be stored. It exists just to allow +// lookups in the AttributePair map before deciding to allocate a string. + +#include +#include + +namespace PooledStringNS { + class PooledString { + public: + // Create a short string or heap string, long-lived. + PooledString(const std::string& str); + + + // Create a std string - only valid so long as the string that is + // pointed to is valid. + PooledString(const std::string* str); + size_t size() const; + bool operator<(const PooledString& other) const; + bool operator==(const PooledString& other) const; + bool operator!=(const PooledString& other) const; + std::string toString() const; + const char* data() const; + void ensureStringIsOwned(); + + private: + // 0..3 is index into table, 4..5 is offset, 6..7 is length + uint8_t storage[16]; + }; +} + +using PooledString = PooledStringNS::PooledString; + +#endif diff --git a/include/protozero/basic_pbf_builder.hpp b/include/protozero/basic_pbf_builder.hpp new file mode 100644 index 00000000..0ede726f --- /dev/null +++ b/include/protozero/basic_pbf_builder.hpp @@ -0,0 +1,266 @@ +#ifndef PROTOZERO_BASIC_PBF_BUILDER_HPP +#define PROTOZERO_BASIC_PBF_BUILDER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file basic_pbf_builder.hpp + * + * @brief Contains the basic_pbf_builder template class. + */ + +#include "basic_pbf_writer.hpp" +#include "types.hpp" + +#include + +namespace protozero { + +/** + * The basic_pbf_builder is used to write PBF formatted messages into a buffer. + * It is based on the basic_pbf_writer class and has all the same methods. The + * difference is that while the pbf_writer class takes an integer tag, + * this template class takes a tag of the template type T. The idea is that + * T will be an enumeration value and this helps reduce the possibility of + * programming errors. + * + * Almost all methods in this class can throw an std::bad_alloc exception if + * the underlying buffer class wants to resize. + * + * Read the tutorial to understand how this class is used. In most cases you + * want to use the pbf_builder class which uses a std::string as buffer type. + */ +template +class basic_pbf_builder : public basic_pbf_writer { + + static_assert(std::is_same::type>::value, + "T must be enum with underlying type protozero::pbf_tag_type"); + +public: + + /// The type of messages this class will build. + using enum_type = T; + + basic_pbf_builder() = default; + + /** + * Create a builder using the given string as a data store. The object + * stores a reference to that string and adds all data to it. The string + * doesn't have to be empty. The pbf_message object will just append data. + */ + explicit basic_pbf_builder(TBuffer& data) noexcept : + basic_pbf_writer{data} { + } + + /** + * Construct a pbf_builder for a submessage from the pbf_message or + * pbf_writer of the parent message. + * + * @param parent_writer The parent pbf_message or pbf_writer + * @param tag Tag of the field that will be written + */ + template + basic_pbf_builder(basic_pbf_writer& parent_writer, P tag) noexcept : + basic_pbf_writer{parent_writer, pbf_tag_type(tag)} { + } + +/// @cond INTERNAL +#define PROTOZERO_WRITER_WRAP_ADD_SCALAR(name, type) \ + void add_##name(T tag, type value) { \ + basic_pbf_writer::add_##name(pbf_tag_type(tag), value); \ + } + + PROTOZERO_WRITER_WRAP_ADD_SCALAR(bool, bool) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(enum, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(int32, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint32, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint32, uint32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(int64, int64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint64, int64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint64, uint64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed32, uint32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed32, int32_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed64, uint64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed64, int64_t) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(float, float) + PROTOZERO_WRITER_WRAP_ADD_SCALAR(double, double) + +#undef PROTOZERO_WRITER_WRAP_ADD_SCALAR +/// @endcond + + /** + * Add "bytes" field to data. + * + * @param tag Tag of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_bytes(T tag, const char* value, std::size_t size) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value, size); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_bytes(T tag, const data_view& value) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_bytes(T tag, const std::string& value) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + /** + * Add "bytes" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag of the field + * @param value Pointer to zero-delimited value to be written + */ + void add_bytes(T tag, const char* value) { + basic_pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + /** + * Add "bytes" field to data using vectored input. All the data in the + * 2nd and further arguments is "concatenated" with only a single copy + * into the final buffer. + * + * This will work with objects of any type supporting the data() and + * size() methods like std::string or protozero::data_view. + * + * Example: + * @code + * std::string data1 = "abc"; + * std::string data2 = "xyz"; + * builder.add_bytes_vectored(1, data1, data2); + * @endcode + * + * @tparam Ts List of types supporting data() and size() methods. + * @param tag Tag of the field + * @param values List of objects of types Ts with data to be appended. + */ + template + void add_bytes_vectored(T tag, Ts&&... values) { + basic_pbf_writer::add_bytes_vectored(pbf_tag_type(tag), std::forward(values)...); + } + + /** + * Add "string" field to data. + * + * @param tag Tag of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_string(T tag, const char* value, std::size_t size) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value, size); + } + + /** + * Add "string" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_string(T tag, const data_view& value) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value); + } + + /** + * Add "string" field to data. + * + * @param tag Tag of the field + * @param value Value to be written + */ + void add_string(T tag, const std::string& value) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value); + } + + /** + * Add "string" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag of the field + * @param value Pointer to value to be written + */ + void add_string(T tag, const char* value) { + basic_pbf_writer::add_string(pbf_tag_type(tag), value); + } + + /** + * Add "message" field to data. + * + * @param tag Tag of the field + * @param value Pointer to message to be written + * @param size Length of the message + */ + void add_message(T tag, const char* value, std::size_t size) { + basic_pbf_writer::add_message(pbf_tag_type(tag), value, size); + } + + /** + * Add "message" field to data. + * + * @param tag Tag of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(T tag, const data_view& value) { + basic_pbf_writer::add_message(pbf_tag_type(tag), value); + } + + /** + * Add "message" field to data. + * + * @param tag Tag of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(T tag, const std::string& value) { + basic_pbf_writer::add_message(pbf_tag_type(tag), value); + } + +/// @cond INTERNAL +#define PROTOZERO_WRITER_WRAP_ADD_PACKED(name) \ + template \ + void add_packed_##name(T tag, InputIterator first, InputIterator last) { \ + basic_pbf_writer::add_packed_##name(pbf_tag_type(tag), first, last); \ + } + + PROTOZERO_WRITER_WRAP_ADD_PACKED(bool) + PROTOZERO_WRITER_WRAP_ADD_PACKED(enum) + PROTOZERO_WRITER_WRAP_ADD_PACKED(int32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sint32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(uint32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(int64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sint64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(uint64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed32) + PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed64) + PROTOZERO_WRITER_WRAP_ADD_PACKED(float) + PROTOZERO_WRITER_WRAP_ADD_PACKED(double) + +#undef PROTOZERO_WRITER_WRAP_ADD_PACKED +/// @endcond + +}; // class basic_pbf_builder + +} // end namespace protozero + +#endif // PROTOZERO_BASIC_PBF_BUILDER_HPP diff --git a/include/protozero/basic_pbf_writer.hpp b/include/protozero/basic_pbf_writer.hpp new file mode 100644 index 00000000..f167c4d1 --- /dev/null +++ b/include/protozero/basic_pbf_writer.hpp @@ -0,0 +1,1054 @@ +#ifndef PROTOZERO_BASIC_PBF_WRITER_HPP +#define PROTOZERO_BASIC_PBF_WRITER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file basic_pbf_writer.hpp + * + * @brief Contains the basic_pbf_writer template class. + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" +#include "data_view.hpp" +#include "types.hpp" +#include "varint.hpp" + +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace protozero { + +namespace detail { + + template class packed_field_varint; + template class packed_field_svarint; + template class packed_field_fixed; + +} // end namespace detail + +/** + * The basic_pbf_writer is used to write PBF formatted messages into a buffer. + * + * This uses TBuffer as the type for the underlaying buffer. In typical uses + * this is std::string, but you can use a different type that must support + * the right interface. Please see the documentation for details. + * + * Almost all methods in this class can throw an std::bad_alloc exception if + * the underlying buffer class wants to resize. + */ +template +class basic_pbf_writer { + + // A pointer to a buffer holding the data already written to the PBF + // message. For default constructed writers or writers that have been + // rolled back, this is a nullptr. + TBuffer* m_data = nullptr; + + // A pointer to a parent writer object if this is a submessage. If this + // is a top-level writer, it is a nullptr. + basic_pbf_writer* m_parent_writer = nullptr; + + // This is usually 0. If there is an open submessage, this is set in the + // parent to the rollback position, ie. the last position before the + // submessage was started. This is the position where the header of the + // submessage starts. + std::size_t m_rollback_pos = 0; + + // This is usually 0. If there is an open submessage, this is set in the + // parent to the position where the data of the submessage is written to. + std::size_t m_pos = 0; + + void add_varint(uint64_t value) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + add_varint_to_buffer(m_data, value); + } + + void add_field(pbf_tag_type tag, pbf_wire_type type) { + protozero_assert(((tag > 0 && tag < 19000) || (tag > 19999 && tag <= ((1U << 29U) - 1))) && "tag out of range"); + const uint32_t b = (tag << 3U) | uint32_t(type); + add_varint(b); + } + + void add_tagged_varint(pbf_tag_type tag, uint64_t value) { + add_field(tag, pbf_wire_type::varint); + add_varint(value); + } + + template + void add_fixed(T value) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + byteswap_inplace(&value); +#endif + buffer_customization::append(m_data, reinterpret_cast(&value), sizeof(T)); + } + + template + void add_packed_fixed(pbf_tag_type tag, It first, It last, std::input_iterator_tag /*unused*/) { + if (first == last) { + return; + } + + basic_pbf_writer sw{*this, tag}; + + while (first != last) { + sw.add_fixed(*first++); + } + } + + template + void add_packed_fixed(pbf_tag_type tag, It first, It last, std::forward_iterator_tag /*unused*/) { + if (first == last) { + return; + } + + const auto length = std::distance(first, last); + add_length_varint(tag, sizeof(T) * pbf_length_type(length)); + reserve(sizeof(T) * std::size_t(length)); + + while (first != last) { + add_fixed(*first++); + } + } + + template + void add_packed_varint(pbf_tag_type tag, It first, It last) { + if (first == last) { + return; + } + + basic_pbf_writer sw{*this, tag}; + + while (first != last) { + sw.add_varint(uint64_t(*first++)); + } + } + + template + void add_packed_svarint(pbf_tag_type tag, It first, It last) { + if (first == last) { + return; + } + + basic_pbf_writer sw{*this, tag}; + + while (first != last) { + sw.add_varint(encode_zigzag64(*first++)); + } + } + + // The number of bytes to reserve for the varint holding the length of + // a length-delimited field. The length has to fit into pbf_length_type, + // and a varint needs 8 bit for every 7 bit. + enum : int { + reserve_bytes = sizeof(pbf_length_type) * 8 / 7 + 1 + }; + + // If m_rollpack_pos is set to this special value, it means that when + // the submessage is closed, nothing needs to be done, because the length + // of the submessage has already been written correctly. + enum : std::size_t { + size_is_known = std::numeric_limits::max() + }; + + void open_submessage(pbf_tag_type tag, std::size_t size) { + protozero_assert(m_pos == 0); + protozero_assert(m_data); + if (size == 0) { + m_rollback_pos = buffer_customization::size(m_data); + add_field(tag, pbf_wire_type::length_delimited); + buffer_customization::append_zeros(m_data, std::size_t(reserve_bytes)); + } else { + m_rollback_pos = size_is_known; + add_length_varint(tag, pbf_length_type(size)); + reserve(size); + } + m_pos = buffer_customization::size(m_data); + } + + void rollback_submessage() { + protozero_assert(m_pos != 0); + protozero_assert(m_rollback_pos != size_is_known); + protozero_assert(m_data); + buffer_customization::resize(m_data, m_rollback_pos); + m_pos = 0; + } + + void commit_submessage() { + protozero_assert(m_pos != 0); + protozero_assert(m_rollback_pos != size_is_known); + protozero_assert(m_data); + const auto length = pbf_length_type(buffer_customization::size(m_data) - m_pos); + + protozero_assert(buffer_customization::size(m_data) >= m_pos - reserve_bytes); + const auto n = add_varint_to_buffer(buffer_customization::at_pos(m_data, m_pos - reserve_bytes), length); + + buffer_customization::erase_range(m_data, m_pos - reserve_bytes + n, m_pos); + m_pos = 0; + } + + void close_submessage() { + protozero_assert(m_data); + if (m_pos == 0 || m_rollback_pos == size_is_known) { + return; + } + if (buffer_customization::size(m_data) - m_pos == 0) { + rollback_submessage(); + } else { + commit_submessage(); + } + } + + void add_length_varint(pbf_tag_type tag, pbf_length_type length) { + add_field(tag, pbf_wire_type::length_delimited); + add_varint(length); + } + +public: + + /** + * Create a writer using the specified buffer as a data store. The + * basic_pbf_writer stores a pointer to that buffer and adds all data to + * it. The buffer doesn't have to be empty. The basic_pbf_writer will just + * append data. + */ + explicit basic_pbf_writer(TBuffer& buffer) noexcept : + m_data{&buffer} { + } + + /** + * Create a writer without a data store. In this form the writer can not + * be used! + */ + basic_pbf_writer() noexcept = default; + + /** + * Construct a basic_pbf_writer for a submessage from the basic_pbf_writer + * of the parent message. + * + * @param parent_writer The basic_pbf_writer + * @param tag Tag (field number) of the field that will be written + * @param size Optional size of the submessage in bytes (use 0 for unknown). + * Setting this allows some optimizations but is only possible in + * a few very specific cases. + */ + basic_pbf_writer(basic_pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size = 0) : + m_data{parent_writer.m_data}, + m_parent_writer{&parent_writer} { + m_parent_writer->open_submessage(tag, size); + } + + /// A basic_pbf_writer object can not be copied + basic_pbf_writer(const basic_pbf_writer&) = delete; + + /// A basic_pbf_writer object can not be copied + basic_pbf_writer& operator=(const basic_pbf_writer&) = delete; + + /** + * A basic_pbf_writer object can be moved. After this the other + * basic_pbf_writer will be invalid. + */ + basic_pbf_writer(basic_pbf_writer&& other) noexcept : + m_data{other.m_data}, + m_parent_writer{other.m_parent_writer}, + m_rollback_pos{other.m_rollback_pos}, + m_pos{other.m_pos} { + other.m_data = nullptr; + other.m_parent_writer = nullptr; + other.m_rollback_pos = 0; + other.m_pos = 0; + } + + /** + * A basic_pbf_writer object can be moved. After this the other + * basic_pbf_writer will be invalid. + */ + basic_pbf_writer& operator=(basic_pbf_writer&& other) noexcept { + m_data = other.m_data; + m_parent_writer = other.m_parent_writer; + m_rollback_pos = other.m_rollback_pos; + m_pos = other.m_pos; + other.m_data = nullptr; + other.m_parent_writer = nullptr; + other.m_rollback_pos = 0; + other.m_pos = 0; + return *this; + } + + ~basic_pbf_writer() noexcept { + try { + if (m_parent_writer != nullptr) { + m_parent_writer->close_submessage(); + } + } catch (...) { + // This try/catch is used to make the destructor formally noexcept. + // close_submessage() is not noexcept, but will not throw the way + // it is called here, so we are good. But to be paranoid, call... + std::terminate(); + } + } + + /** + * Check if this writer is valid. A writer is invalid if it was default + * constructed, moved from, or if commit() has been called on it. + * Otherwise it is valid. + */ + bool valid() const noexcept { + return m_data != nullptr; + } + + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(basic_pbf_writer& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_parent_writer, other.m_parent_writer); + swap(m_rollback_pos, other.m_rollback_pos); + swap(m_pos, other.m_pos); + } + + /** + * Reserve size bytes in the underlying message store in addition to + * whatever the message store already holds. So unlike + * the `std::string::reserve()` method this is not an absolute size, + * but additional memory that should be reserved. + * + * @param size Number of bytes to reserve in underlying message store. + */ + void reserve(std::size_t size) { + protozero_assert(m_data); + buffer_customization::reserve_additional(m_data, size); + } + + /** + * Commit this submessage. This does the same as when the basic_pbf_writer + * goes out of scope and is destructed. + * + * @pre Must be a basic_pbf_writer of a submessage, ie one opened with the + * basic_pbf_writer constructor taking a parent message. + * @post The basic_pbf_writer is invalid and can't be used any more. + */ + void commit() { + protozero_assert(m_parent_writer && "you can't call commit() on a basic_pbf_writer without a parent"); + protozero_assert(m_pos == 0 && "you can't call commit() on a basic_pbf_writer that has an open nested submessage"); + m_parent_writer->close_submessage(); + m_parent_writer = nullptr; + m_data = nullptr; + } + + /** + * Cancel writing of this submessage. The complete submessage will be + * removed as if it was never created and no fields were added. + * + * @pre Must be a basic_pbf_writer of a submessage, ie one opened with the + * basic_pbf_writer constructor taking a parent message. + * @post The basic_pbf_writer is invalid and can't be used any more. + */ + void rollback() { + protozero_assert(m_parent_writer && "you can't call rollback() on a basic_pbf_writer without a parent"); + protozero_assert(m_pos == 0 && "you can't call rollback() on a basic_pbf_writer that has an open nested submessage"); + m_parent_writer->rollback_submessage(); + m_parent_writer = nullptr; + m_data = nullptr; + } + + ///@{ + /** + * @name Scalar field writer functions + */ + + /** + * Add "bool" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_bool(pbf_tag_type tag, bool value) { + add_field(tag, pbf_wire_type::varint); + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + m_data->push_back(char(value)); + } + + /** + * Add "enum" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_enum(pbf_tag_type tag, int32_t value) { + add_tagged_varint(tag, uint64_t(value)); + } + + /** + * Add "int32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_int32(pbf_tag_type tag, int32_t value) { + add_tagged_varint(tag, uint64_t(value)); + } + + /** + * Add "sint32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sint32(pbf_tag_type tag, int32_t value) { + add_tagged_varint(tag, encode_zigzag32(value)); + } + + /** + * Add "uint32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_uint32(pbf_tag_type tag, uint32_t value) { + add_tagged_varint(tag, value); + } + + /** + * Add "int64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_int64(pbf_tag_type tag, int64_t value) { + add_tagged_varint(tag, uint64_t(value)); + } + + /** + * Add "sint64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sint64(pbf_tag_type tag, int64_t value) { + add_tagged_varint(tag, encode_zigzag64(value)); + } + + /** + * Add "uint64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_uint64(pbf_tag_type tag, uint64_t value) { + add_tagged_varint(tag, value); + } + + /** + * Add "fixed32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_fixed32(pbf_tag_type tag, uint32_t value) { + add_field(tag, pbf_wire_type::fixed32); + add_fixed(value); + } + + /** + * Add "sfixed32" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sfixed32(pbf_tag_type tag, int32_t value) { + add_field(tag, pbf_wire_type::fixed32); + add_fixed(value); + } + + /** + * Add "fixed64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_fixed64(pbf_tag_type tag, uint64_t value) { + add_field(tag, pbf_wire_type::fixed64); + add_fixed(value); + } + + /** + * Add "sfixed64" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_sfixed64(pbf_tag_type tag, int64_t value) { + add_field(tag, pbf_wire_type::fixed64); + add_fixed(value); + } + + /** + * Add "float" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_float(pbf_tag_type tag, float value) { + add_field(tag, pbf_wire_type::fixed32); + add_fixed(value); + } + + /** + * Add "double" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_double(pbf_tag_type tag, double value) { + add_field(tag, pbf_wire_type::fixed64); + add_fixed(value); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag (field number) of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_bytes(pbf_tag_type tag, const char* value, std::size_t size) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + protozero_assert(size <= std::numeric_limits::max()); + add_length_varint(tag, pbf_length_type(size)); + buffer_customization::append(m_data, value, size); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_bytes(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "bytes" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_bytes(pbf_tag_type tag, const std::string& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "bytes" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag (field number) of the field + * @param value Pointer to zero-delimited value to be written + */ + void add_bytes(pbf_tag_type tag, const char* value) { + add_bytes(tag, value, std::strlen(value)); + } + + /** + * Add "bytes" field to data using vectored input. All the data in the + * 2nd and further arguments is "concatenated" with only a single copy + * into the final buffer. + * + * This will work with objects of any type supporting the data() and + * size() methods like std::string or protozero::data_view. + * + * Example: + * @code + * std::string data1 = "abc"; + * std::string data2 = "xyz"; + * writer.add_bytes_vectored(1, data1, data2); + * @endcode + * + * @tparam Ts List of types supporting data() and size() methods. + * @param tag Tag (field number) of the field + * @param values List of objects of types Ts with data to be appended. + */ + template + void add_bytes_vectored(pbf_tag_type tag, Ts&&... values) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent basic_pbf_writer if there is an existing basic_pbf_writer for a submessage"); + protozero_assert(m_data); + size_t sum_size = 0; + (void)std::initializer_list{sum_size += values.size()...}; + protozero_assert(sum_size <= std::numeric_limits::max()); + add_length_varint(tag, pbf_length_type(sum_size)); + buffer_customization::reserve_additional(m_data, sum_size); + (void)std::initializer_list{(buffer_customization::append(m_data, values.data(), values.size()), 0)...}; + } + + /** + * Add "string" field to data. + * + * @param tag Tag (field number) of the field + * @param value Pointer to value to be written + * @param size Number of bytes to be written + */ + void add_string(pbf_tag_type tag, const char* value, std::size_t size) { + add_bytes(tag, value, size); + } + + /** + * Add "string" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_string(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "string" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_string(pbf_tag_type tag, const std::string& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "string" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag (field number) of the field + * @param value Pointer to value to be written + */ + void add_string(pbf_tag_type tag, const char* value) { + add_bytes(tag, value, std::strlen(value)); + } + + /** + * Add "message" field to data. + * + * @param tag Tag (field number) of the field + * @param value Pointer to message to be written + * @param size Length of the message + */ + void add_message(pbf_tag_type tag, const char* value, std::size_t size) { + add_bytes(tag, value, size); + } + + /** + * Add "message" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "message" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(pbf_tag_type tag, const std::string& value) { + add_bytes(tag, value.data(), value.size()); + } + + ///@} + + ///@{ + /** + * @name Repeated packed field writer functions + */ + + /** + * Add "repeated packed bool" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to bool. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_bool(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed enum" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_enum(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed int32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_int32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed sint32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sint32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_svarint(tag, first, last); + } + + /** + * Add "repeated packed uint32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_uint32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed int64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_int64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add "repeated packed sint64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sint64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_svarint(tag, first, last); + } + + /** + * Add "repeated packed uint64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_uint64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_varint(tag, first, last); + } + + /** + * Add a "repeated packed" fixed-size field to data. The following + * fixed-size fields are available: + * + * uint32_t -> repeated packed fixed32 + * int32_t -> repeated packed sfixed32 + * uint64_t -> repeated packed fixed64 + * int64_t -> repeated packed sfixed64 + * double -> repeated packed double + * float -> repeated packed float + * + * @tparam ValueType One of the following types: (u)int32/64_t, double, float. + * @tparam InputIterator A type satisfying the InputIterator concept. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_fixed(pbf_tag_type tag, InputIterator first, InputIterator last) { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, "Only some types are allowed"); + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed fixed32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_fixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed sfixed32" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int32_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sfixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed fixed64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to uint64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_fixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed sfixed64" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to int64_t. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_sfixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed float" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to float. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_float(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + /** + * Add "repeated packed double" field to data. + * + * @tparam InputIterator A type satisfying the InputIterator concept. + * Dereferencing the iterator must yield a type assignable to double. + * @param tag Tag (field number) of the field + * @param first Iterator pointing to the beginning of the data + * @param last Iterator pointing one past the end of data + */ + template + void add_packed_double(pbf_tag_type tag, InputIterator first, InputIterator last) { + add_packed_fixed(tag, first, last, + typename std::iterator_traits::iterator_category{}); + } + + ///@} + + template friend class detail::packed_field_varint; + template friend class detail::packed_field_svarint; + template friend class detail::packed_field_fixed; + +}; // class basic_pbf_writer + +/** + * Swap two basic_pbf_writer objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +template +inline void swap(basic_pbf_writer& lhs, basic_pbf_writer& rhs) noexcept { + lhs.swap(rhs); +} + +namespace detail { + + template + class packed_field { + + basic_pbf_writer m_writer{}; + + public: + + packed_field(const packed_field&) = delete; + packed_field& operator=(const packed_field&) = delete; + + packed_field(packed_field&&) noexcept = default; + packed_field& operator=(packed_field&&) noexcept = default; + + packed_field() = default; + + packed_field(basic_pbf_writer& parent_writer, pbf_tag_type tag) : + m_writer{parent_writer, tag} { + } + + packed_field(basic_pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size) : + m_writer{parent_writer, tag, size} { + } + + ~packed_field() noexcept = default; + + bool valid() const noexcept { + return m_writer.valid(); + } + + void commit() { + m_writer.commit(); + } + + void rollback() { + m_writer.rollback(); + } + + basic_pbf_writer& writer() noexcept { + return m_writer; + } + + }; // class packed_field + + template + class packed_field_fixed : public packed_field { + + public: + + packed_field_fixed() : + packed_field{} { + } + + template + packed_field_fixed(basic_pbf_writer& parent_writer, P tag) : + packed_field{parent_writer, static_cast(tag)} { + } + + template + packed_field_fixed(basic_pbf_writer& parent_writer, P tag, std::size_t size) : + packed_field{parent_writer, static_cast(tag), size * sizeof(T)} { + } + + void add_element(T value) { + this->writer().template add_fixed(value); + } + + }; // class packed_field_fixed + + template + class packed_field_varint : public packed_field { + + public: + + packed_field_varint() : + packed_field{} { + } + + template + packed_field_varint(basic_pbf_writer& parent_writer, P tag) : + packed_field{parent_writer, static_cast(tag)} { + } + + void add_element(T value) { + this->writer().add_varint(uint64_t(value)); + } + + }; // class packed_field_varint + + template + class packed_field_svarint : public packed_field { + + public: + + packed_field_svarint() : + packed_field{} { + } + + template + packed_field_svarint(basic_pbf_writer& parent_writer, P tag) : + packed_field{parent_writer, static_cast(tag)} { + } + + void add_element(T value) { + this->writer().add_varint(encode_zigzag64(value)); + } + + }; // class packed_field_svarint + +} // end namespace detail + +} // end namespace protozero + +#endif // PROTOZERO_BASIC_PBF_WRITER_HPP diff --git a/include/protozero/buffer_fixed.hpp b/include/protozero/buffer_fixed.hpp new file mode 100644 index 00000000..b2e6d1d2 --- /dev/null +++ b/include/protozero/buffer_fixed.hpp @@ -0,0 +1,222 @@ +#ifndef PROTOZERO_BUFFER_FIXED_HPP +#define PROTOZERO_BUFFER_FIXED_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_fixed.hpp + * + * @brief Contains the fixed_size_buffer_adaptor class. + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" + +#include +#include +#include +#include + +namespace protozero { + +/** + * This class can be used instead of std::string if you want to create a + * vector tile in a fixed-size buffer. Any operation that needs more space + * than is available will fail with a std::length_error exception. + */ +class fixed_size_buffer_adaptor { + + char* m_data; + std::size_t m_capacity; + std::size_t m_size = 0; + +public: + + /// @cond usual container typedefs not documented + + using size_type = std::size_t; + + using value_type = char; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + + using iterator = pointer; + using const_iterator = const_pointer; + + /// @endcond + + /** + * Constructor. + * + * @param data Pointer to some memory allocated for the buffer. + * @param capacity Number of bytes available. + */ + fixed_size_buffer_adaptor(char* data, std::size_t capacity) noexcept : + m_data(data), + m_capacity(capacity) { + } + + /** + * Constructor. + * + * @param container Some container class supporting the member functions + * data() and size(). + */ + template + explicit fixed_size_buffer_adaptor(T& container) : + m_data(container.data()), + m_capacity(container.size()) { + } + + /// Returns a pointer to the data in the buffer. + const char* data() const noexcept { + return m_data; + } + + /// Returns a pointer to the data in the buffer. + char* data() noexcept { + return m_data; + } + + /// The capacity this buffer was created with. + std::size_t capacity() const noexcept { + return m_capacity; + } + + /// The number of bytes used in the buffer. Always <= capacity(). + std::size_t size() const noexcept { + return m_size; + } + + /// Return iterator to beginning of data. + char* begin() noexcept { + return m_data; + } + + /// Return iterator to beginning of data. + const char* begin() const noexcept { + return m_data; + } + + /// Return iterator to beginning of data. + const char* cbegin() const noexcept { + return m_data; + } + + /// Return iterator to end of data. + char* end() noexcept { + return m_data + m_size; + } + + /// Return iterator to end of data. + const char* end() const noexcept { + return m_data + m_size; + } + + /// Return iterator to end of data. + const char* cend() const noexcept { + return m_data + m_size; + } + +/// @cond INTERNAL + + // Do not rely on anything beyond this point + + void append(const char* data, std::size_t count) { + if (m_size + count > m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + std::copy_n(data, count, m_data + m_size); + m_size += count; + } + + void append_zeros(std::size_t count) { + if (m_size + count > m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + std::fill_n(m_data + m_size, count, '\0'); + m_size += count; + } + + void resize(std::size_t size) { + protozero_assert(size < m_size); + if (size > m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + m_size = size; + } + + void erase_range(std::size_t from, std::size_t to) { + protozero_assert(from <= m_size); + protozero_assert(to <= m_size); + protozero_assert(from < to); + std::copy(m_data + to, m_data + m_size, m_data + from); + m_size -= (to - from); + } + + char* at_pos(std::size_t pos) { + protozero_assert(pos <= m_size); + return m_data + pos; + } + + void push_back(char ch) { + if (m_size >= m_capacity) { + throw std::length_error{"fixed size data store exhausted"}; + } + m_data[m_size++] = ch; + } +/// @endcond + +}; // class fixed_size_buffer_adaptor + +/// @cond INTERNAL +template <> +struct buffer_customization { + + static std::size_t size(const fixed_size_buffer_adaptor* buffer) noexcept { + return buffer->size(); + } + + static void append(fixed_size_buffer_adaptor* buffer, const char* data, std::size_t count) { + buffer->append(data, count); + } + + static void append_zeros(fixed_size_buffer_adaptor* buffer, std::size_t count) { + buffer->append_zeros(count); + } + + static void resize(fixed_size_buffer_adaptor* buffer, std::size_t size) { + buffer->resize(size); + } + + static void reserve_additional(fixed_size_buffer_adaptor* /*buffer*/, std::size_t /*size*/) { + /* nothing to be done for fixed-size buffers */ + } + + static void erase_range(fixed_size_buffer_adaptor* buffer, std::size_t from, std::size_t to) { + buffer->erase_range(from, to); + } + + static char* at_pos(fixed_size_buffer_adaptor* buffer, std::size_t pos) { + return buffer->at_pos(pos); + } + + static void push_back(fixed_size_buffer_adaptor* buffer, char ch) { + buffer->push_back(ch); + } + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_FIXED_HPP diff --git a/include/protozero/buffer_string.hpp b/include/protozero/buffer_string.hpp new file mode 100644 index 00000000..02e8ad25 --- /dev/null +++ b/include/protozero/buffer_string.hpp @@ -0,0 +1,78 @@ +#ifndef PROTOZERO_BUFFER_STRING_HPP +#define PROTOZERO_BUFFER_STRING_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_string.hpp + * + * @brief Contains the customization points for buffer implementation based + * on std::string + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" + +#include +#include +#include + +namespace protozero { + +// Implementation of buffer customizations points for std::string + +/// @cond INTERNAL +template <> +struct buffer_customization { + + static std::size_t size(const std::string* buffer) noexcept { + return buffer->size(); + } + + static void append(std::string* buffer, const char* data, std::size_t count) { + buffer->append(data, count); + } + + static void append_zeros(std::string* buffer, std::size_t count) { + buffer->append(count, '\0'); + } + + static void resize(std::string* buffer, std::size_t size) { + protozero_assert(size < buffer->size()); + buffer->resize(size); + } + + static void reserve_additional(std::string* buffer, std::size_t size) { + buffer->reserve(buffer->size() + size); + } + + static void erase_range(std::string* buffer, std::size_t from, std::size_t to) { + protozero_assert(from <= buffer->size()); + protozero_assert(to <= buffer->size()); + protozero_assert(from <= to); + buffer->erase(std::next(buffer->begin(), static_cast(from)), + std::next(buffer->begin(), static_cast(to))); + } + + static char* at_pos(std::string* buffer, std::size_t pos) { + protozero_assert(pos <= buffer->size()); + return (&*buffer->begin()) + pos; + } + + static void push_back(std::string* buffer, char ch) { + buffer->push_back(ch); + } + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_STRING_HPP diff --git a/include/protozero/buffer_tmpl.hpp b/include/protozero/buffer_tmpl.hpp new file mode 100644 index 00000000..ac223996 --- /dev/null +++ b/include/protozero/buffer_tmpl.hpp @@ -0,0 +1,113 @@ +#ifndef PROTOZERO_BUFFER_TMPL_HPP +#define PROTOZERO_BUFFER_TMPL_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_tmpl.hpp + * + * @brief Contains the customization points for buffer implementations. + */ + +#include +#include +#include + +namespace protozero { + +// Implementation of buffer customizations points for std::string + +/// @cond INTERNAL +template +struct buffer_customization { + + /** + * Get the number of bytes currently used in the buffer. + * + * @param buffer Pointer to the buffer. + * @returns number of bytes used in the buffer. + */ + static std::size_t size(const std::string* buffer); + + /** + * Append count bytes from data to the buffer. + * + * @param buffer Pointer to the buffer. + * @param data Pointer to the data. + * @param count Number of bytes to be added to the buffer. + */ + static void append(std::string* buffer, const char* data, std::size_t count); + + /** + * Append count zero bytes to the buffer. + * + * @param buffer Pointer to the buffer. + * @param count Number of bytes to be added to the buffer. + */ + static void append_zeros(std::string* buffer, std::size_t count); + + /** + * Shrink the buffer to the specified size. The new size will always be + * smaller than the current size. + * + * @param buffer Pointer to the buffer. + * @param size New size of the buffer. + * + * @pre size < current size of buffer + */ + static void resize(std::string* buffer, std::size_t size); + + /** + * Reserve an additional size bytes for use in the buffer. This is used for + * variable-sized buffers to tell the buffer implementation that soon more + * memory will be used. The implementation can ignore this. + * + * @param buffer Pointer to the buffer. + * @param size Number of bytes to reserve. + */ + static void reserve_additional(std::string* buffer, std::size_t size); + + /** + * Delete data from the buffer. This must move back the data after the + * part being deleted and resize the buffer accordingly. + * + * @param buffer Pointer to the buffer. + * @param from Offset into the buffer where we want to erase from. + * @param to Offset into the buffer one past the last byte we want to erase. + * + * @pre from, to <= size of the buffer, from < to + */ + static void erase_range(std::string* buffer, std::size_t from, std::size_t to); + + /** + * Return a pointer to the memory at the specified position in the buffer. + * + * @param buffer Pointer to the buffer. + * @param pos The position in the buffer. + * @returns pointer to the memory in the buffer at the specified position. + * + * @pre pos <= size of the buffer + */ + static char* at_pos(std::string* buffer, std::size_t pos); + + /** + * Add a char to the buffer incrementing the number of chars in the buffer. + * + * @param buffer Pointer to the buffer. + * @param ch The character to add. + */ + static void push_back(std::string* buffer, char ch); + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_TMPL_HPP diff --git a/include/protozero/buffer_vector.hpp b/include/protozero/buffer_vector.hpp new file mode 100644 index 00000000..c163300c --- /dev/null +++ b/include/protozero/buffer_vector.hpp @@ -0,0 +1,78 @@ +#ifndef PROTOZERO_BUFFER_VECTOR_HPP +#define PROTOZERO_BUFFER_VECTOR_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file buffer_vector.hpp + * + * @brief Contains the customization points for buffer implementation based + * on std::vector + */ + +#include "buffer_tmpl.hpp" +#include "config.hpp" + +#include +#include +#include + +namespace protozero { + +// Implementation of buffer customizations points for std::vector + +/// @cond INTERNAL +template <> +struct buffer_customization> { + + static std::size_t size(const std::vector* buffer) noexcept { + return buffer->size(); + } + + static void append(std::vector* buffer, const char* data, std::size_t count) { + buffer->insert(buffer->end(), data, data + count); + } + + static void append_zeros(std::vector* buffer, std::size_t count) { + buffer->insert(buffer->end(), count, '\0'); + } + + static void resize(std::vector* buffer, std::size_t size) { + protozero_assert(size < buffer->size()); + buffer->resize(size); + } + + static void reserve_additional(std::vector* buffer, std::size_t size) { + buffer->reserve(buffer->size() + size); + } + + static void erase_range(std::vector* buffer, std::size_t from, std::size_t to) { + protozero_assert(from <= buffer->size()); + protozero_assert(to <= buffer->size()); + protozero_assert(from <= to); + buffer->erase(std::next(buffer->begin(), static_cast(from)), + std::next(buffer->begin(), static_cast(to))); + } + + static char* at_pos(std::vector* buffer, std::size_t pos) { + protozero_assert(pos <= buffer->size()); + return (&*buffer->begin()) + pos; + } + + static void push_back(std::vector* buffer, char ch) { + buffer->push_back(ch); + } + +}; +/// @endcond + +} // namespace protozero + +#endif // PROTOZERO_BUFFER_VECTOR_HPP diff --git a/include/protozero/byteswap.hpp b/include/protozero/byteswap.hpp new file mode 100644 index 00000000..75cae691 --- /dev/null +++ b/include/protozero/byteswap.hpp @@ -0,0 +1,108 @@ +#ifndef PROTOZERO_BYTESWAP_HPP +#define PROTOZERO_BYTESWAP_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file byteswap.hpp + * + * @brief Contains functions to swap bytes in values (for different endianness). + */ + +#include "config.hpp" + +#include +#include + +namespace protozero { +namespace detail { + +inline uint32_t byteswap_impl(uint32_t value) noexcept { +#ifdef PROTOZERO_USE_BUILTIN_BSWAP + return __builtin_bswap32(value); +#else + return ((value & 0xff000000U) >> 24U) | + ((value & 0x00ff0000U) >> 8U) | + ((value & 0x0000ff00U) << 8U) | + ((value & 0x000000ffU) << 24U); +#endif +} + +inline uint64_t byteswap_impl(uint64_t value) noexcept { +#ifdef PROTOZERO_USE_BUILTIN_BSWAP + return __builtin_bswap64(value); +#else + return ((value & 0xff00000000000000ULL) >> 56U) | + ((value & 0x00ff000000000000ULL) >> 40U) | + ((value & 0x0000ff0000000000ULL) >> 24U) | + ((value & 0x000000ff00000000ULL) >> 8U) | + ((value & 0x00000000ff000000ULL) << 8U) | + ((value & 0x0000000000ff0000ULL) << 24U) | + ((value & 0x000000000000ff00ULL) << 40U) | + ((value & 0x00000000000000ffULL) << 56U); +#endif +} + +} // end namespace detail + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(uint32_t* ptr) noexcept { + *ptr = detail::byteswap_impl(*ptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(uint64_t* ptr) noexcept { + *ptr = detail::byteswap_impl(*ptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(int32_t* ptr) noexcept { + auto* bptr = reinterpret_cast(ptr); + *bptr = detail::byteswap_impl(*bptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(int64_t* ptr) noexcept { + auto* bptr = reinterpret_cast(ptr); + *bptr = detail::byteswap_impl(*bptr); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(float* ptr) noexcept { + static_assert(sizeof(float) == 4, "Expecting four byte float"); + + uint32_t tmp = 0; + std::memcpy(&tmp, ptr, 4); + tmp = detail::byteswap_impl(tmp); // uint32 overload + std::memcpy(ptr, &tmp, 4); +} + +/// byteswap the data pointed to by ptr in-place. +inline void byteswap_inplace(double* ptr) noexcept { + static_assert(sizeof(double) == 8, "Expecting eight byte double"); + + uint64_t tmp = 0; + std::memcpy(&tmp, ptr, 8); + tmp = detail::byteswap_impl(tmp); // uint64 overload + std::memcpy(ptr, &tmp, 8); +} + +namespace detail { + + // Added for backwards compatibility with any code that might use this + // function (even if it shouldn't have). Will be removed in a later + // version of protozero. + using ::protozero::byteswap_inplace; + +} // end namespace detail + +} // end namespace protozero + +#endif // PROTOZERO_BYTESWAP_HPP diff --git a/include/protozero/config.hpp b/include/protozero/config.hpp new file mode 100644 index 00000000..6fc77490 --- /dev/null +++ b/include/protozero/config.hpp @@ -0,0 +1,48 @@ +#ifndef PROTOZERO_CONFIG_HPP +#define PROTOZERO_CONFIG_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +#include + +/** + * @file config.hpp + * + * @brief Contains macro checks for different configurations. + */ + +#define PROTOZERO_LITTLE_ENDIAN 1234 +#define PROTOZERO_BIG_ENDIAN 4321 + +// Find out which byte order the machine has. +#if defined(__BYTE_ORDER) +# if (__BYTE_ORDER == __LITTLE_ENDIAN) +# define PROTOZERO_BYTE_ORDER PROTOZERO_LITTLE_ENDIAN +# endif +# if (__BYTE_ORDER == __BIG_ENDIAN) +# define PROTOZERO_BYTE_ORDER PROTOZERO_BIG_ENDIAN +# endif +#else +// This probably isn't a very good default, but might do until we figure +// out something better. +# define PROTOZERO_BYTE_ORDER PROTOZERO_LITTLE_ENDIAN +#endif + +// Check whether __builtin_bswap is available +#if defined(__GNUC__) || defined(__clang__) +# define PROTOZERO_USE_BUILTIN_BSWAP +#endif + +// Wrapper for assert() used for testing +#ifndef protozero_assert +# define protozero_assert(x) assert(x) +#endif + +#endif // PROTOZERO_CONFIG_HPP diff --git a/include/protozero/data_view.hpp b/include/protozero/data_view.hpp new file mode 100644 index 00000000..3ec87af3 --- /dev/null +++ b/include/protozero/data_view.hpp @@ -0,0 +1,236 @@ +#ifndef PROTOZERO_DATA_VIEW_HPP +#define PROTOZERO_DATA_VIEW_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file data_view.hpp + * + * @brief Contains the implementation of the data_view class. + */ + +#include "config.hpp" + +#include +#include +#include +#include +#include + +namespace protozero { + +#ifdef PROTOZERO_USE_VIEW +using data_view = PROTOZERO_USE_VIEW; +#else + +/** + * Holds a pointer to some data and a length. + * + * This class is supposed to be compatible with the std::string_view + * that will be available in C++17. + */ +class data_view { + + const char* m_data = nullptr; + std::size_t m_size = 0; + +public: + + /** + * Default constructor. Construct an empty data_view. + */ + constexpr data_view() noexcept = default; + + /** + * Create data_view from pointer and size. + * + * @param ptr Pointer to the data. + * @param length Length of the data. + */ + constexpr data_view(const char* ptr, std::size_t length) noexcept + : m_data{ptr}, + m_size{length} { + } + + /** + * Create data_view from string. + * + * @param str String with the data. + */ + data_view(const std::string& str) noexcept // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + : m_data{str.data()}, + m_size{str.size()} { + } + + /** + * Create data_view from zero-terminated string. + * + * @param ptr Pointer to the data. + */ + data_view(const char* ptr) noexcept // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + : m_data{ptr}, + m_size{std::strlen(ptr)} { + } + + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(data_view& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_size, other.m_size); + } + + /// Return pointer to data. + constexpr const char* data() const noexcept { + return m_data; + } + + /// Return length of data in bytes. + constexpr std::size_t size() const noexcept { + return m_size; + } + + /// Returns true if size is 0. + constexpr bool empty() const noexcept { + return m_size == 0; + } + +#ifndef PROTOZERO_STRICT_API + /** + * Convert data view to string. + * + * @pre Must not be default constructed data_view. + * + * @deprecated to_string() is not available in C++17 string_view so it + * should not be used to make conversion to that class easier + * in the future. + */ + std::string to_string() const { + protozero_assert(m_data); + return {m_data, m_size}; + } +#endif + + /** + * Convert data view to string. + * + * @pre Must not be default constructed data_view. + */ + explicit operator std::string() const { + protozero_assert(m_data); + return {m_data, m_size}; + } + + /** + * Compares the contents of this object with the given other object. + * + * @returns 0 if they are the same, <0 if this object is smaller than + * the other or >0 if it is larger. If both objects have the + * same size returns <0 if this object is lexicographically + * before the other, >0 otherwise. + * + * @pre Must not be default constructed data_view. + */ + int compare(data_view other) const noexcept { + assert(m_data && other.m_data); + const int cmp = std::memcmp(data(), other.data(), + std::min(size(), other.size())); + if (cmp == 0) { + if (size() == other.size()) { + return 0; + } + return size() < other.size() ? -1 : 1; + } + return cmp; + } + +}; // class data_view + +/** + * Swap two data_view objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline void swap(data_view& lhs, data_view& rhs) noexcept { + lhs.swap(rhs); +} + +/** + * Two data_view instances are equal if they have the same size and the + * same content. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline constexpr bool operator==(const data_view lhs, const data_view rhs) noexcept { + return lhs.size() == rhs.size() && + std::equal(lhs.data(), lhs.data() + lhs.size(), rhs.data()); +} + +/** + * Two data_view instances are not equal if they have different sizes or the + * content differs. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline constexpr bool operator!=(const data_view lhs, const data_view rhs) noexcept { + return !(lhs == rhs); +} + +/** + * Returns true if lhs.compare(rhs) < 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator<(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) < 0; +} + +/** + * Returns true if lhs.compare(rhs) <= 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator<=(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) <= 0; +} + +/** + * Returns true if lhs.compare(rhs) > 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator>(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) > 0; +} + +/** + * Returns true if lhs.compare(rhs) >= 0. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator>=(const data_view lhs, const data_view rhs) noexcept { + return lhs.compare(rhs) >= 0; +} + +#endif + +} // end namespace protozero + +#endif // PROTOZERO_DATA_VIEW_HPP diff --git a/include/protozero/exception.hpp b/include/protozero/exception.hpp new file mode 100644 index 00000000..a3cd0f15 --- /dev/null +++ b/include/protozero/exception.hpp @@ -0,0 +1,101 @@ +#ifndef PROTOZERO_EXCEPTION_HPP +#define PROTOZERO_EXCEPTION_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file exception.hpp + * + * @brief Contains the exceptions used in the protozero library. + */ + +#include + +/** + * @brief All parts of the protozero header-only library are in this namespace. + */ +namespace protozero { + +/** + * All exceptions explicitly thrown by the functions of the protozero library + * derive from this exception. + */ +struct exception : std::exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "pbf exception"; + } +}; + +/** + * This exception is thrown when parsing a varint thats larger than allowed. + * This should never happen unless the data is corrupted. + */ +struct varint_too_long_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "varint too long exception"; + } +}; + +/** + * This exception is thrown when the wire type of a pdf field is unknown. + * This should never happen unless the data is corrupted. + */ +struct unknown_pbf_wire_type_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "unknown pbf field type exception"; + } +}; + +/** + * This exception is thrown when we are trying to read a field and there + * are not enough bytes left in the buffer to read it. Almost all functions + * of the pbf_reader class can throw this exception. + * + * This should never happen unless the data is corrupted or you have + * initialized the pbf_reader object with incomplete data. + */ +struct end_of_buffer_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "end of buffer exception"; + } +}; + +/** + * This exception is thrown when a tag has an invalid value. Tags must be + * unsigned integers between 1 and 2^29-1. Tags between 19000 and 19999 are + * not allowed. See + * https://developers.google.com/protocol-buffers/docs/proto#assigning-tags + */ +struct invalid_tag_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "invalid tag exception"; + } +}; + +/** + * This exception is thrown when a length field of a packed repeated field is + * invalid. For fixed size types the length must be a multiple of the size of + * the type. + */ +struct invalid_length_exception : exception { + /// Returns the explanatory string. + const char* what() const noexcept override { + return "invalid length exception"; + } +}; + +} // end namespace protozero + +#endif // PROTOZERO_EXCEPTION_HPP diff --git a/include/protozero/iterators.hpp b/include/protozero/iterators.hpp new file mode 100644 index 00000000..ee8ef8ec --- /dev/null +++ b/include/protozero/iterators.hpp @@ -0,0 +1,481 @@ +#ifndef PROTOZERO_ITERATORS_HPP +#define PROTOZERO_ITERATORS_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file iterators.hpp + * + * @brief Contains the iterators for access to packed repeated fields. + */ + +#include "config.hpp" +#include "varint.hpp" + +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN +# include +#endif + +#include +#include +#include +#include + +namespace protozero { + +/** + * A range of iterators based on std::pair. Created from beginning and + * end iterators. Used as a return type from some pbf_reader methods + * that is easy to use with range-based for loops. + */ +template > +class iterator_range : +#ifdef PROTOZERO_STRICT_API + protected +#else + public +#endif + P { + +public: + + /// The type of the iterators in this range. + using iterator = T; + + /// The value type of the underlying iterator. + using value_type = typename std::iterator_traits::value_type; + + /** + * Default constructor. Create empty iterator_range. + */ + constexpr iterator_range() : + P{iterator{}, iterator{}} { + } + + /** + * Create iterator range from two iterators. + * + * @param first_iterator Iterator to beginning of range. + * @param last_iterator Iterator to end of range. + */ + constexpr iterator_range(iterator&& first_iterator, iterator&& last_iterator) : + P{std::forward(first_iterator), + std::forward(last_iterator)} { + } + + /// Return iterator to beginning of range. + constexpr iterator begin() const noexcept { + return this->first; + } + + /// Return iterator to end of range. + constexpr iterator end() const noexcept { + return this->second; + } + + /// Return iterator to beginning of range. + constexpr iterator cbegin() const noexcept { + return this->first; + } + + /// Return iterator to end of range. + constexpr iterator cend() const noexcept { + return this->second; + } + + /** + * Return true if this range is empty. + * + * Complexity: Constant. + */ + constexpr bool empty() const noexcept { + return begin() == end(); + } + + /** + * Get the size of the range, ie the number of elements it contains. + * + * Complexity: Constant or linear depending on the underlaying iterator. + */ + std::size_t size() const noexcept { + return static_cast(std::distance(begin(), end())); + } + + /** + * Get element at the beginning of the range. + * + * @pre Range must not be empty. + */ + value_type front() const { + protozero_assert(!empty()); + return *(this->first); + } + + /** + * Advance beginning of range by one. + * + * @pre Range must not be empty. + */ + void drop_front() { + protozero_assert(!empty()); + ++this->first; + } + + /** + * Swap the contents of this range with the other. + * + * @param other Other range to swap data with. + */ + void swap(iterator_range& other) noexcept { + using std::swap; + swap(this->first, other.first); + swap(this->second, other.second); + } + +}; // struct iterator_range + +/** + * Swap two iterator_ranges. + * + * @param lhs First range. + * @param rhs Second range. + */ +template +inline void swap(iterator_range& lhs, iterator_range& rhs) noexcept { + lhs.swap(rhs); +} + +/** + * A forward iterator used for accessing packed repeated fields of fixed + * length (fixed32, sfixed32, float, double). + */ +template +class const_fixed_iterator { + + /// Pointer to current iterator position + const char* m_data = nullptr; + +public: + + /// @cond usual iterator functions not documented + + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + const_fixed_iterator() noexcept = default; + + explicit const_fixed_iterator(const char* data) noexcept : + m_data{data} { + } + + const_fixed_iterator(const const_fixed_iterator&) noexcept = default; + const_fixed_iterator(const_fixed_iterator&&) noexcept = default; + + const_fixed_iterator& operator=(const const_fixed_iterator&) noexcept = default; + const_fixed_iterator& operator=(const_fixed_iterator&&) noexcept = default; + + ~const_fixed_iterator() noexcept = default; + + value_type operator*() const noexcept { + value_type result; + std::memcpy(&result, m_data, sizeof(value_type)); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + byteswap_inplace(&result); +#endif + return result; + } + + const_fixed_iterator& operator++() noexcept { + m_data += sizeof(value_type); + return *this; + } + + const_fixed_iterator operator++(int) noexcept { + const const_fixed_iterator tmp{*this}; + ++(*this); + return tmp; + } + + const_fixed_iterator& operator--() noexcept { + m_data -= sizeof(value_type); + return *this; + } + + const_fixed_iterator operator--(int) noexcept { + const const_fixed_iterator tmp{*this}; + --(*this); + return tmp; + } + + friend bool operator==(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return lhs.m_data == rhs.m_data; + } + + friend bool operator!=(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return !(lhs == rhs); + } + + friend bool operator<(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return lhs.m_data < rhs.m_data; + } + + friend bool operator>(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return rhs < lhs; + } + + friend bool operator<=(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return !(lhs > rhs); + } + + friend bool operator>=(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return !(lhs < rhs); + } + + const_fixed_iterator& operator+=(difference_type val) noexcept { + m_data += (sizeof(value_type) * val); + return *this; + } + + friend const_fixed_iterator operator+(const_fixed_iterator lhs, difference_type rhs) noexcept { + const_fixed_iterator tmp{lhs}; + tmp.m_data += (sizeof(value_type) * rhs); + return tmp; + } + + friend const_fixed_iterator operator+(difference_type lhs, const_fixed_iterator rhs) noexcept { + const_fixed_iterator tmp{rhs}; + tmp.m_data += (sizeof(value_type) * lhs); + return tmp; + } + + const_fixed_iterator& operator-=(difference_type val) noexcept { + m_data -= (sizeof(value_type) * val); + return *this; + } + + friend const_fixed_iterator operator-(const_fixed_iterator lhs, difference_type rhs) noexcept { + const_fixed_iterator tmp{lhs}; + tmp.m_data -= (sizeof(value_type) * rhs); + return tmp; + } + + friend difference_type operator-(const_fixed_iterator lhs, const_fixed_iterator rhs) noexcept { + return static_cast(lhs.m_data - rhs.m_data) / static_cast(sizeof(T)); + } + + value_type operator[](difference_type n) const noexcept { + return *(*this + n); + } + + /// @endcond + +}; // class const_fixed_iterator + +/** + * A forward iterator used for accessing packed repeated varint fields + * (int32, uint32, int64, uint64, bool, enum). + */ +template +class const_varint_iterator { + +protected: + + /// Pointer to current iterator position + const char* m_data = nullptr; // NOLINT(misc-non-private-member-variables-in-classes, cppcoreguidelines-non-private-member-variables-in-classes,-warnings-as-errors) + + /// Pointer to end iterator position + const char* m_end = nullptr; // NOLINT(misc-non-private-member-variables-in-classes, cppcoreguidelines-non-private-member-variables-in-classes,-warnings-as-errors) + +public: + + /// @cond usual iterator functions not documented + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + static difference_type distance(const_varint_iterator begin, const_varint_iterator end) noexcept { + // The "distance" between default initialized const_varint_iterator's + // is always 0. + if (!begin.m_data) { + return 0; + } + // We know that each varint contains exactly one byte with the most + // significant bit not set. We can use this to quickly figure out + // how many varints there are without actually decoding the varints. + return std::count_if(begin.m_data, end.m_data, [](char c) noexcept { + return (static_cast(c) & 0x80U) == 0; + }); + } + + const_varint_iterator() noexcept = default; + + const_varint_iterator(const char* data, const char* end) noexcept : + m_data{data}, + m_end{end} { + } + + const_varint_iterator(const const_varint_iterator&) noexcept = default; + const_varint_iterator(const_varint_iterator&&) noexcept = default; + + const_varint_iterator& operator=(const const_varint_iterator&) noexcept = default; + const_varint_iterator& operator=(const_varint_iterator&&) noexcept = default; + + ~const_varint_iterator() noexcept = default; + + value_type operator*() const { + protozero_assert(m_data); + const char* d = m_data; // will be thrown away + return static_cast(decode_varint(&d, m_end)); + } + + const_varint_iterator& operator++() { + protozero_assert(m_data); + skip_varint(&m_data, m_end); + return *this; + } + + const_varint_iterator operator++(int) { + protozero_assert(m_data); + const const_varint_iterator tmp{*this}; + ++(*this); + return tmp; + } + + bool operator==(const const_varint_iterator& rhs) const noexcept { + return m_data == rhs.m_data && m_end == rhs.m_end; + } + + bool operator!=(const const_varint_iterator& rhs) const noexcept { + return !(*this == rhs); + } + + /// @endcond + +}; // class const_varint_iterator + +/** + * A forward iterator used for accessing packed repeated svarint fields + * (sint32, sint64). + */ +template +class const_svarint_iterator : public const_varint_iterator { + +public: + + /// @cond usual iterator functions not documented + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + const_svarint_iterator() noexcept : + const_varint_iterator{} { + } + + const_svarint_iterator(const char* data, const char* end) noexcept : + const_varint_iterator{data, end} { + } + + const_svarint_iterator(const const_svarint_iterator&) = default; + const_svarint_iterator(const_svarint_iterator&&) noexcept = default; + + const_svarint_iterator& operator=(const const_svarint_iterator&) = default; + const_svarint_iterator& operator=(const_svarint_iterator&&) noexcept = default; + + ~const_svarint_iterator() = default; + + value_type operator*() const { + protozero_assert(this->m_data); + const char* d = this->m_data; // will be thrown away + return static_cast(decode_zigzag64(decode_varint(&d, this->m_end))); + } + + const_svarint_iterator& operator++() { + protozero_assert(this->m_data); + skip_varint(&this->m_data, this->m_end); + return *this; + } + + const_svarint_iterator operator++(int) { + protozero_assert(this->m_data); + const const_svarint_iterator tmp{*this}; + ++(*this); + return tmp; + } + + /// @endcond + +}; // class const_svarint_iterator + +} // end namespace protozero + +namespace std { + + // Specialize std::distance for all the protozero iterators. Because + // functions can't be partially specialized, we have to do this for + // every value_type we are using. + + /// @cond individual overloads do not need to be documented + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_varint_iterator::difference_type + distance>(protozero::const_varint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_varint_iterator last) { + return protozero::const_varint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_svarint_iterator::difference_type + distance>(protozero::const_svarint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_svarint_iterator last) { + return protozero::const_svarint_iterator::distance(first, last); + } + + template <> + inline typename protozero::const_svarint_iterator::difference_type + distance>(protozero::const_svarint_iterator first, // NOLINT(readability-inconsistent-declaration-parameter-name) + protozero::const_svarint_iterator last) { + return protozero::const_svarint_iterator::distance(first, last); + } + + /// @endcond + +} // end namespace std + +#endif // PROTOZERO_ITERATORS_HPP diff --git a/include/protozero/pbf_builder.hpp b/include/protozero/pbf_builder.hpp new file mode 100644 index 00000000..71a2dec2 --- /dev/null +++ b/include/protozero/pbf_builder.hpp @@ -0,0 +1,32 @@ +#ifndef PROTOZERO_PBF_BUILDER_HPP +#define PROTOZERO_PBF_BUILDER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_builder.hpp + * + * @brief Contains the pbf_builder template class. + */ + +#include "basic_pbf_builder.hpp" +#include "pbf_writer.hpp" + +#include + +namespace protozero { + +/// Specialization of basic_pbf_builder using std::string as buffer type. +template +using pbf_builder = basic_pbf_builder; + +} // end namespace protozero + +#endif // PROTOZERO_PBF_BUILDER_HPP diff --git a/include/protozero/pbf_message.hpp b/include/protozero/pbf_message.hpp new file mode 100644 index 00000000..d7fd8b5d --- /dev/null +++ b/include/protozero/pbf_message.hpp @@ -0,0 +1,184 @@ +#ifndef PROTOZERO_PBF_MESSAGE_HPP +#define PROTOZERO_PBF_MESSAGE_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_message.hpp + * + * @brief Contains the pbf_message template class. + */ + +#include "pbf_reader.hpp" +#include "types.hpp" + +#include + +namespace protozero { + +/** + * This class represents a protobuf message. Either a top-level message or + * a nested sub-message. Top-level messages can be created from any buffer + * with a pointer and length: + * + * @code + * enum class Message : protozero::pbf_tag_type { + * ... + * }; + * + * std::string buffer; + * // fill buffer... + * pbf_message message{buffer.data(), buffer.size()}; + * @endcode + * + * Sub-messages are created using get_message(): + * + * @code + * enum class SubMessage : protozero::pbf_tag_type { + * ... + * }; + * + * pbf_message message{...}; + * message.next(); + * pbf_message submessage = message.get_message(); + * @endcode + * + * All methods of the pbf_message class except get_bytes() and get_string() + * provide the strong exception guarantee, ie they either succeed or do not + * change the pbf_message object they are called on. Use the get_data() method + * instead of get_bytes() or get_string(), if you need this guarantee. + * + * This template class is based on the pbf_reader class and has all the same + * methods. The difference is that whereever the pbf_reader class takes an + * integer tag, this template class takes a tag of the template type T. + * + * Read the tutorial to understand how this class is used. + */ +template +class pbf_message : public pbf_reader { + + static_assert(std::is_same::type>::value, + "T must be enum with underlying type protozero::pbf_tag_type"); + +public: + + /// The type of messages this class will read. + using enum_type = T; + + /** + * Construct a pbf_message. All arguments are forwarded to the pbf_reader + * parent class. + */ + template + pbf_message(Args&&... args) noexcept : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + pbf_reader{std::forward(args)...} { + } + + /** + * Set next field in the message as the current field. This is usually + * called in a while loop: + * + * @code + * pbf_message<...> message(...); + * while (message.next()) { + * // handle field + * } + * @endcode + * + * @returns `true` if there is a next field, `false` if not. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now. + */ + bool next() { + return pbf_reader::next(); + } + + /** + * Set next field with given tag in the message as the current field. + * Fields with other tags are skipped. This is usually called in a while + * loop for repeated fields: + * + * @code + * pbf_message message{...}; + * while (message.next(Example1::repeated_fixed64_r)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_message message{...}; + * if (message.next(Example1::required_uint32_x)) { + * // handle field + * } + * @endcode + * + * Note that this will not check the wire type. The two-argument version + * of this function will also check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(T next_tag) { + return pbf_reader::next(pbf_tag_type(next_tag)); + } + + /** + * Set next field with given tag and wire type in the message as the + * current field. Fields with other tags are skipped. This is usually + * called in a while loop for repeated fields: + * + * @code + * pbf_message message{...}; + * while (message.next(Example1::repeated_fixed64_r, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_message message{...}; + * if (message.next(Example1::required_uint32_x, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * Note that this will also check the wire type. The one-argument version + * of this function will not check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(T next_tag, pbf_wire_type type) { + return pbf_reader::next(pbf_tag_type(next_tag), type); + } + + /** + * The tag of the current field. The tag is the enum value for the field + * number from the description in the .proto file. + * + * Call next() before calling this function to set the current field. + * + * @returns tag of the current field. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + T tag() const noexcept { + return T(pbf_reader::tag()); + } + +}; // class pbf_message + +} // end namespace protozero + +#endif // PROTOZERO_PBF_MESSAGE_HPP diff --git a/include/protozero/pbf_reader.hpp b/include/protozero/pbf_reader.hpp new file mode 100644 index 00000000..92bfdee5 --- /dev/null +++ b/include/protozero/pbf_reader.hpp @@ -0,0 +1,977 @@ +#ifndef PROTOZERO_PBF_READER_HPP +#define PROTOZERO_PBF_READER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_reader.hpp + * + * @brief Contains the pbf_reader class. + */ + +#include "config.hpp" +#include "data_view.hpp" +#include "exception.hpp" +#include "iterators.hpp" +#include "types.hpp" +#include "varint.hpp" + +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN +# include +#endif + +#include +#include +#include +#include +#include + +namespace protozero { + +/** + * This class represents a protobuf message. Either a top-level message or + * a nested sub-message. Top-level messages can be created from any buffer + * with a pointer and length: + * + * @code + * std::string buffer; + * // fill buffer... + * pbf_reader message{buffer.data(), buffer.size()}; + * @endcode + * + * Sub-messages are created using get_message(): + * + * @code + * pbf_reader message{...}; + * message.next(); + * pbf_reader submessage = message.get_message(); + * @endcode + * + * All methods of the pbf_reader class except get_bytes() and get_string() + * provide the strong exception guarantee, ie they either succeed or do not + * change the pbf_reader object they are called on. Use the get_view() method + * instead of get_bytes() or get_string(), if you need this guarantee. + */ +class pbf_reader { + + // A pointer to the next unread data. + const char* m_data = nullptr; + + // A pointer to one past the end of data. + const char* m_end = nullptr; + + // The wire type of the current field. + pbf_wire_type m_wire_type = pbf_wire_type::unknown; + + // The tag of the current field. + pbf_tag_type m_tag = 0; + + template + T get_fixed() { + T result; + const char* data = m_data; + skip_bytes(sizeof(T)); + std::memcpy(&result, data, sizeof(T)); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + byteswap_inplace(&result); +#endif + return result; + } + + template + iterator_range> packed_fixed() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + const auto len = get_len_and_skip(); + if (len % sizeof(T) != 0) { + throw invalid_length_exception{}; + } + return {const_fixed_iterator(m_data - len), + const_fixed_iterator(m_data)}; + } + + template + T get_varint() { + const auto val = static_cast(decode_varint(&m_data, m_end)); + return val; + } + + template + T get_svarint() { + protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint"); + return static_cast(decode_zigzag64(decode_varint(&m_data, m_end))); + } + + pbf_length_type get_length() { + return get_varint(); + } + + void skip_bytes(pbf_length_type len) { + if (m_end - m_data < static_cast(len)) { + throw end_of_buffer_exception{}; + } + m_data += len; + +#ifndef NDEBUG + // In debug builds reset the tag to zero so that we can detect (some) + // wrong code. + m_tag = 0; +#endif + } + + pbf_length_type get_len_and_skip() { + const auto len = get_length(); + skip_bytes(len); + return len; + } + + template + iterator_range get_packed() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + const auto len = get_len_and_skip(); + return {T{m_data - len, m_data}, + T{m_data, m_data}}; + } + +public: + + /** + * Construct a pbf_reader message from a data_view. The pointer from the + * data_view will be stored inside the pbf_reader object, no data is + * copied. So you must make sure the view stays valid as long as the + * pbf_reader object is used. + * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + */ + explicit pbf_reader(const data_view& view) noexcept + : m_data{view.data()}, + m_end{view.data() + view.size()} { + } + + /** + * Construct a pbf_reader message from a data pointer and a length. The + * pointer will be stored inside the pbf_reader object, no data is copied. + * So you must make sure the buffer stays valid as long as the pbf_reader + * object is used. + * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + */ + pbf_reader(const char* data, std::size_t size) noexcept + : m_data{data}, + m_end{data + size} { + } + +#ifndef PROTOZERO_STRICT_API + /** + * Construct a pbf_reader message from a data pointer and a length. The + * pointer will be stored inside the pbf_reader object, no data is copied. + * So you must make sure the buffer stays valid as long as the pbf_reader + * object is used. + * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + * @deprecated Use one of the other constructors. + */ + explicit pbf_reader(const std::pair& data) noexcept + : m_data{data.first}, + m_end{data.first + data.second} { + } +#endif + + /** + * Construct a pbf_reader message from a std::string. A pointer to the + * string internals will be stored inside the pbf_reader object, no data + * is copied. So you must make sure the string is unchanged as long as the + * pbf_reader object is used. + * + * The string must contain a complete protobuf message. + * + * @post There is no current field. + */ + explicit pbf_reader(const std::string& data) noexcept + : m_data{data.data()}, + m_end{data.data() + data.size()} { + } + + /** + * pbf_reader can be default constructed and behaves like it has an empty + * buffer. + */ + pbf_reader() noexcept = default; + + /// pbf_reader messages can be copied trivially. + pbf_reader(const pbf_reader&) noexcept = default; + + /// pbf_reader messages can be moved trivially. + pbf_reader(pbf_reader&&) noexcept = default; + + /// pbf_reader messages can be copied trivially. + pbf_reader& operator=(const pbf_reader& other) noexcept = default; + + /// pbf_reader messages can be moved trivially. + pbf_reader& operator=(pbf_reader&& other) noexcept = default; + + ~pbf_reader() = default; + + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(pbf_reader& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_end, other.m_end); + swap(m_wire_type, other.m_wire_type); + swap(m_tag, other.m_tag); + } + + /** + * In a boolean context the pbf_reader class evaluates to `true` if there + * are still fields available and to `false` if the last field has been + * read. + */ + operator bool() const noexcept { // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) + return m_data != m_end; + } + + /** + * Get a view of the not yet read data. + */ + data_view data() const noexcept { + return {m_data, static_cast(m_end - m_data)}; + } + + /** + * Return the length in bytes of the current message. If you have + * already called next() and/or any of the get_*() functions, this will + * return the remaining length. + * + * This can, for instance, be used to estimate the space needed for a + * buffer. Of course you have to know reasonably well what data to expect + * and how it is encoded for this number to have any meaning. + */ + std::size_t length() const noexcept { + return std::size_t(m_end - m_data); + } + + /** + * Set next field in the message as the current field. This is usually + * called in a while loop: + * + * @code + * pbf_reader message(...); + * while (message.next()) { + * // handle field + * } + * @endcode + * + * @returns `true` if there is a next field, `false` if not. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now. + */ + bool next() { + if (m_data == m_end) { + return false; + } + + const auto value = get_varint(); + m_tag = pbf_tag_type(value >> 3U); + + // tags 0 and 19000 to 19999 are not allowed as per + // https://developers.google.com/protocol-buffers/docs/proto#assigning-tags + if (m_tag == 0 || (m_tag >= 19000 && m_tag <= 19999)) { + throw invalid_tag_exception{}; + } + + m_wire_type = pbf_wire_type(value & 0x07U); + switch (m_wire_type) { + case pbf_wire_type::varint: + case pbf_wire_type::fixed64: + case pbf_wire_type::length_delimited: + case pbf_wire_type::fixed32: + break; + default: + throw unknown_pbf_wire_type_exception{}; + } + + return true; + } + + /** + * Set next field with given tag in the message as the current field. + * Fields with other tags are skipped. This is usually called in a while + * loop for repeated fields: + * + * @code + * pbf_reader message{...}; + * while (message.next(17)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_reader message{...}; + * if (message.next(17)) { + * // handle field + * } + * @endcode + * + * Note that this will not check the wire type. The two-argument version + * of this function will also check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(pbf_tag_type next_tag) { + while (next()) { + if (m_tag == next_tag) { + return true; + } + skip(); + } + return false; + } + + /** + * Set next field with given tag and wire type in the message as the + * current field. Fields with other tags are skipped. This is usually + * called in a while loop for repeated fields: + * + * @code + * pbf_reader message{...}; + * while (message.next(17, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_reader message{...}; + * if (message.next(17, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * Note that this will also check the wire type. The one-argument version + * of this function will not check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(pbf_tag_type next_tag, pbf_wire_type type) { + while (next()) { + if (m_tag == next_tag && m_wire_type == type) { + return true; + } + skip(); + } + return false; + } + + /** + * The tag of the current field. The tag is the field number from the + * description in the .proto file. + * + * Call next() before calling this function to set the current field. + * + * @returns tag of the current field. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + pbf_tag_type tag() const noexcept { + return m_tag; + } + + /** + * Get the wire type of the current field. The wire types are: + * + * * 0 - varint + * * 1 - 64 bit + * * 2 - length-delimited + * * 5 - 32 bit + * + * All other types are illegal. + * + * Call next() before calling this function to set the current field. + * + * @returns wire type of the current field. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + pbf_wire_type wire_type() const noexcept { + return m_wire_type; + } + + /** + * Get the tag and wire type of the current field in one integer suitable + * for comparison with a switch statement. + * + * Use it like this: + * + * @code + * pbf_reader message{...}; + * while (message.next()) { + * switch (message.tag_and_type()) { + * case tag_and_type(17, pbf_wire_type::length_delimited): + * .... + * break; + * case tag_and_type(21, pbf_wire_type::varint): + * .... + * break; + * default: + * message.skip(); + * } + * } + * @endcode + */ + uint32_t tag_and_type() const noexcept { + return protozero::tag_and_type(tag(), wire_type()); + } + + /** + * Check the wire type of the current field. + * + * @returns `true` if the current field has the given wire type. + * @pre There must be a current field (ie. next() must have returned `true`). + */ + bool has_wire_type(pbf_wire_type type) const noexcept { + return wire_type() == type; + } + + /** + * Consume the current field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @post The current field was consumed and there is no current field now. + */ + void skip() { + protozero_assert(tag() != 0 && "call next() before calling skip()"); + switch (wire_type()) { + case pbf_wire_type::varint: + skip_varint(&m_data, m_end); + break; + case pbf_wire_type::fixed64: + skip_bytes(8); + break; + case pbf_wire_type::length_delimited: + skip_bytes(get_length()); + break; + case pbf_wire_type::fixed32: + skip_bytes(4); + break; + default: + break; + } + } + + ///@{ + /** + * @name Scalar field accessor functions + */ + + /** + * Consume and return value of current "bool" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bool". + * @post The current field was consumed and there is no current field now. + */ + bool get_bool() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + const bool result = m_data[0] != 0; + skip_varint(&m_data, m_end); + return result; + } + + /** + * Consume and return value of current "enum" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "enum". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_enum() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "int32" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "int32". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_int32() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "sint32" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sint32". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_sint32() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_svarint(); + } + + /** + * Consume and return value of current "uint32" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "uint32". + * @post The current field was consumed and there is no current field now. + */ + uint32_t get_uint32() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "int64" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "int64". + * @post The current field was consumed and there is no current field now. + */ + int64_t get_int64() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "sint64" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sint64". + * @post The current field was consumed and there is no current field now. + */ + int64_t get_sint64() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_svarint(); + } + + /** + * Consume and return value of current "uint64" varint field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "uint64". + * @post The current field was consumed and there is no current field now. + */ + uint64_t get_uint64() { + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + return get_varint(); + } + + /** + * Consume and return value of current "fixed32" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "fixed32". + * @post The current field was consumed and there is no current field now. + */ + uint32_t get_fixed32() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "sfixed32" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sfixed32". + * @post The current field was consumed and there is no current field now. + */ + int32_t get_sfixed32() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "fixed64" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "fixed64". + * @post The current field was consumed and there is no current field now. + */ + uint64_t get_fixed64() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "sfixed64" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "sfixed64". + * @post The current field was consumed and there is no current field now. + */ + int64_t get_sfixed64() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "float" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "float". + * @post The current field was consumed and there is no current field now. + */ + float get_float() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "double" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "double". + * @post The current field was consumed and there is no current field now. + */ + double get_double() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } + + /** + * Consume and return value of current "bytes", "string", or "message" + * field. + * + * @returns A data_view object. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bytes", "string", or "message". + * @post The current field was consumed and there is no current field now. + */ + data_view get_view() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message"); + const auto len = get_len_and_skip(); + return {m_data - len, len}; + } + +#ifndef PROTOZERO_STRICT_API + /** + * Consume and return value of current "bytes" or "string" field. + * + * @returns A pair with a pointer to the data and the length of the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bytes" or "string". + * @post The current field was consumed and there is no current field now. + */ + std::pair get_data() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message"); + const auto len = get_len_and_skip(); + return {m_data - len, len}; + } +#endif + + /** + * Consume and return value of current "bytes" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bytes". + * @post The current field was consumed and there is no current field now. + */ + std::string get_bytes() { + return std::string(get_view()); + } + + /** + * Consume and return value of current "string" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "string". + * @post The current field was consumed and there is no current field now. + */ + std::string get_string() { + return std::string(get_view()); + } + + /** + * Consume and return value of current "message" field. + * + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "message". + * @post The current field was consumed and there is no current field now. + */ + pbf_reader get_message() { + return pbf_reader{get_view()}; + } + + ///@} + + /// Forward iterator for iterating over bool (int32 varint) values. + using const_bool_iterator = const_varint_iterator< int32_t>; + + /// Forward iterator for iterating over enum (int32 varint) values. + using const_enum_iterator = const_varint_iterator< int32_t>; + + /// Forward iterator for iterating over int32 (varint) values. + using const_int32_iterator = const_varint_iterator< int32_t>; + + /// Forward iterator for iterating over sint32 (varint) values. + using const_sint32_iterator = const_svarint_iterator; + + /// Forward iterator for iterating over uint32 (varint) values. + using const_uint32_iterator = const_varint_iterator; + + /// Forward iterator for iterating over int64 (varint) values. + using const_int64_iterator = const_varint_iterator< int64_t>; + + /// Forward iterator for iterating over sint64 (varint) values. + using const_sint64_iterator = const_svarint_iterator; + + /// Forward iterator for iterating over uint64 (varint) values. + using const_uint64_iterator = const_varint_iterator; + + /// Forward iterator for iterating over fixed32 values. + using const_fixed32_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over sfixed32 values. + using const_sfixed32_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over fixed64 values. + using const_fixed64_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over sfixed64 values. + using const_sfixed64_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over float values. + using const_float_iterator = const_fixed_iterator; + + /// Forward iterator for iterating over double values. + using const_double_iterator = const_fixed_iterator; + + ///@{ + /** + * @name Repeated packed field accessor functions + */ + + /** + * Consume current "repeated packed bool" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed bool". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_bool() { + return get_packed(); + } + + /** + * Consume current "repeated packed enum" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed enum". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_enum() { + return get_packed(); + } + + /** + * Consume current "repeated packed int32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed int32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_int32() { + return get_packed(); + } + + /** + * Consume current "repeated packed sint32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sint32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sint32() { + return get_packed(); + } + + /** + * Consume current "repeated packed uint32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed uint32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_uint32() { + return get_packed(); + } + + /** + * Consume current "repeated packed int64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed int64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_int64() { + return get_packed(); + } + + /** + * Consume current "repeated packed sint64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sint64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sint64() { + return get_packed(); + } + + /** + * Consume current "repeated packed uint64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed uint64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_uint64() { + return get_packed(); + } + + /** + * Consume current "repeated packed fixed32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed fixed32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_fixed32() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed sfixed32" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sfixed32". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sfixed32() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed fixed64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed fixed64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_fixed64() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed sfixed64" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed sfixed64". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_sfixed64() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed float" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed float". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_float() { + return packed_fixed(); + } + + /** + * Consume current "repeated packed double" field. + * + * @returns a pair of iterators to the beginning and one past the end of + * the data. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "repeated packed double". + * @post The current field was consumed and there is no current field now. + */ + iterator_range get_packed_double() { + return packed_fixed(); + } + + ///@} + +}; // class pbf_reader + +/** + * Swap two pbf_reader objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept { + lhs.swap(rhs); +} + +} // end namespace protozero + +#endif // PROTOZERO_PBF_READER_HPP diff --git a/include/protozero/pbf_writer.hpp b/include/protozero/pbf_writer.hpp new file mode 100644 index 00000000..9a07bd5b --- /dev/null +++ b/include/protozero/pbf_writer.hpp @@ -0,0 +1,76 @@ +#ifndef PROTOZERO_PBF_WRITER_HPP +#define PROTOZERO_PBF_WRITER_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file pbf_writer.hpp + * + * @brief Contains the pbf_writer class. + */ + +#include "basic_pbf_writer.hpp" +#include "buffer_string.hpp" + +#include +#include + +namespace protozero { + +/** + * Specialization of basic_pbf_writer using std::string as buffer type. + */ +using pbf_writer = basic_pbf_writer; + +/// Class for generating packed repeated bool fields. +using packed_field_bool = detail::packed_field_varint; + +/// Class for generating packed repeated enum fields. +using packed_field_enum = detail::packed_field_varint; + +/// Class for generating packed repeated int32 fields. +using packed_field_int32 = detail::packed_field_varint; + +/// Class for generating packed repeated sint32 fields. +using packed_field_sint32 = detail::packed_field_svarint; + +/// Class for generating packed repeated uint32 fields. +using packed_field_uint32 = detail::packed_field_varint; + +/// Class for generating packed repeated int64 fields. +using packed_field_int64 = detail::packed_field_varint; + +/// Class for generating packed repeated sint64 fields. +using packed_field_sint64 = detail::packed_field_svarint; + +/// Class for generating packed repeated uint64 fields. +using packed_field_uint64 = detail::packed_field_varint; + +/// Class for generating packed repeated fixed32 fields. +using packed_field_fixed32 = detail::packed_field_fixed; + +/// Class for generating packed repeated sfixed32 fields. +using packed_field_sfixed32 = detail::packed_field_fixed; + +/// Class for generating packed repeated fixed64 fields. +using packed_field_fixed64 = detail::packed_field_fixed; + +/// Class for generating packed repeated sfixed64 fields. +using packed_field_sfixed64 = detail::packed_field_fixed; + +/// Class for generating packed repeated float fields. +using packed_field_float = detail::packed_field_fixed; + +/// Class for generating packed repeated double fields. +using packed_field_double = detail::packed_field_fixed; + +} // end namespace protozero + +#endif // PROTOZERO_PBF_WRITER_HPP diff --git a/include/protozero/types.hpp b/include/protozero/types.hpp new file mode 100644 index 00000000..3aefddfb --- /dev/null +++ b/include/protozero/types.hpp @@ -0,0 +1,66 @@ +#ifndef PROTOZERO_TYPES_HPP +#define PROTOZERO_TYPES_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file types.hpp + * + * @brief Contains the declaration of low-level types used in the pbf format. + */ + +#include "config.hpp" + +#include +#include +#include +#include +#include +#include + +namespace protozero { + +/** + * The type used for field tags (field numbers). + */ +using pbf_tag_type = uint32_t; + +/** + * The type used to encode type information. + * See the table on + * https://developers.google.com/protocol-buffers/docs/encoding + */ +enum class pbf_wire_type : uint32_t { + varint = 0, // int32/64, uint32/64, sint32/64, bool, enum + fixed64 = 1, // fixed64, sfixed64, double + length_delimited = 2, // string, bytes, nested messages, packed repeated fields + fixed32 = 5, // fixed32, sfixed32, float + unknown = 99 // used for default setting in this library +}; + +/** + * Get the tag and wire type of the current field in one integer suitable + * for comparison with a switch statement. + * + * See pbf_reader.tag_and_type() for an example how to use this. + */ +template +constexpr inline uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept { + return (static_cast(static_cast(tag)) << 3U) | static_cast(wire_type); +} + +/** + * The type used for length values, such as the length of a field. + */ +using pbf_length_type = uint32_t; + +} // end namespace protozero + +#endif // PROTOZERO_TYPES_HPP diff --git a/include/protozero/varint.hpp b/include/protozero/varint.hpp new file mode 100644 index 00000000..b4648a44 --- /dev/null +++ b/include/protozero/varint.hpp @@ -0,0 +1,245 @@ +#ifndef PROTOZERO_VARINT_HPP +#define PROTOZERO_VARINT_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file varint.hpp + * + * @brief Contains low-level varint and zigzag encoding and decoding functions. + */ + +#include "buffer_tmpl.hpp" +#include "exception.hpp" + +#include + +namespace protozero { + +/** + * The maximum length of a 64 bit varint. + */ +constexpr const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1; + +namespace detail { + + // from https://github.com/facebook/folly/blob/master/folly/Varint.h + inline uint64_t decode_varint_impl(const char** data, const char* end) { + const auto* begin = reinterpret_cast(*data); + const auto* iend = reinterpret_cast(end); + const int8_t* p = begin; + uint64_t val = 0; + + if (iend - begin >= max_varint_length) { // fast path + do { + int64_t b = *p++; + val = ((uint64_t(b) & 0x7fU) ); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 7U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 14U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 21U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 28U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 35U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 42U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 49U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x7fU) << 56U); if (b >= 0) { break; } + b = *p++; val |= ((uint64_t(b) & 0x01U) << 63U); if (b >= 0) { break; } + throw varint_too_long_exception{}; + } while (false); + } else { + unsigned int shift = 0; + while (p != iend && *p < 0) { + val |= (uint64_t(*p++) & 0x7fU) << shift; + shift += 7; + } + if (p == iend) { + throw end_of_buffer_exception{}; + } + val |= uint64_t(*p++) << shift; + } + + *data = reinterpret_cast(p); + return val; + } + +} // end namespace detail + +/** + * Decode a 64 bit varint. + * + * Strong exception guarantee: if there is an exception the data pointer will + * not be changed. + * + * @param[in,out] data Pointer to pointer to the input data. After the function + * returns this will point to the next data to be read. + * @param[in] end Pointer one past the end of the input data. + * @returns The decoded integer + * @throws varint_too_long_exception if the varint is longer then the maximum + * length that would fit in a 64 bit int. Usually this means your data + * is corrupted or you are trying to read something as a varint that + * isn't. + * @throws end_of_buffer_exception if the *end* of the buffer was reached + * before the end of the varint. + */ +inline uint64_t decode_varint(const char** data, const char* end) { + // If this is a one-byte varint, decode it here. + if (end != *data && ((static_cast(**data) & 0x80U) == 0)) { + const auto val = static_cast(**data); + ++(*data); + return val; + } + // If this varint is more than one byte, defer to complete implementation. + return detail::decode_varint_impl(data, end); +} + +/** + * Skip over a varint. + * + * Strong exception guarantee: if there is an exception the data pointer will + * not be changed. + * + * @param[in,out] data Pointer to pointer to the input data. After the function + * returns this will point to the next data to be read. + * @param[in] end Pointer one past the end of the input data. + * @throws end_of_buffer_exception if the *end* of the buffer was reached + * before the end of the varint. + */ +inline void skip_varint(const char** data, const char* end) { + const auto* begin = reinterpret_cast(*data); + const auto* iend = reinterpret_cast(end); + const int8_t* p = begin; + + while (p != iend && *p < 0) { + ++p; + } + + if (p - begin >= max_varint_length) { + throw varint_too_long_exception{}; + } + + if (p == iend) { + throw end_of_buffer_exception{}; + } + + ++p; + + *data = reinterpret_cast(p); +} + +/** + * Varint encode a 64 bit integer. + * + * @tparam T An output iterator type. + * @param data Output iterator the varint encoded value will be written to + * byte by byte. + * @param value The integer that will be encoded. + * @returns the number of bytes written + * @throws Any exception thrown by increment or dereference operator on data. + * @deprecated Use add_varint_to_buffer() instead. + */ +template +inline int write_varint(T data, uint64_t value) { + int n = 1; + + while (value >= 0x80U) { + *data++ = char((value & 0x7fU) | 0x80U); + value >>= 7U; + ++n; + } + *data = char(value); + + return n; +} + +/** + * Varint encode a 64 bit integer. + * + * @tparam TBuffer A buffer type. + * @param buffer Output buffer the varint will be written to. + * @param value The integer that will be encoded. + * @returns the number of bytes written + * @throws Any exception thrown by calling the buffer_push_back() function. + */ +template +inline void add_varint_to_buffer(TBuffer* buffer, uint64_t value) { + while (value >= 0x80U) { + buffer_customization::push_back(buffer, char((value & 0x7fU) | 0x80U)); + value >>= 7U; + } + buffer_customization::push_back(buffer, char(value)); +} + +/** + * Varint encode a 64 bit integer. + * + * @param data Where to add the varint. There must be enough space available! + * @param value The integer that will be encoded. + * @returns the number of bytes written + */ +inline int add_varint_to_buffer(char* data, uint64_t value) noexcept { + int n = 1; + + while (value >= 0x80U) { + *data++ = char((value & 0x7fU) | 0x80U); + value >>= 7U; + ++n; + } + *data = char(value); + + return n; +} + +/** + * Get the length of the varint the specified value would produce. + * + * @param value The integer to be encoded. + * @returns the number of bytes the varint would have if we created it. + */ +inline int length_of_varint(uint64_t value) noexcept { + int n = 1; + + while (value >= 0x80U) { + value >>= 7U; + ++n; + } + + return n; +} + +/** + * ZigZag encodes a 32 bit integer. + */ +inline constexpr uint32_t encode_zigzag32(int32_t value) noexcept { + return (static_cast(value) << 1U) ^ static_cast(-static_cast(static_cast(value) >> 31U)); +} + +/** + * ZigZag encodes a 64 bit integer. + */ +inline constexpr uint64_t encode_zigzag64(int64_t value) noexcept { + return (static_cast(value) << 1U) ^ static_cast(-static_cast(static_cast(value) >> 63U)); +} + +/** + * Decodes a 32 bit ZigZag-encoded integer. + */ +inline constexpr int32_t decode_zigzag32(uint32_t value) noexcept { + return static_cast((value >> 1U) ^ static_cast(-static_cast(value & 1U))); +} + +/** + * Decodes a 64 bit ZigZag-encoded integer. + */ +inline constexpr int64_t decode_zigzag64(uint64_t value) noexcept { + return static_cast((value >> 1U) ^ static_cast(-static_cast(value & 1U))); +} + +} // end namespace protozero + +#endif // PROTOZERO_VARINT_HPP diff --git a/include/protozero/version.hpp b/include/protozero/version.hpp new file mode 100644 index 00000000..fc9b9287 --- /dev/null +++ b/include/protozero/version.hpp @@ -0,0 +1,34 @@ +#ifndef PROTOZERO_VERSION_HPP +#define PROTOZERO_VERSION_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file version.hpp + * + * @brief Contains macros defining the protozero version. + */ + +/// The major version number +#define PROTOZERO_VERSION_MAJOR 1 + +/// The minor version number +#define PROTOZERO_VERSION_MINOR 7 + +/// The patch number +#define PROTOZERO_VERSION_PATCH 1 + +/// The complete version number +#define PROTOZERO_VERSION_CODE (PROTOZERO_VERSION_MAJOR * 10000 + PROTOZERO_VERSION_MINOR * 100 + PROTOZERO_VERSION_PATCH) + +/// Version number as string +#define PROTOZERO_VERSION_STRING "1.7.1" + +#endif // PROTOZERO_VERSION_HPP diff --git a/include/sharded_node_store.h b/include/sharded_node_store.h new file mode 100644 index 00000000..836c34ef --- /dev/null +++ b/include/sharded_node_store.h @@ -0,0 +1,32 @@ +#ifndef _SHARDED_NODE_STORE +#define _SHARDED_NODE_STORE + +#include +#include +#include "node_store.h" + +class ShardedNodeStore : public NodeStore { +public: + ShardedNodeStore(std::function()> createNodeStore); + ~ShardedNodeStore(); + void reopen() override; + void finalize(size_t threadNum) override; + LatpLon at(NodeID i) const override; + size_t size() const override; + void batchStart() override; + void insert(const std::vector& elements) override; + void clear() override { + reopen(); + } + + bool contains(size_t shard, NodeID id) const override; + NodeStore& shard(size_t shard) override { return *stores[shard]; } + const NodeStore& shard(size_t shard) const override { return *stores[shard]; } + size_t shards() const override; + +private: + std::function()> createNodeStore; + std::vector> stores; +}; + +#endif diff --git a/include/sharded_way_store.h b/include/sharded_way_store.h new file mode 100644 index 00000000..40a3d331 --- /dev/null +++ b/include/sharded_way_store.h @@ -0,0 +1,35 @@ +#ifndef _SHARDED_WAY_STORE +#define _SHARDED_WAY_STORE + +#include +#include +#include "way_store.h" + +class NodeStore; + +class ShardedWayStore : public WayStore { +public: + ShardedWayStore(std::function()> createWayStore, const NodeStore& nodeStore); + ~ShardedWayStore(); + void reopen() override; + void batchStart() override; + std::vector at(WayID wayid) const override; + bool requiresNodes() const override; + void insertLatpLons(std::vector &newWays) override; + void insertNodes(const std::vector>>& newWays) override; + void clear() override; + std::size_t size() const override; + void finalize(unsigned int threadNum) override; + + bool contains(size_t shard, WayID id) const override; + WayStore& shard(size_t shard) override; + const WayStore& shard(size_t shard) const override; + size_t shards() const override; + +private: + std::function()> createWayStore; + const NodeStore& nodeStore; + std::vector> stores; +}; + +#endif diff --git a/include/shared_data.h b/include/shared_data.h index 23ba9a06..45c6e34b 100644 --- a/include/shared_data.h +++ b/include/shared_data.h @@ -7,6 +7,7 @@ #include "rapidjson/document.h" +#include "options_parser.h" #include "osm_store.h" #include "output_object.h" #include "mbtiles.h" @@ -61,10 +62,6 @@ class LayerDefinition { std::string serialiseToJSON() const; }; -const int OUTPUT_FILE = 0; -const int OUTPUT_MBTILES = 1; -const int OUTPUT_PMTILES = 2; - ///\brief Config read from JSON to control behavior of program class Config { @@ -91,7 +88,7 @@ class SharedData { public: const class LayerDefinition &layers; - int outputMode; + OptionsParser::OutputMode outputMode; bool mergeSqlite; MBTiles mbtiles; PMTiles pmtiles; diff --git a/include/shp_mem_tiles.h b/include/shp_mem_tiles.h index 267a0090..508921ff 100644 --- a/include/shp_mem_tiles.h +++ b/include/shp_mem_tiles.h @@ -11,6 +11,8 @@ class ShpMemTiles : public TileDataSource public: ShpMemTiles(size_t threadNum, uint baseZoom); + std::string name() const override { return "shp"; } + void CreateNamedLayerIndex(const std::string& layerName); // Used in shape file loading diff --git a/include/sorted_node_store.h b/include/sorted_node_store.h index 5c156ad3..61fdfad3 100644 --- a/include/sorted_node_store.h +++ b/include/sorted_node_store.h @@ -3,6 +3,7 @@ #include "node_store.h" #include "mmap_allocator.h" +#include #include #include #include @@ -65,10 +66,15 @@ class SortedNodeStore : public NodeStore size_t size() const override; void batchStart() override; void insert(const std::vector& elements) override; - void clear() { + void clear() override { reopen(); } + bool contains(size_t shard, NodeID id) const override; + NodeStore& shard(size_t shard) override { return *this; } + const NodeStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } + private: // When true, store chunks compressed. Only store compressed if the // chunk is sufficiently large. @@ -82,6 +88,15 @@ class SortedNodeStore : public NodeStore // multiple threads. They'll get folded into the index during finalize() std::map> orphanage; std::vector> workerBuffers; + + std::atomic totalGroups; + std::atomic totalNodes; + std::atomic totalGroupSpace; + std::atomic totalAllocatedSpace; + std::atomic totalChunks; + std::atomic chunkSizeFreqs[257]; + std::atomic groupSizeFreqs[257]; + void collectOrphans(const std::vector& orphans); void publishGroup(const std::vector& nodes); }; diff --git a/include/sorted_way_store.h b/include/sorted_way_store.h index 145e467b..b99ba7de 100644 --- a/include/sorted_way_store.h +++ b/include/sorted_way_store.h @@ -1,6 +1,7 @@ #ifndef _SORTED_WAY_STORE_H #define _SORTED_WAY_STORE_H +#include #include #include #include @@ -89,10 +90,15 @@ class SortedWayStore: public WayStore { std::vector at(WayID wayid) const override; bool requiresNodes() const override { return true; } void insertLatpLons(std::vector &newWays) override; - const void insertNodes(const std::vector>>& newWays) override; + void insertNodes(const std::vector>>& newWays) override; void clear() override; std::size_t size() const override; void finalize(unsigned int threadNum) override; + + bool contains(size_t shard, WayID id) const override; + WayStore& shard(size_t shard) override { return *this; } + const WayStore& shard(size_t shard) const override { return *this; } + size_t shards() const override { return 1; } static uint16_t encodeWay( const std::vector& way, @@ -113,6 +119,13 @@ class SortedWayStore: public WayStore { // multiple threads. They'll get folded into the index during finalize() std::map>>> orphanage; std::vector>>> workerBuffers; + + std::atomic totalWays; + std::atomic totalNodes; + std::atomic totalGroups; + std::atomic totalGroupSpace; + std::atomic totalChunks; + void collectOrphans(const std::vector>>& orphans); void publishGroup(const std::vector>>& ways); }; diff --git a/include/tag_map.h b/include/tag_map.h new file mode 100644 index 00000000..f951d9e9 --- /dev/null +++ b/include/tag_map.h @@ -0,0 +1,56 @@ +#ifndef _TAG_MAP_H +#define _TAG_MAP_H + +#include +#include +#include +#include + +// We track tags in a special structure, which enables some tricks when +// doing Lua interop. +// +// The alternative is a std::map - but often, our map is quite small. +// It's preferable to have a small set of vectors and do linear search. +// +// Further, we can avoid passing std::string from Lua -> C++ in some cases +// by first checking to see if the string we would have passed is already +// stored in our tag map, and passing a reference to its location. + +// Assumptions: +// 1. Not thread-safe +// This is OK because we have 1 instance of OsmLuaProcessing per thread. +// 2. Lifetime of map is less than lifetime of keys/values that are passed +// This is true since the strings are owned by the protobuf block reader +// 3. Max number of tag values will fit in a short +// OSM limit is 5,000 tags per object +class TagMap { +public: + TagMap(); + void reset(); + + void addTag(const protozero::data_view& key, const protozero::data_view& value); + + // Return -1 if key not found, else return its keyLoc. + int64_t getKey(const char* key, size_t size) const; + + // Return -1 if value not found, else return its keyLoc. + int64_t getValue(const char* key, size_t size) const; + + const protozero::data_view* getValueFromKey(uint32_t keyLoc) const; + const protozero::data_view* getValue(uint32_t valueLoc) const; + + boost::container::flat_map exportToBoostMap() const; + +private: + uint32_t ensureString( + std::vector>& vector, + const protozero::data_view& value + ); + + + std::vector> keys; + std::vector> key2value; + std::vector> values; +}; + +#endif _TAG_MAP_H diff --git a/include/tile_data.h b/include/tile_data.h index 814b53ce..b78463e2 100644 --- a/include/tile_data.h +++ b/include/tile_data.h @@ -8,7 +8,11 @@ #include #include #include "output_object.h" +#include "append_vector.h" #include "clip_cache.h" +#include "mmap_allocator.h" + +#define TILE_DATA_ID_SIZE 34 typedef std::vector SourceList; @@ -45,16 +49,40 @@ struct OutputObjectXYID { }; template void finalizeObjects( + const std::string& name, const size_t& threadNum, const unsigned int& baseZoom, - typename std::vector>::iterator begin, - typename std::vector>::iterator end + typename std::vector>::iterator begin, + typename std::vector>::iterator end, + typename std::vector>& lowZoom ) { - for (typename std::vector>::iterator it = begin; it != end; it++) { + size_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; +#ifdef CLOCK_MONOTONIC + timespec startTs, endTs; + clock_gettime(CLOCK_MONOTONIC, &startTs); +#endif + + int i = -1; + for (auto it = begin; it != end; it++) { + i++; + if (it->size() > 0 || i % 10 == 0 || i == 4095) { + std::cout << "\r" << name << ": finalizing z6 tile " << (i + 1) << "/" << CLUSTER_ZOOM_AREA; + +#ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &endTs); + uint64_t elapsedNs = 1e9 * (endTs.tv_sec - startTs.tv_sec) + endTs.tv_nsec - startTs.tv_nsec; + std::cout << " (" << std::to_string((uint32_t)(elapsedNs / 1e6)) << " ms)"; +#endif + std::cout << std::flush; + } if (it->size() == 0) continue; - it->shrink_to_fit(); + // We track a separate copy of low zoom objects to avoid scanning large + // lists of objects that may be on slow disk storage. + for (auto objectIt = it->begin(); objectIt != it->end(); objectIt++) + if (objectIt->oo.minZoom < CLUSTER_ZOOM) + lowZoom[i].push_back(*objectIt); // If the user is doing a a small extract, there are few populated // entries in `object`. @@ -102,17 +130,18 @@ template void finalizeObjects( }, threadNum ); - } + + std::cout << std::endl; } template void collectTilesWithObjectsAtZoomTemplate( const unsigned int& baseZoom, - const typename std::vector>::iterator objects, + const typename std::vector>::iterator objects, const size_t size, - const unsigned int zoom, - TileCoordinatesSet& output + std::vector& zooms ) { + size_t maxZoom = zooms.size() - 1; uint16_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; int64_t lastX = -1; int64_t lastY = -1; @@ -126,13 +155,18 @@ template void collectTilesWithObjectsAtZoomTemplate( TileCoordinate baseY = z6y * z6OffsetDivisor + objects[i][j].y; // Translate the x, y at the requested zoom level - TileCoordinate x = baseX / (1 << (baseZoom - zoom)); - TileCoordinate y = baseY / (1 << (baseZoom - zoom)); + TileCoordinate x = baseX / (1 << (baseZoom - maxZoom)); + TileCoordinate y = baseY / (1 << (baseZoom - maxZoom)); if (lastX != x || lastY != y) { - output.set(x, y); lastX = x; lastY = y; + + for (int zoom = maxZoom; zoom >= 0; zoom--) { + zooms[zoom].set(x, y); + x /= 2; + y /= 2; + } } } } @@ -148,107 +182,124 @@ inline OutputObjectID outputObjectWithId(const OutputObjectXYI return OutputObjectID({ input.oo, input.id }); } +template void collectLowZoomObjectsForTile( + const unsigned int& baseZoom, + typename std::vector> objects, + unsigned int zoom, + const TileCoordinates& dstIndex, + std::vector& output +) { + if (zoom >= CLUSTER_ZOOM) + throw std::runtime_error("collectLowZoomObjectsForTile should not be called for high zooms"); + + uint16_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; + + for (size_t i = 0; i < objects.size(); i++) { + const size_t z6x = i / CLUSTER_ZOOM_WIDTH; + const size_t z6y = i % CLUSTER_ZOOM_WIDTH; + + for (size_t j = 0; j < objects[i].size(); j++) { + // Compute the x, y at the base zoom level + TileCoordinate baseX = z6x * z6OffsetDivisor + objects[i][j].x; + TileCoordinate baseY = z6y * z6OffsetDivisor + objects[i][j].y; + + // Translate the x, y at the requested zoom level + TileCoordinate x = baseX / (1 << (baseZoom - zoom)); + TileCoordinate y = baseY / (1 << (baseZoom - zoom)); + + if (dstIndex.x == x && dstIndex.y == y) { + if (objects[i][j].oo.minZoom <= zoom) { + output.push_back(outputObjectWithId(objects[i][j])); + } + } + } + } +} + template void collectObjectsForTileTemplate( const unsigned int& baseZoom, - typename std::vector>::iterator objects, + typename std::vector>::iterator objects, size_t iStart, size_t iEnd, unsigned int zoom, const TileCoordinates& dstIndex, std::vector& output ) { + if (zoom < CLUSTER_ZOOM) + throw std::runtime_error("collectObjectsForTileTemplate should not be called for low zooms"); + uint16_t z6OffsetDivisor = baseZoom >= CLUSTER_ZOOM ? (1 << (baseZoom - CLUSTER_ZOOM)) : 1; for (size_t i = iStart; i < iEnd; i++) { - const size_t z6x = i / CLUSTER_ZOOM_WIDTH; - const size_t z6y = i % CLUSTER_ZOOM_WIDTH; + // If z >= 6, we can compute the exact bounds within the objects array. + // Translate to the base zoom, then do a binary search to find + // the starting point. + TileCoordinate z6x = dstIndex.x / (1 << (zoom - CLUSTER_ZOOM)); + TileCoordinate z6y = dstIndex.y / (1 << (zoom - CLUSTER_ZOOM)); + + TileCoordinate baseX = dstIndex.x * (1 << (baseZoom - zoom)); + TileCoordinate baseY = dstIndex.y * (1 << (baseZoom - zoom)); + + Z6Offset needleX = baseX - z6x * z6OffsetDivisor; + Z6Offset needleY = baseY - z6y * z6OffsetDivisor; + + // Kind of gross that we have to do this. Might be better if we split + // into two arrays, one of x/y and one of OOs. Would have better locality for + // searching, too. + OutputObject dummyOo(POINT_, 0, 0, 0, 0); + const size_t bz = baseZoom; + + const OO targetXY = {dummyOo, needleX, needleY }; + auto iter = std::lower_bound( + objects[i].begin(), + objects[i].end(), + targetXY, + [bz](const OO& a, const OO& b) { + // Cluster by parent zoom, so that a subsequent search + // can find a contiguous range of entries for any tile + // at zoom 6 or higher. + const size_t aX = a.x; + const size_t aY = a.y; + const size_t bX = b.x; + const size_t bY = b.y; + for (size_t z = CLUSTER_ZOOM; z <= bz; z++) { + const auto aXz = aX / (1 << (bz - z)); + const auto aYz = aY / (1 << (bz - z)); + const auto bXz = bX / (1 << (bz - z)); + const auto bYz = bY / (1 << (bz - z)); - if (zoom >= CLUSTER_ZOOM) { - // If z >= 6, we can compute the exact bounds within the objects array. - // Translate to the base zoom, then do a binary search to find - // the starting point. - TileCoordinate z6x = dstIndex.x / (1 << (zoom - CLUSTER_ZOOM)); - TileCoordinate z6y = dstIndex.y / (1 << (zoom - CLUSTER_ZOOM)); - - TileCoordinate baseX = dstIndex.x * (1 << (baseZoom - zoom)); - TileCoordinate baseY = dstIndex.y * (1 << (baseZoom - zoom)); - - Z6Offset needleX = baseX - z6x * z6OffsetDivisor; - Z6Offset needleY = baseY - z6y * z6OffsetDivisor; - - // Kind of gross that we have to do this. Might be better if we split - // into two arrays, one of x/y and one of OOs. Would have better locality for - // searching, too. - OutputObject dummyOo(POINT_, 0, 0, 0, 0); - const size_t bz = baseZoom; - - const OO targetXY = {dummyOo, needleX, needleY }; - auto iter = std::lower_bound( - objects[i].begin(), - objects[i].end(), - targetXY, - [bz](const OO& a, const OO& b) { - // Cluster by parent zoom, so that a subsequent search - // can find a contiguous range of entries for any tile - // at zoom 6 or higher. - const size_t aX = a.x; - const size_t aY = a.y; - const size_t bX = b.x; - const size_t bY = b.y; - for (size_t z = CLUSTER_ZOOM; z <= bz; z++) { - const auto aXz = aX / (1 << (bz - z)); - const auto aYz = aY / (1 << (bz - z)); - const auto bXz = bX / (1 << (bz - z)); - const auto bYz = bY / (1 << (bz - z)); - - if (aXz != bXz) - return aXz < bXz; - - if (aYz != bYz) - return aYz < bYz; - } - return false; - } - ); - for (; iter != objects[i].end(); iter++) { - // Compute the x, y at the base zoom level - TileCoordinate baseX = z6x * z6OffsetDivisor + iter->x; - TileCoordinate baseY = z6y * z6OffsetDivisor + iter->y; - - // Translate the x, y at the requested zoom level - TileCoordinate x = baseX / (1 << (baseZoom - zoom)); - TileCoordinate y = baseY / (1 << (baseZoom - zoom)); - - if (dstIndex.x == x && dstIndex.y == y) { - if (iter->oo.minZoom <= zoom) { - output.push_back(outputObjectWithId(*iter)); - } - } else { - // Short-circuit when we're confident we'd no longer see relevant matches. - // We've ordered the entries in `objects` such that all objects that - // share the same tile at any zoom are in contiguous runs. - // - // Thus, as soon as we fail to find a match, we can stop looking. - break; - } + if (aXz != bXz) + return aXz < bXz; + if (aYz != bYz) + return aYz < bYz; + } + return false; } - } else { - for (size_t j = 0; j < objects[i].size(); j++) { - // Compute the x, y at the base zoom level - TileCoordinate baseX = z6x * z6OffsetDivisor + objects[i][j].x; - TileCoordinate baseY = z6y * z6OffsetDivisor + objects[i][j].y; - - // Translate the x, y at the requested zoom level - TileCoordinate x = baseX / (1 << (baseZoom - zoom)); - TileCoordinate y = baseY / (1 << (baseZoom - zoom)); - - if (dstIndex.x == x && dstIndex.y == y) { - if (objects[i][j].oo.minZoom <= zoom) { - output.push_back(outputObjectWithId(objects[i][j])); - } + ); + + for (; iter != objects[i].end(); iter++) { + // Compute the x, y at the base zoom level + TileCoordinate baseX = z6x * z6OffsetDivisor + iter->x; + TileCoordinate baseY = z6y * z6OffsetDivisor + iter->y; + + // Translate the x, y at the requested zoom level + TileCoordinate x = baseX / (1 << (baseZoom - zoom)); + TileCoordinate y = baseY / (1 << (baseZoom - zoom)); + + if (dstIndex.x == x && dstIndex.y == y) { + if (iter->oo.minZoom <= zoom) { + output.push_back(outputObjectWithId(*iter)); } + } else { + // Short-circuit when we're confident we'd no longer see relevant matches. + // We've ordered the entries in `objects` such that all objects that + // share the same tile at any zoom are in contiguous runs. + // + // Thus, as soon as we fail to find a match, we can stop looking. + break; } + } } } @@ -275,6 +326,7 @@ class TileDataSource { std::vector> availableMultiLinestringStoreLeases; std::vector> availableMultiPolygonStoreLeases; + virtual std::string name() const = 0; protected: size_t numShards; @@ -292,8 +344,10 @@ class TileDataSource { // // If config.include_ids is true, objectsWithIds will be populated. // Otherwise, objects. - std::vector> objects; - std::vector> objectsWithIds; + std::vector> objects; + std::vector> lowZoomObjects; + std::vector> objectsWithIds; + std::vector> lowZoomObjectsWithIds; // rtree index of large objects using oo_rtree_param_type = boost::geometry::index::quadratic<128>; @@ -310,12 +364,14 @@ class TileDataSource { ClipCache multiPolygonClipCache; ClipCache multiLinestringClipCache; + std::deque>> pendingSmallIndexObjects; + public: TileDataSource(size_t threadNum, unsigned int baseZoom, bool includeID); - void collectTilesWithObjectsAtZoom(uint zoom, TileCoordinatesSet& output); + void collectTilesWithObjectsAtZoom(std::vector& zooms); - void collectTilesWithLargeObjectsAtZoom(uint zoom, TileCoordinatesSet& output); + void collectTilesWithLargeObjectsAtZoom(std::vector& zooms); void collectObjectsForTile(uint zoom, TileCoordinates dstIndex, std::vector& output); void finalize(size_t threadNum); @@ -337,6 +393,8 @@ class TileDataSource { ); void addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id); + void addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id, bool needsLock); + void addObjectToSmallIndexUnsafe(const TileCoordinates& index, const OutputObject& oo, uint64_t id); void addObjectToLargeIndex(const Box& envelope, const OutputObject& oo, uint64_t id) { std::lock_guard lock(mutex); @@ -355,7 +413,7 @@ class TileDataSource { ); virtual Geometry buildWayGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox); - LatpLon buildNodeGeometry(OutputGeometryType const geomType, NodeID const objectID, const TileBbox &bbox) const; + virtual LatpLon buildNodeGeometry(NodeID const objectID, const TileBbox &bbox) const; void open() { // Put something at index 0 of all stores so that 0 can be used @@ -373,18 +431,18 @@ class TileDataSource { NodeID storePoint(Point const &input); inline size_t getShard(NodeID id) const { - // Note: we only allocate 35 bits for the IDs. This allows us to - // use bit 36 for TileDataSource-specific handling (e.g., + // Note: we only allocate 34 bits for the IDs. This allows us to + // use bits 35 and 36 for TileDataSource-specific handling (e.g., // OsmMemTiles may want to generate points/ways on the fly by // referring to the WayStore). - return id >> (35 - shardBits); + return id >> (TILE_DATA_ID_SIZE - shardBits); } virtual void populateMultiPolygon(MultiPolygon& dst, NodeID objectID); inline size_t getId(NodeID id) const { - return id & (~(~0ull << (35 - shardBits))); + return id & (~(~0ull << (TILE_DATA_ID_SIZE - shardBits))); } const Point& retrievePoint(NodeID id) const { @@ -426,9 +484,9 @@ class TileDataSource { } }; -TileCoordinatesSet getTilesAtZoom( +void populateTilesAtZoom( const std::vector& sources, - unsigned int zoom + std::vector& zooms ); #endif //_TILE_DATA_H diff --git a/include/vector_tile.proto b/include/vector_tile.proto deleted file mode 100644 index a4be957b..00000000 --- a/include/vector_tile.proto +++ /dev/null @@ -1,102 +0,0 @@ -syntax = "proto2"; - -// Protocol Version 1 - -package vector_tile; - -// option optimize_for = LITE_RUNTIME; - -message Tile { - enum GeomType { - UNKNOWN = 0; - POINT = 1; - LINESTRING = 2; - POLYGON = 3; - } - - // Variant type encoding - message Value { - // Exactly one of these values may be present in a valid message - optional string string_value = 1; - optional float float_value = 2; - optional double double_value = 3; - optional int64 int_value = 4; - optional uint64 uint_value = 5; - optional sint64 sint_value = 6; - optional bool bool_value = 7; - - extensions 8 to max; - } - - message Feature { - optional uint64 id = 1 [ default = 0 ]; - - // Tags of this feature are encoded as repeated pairs of - // integers. Even indexed values (n, beginning with 0) are - // themselves indexes into the layer's keys list. Odd indexed - // values (n+1) are indexes into the layer's values list. - // The first (n=0) tag of a feature, therefore, has a key of - // layer.keys[feature.tags[0]] and a value of - // layer.values[feature.tags[1]]. - repeated uint32 tags = 2 [ packed = true ]; - - // The type of geometry stored in this feature. - optional GeomType type = 3 [ default = UNKNOWN ]; - - // Contains a stream of commands and parameters (vertices). The - // repeat count is shifted to the left by 3 bits. This means - // that the command has 3 bits (0-7). The repeat count - // indicates how often this command is to be repeated. Defined - // commands are: - // - MoveTo: 1 (2 parameters follow) - // - LineTo: 2 (2 parameters follow) - // - ClosePath: 7 (no parameters follow) - // - // Commands are encoded as uint32 varints. Vertex parameters - // are encoded as deltas to the previous position and, as they - // may be negative, are further "zigzag" encoded as unsigned - // 32-bit ints: - // - // n = (n << 1) ^ (n >> 31) - // - // Ex.: MoveTo(3, 6), LineTo(8, 12), LineTo(20, 34), ClosePath - // Encoded as: [ 9 6 12 18 10 12 24 44 15 ] - // | | `> [00001 111] command type 7 (ClosePath), length 1 - // | | ===== relative LineTo(+12, +22) == LineTo(20, 34) - // | | ===== relative LineTo(+5, +6) == LineTo(8, 12) - // | `> [00010 010] = command type 2 (LineTo), length 2 - // | ==== relative MoveTo(+3, +6) - // `> [00001 001] = command type 1 (MoveTo), length 1 - // - // The original position is (0,0). - repeated uint32 geometry = 4 [ packed = true ]; - } - - message Layer { - // Any compliant implementation must first read the version - // number encoded in this message and choose the correct - // implementation for this version number before proceeding to - // decode other parts of this message. - required uint32 version = 15 [ default = 1 ]; - - required string name = 1; - - // The actual features in this tile. - repeated Feature features = 2; - - // Dictionary encoding for keys - repeated string keys = 3; - - // Dictionary encoding for values - repeated Value values = 4; - - // The bounding box in this tile spans from 0..4095 units - optional uint32 extent = 5 [ default = 4096 ]; - - extensions 16 to max; - } - - repeated Layer layers = 3; - - extensions 16 to 8191; -} diff --git a/include/vtzero/builder.hpp b/include/vtzero/builder.hpp new file mode 100644 index 00000000..781758e8 --- /dev/null +++ b/include/vtzero/builder.hpp @@ -0,0 +1,1365 @@ +#ifndef VTZERO_BUILDER_HPP +#define VTZERO_BUILDER_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file builder.hpp + * + * @brief Contains the classes and functions to build vector tiles. + */ + +#include "builder_impl.hpp" +#include "feature_builder_impl.hpp" +#include "geometry.hpp" +#include "types.hpp" +#include "vector_tile.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace vtzero { + + /** + * Used to build vector tiles. Whenever you are building a new vector + * tile, start with an object of this class and add layers. After all + * the data is added, call serialize(). + * + * @code + * layer some_existing_layer = ...; + * + * tile_builder builder; + * layer_builder layer_roads{builder, "roads"}; + * builder.add_existing_layer(some_existing_layer); + * ... + * std::string data = builder.serialize(); + * @endcode + */ + class tile_builder { + + friend class layer_builder; + + std::vector> m_layers; + + /** + * Add a new layer to the vector tile based on an existing layer. The + * new layer will have the same name, version, and extent as the + * existing layer. The new layer will not contain any features. This + * method is handy when copying some (but not all) data from an + * existing layer. + */ + detail::layer_builder_impl* add_layer(const layer& layer) { + auto* ptr = new detail::layer_builder_impl{layer.name(), layer.version(), layer.extent()}; + m_layers.emplace_back(ptr); + return ptr; + } + + /** + * Add a new layer to the vector tile with the specified name, version, + * and extent. + * + * @tparam TString Some string type (const char*, std::string, + * vtzero::data_view) or something that converts to one of + * these types. + * @param name Name of this layer. + * @param version Version of this layer (only version 1 and 2 are + * supported) + * @param extent Extent used for this layer. + */ + template + detail::layer_builder_impl* add_layer(TString&& name, uint32_t version, uint32_t extent) { + auto* ptr = new detail::layer_builder_impl{std::forward(name), version, extent}; + m_layers.emplace_back(ptr); + return ptr; + } + + public: + + /// Constructor + tile_builder() = default; + + /// Destructor + ~tile_builder() noexcept = default; + + /// Tile builders can not be copied. + tile_builder(const tile_builder&) = delete; + + /// Tile builders can not be copied. + tile_builder& operator=(const tile_builder&) = delete; + + /// Tile builders can be moved. + tile_builder(tile_builder&&) = default; + + /// Tile builders can be moved. + tile_builder& operator=(tile_builder&&) = default; + + /** + * Add an existing layer to the vector tile. The layer data will be + * copied over into the new vector_tile when the serialize() method + * is called. Until then, the data referenced here must stay available. + * + * @param data Reference to some data that must be a valid encoded + * layer. + */ + void add_existing_layer(data_view&& data) { + m_layers.emplace_back(new detail::layer_builder_impl{std::forward(data)}); + } + + /** + * Add an existing layer to the vector tile. The layer data will be + * copied over into the new vector_tile when the serialize() method + * is called. Until then, the data referenced here must stay available. + * + * @param layer Reference to the layer to be copied. + */ + void add_existing_layer(const layer& layer) { + add_existing_layer(layer.data()); + } + + /** + * Serialize the data accumulated in this builder into a vector tile. + * The data will be appended to the specified buffer. The buffer + * doesn't have to be empty. + * + * @tparam TBuffer Type of buffer. Must be std:string or other buffer + * type supported by protozero. + * @param buffer Buffer to append the encoded vector tile to. + */ + template + void serialize(TBuffer& buffer) const { + const std::size_t estimated_size = std::accumulate(m_layers.cbegin(), m_layers.cend(), 0ULL, [](std::size_t sum, const std::unique_ptr& layer) { + return sum + layer->estimated_size(); + }); + + protozero::basic_pbf_builder pbf_tile_builder{buffer}; + pbf_tile_builder.reserve(estimated_size); + for (const auto& layer : m_layers) { + layer->build(pbf_tile_builder); + } + } + + /** + * Serialize the data accumulated in this builder into a vector_tile + * and return it. + * + * If you want to use an existing buffer instead, use the serialize() + * member function taking a TBuffer& as parameter. + * + * @returns std::string Buffer with encoded vector_tile data. + */ + std::string serialize() const { + std::string data; + serialize(data); + return data; + } + + }; // class tile_builder + + /** + * The layer_builder is used to add a new layer to a vector tile that is + * being built. + */ + class layer_builder { + + vtzero::detail::layer_builder_impl* m_layer; + + friend class geometry_feature_builder; + friend class point_feature_builder; + friend class linestring_feature_builder; + friend class polygon_feature_builder; + + vtzero::detail::layer_builder_impl& get_layer_impl() noexcept { + return *m_layer; + } + + template + using is_layer = std::is_same::type>::type, layer>; + + public: + + /** + * Construct a layer_builder to build a new layer with the same name, + * version, and extent as an existing layer. + * + * @param tile The tile builder we want to create this layer in. + * @param layer Existing layer we want to use the name, version, and + * extent from + */ + layer_builder(vtzero::tile_builder& tile, const layer& layer) : + m_layer(tile.add_layer(layer)) { + } + + /** + * Construct a layer_builder to build a completely new layer. + * + * @tparam TString Some string type (such as std::string or const char*) + * @param tile The tile builder we want to create this layer in. + * @param name The name of the new layer. + * @param version The vector tile spec version of the new layer. + * @param extent The extent of the new layer. + */ + template ::value, int>::type = 0> + layer_builder(vtzero::tile_builder& tile, TString&& name, uint32_t version = 2, uint32_t extent = 4096) : + m_layer(tile.add_layer(std::forward(name), version, extent)) { + } + + /** + * Add key to the keys table without checking for duplicates. This + * function is usually used when an external index is used which takes + * care of the duplication check. + * + * @param text The key. + * @returns The index value of this key. + */ + index_value add_key_without_dup_check(const data_view text) { + return m_layer->add_key_without_dup_check(text); + } + + /** + * Add key to the keys table. This function will consult the internal + * index in the layer to make sure the key is only in the table once. + * It will either return the index value of an existing key or add the + * new key and return its index value. + * + * @param text The key. + * @returns The index value of this key. + */ + index_value add_key(const data_view text) { + return m_layer->add_key(text); + } + + /** + * Add value to the values table without checking for duplicates. This + * function is usually used when an external index is used which takes + * care of the duplication check. + * + * @param value The property value. + * @returns The index value of this value. + */ + index_value add_value_without_dup_check(const property_value value) { + return m_layer->add_value_without_dup_check(value); + } + + /** + * Add value to the values table without checking for duplicates. This + * function is usually used when an external index is used which takes + * care of the duplication check. + * + * @param value The property value. + * @returns The index value of this value. + */ + index_value add_value_without_dup_check(const encoded_property_value& value) { + return m_layer->add_value_without_dup_check(value); + } + + /** + * Add value to the values table. This function will consult the + * internal index in the layer to make sure the value is only in the + * table once. It will either return the index value of an existing + * value or add the new value and return its index value. + * + * @param value The property value. + * @returns The index value of this value. + */ + index_value add_value(const property_value value) { + return m_layer->add_value(value); + } + + /** + * Add value to the values table. This function will consult the + * internal index in the layer to make sure the value is only in the + * table once. It will either return the index value of an existing + * value or add the new value and return its index value. + * + * @param value The property value. + * @returns The index value of this value. + */ + index_value add_value(const encoded_property_value& value) { + return m_layer->add_value(value); + } + + /** + * Add a feature from an existing layer to the new layer. The feature + * will be copied completely over to the new layer including its + * geometry and all its properties. + */ + void add_feature(const feature& feature); + + }; // class layer_builder + + /** + * Parent class for the point_feature_builder, linestring_feature_builder + * and polygon_feature_builder classes. You can not instantiate this class + * directly, use it through its derived classes. + */ + class feature_builder : public detail::feature_builder_base { + + class countdown_value { + + uint32_t m_value = 0; + + public: + + countdown_value() noexcept = default; + + ~countdown_value() noexcept { + assert_is_zero(); + } + + countdown_value(const countdown_value&) = delete; + + countdown_value& operator=(const countdown_value&) = delete; + + countdown_value(countdown_value&& other) noexcept : + m_value(other.m_value) { + other.m_value = 0; + } + + countdown_value& operator=(countdown_value&& other) noexcept { + m_value = other.m_value; + other.m_value = 0; + return *this; + } + + uint32_t value() const noexcept { + return m_value; + } + + void set(const uint32_t value) noexcept { + m_value = value; + } + + void decrement() { + vtzero_assert(m_value > 0 && "too many calls to set_point()"); + --m_value; + } + + void assert_is_zero() const noexcept { + vtzero_assert_in_noexcept_function(m_value == 0 && + "not enough calls to set_point()"); + } + + }; // countdown_value + + protected: + + /// Encoded geometry. + protozero::packed_field_uint32 m_pbf_geometry{}; + + /// Number of points still to be set for the geometry to be complete. + countdown_value m_num_points; + + /// Last point (used to calculate delta between coordinates) + point m_cursor{0, 0}; + + /// Constructor. + explicit feature_builder(detail::layer_builder_impl* layer) : + feature_builder_base(layer) { + } + + /// Helper function to check size isn't too large + template + uint32_t check_num_points(T size) { + if (size >= (1UL << 29U)) { + throw geometry_exception{"Maximum of 2^29 - 1 points allowed in geometry"}; + } + return static_cast(size); + } + + /// Helper function to make sure we have everything before adding a property + void prepare_to_add_property() { + if (m_pbf_geometry.valid()) { + m_num_points.assert_is_zero(); + m_pbf_geometry.commit(); + } + if (!m_pbf_tags.valid()) { + m_pbf_tags = {m_feature_writer, detail::pbf_feature::tags}; + } + } + + public: + + /** + * If the feature was not committed, the destructor will roll back all + * the changes. + */ + ~feature_builder() { + try { + rollback(); + } catch (...) { + // ignore exceptions + } + } + + /// Builder classes can not be copied + feature_builder(const feature_builder&) = delete; + + /// Builder classes can not be copied + feature_builder& operator=(const feature_builder&) = delete; + + /// Builder classes can be moved + feature_builder(feature_builder&& other) noexcept = default; + + /// Builder classes can be moved + feature_builder& operator=(feature_builder&& other) noexcept = default; + + /** + * Set the ID of this feature. + * + * You can only call this method once and it must be before calling + * any method manipulating the geometry. + * + * @param id The ID. + */ + void set_id(uint64_t id) { + vtzero_assert(m_feature_writer.valid() && + "Can not call set_id() after commit() or rollback()"); + vtzero_assert(!m_pbf_geometry.valid() && + !m_pbf_tags.valid() && + "Call set_id() before setting the geometry or adding properties"); + set_id_impl(id); + } + + /** + * Copy the ID of an existing feature to this feature. If the + * feature doesn't have an ID, no ID is set. + * + * You can only call this method once and it must be before calling + * any method manipulating the geometry. + * + * @param feature The feature to copy the ID from. + */ + void copy_id(const feature& feature) { + vtzero_assert(m_feature_writer.valid() && + "Can not call copy_id() after commit() or rollback()"); + vtzero_assert(!m_pbf_geometry.valid() && + !m_pbf_tags.valid() && + "Call copy_id() before setting the geometry or adding properties"); + if (feature.has_id()) { + set_id_impl(feature.id()); + } + } + + /** + * Add a property to this feature. Can only be called after all the + * methods manipulating the geometry. + * + * @tparam TProp Can be type index_value_pair or property. + * @param prop The property to add. + */ + template + void add_property(TProp&& prop) { + vtzero_assert(m_feature_writer.valid() && + "Can not call add_property() after commit() or rollback()"); + prepare_to_add_property(); + add_property_impl(std::forward(prop)); + } + + /** + * Copy all properties of an existing feature to the one being built. + * + * @param feature The feature to copy the properties from. + */ + void copy_properties(const feature& feature) { + vtzero_assert(m_feature_writer.valid() && + "Can not call copy_properties() after commit() or rollback()"); + prepare_to_add_property(); + feature.for_each_property([this](const property& prop) { + add_property_impl(prop); + return true; + }); + } + + /** + * Copy all properties of an existing feature to the one being built + * using a property_mapper. + * + * @tparam TMapper Must be the property_mapper class or something + * equivalent. + * @param feature The feature to copy the properties from. + * @param mapper Instance of the property_mapper class. + */ + template + void copy_properties(const feature& feature, TMapper& mapper) { + vtzero_assert(m_feature_writer.valid() && + "Can not call copy_properties() after commit() or rollback()"); + prepare_to_add_property(); + feature.for_each_property_indexes([this, &mapper](const index_value_pair& idxs) { + add_property_impl(mapper(idxs)); + return true; + }); + } + + /** + * Add a property to this feature. Can only be called after all the + * methods manipulating the geometry. + * + * @tparam TKey Can be type index_value or data_view or anything that + * converts to it. + * @tparam TValue Can be type index_value or property_value or + * encoded_property or anything that converts to it. + * @param key The key. + * @param value The value. + */ + template + void add_property(TKey&& key, TValue&& value) { + vtzero_assert(m_feature_writer.valid() && + "Can not call add_property() after commit() or rollback()"); + prepare_to_add_property(); + add_property_impl(std::forward(key), std::forward(value)); + } + + /** + * Commit this feature. Call this after all the details of this + * feature have been added. If this is not called, the feature + * will be rolled back when the destructor of the feature_builder is + * called. + * + * Once a feature has been committed or rolled back, further calls + * to commit() or rollback() don't do anything. + */ + void commit() { + if (m_feature_writer.valid()) { + vtzero_assert((m_pbf_geometry.valid() || m_pbf_tags.valid()) && + "Can not call commit before geometry was added"); + if (m_pbf_geometry.valid()) { + m_pbf_geometry.commit(); + } + do_commit(); + } + } + + /** + * Rollback this feature. Removed all traces of this feature from + * the layer_builder. Useful when you started creating a feature + * but then find out that its geometry is invalid or something like + * it. This will also happen automatically when the feature_builder + * is destructed and commit() hasn't been called on it. + * + * Once a feature has been committed or rolled back, further calls + * to commit() or rollback() don't do anything. + */ + void rollback() { + if (m_feature_writer.valid()) { + if (m_pbf_geometry.valid()) { + m_pbf_geometry.rollback(); + } + do_rollback(); + } + } + + }; // class feature_builder + + /** + * Used for adding a feature with a point geometry to a layer. After + * creating an object of this class you can add data to the feature in a + * specific order: + * + * * Optionally add the ID using set_id(). + * * Add the (multi)point geometry using add_point(), add_points() and + * set_point(), or add_points_from_container(). + * * Optionally add any number of properties using add_property(). + * + * @code + * vtzero::tile_builder tb; + * vtzero::layer_builder lb{tb}; + * vtzero::point_feature_builder fb{lb}; + * fb.set_id(123); // optionally set ID + * fb.add_point(10, 20) // add point geometry + * fb.add_property("foo", "bar"); // add property + * @endcode + */ + class point_feature_builder : public feature_builder { + + public: + + /** + * Constructor + * + * @param layer The layer we want to create this feature in. + */ + explicit point_feature_builder(layer_builder layer) : + feature_builder(&layer.get_layer_impl()) { + m_feature_writer.add_enum(detail::pbf_feature::type, static_cast(GeomType::POINT)); + } + + /** + * Add a single point as the geometry to this feature. + * + * @param p The point to add. + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void add_point(const point p) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(!m_pbf_geometry.valid() && + !m_pbf_tags.valid() && + "add_point() can only be called once"); + m_pbf_geometry = {m_feature_writer, detail::pbf_feature::geometry}; + m_pbf_geometry.add_element(detail::command_move_to(1)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.x)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.y)); + } + + /** + * Add a single point as the geometry to this feature. + * + * @param x X coordinate of the point to add. + * @param y Y coordinate of the point to add. + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void add_point(const int32_t x, const int32_t y) { + add_point(point{x, y}); + } + + /** + * Add a single point as the geometry to this feature. + * + * @tparam TPoint A type that can be converted to vtzero::point using + * the create_vtzero_point function. + * @param p The point to add. + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + template + void add_point(TPoint&& p) { + add_point(create_vtzero_point(std::forward(p))); + } + + /** + * Declare the intent to add a multipoint geometry with *count* points + * to this feature. + * + * @param count The number of points in the multipoint geometry. + * + * @pre @code count > 0 && count < 2^29 @endcode + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void add_points(uint32_t count) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(!m_pbf_geometry.valid() && + "can not call add_points() twice or mix with add_point()"); + vtzero_assert(!m_pbf_tags.valid() && + "add_points() has to be called before properties are added"); + vtzero_assert(count > 0 && count < (1UL << 29U) && "add_points() must be called with 0 < count < 2^29"); + m_num_points.set(count); + m_pbf_geometry = {m_feature_writer, detail::pbf_feature::geometry}; + m_pbf_geometry.add_element(detail::command_move_to(count)); + } + + /** + * Set a point in the multipoint geometry. + * + * @param p The point. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_points(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void set_point(const point p) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(m_pbf_geometry.valid() && + "call add_points() before set_point()"); + vtzero_assert(!m_pbf_tags.valid() && + "set_point() has to be called before properties are added"); + m_num_points.decrement(); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.x - m_cursor.x)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.y - m_cursor.y)); + m_cursor = p; + } + + /** + * Set a point in the multipoint geometry. + * + * @param x X coordinate of the point to set. + * @param y Y coordinate of the point to set. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_points(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void set_point(const int32_t x, const int32_t y) { + set_point(point{x, y}); + } + + /** + * Set a point in the multipoint geometry. + * + * @tparam TPoint A type that can be converted to vtzero::point using + * the create_vtzero_point function. + * @param p The point to add. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_points(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + template + void set_point(TPoint&& p) { + set_point(create_vtzero_point(std::forward(p))); + } + + /** + * Add the points from the specified container as multipoint geometry + * to this feature. + * + * @tparam TContainer The container type. Must support the size() + * method, be iterable using a range for loop, and contain + * objects of type vtzero::point or something convertible to + * it. + * @param container The container to read the points from. + * + * @throws geometry_exception If there are more than 2^32-1 members in + * the container. + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + template + void add_points_from_container(const TContainer& container) { + add_points(check_num_points(container.size())); + for (const auto& element : container) { + set_point(element); + } + } + + }; // class point_feature_builder + + /** + * Used for adding a feature with a (multi)linestring geometry to a layer. + * After creating an object of this class you can add data to the + * feature in a specific order: + * + * * Optionally add the ID using set_id(). + * * Add the (multi)linestring geometry using add_linestring() or + * add_linestring_from_container(). + * * Optionally add any number of properties using add_property(). + * + * @code + * vtzero::tile_builder tb; + * vtzero::layer_builder lb{tb}; + * vtzero::linestring_feature_builder fb{lb}; + * fb.set_id(123); // optionally set ID + * fb.add_linestring(2); + * fb.set_point(10, 10); + * fb.set_point(10, 20); + * fb.add_property("foo", "bar"); // add property + * @endcode + */ + class linestring_feature_builder : public feature_builder { + + bool m_start_line = false; + + public: + + /** + * Constructor + * + * @param layer The layer we want to create this feature in. + */ + explicit linestring_feature_builder(layer_builder layer) : + feature_builder(&layer.get_layer_impl()) { + m_feature_writer.add_enum(detail::pbf_feature::type, static_cast(GeomType::LINESTRING)); + } + + /** + * Declare the intent to add a linestring geometry with *count* points + * to this feature. + * + * @param count The number of points in the linestring. + * + * @pre @code count > 1 && count < 2^29 @endcode + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void add_linestring(const uint32_t count) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(!m_pbf_tags.valid() && + "add_linestring() has to be called before properties are added"); + vtzero_assert(count > 1 && count < (1UL << 29U) && "add_linestring() must be called with 1 < count < 2^29"); + m_num_points.assert_is_zero(); + if (!m_pbf_geometry.valid()) { + m_pbf_geometry = {m_feature_writer, detail::pbf_feature::geometry}; + } + m_num_points.set(count); + m_start_line = true; + } + + /** + * Set a point in the multilinestring geometry opened with + * add_linestring(). + * + * @param p The point. + * + * @throws geometry_exception if the point set is the same as the + * previous point. This would create zero-length segments + * which are not allowed according to the vector tile spec. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_linestring(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void set_point(const point p) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(m_pbf_geometry.valid() && + "call add_linestring() before set_point()"); + vtzero_assert(!m_pbf_tags.valid() && + "set_point() has to be called before properties are added"); + m_num_points.decrement(); + if (m_start_line) { + m_pbf_geometry.add_element(detail::command_move_to(1)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.x - m_cursor.x)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.y - m_cursor.y)); + m_pbf_geometry.add_element(detail::command_line_to(m_num_points.value())); + m_start_line = false; + } else { + if (p == m_cursor) { + throw geometry_exception{"Zero-length segments in linestrings are not allowed."}; + } + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.x - m_cursor.x)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.y - m_cursor.y)); + } + m_cursor = p; + } + + /** + * Set a point in the multilinestring geometry opened with + * add_linestring(). + * + * @param x X coordinate of the point to set. + * @param y Y coordinate of the point to set. + * + * @throws geometry_exception if the point set is the same as the + * previous point. This would create zero-length segments + * which are not allowed according to the vector tile spec. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_linestring(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void set_point(const int32_t x, const int32_t y) { + set_point(point{x, y}); + } + + /** + * Set a point in the multilinestring geometry opened with + * add_linestring(). + * + * @tparam TPoint A type that can be converted to vtzero::point using + * the create_vtzero_point function. + * @param p The point to add. + * + * @throws geometry_exception if the point set is the same as the + * previous point. This would create zero-length segments + * which are not allowed according to the vector tile spec. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_linestring(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + template + void set_point(TPoint&& p) { + set_point(create_vtzero_point(std::forward(p))); + } + + /** + * Add the points from the specified container as a linestring geometry + * to this feature. + * + * @tparam TContainer The container type. Must support the size() + * method, be iterable using a range for loop, and contain + * objects of type vtzero::point or something convertible to + * it. + * @param container The container to read the points from. + * + * @throws geometry_exception If there are more than 2^32-1 members in + * the container or if two consecutive points in the container + * are identical. + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + template + void add_linestring_from_container(const TContainer& container) { + add_linestring(check_num_points(container.size())); + for (const auto& element : container) { + set_point(element); + } + } + + }; // class linestring_feature_builder + + /** + * Used for adding a feature with a (multi)polygon geometry to a layer. + * After creating an object of this class you can add data to the + * feature in a specific order: + * + * * Optionally add the ID using set_id(). + * * Add the (multi)polygon geometry using add_ring() or + * add_ring_from_container(). + * * Optionally add any number of properties using add_property(). + * + * @code + * vtzero::tile_builder tb; + * vtzero::layer_builder lb{tb}; + * vtzero::polygon_feature_builder fb{lb}; + * fb.set_id(123); // optionally set ID + * fb.add_ring(5); + * fb.set_point(10, 10); + * ... + * fb.add_property("foo", "bar"); // add property + * @endcode + */ + class polygon_feature_builder : public feature_builder { + + point m_first_point{0, 0}; + bool m_start_ring = false; + + public: + + /** + * Constructor + * + * @param layer The layer we want to create this feature in. + */ + explicit polygon_feature_builder(layer_builder layer) : + feature_builder(&layer.get_layer_impl()) { + m_feature_writer.add_enum(detail::pbf_feature::type, static_cast(GeomType::POLYGON)); + } + + /** + * Declare the intent to add a ring with *count* points to this + * feature. + * + * @param count The number of points in the ring. + * + * @pre @code count > 3 && count < 2^29 @endcode + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void add_ring(const uint32_t count) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(!m_pbf_tags.valid() && + "add_ring() has to be called before properties are added"); + vtzero_assert(count > 3 && count < (1UL << 29U) && "add_ring() must be called with 3 < count < 2^29"); + m_num_points.assert_is_zero(); + if (!m_pbf_geometry.valid()) { + m_pbf_geometry = {m_feature_writer, detail::pbf_feature::geometry}; + } + m_num_points.set(count); + m_start_ring = true; + } + + /** + * Set a point in the ring opened with add_ring(). + * + * @param p The point. + * + * @throws geometry_exception if the point set is the same as the + * previous point. This would create zero-length segments + * which are not allowed according to the vector tile spec. + * This exception is also thrown when the last point in the + * ring is not equal to the first point, because this would + * not create a closed ring. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_ring(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void set_point(const point p) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(m_pbf_geometry.valid() && + "call add_ring() before set_point()"); + vtzero_assert(!m_pbf_tags.valid() && + "set_point() has to be called before properties are added"); + m_num_points.decrement(); + if (m_start_ring) { + m_first_point = p; + m_pbf_geometry.add_element(detail::command_move_to(1)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.x - m_cursor.x)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.y - m_cursor.y)); + m_pbf_geometry.add_element(detail::command_line_to(m_num_points.value() - 1)); + m_start_ring = false; + m_cursor = p; + } else if (m_num_points.value() == 0) { + if (p != m_first_point) { + throw geometry_exception{"Last point in a ring must be the same as the first point."}; + } + // spec 4.3.3.3 "A ClosePath command MUST have a command count of 1" + m_pbf_geometry.add_element(detail::command_close_path()); + } else { + if (p == m_cursor) { + throw geometry_exception{"Zero-length segments in rings are not allowed."}; + } + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.x - m_cursor.x)); + m_pbf_geometry.add_element(protozero::encode_zigzag32(p.y - m_cursor.y)); + m_cursor = p; + } + } + + /** + * Set a point in the ring opened with add_ring(). + * + * @param x X coordinate of the point to set. + * @param y Y coordinate of the point to set. + * + * @throws geometry_exception if the point set is the same as the + * previous point. This would create zero-length segments + * which are not allowed according to the vector tile spec. + * This exception is also thrown when the last point in the + * ring is not equal to the first point, because this would + * not create a closed ring. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_ring(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void set_point(const int32_t x, const int32_t y) { + set_point(point{x, y}); + } + + /** + * Set a point in the ring opened with add_ring(). + * + * @tparam TPoint A type that can be converted to vtzero::point using + * the create_vtzero_point function. + * @param p The point to add. + * + * @throws geometry_exception if the point set is the same as the + * previous point. This would create zero-length segments + * which are not allowed according to the vector tile spec. + * This exception is also thrown when the last point in the + * ring is not equal to the first point, because this would + * not create a closed ring. + * + * @pre There must have been less than *count* calls to set_point() + * already after a call to add_ring(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + template + void set_point(TPoint&& p) { + set_point(create_vtzero_point(std::forward(p))); + } + + /** + * Close a ring opened with add_ring(). This can be called for the + * last point (which will be equal to the first point) in the ring + * instead of calling set_point(). + * + * @pre There must have been *count* - 1 calls to set_point() + * already after a call to add_ring(count). + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + void close_ring() { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(m_pbf_geometry.valid() && + "Call add_ring() before you can call close_ring()"); + vtzero_assert(!m_pbf_tags.valid() && + "close_ring() has to be called before properties are added"); + vtzero_assert(m_num_points.value() == 1 && + "wrong number of points in ring"); + m_pbf_geometry.add_element(detail::command_close_path()); + m_num_points.decrement(); + } + + /** + * Add the points from the specified container as a ring to this + * feature. + * + * @tparam TContainer The container type. Must support the size() + * method, be iterable using a range for loop, and contain + * objects of type vtzero::point or something convertible to + * it. + * @param container The container to read the points from. + * + * @throws geometry_exception If there are more than 2^32-1 members in + * the container or if two consecutive points in the container + * are identical or if the last point is not the same as the + * first point. + * + * @pre You must not have any calls to add_property() before calling + * this method. + */ + template + void add_ring_from_container(const TContainer& container) { + add_ring(check_num_points(container.size())); + for (const auto& element : container) { + set_point(element); + } + } + + }; // class polygon_feature_builder + + /** + * Used for adding a feature to a layer using an existing geometry. After + * creating an object of this class you can add data to the feature in a + * specific order: + * + * * Optionally add the ID using set_id(). + * * Add the geometry using set_geometry(). + * * Optionally add any number of properties using add_property(). + * + * @code + * auto geom = ... // get geometry from a feature you are reading + * ... + * vtzero::tile_builder tb; + * vtzero::layer_builder lb{tb}; + * vtzero::geometry_feature_builder fb{lb}; + * fb.set_id(123); // optionally set ID + * fb.set_geometry(geom) // add geometry + * fb.add_property("foo", "bar"); // add property + * @endcode + */ + class geometry_feature_builder : public detail::feature_builder_base { + + public: + + /** + * Constructor + * + * @param layer The layer we want to create this feature in. + */ + explicit geometry_feature_builder(layer_builder layer) : + feature_builder_base(&layer.get_layer_impl()) { + } + + /** + * If the feature was not committed, the destructor will roll back all + * the changes. + */ + ~geometry_feature_builder() noexcept { + try { + rollback(); + } catch (...) { + // ignore exceptions + } + } + + /// Feature builders can not be copied. + geometry_feature_builder(const geometry_feature_builder&) = delete; + + /// Feature builders can not be copied. + geometry_feature_builder& operator=(const geometry_feature_builder&) = delete; + + /// Feature builders can be moved. + geometry_feature_builder(geometry_feature_builder&&) noexcept = default; + + /// Feature builders can be moved. + geometry_feature_builder& operator=(geometry_feature_builder&&) noexcept = default; + + /** + * Set the ID of this feature. + * + * You can only call this function once and it must be before calling + * set_geometry(). + * + * @param id The ID. + */ + void set_id(uint64_t id) { + vtzero_assert(m_feature_writer.valid() && + "Can not call set_id() after commit() or rollback()"); + vtzero_assert(!m_pbf_tags.valid()); + set_id_impl(id); + } + + /** + * Copy the ID of an existing feature to this feature. If the + * feature doesn't have an ID, no ID is set. + * + * You can only call this function once and it must be before calling + * set_geometry(). + * + * @param feature The feature to copy the ID from. + */ + void copy_id(const feature& feature) { + vtzero_assert(m_feature_writer.valid() && + "Can not call copy_id() after commit() or rollback()"); + vtzero_assert(!m_pbf_tags.valid()); + if (feature.has_id()) { + set_id_impl(feature.id()); + } + } + + /** + * Set the geometry of this feature. + * + * You can only call this method once and it must be before calling the + * add_property() method. + * + * @param geometry The geometry. + */ + void set_geometry(const geometry& geometry) { + vtzero_assert(m_feature_writer.valid() && + "Can not add geometry after commit() or rollback()"); + vtzero_assert(!m_pbf_tags.valid()); + m_feature_writer.add_enum(detail::pbf_feature::type, static_cast(geometry.type())); + m_feature_writer.add_string(detail::pbf_feature::geometry, geometry.data()); + m_pbf_tags = {m_feature_writer, detail::pbf_feature::tags}; + } + + /** + * Add a property to this feature. Can only be called after the + * set_geometry method. + * + * @tparam TProp Can be type index_value_pair or property. + * @param prop The property to add. + */ + template + void add_property(TProp&& prop) { + vtzero_assert(m_feature_writer.valid() && + "Can not call add_property() after commit() or rollback()"); + add_property_impl(std::forward(prop)); + } + + /** + * Add a property to this feature. Can only be called after the + * set_geometry method. + * + * @tparam TKey Can be type index_value or data_view or anything that + * converts to it. + * @tparam TValue Can be type index_value or property_value or + * encoded_property or anything that converts to it. + * @param key The key. + * @param value The value. + */ + template + void add_property(TKey&& key, TValue&& value) { + vtzero_assert(m_feature_writer.valid() && + "Can not call add_property() after commit() or rollback()"); + add_property_impl(std::forward(key), std::forward(value)); + } + + /** + * Copy all properties of an existing feature to the one being built. + * + * @param feature The feature to copy the properties from. + */ + void copy_properties(const feature& feature) { + vtzero_assert(m_feature_writer.valid() && + "Can not call copy_properties() after commit() or rollback()"); + feature.for_each_property([this](const property& prop) { + add_property_impl(prop); + return true; + }); + } + + /** + * Copy all properties of an existing feature to the one being built + * using a property_mapper. + * + * @tparam TMapper Must be the property_mapper class or something + * equivalent. + * @param feature The feature to copy the properties from. + * @param mapper Instance of the property_mapper class. + */ + template + void copy_properties(const feature& feature, TMapper& mapper) { + vtzero_assert(m_feature_writer.valid() && + "Can not call copy_properties() after commit() or rollback()"); + feature.for_each_property_indexes([this, &mapper](const index_value_pair& idxs) { + add_property_impl(mapper(idxs)); + return true; + }); + } + + /** + * Commit this feature. Call this after all the details of this + * feature have been added. If this is not called, the feature + * will be rolled back when the destructor of the feature_builder is + * called. + * + * Once a feature has been committed or rolled back, further calls + * to commit() or rollback() don't do anything. + */ + void commit() { + if (m_feature_writer.valid()) { + vtzero_assert(m_pbf_tags.valid() && + "Can not call commit before geometry was added"); + do_commit(); + } + } + + /** + * Rollback this feature. Removed all traces of this feature from + * the layer_builder. Useful when you started creating a feature + * but then find out that its geometry is invalid or something like + * it. This will also happen automatically when the feature_builder + * is destructed and commit() hasn't been called on it. + * + * Once a feature has been committed or rolled back, further calls + * to commit() or rollback() don't do anything. + */ + void rollback() { + if (m_feature_writer.valid()) { + do_rollback(); + } + } + + }; // class geometry_feature_builder + + inline void layer_builder::add_feature(const feature& feature) { + geometry_feature_builder feature_builder{*this}; + if (feature.has_id()) { + feature_builder.set_id(feature.id()); + } + feature_builder.set_geometry(feature.geometry()); + feature.for_each_property([&feature_builder](const property& p) { + feature_builder.add_property(p); + return true; + }); + feature_builder.commit(); + } + +} // namespace vtzero + +#endif // VTZERO_BUILDER_HPP diff --git a/include/vtzero/builder_impl.hpp b/include/vtzero/builder_impl.hpp new file mode 100644 index 00000000..1b348df2 --- /dev/null +++ b/include/vtzero/builder_impl.hpp @@ -0,0 +1,267 @@ +#ifndef VTZERO_BUILDER_IMPL_HPP +#define VTZERO_BUILDER_IMPL_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file builder_impl.hpp + * + * @brief Contains classes internal to the builder. + */ + +#include "encoded_property_value.hpp" +#include "property_value.hpp" +#include "types.hpp" + +#include +#include + +#include +#include +#include +#include + +namespace vtzero { + + namespace detail { + + class layer_builder_impl { + + // If this layer is copied from an existing layer, this points + // to the data of the original layer. For newly built layers, + // this is empty. + data_view m_data_view{}; + + // Buffer containing the encoded layer metadata and features + std::string m_data; + + // Buffer containing the encoded keys table + std::string m_keys_data; + + // Buffer containing the encoded values table + std::string m_values_data; + + protozero::pbf_builder m_pbf_message_layer{}; + protozero::pbf_builder m_pbf_message_keys{}; + protozero::pbf_builder m_pbf_message_values{}; + + // The number of features in the layer + std::size_t m_num_features = 0; + + // Vector tile spec version + uint32_t m_version = 0; + + // The number of keys in the keys table + uint32_t m_num_keys = 0; + + // The number of values in the values table + uint32_t m_num_values = 0; + + // Below this value, no index will be used to find entries in the + // key/value tables. This number is based on some initial + // benchmarking but probably needs some tuning. + // See also https://github.com/mapbox/vtzero/issues/30 + static constexpr const uint32_t max_entries_flat = 20; + + using map_type = std::unordered_map; + map_type m_keys_index; + map_type m_values_index; + + static index_value find_in_table(const data_view text, const std::string& data) { + uint32_t index = 0; + protozero::pbf_message pbf_table{data}; + + while (pbf_table.next()) { + const auto v = pbf_table.get_view(); + if (v == text) { + return index_value{index}; + } + ++index; + } + + return index_value{}; + } + + // Read the key or value table and populate an index from its + // entries. This is done once the table becomes too large to do + // linear search in it. + static void populate_index(const std::string& data, map_type& map) { + uint32_t index = 0; + protozero::pbf_message pbf_table{data}; + + while (pbf_table.next()) { + map[pbf_table.get_string()] = index++; + } + } + + index_value add_value_without_dup_check(const data_view text) { + m_pbf_message_values.add_string(detail::pbf_layer::values, text); + return m_num_values++; + } + + index_value add_value(const data_view text) { + const auto index = find_in_values_table(text); + if (index.valid()) { + return index; + } + return add_value_without_dup_check(text); + } + + index_value find_in_keys_table(const data_view text) { + if (m_num_keys < max_entries_flat) { + return find_in_table(text, m_keys_data); + } + + if (m_keys_index.empty()) { + populate_index(m_keys_data, m_keys_index); + } + + auto& v = m_keys_index[std::string(text)]; + if (!v.valid()) { + v = add_key_without_dup_check(text); + } + return v; + } + + index_value find_in_values_table(const data_view text) { + if (m_num_values < max_entries_flat) { + return find_in_table(text, m_values_data); + } + + if (m_values_index.empty()) { + populate_index(m_values_data, m_values_index); + } + + auto& v = m_values_index[std::string(text)]; + if (!v.valid()) { + v = add_value_without_dup_check(text); + } + return v; + } + + public: + + // This layer should be a copy of an existing layer + explicit layer_builder_impl(const data_view data) : + m_data_view(data) { + } + + // This layer is being created from scratch + template + layer_builder_impl(TString&& name, uint32_t version, uint32_t extent) : + m_pbf_message_layer(m_data), + m_pbf_message_keys(m_keys_data), + m_pbf_message_values(m_values_data), + m_version(version) { + m_pbf_message_layer.add_uint32(detail::pbf_layer::version, version); + m_pbf_message_layer.add_string(detail::pbf_layer::name, std::forward(name)); + m_pbf_message_layer.add_uint32(detail::pbf_layer::extent, extent); + } + + ~layer_builder_impl() noexcept = default; + + layer_builder_impl(const layer_builder_impl&) = delete; + layer_builder_impl& operator=(const layer_builder_impl&) = delete; + + layer_builder_impl(layer_builder_impl&&) = default; + layer_builder_impl& operator=(layer_builder_impl&&) = default; + + uint32_t version() const noexcept { + return m_version; + } + + index_value add_key_without_dup_check(const data_view text) { + m_pbf_message_keys.add_string(detail::pbf_layer::keys, text); + return m_num_keys++; + } + + index_value add_key(const data_view text) { + const auto index = find_in_keys_table(text); + if (index.valid()) { + return index; + } + return add_key_without_dup_check(text); + } + + index_value add_value_without_dup_check(const property_value value) { + return add_value_without_dup_check(value.data()); + } + + index_value add_value_without_dup_check(const encoded_property_value& value) { + return add_value_without_dup_check(value.data()); + } + + index_value add_value(const property_value value) { + return add_value(value.data()); + } + + index_value add_value(const encoded_property_value& value) { + return add_value(value.data()); + } + + const std::string& data() const noexcept { + return m_data; + } + + const std::string& keys_data() const noexcept { + return m_keys_data; + } + + const std::string& values_data() const noexcept { + return m_values_data; + } + + protozero::pbf_builder& message() noexcept { + return m_pbf_message_layer; + } + + void increment_feature_count() noexcept { + ++m_num_features; + } + + std::size_t estimated_size() const { + if (m_data_view.data()) { + // This is a layer created as copy from an existing layer + constexpr const std::size_t estimated_overhead_for_pbf_encoding = 8; + return m_data_view.size() + estimated_overhead_for_pbf_encoding; + } + + // This is a layer created from scratch + constexpr const std::size_t estimated_overhead_for_pbf_encoding = 8; + return data().size() + + keys_data().size() + + values_data().size() + + estimated_overhead_for_pbf_encoding; + } + + template + void build(protozero::basic_pbf_builder& pbf_tile_builder) const { + if (m_data_view.data()) { + // This is a layer created as copy from an existing layer + pbf_tile_builder.add_bytes(detail::pbf_tile::layers, m_data_view); + return; + } + + // This is a layer created from scratch + if (m_num_features > 0) { + pbf_tile_builder.add_bytes_vectored(detail::pbf_tile::layers, + data(), + keys_data(), + values_data()); + } + } + + }; // class layer_builder_impl + + } // namespace detail + +} // namespace vtzero + +#endif // VTZERO_BUILDER_IMPL_HPP diff --git a/include/vtzero/encoded_property_value.hpp b/include/vtzero/encoded_property_value.hpp new file mode 100644 index 00000000..8a573f3e --- /dev/null +++ b/include/vtzero/encoded_property_value.hpp @@ -0,0 +1,244 @@ +#ifndef VTZERO_ENCODED_PROPERTY_VALUE_HPP +#define VTZERO_ENCODED_PROPERTY_VALUE_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file encoded_property_value.hpp + * + * @brief Contains the encoded_property_value class. + */ + +#include "types.hpp" + +#include + +#include +#include + +namespace vtzero { + + /** + * A property value encoded in the vector_tile internal format. Can be + * created from values of many different types and then later added to + * a layer/feature. + */ + class encoded_property_value { + + std::string m_data; + + public: + + /// Construct from vtzero::string_value_type. + explicit encoded_property_value(string_value_type value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_string(detail::pbf_value::string_value, value.value); + } + + /// Construct from const char*. + explicit encoded_property_value(const char* value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_string(detail::pbf_value::string_value, value); + } + + /// Construct from const char* and size_t. + explicit encoded_property_value(const char* value, std::size_t size) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_string(detail::pbf_value::string_value, value, size); + } + + /// Construct from std::string. + explicit encoded_property_value(const std::string& value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_string(detail::pbf_value::string_value, value); + } + + /// Construct from vtzero::data_view. + explicit encoded_property_value(const data_view& value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_string(detail::pbf_value::string_value, value); + } + + // ------------------ + + /// Construct from vtzero::float_value_type. + explicit encoded_property_value(float_value_type value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_float(detail::pbf_value::float_value, value.value); + } + + /// Construct from float. + explicit encoded_property_value(float value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_float(detail::pbf_value::float_value, value); + } + + // ------------------ + + /// Construct from vtzero::double_value_type. + explicit encoded_property_value(double_value_type value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_double(detail::pbf_value::double_value, value.value); + } + + /// Construct from double. + explicit encoded_property_value(double value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_double(detail::pbf_value::double_value, value); + } + + // ------------------ + + /// Construct from vtzero::int_value_type. + explicit encoded_property_value(int_value_type value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_int64(detail::pbf_value::int_value, value.value); + } + + /// Construct from int64_t. + explicit encoded_property_value(int64_t value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_int64(detail::pbf_value::int_value, value); + } + + /// Construct from int32_t. + explicit encoded_property_value(int32_t value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_int64(detail::pbf_value::int_value, static_cast(value)); + } + + /// Construct from int16_t. + explicit encoded_property_value(int16_t value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_int64(detail::pbf_value::int_value, static_cast(value)); + } + + // ------------------ + + /// Construct from vtzero::uint_value_type. + explicit encoded_property_value(uint_value_type value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_uint64(detail::pbf_value::uint_value, value.value); + } + + /// Construct from uint64_t. + explicit encoded_property_value(uint64_t value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_uint64(detail::pbf_value::uint_value, value); + } + + /// Construct from uint32_t. + explicit encoded_property_value(uint32_t value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_uint64(detail::pbf_value::uint_value, static_cast(value)); + } + + /// Construct from uint16_t. + explicit encoded_property_value(uint16_t value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_uint64(detail::pbf_value::uint_value, static_cast(value)); + } + + // ------------------ + + /// Construct from vtzero::sint_value_type. + explicit encoded_property_value(sint_value_type value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_sint64(detail::pbf_value::sint_value, value.value); + } + + // ------------------ + + /// Construct from vtzero::bool_value_type. + explicit encoded_property_value(bool_value_type value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_bool(detail::pbf_value::bool_value, value.value); + } + + /// Construct from bool. + explicit encoded_property_value(bool value) { + protozero::pbf_builder pbf_message_value{m_data}; + pbf_message_value.add_bool(detail::pbf_value::bool_value, value); + } + + // ------------------ + + /** + * Get view of the raw data stored inside. + */ + data_view data() const noexcept { + return {m_data.data(), m_data.size()}; + } + + /** + * Hash function compatible with std::hash. + */ + std::size_t hash() const noexcept { + return std::hash{}(m_data); + } + + }; // class encoded_property_value + + /// Encoded property values are equal if they contain the same data. + inline bool operator==(const encoded_property_value& lhs, const encoded_property_value& rhs) noexcept { + return lhs.data() == rhs.data(); + } + + /// Encoded property values are unequal if they are not equal. + inline bool operator!=(const encoded_property_value& lhs, const encoded_property_value& rhs) noexcept { + return !(lhs == rhs); + } + + /// Arbitrary ordering based on internal data. + inline bool operator<(const encoded_property_value& lhs, const encoded_property_value& rhs) noexcept { + return lhs.data() < rhs.data(); + } + + /// Arbitrary ordering based on internal data. + inline bool operator<=(const encoded_property_value& lhs, const encoded_property_value& rhs) noexcept { + return lhs.data() <= rhs.data(); + } + + /// Arbitrary ordering based on internal data. + inline bool operator>(const encoded_property_value& lhs, const encoded_property_value& rhs) noexcept { + return lhs.data() > rhs.data(); + } + + /// Arbitrary ordering based on internal data. + inline bool operator>=(const encoded_property_value& lhs, const encoded_property_value& rhs) noexcept { + return lhs.data() >= rhs.data(); + } + +} // namespace vtzero + +namespace std { + + /** + * Specialization of std::hash for encoded_property_value. + */ + template <> + struct hash { + + /// key vtzero::encoded_property_value + using argument_type = vtzero::encoded_property_value; + + /// hash result: size_t + using result_type = std::size_t; + + /// calculate the hash of the argument + std::size_t operator()(const vtzero::encoded_property_value& value) const noexcept { + return value.hash(); + } + + }; // struct hash + +} // namespace std + +#endif // VTZERO_ENCODED_PROPERTY_VALUE_HPP diff --git a/include/vtzero/exception.hpp b/include/vtzero/exception.hpp new file mode 100644 index 00000000..80b72595 --- /dev/null +++ b/include/vtzero/exception.hpp @@ -0,0 +1,134 @@ +#ifndef VTZERO_EXCEPTION_HPP +#define VTZERO_EXCEPTION_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file exception.hpp + * + * @brief Contains the exceptions used in the vtzero library. + */ + +#include +#include +#include + +namespace vtzero { + + /** + * Base class for all exceptions directly thrown by the vtzero library. + */ + class exception : public std::runtime_error { + + public: + + /// Constructor + explicit exception(const char* message) : + std::runtime_error(message) { + } + + /// Constructor + explicit exception(const std::string& message) : + std::runtime_error(message) { + } + + }; // class exception + + /** + * This exception is thrown when vector tile encoding isn't valid according + * to the vector tile specification. + */ + class format_exception : public exception { + + public: + + /// Constructor + explicit format_exception(const char* message) : + exception(message) { + } + + /// Constructor + explicit format_exception(const std::string& message) : + exception(message) { + } + + }; // class format_exception + + /** + * This exception is thrown when a geometry encoding isn't valid according + * to the vector tile specification. + */ + class geometry_exception : public format_exception { + + public: + + /// Constructor + explicit geometry_exception(const char* message) : + format_exception(message) { + } + + /// Constructor + explicit geometry_exception(const std::string& message) : + format_exception(message) { + } + + }; // class geometry_exception + + /** + * This exception is thrown when a property value is accessed using the + * wrong type. + */ + class type_exception : public exception { + + public: + + /// Constructor + explicit type_exception() : + exception("wrong property value type") { + } + + }; // class type_exception + + /** + * This exception is thrown when an unknown version number is found in the + * layer. + */ + class version_exception : public exception { + + public: + + /// Constructor + explicit version_exception(const uint32_t version) : + exception(std::string{"unknown vector tile version: "} + + std::to_string(version)) { + } + + }; // version_exception + + /** + * This exception is thrown when an index into the key or value table + * in a layer is out of range. This can only happen if the tile data is + * invalid. + */ + class out_of_range_exception : public exception { + + public: + + /// Constructor + explicit out_of_range_exception(const uint32_t index) : + exception(std::string{"index out of range: "} + + std::to_string(index)) { + } + + }; // out_of_range_exception + +} // namespace vtzero + +#endif // VTZERO_EXCEPTION_HPP diff --git a/include/vtzero/feature.hpp b/include/vtzero/feature.hpp new file mode 100644 index 00000000..745d49a1 --- /dev/null +++ b/include/vtzero/feature.hpp @@ -0,0 +1,315 @@ +#ifndef VTZERO_FEATURE_HPP +#define VTZERO_FEATURE_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file feature.hpp + * + * @brief Contains the feature class. + */ + +#include "exception.hpp" +#include "property.hpp" +#include "property_value.hpp" +#include "types.hpp" + +#include + +#include +#include +#include +#include + +namespace vtzero { + + class layer; + + /** + * A feature according to spec 4.2. + * + * Note that a feature will internally contain a pointer to the layer it + * came from. The layer has to stay valid as long as the feature is used. + */ + class feature { + + using uint32_it_range = protozero::iterator_range; + + const layer* m_layer = nullptr; + uint64_t m_id = 0; // defaults to 0, see https://github.com/mapbox/vector-tile-spec/blob/master/2.1/vector_tile.proto#L32 + uint32_it_range m_properties{}; + protozero::pbf_reader::const_uint32_iterator m_property_iterator{}; + std::size_t m_num_properties = 0; + data_view m_geometry{}; + GeomType m_geometry_type = GeomType::UNKNOWN; // defaults to UNKNOWN, see https://github.com/mapbox/vector-tile-spec/blob/master/2.1/vector_tile.proto#L41 + bool m_has_id = false; + + public: + + /** + * Construct an invalid feature object. + */ + feature() = default; + + /** + * Construct a feature object. + * + * @throws format_exception if the layer data is ill-formed. + */ + feature(const layer* layer, const data_view data) : + m_layer(layer) { + vtzero_assert(layer); + vtzero_assert(data.data()); + + protozero::pbf_message reader{data}; + + while (reader.next()) { + switch (reader.tag_and_type()) { + case protozero::tag_and_type(detail::pbf_feature::id, protozero::pbf_wire_type::varint): + m_id = reader.get_uint64(); + m_has_id = true; + break; + case protozero::tag_and_type(detail::pbf_feature::tags, protozero::pbf_wire_type::length_delimited): + if (m_properties.begin() != protozero::pbf_reader::const_uint32_iterator{}) { + throw format_exception{"Feature has more than one tags field"}; + } + m_properties = reader.get_packed_uint32(); + m_property_iterator = m_properties.begin(); + break; + case protozero::tag_and_type(detail::pbf_feature::type, protozero::pbf_wire_type::varint): { + const auto type = reader.get_enum(); + // spec 4.3.4 "Geometry Types" + if (type < 0 || type > 3) { + throw format_exception{"Unknown geometry type (spec 4.3.4)"}; + } + m_geometry_type = static_cast(type); + } + break; + case protozero::tag_and_type(detail::pbf_feature::geometry, protozero::pbf_wire_type::length_delimited): + if (!m_geometry.empty()) { + throw format_exception{"Feature has more than one geometry field"}; + } + m_geometry = reader.get_view(); + break; + default: + reader.skip(); // ignore unknown fields + } + } + + // spec 4.2 "A feature MUST contain a geometry field." + if (m_geometry.empty()) { + throw format_exception{"Missing geometry field in feature (spec 4.2)"}; + } + + const auto size = m_properties.size(); + if (size % 2 != 0) { + throw format_exception{"unpaired property key/value indexes (spec 4.4)"}; + } + m_num_properties = size / 2; + } + + /** + * Is this a valid feature? Valid features are those not created from + * the default constructor. + * + * Complexity: Constant. + */ + bool valid() const noexcept { + return m_geometry.data() != nullptr; + } + + /** + * Is this a valid feature? Valid features are those not created from + * the default constructor. + * + * Complexity: Constant. + */ + explicit operator bool() const noexcept { + return valid(); + } + + /** + * The ID of this feature. According to the spec IDs should be unique + * in a layer if they are set (spec 4.2). + * + * Complexity: Constant. + * + * Always returns 0 for invalid features. + */ + uint64_t id() const noexcept { + return m_id; + } + + /** + * Does this feature have an ID? + * + * Complexity: Constant. + * + * Always returns false for invalid features. + */ + bool has_id() const noexcept { + return m_has_id; + } + + /** + * The geometry type of this feature. + * + * Complexity: Constant. + * + * Always returns GeomType::UNKNOWN for invalid features. + */ + GeomType geometry_type() const noexcept { + return m_geometry_type; + } + + /** + * Get the geometry of this feature. + * + * Complexity: Constant. + * + * @pre @code valid() @endcode + */ + vtzero::geometry geometry() const noexcept { + vtzero_assert_in_noexcept_function(valid()); + return {m_geometry, m_geometry_type}; + } + + /** + * Returns true if this feature doesn't have any properties. + * + * Complexity: Constant. + * + * Always returns true for invalid features. + */ + bool empty() const noexcept { + return m_num_properties == 0; + } + + /** + * Returns the number of properties in this feature. + * + * Complexity: Constant. + * + * Always returns 0 for invalid features. + */ + std::size_t num_properties() const noexcept { + return m_num_properties; + } + + /** + * Get the next property in this feature. + * + * Complexity: Constant. + * + * @returns The next property or the invalid property if there are no + * more properties. + * @throws format_exception if the feature data is ill-formed. + * @throws any protozero exception if the protobuf encoding is invalid. + * @pre @code valid() @endcode + */ + property next_property(); + + /** + * Get the indexes into the key/value table for the next property in + * this feature. + * + * Complexity: Constant. + * + * @returns The next index_value_pair or an invalid index_value_pair + * if there are no more properties. + * @throws format_exception if the feature data is ill-formed. + * @throws out_of_range_exception if the key or value index is not + * within the range of indexes in the layer key/value table. + * @throws any protozero exception if the protobuf encoding is invalid. + * @pre @code valid() @endcode + */ + index_value_pair next_property_indexes(); + + /** + * Reset the property iterator. The next time next_property() or + * next_property_indexes() is called, it will begin from the first + * property again. + * + * Complexity: Constant. + * + * @pre @code valid() @endcode + */ + void reset_property() noexcept { + vtzero_assert_in_noexcept_function(valid()); + m_property_iterator = m_properties.begin(); + } + + /** + * Call a function for each property of this feature. + * + * @tparam TFunc The type of the function. It must take a single + * argument of type property&& and return a bool. If the + * function returns false, the iteration will be stopped. + * @param func The function to call. + * @returns true if the iteration was completed and false otherwise. + * @pre @code valid() @endcode + */ + template + bool for_each_property(TFunc&& func) const; + + /** + * Call a function for each key/value index of this feature. + * + * @tparam TFunc The type of the function. It must take a single + * argument of type index_value_pair&& and return a bool. + * If the function returns false, the iteration will be stopped. + * @param func The function to call. + * @returns true if the iteration was completed and false otherwise. + * @pre @code valid() @endcode + */ + template + bool for_each_property_indexes(TFunc&& func) const; + + }; // class feature + + /** + * Create some kind of mapping from property keys to property values. + * + * This can be used to read all properties into a std::map or similar + * object. + * + * @tparam TMap Map type (std::map, std::unordered_map, ...) Must support + * the emplace() method. + * @tparam TKey Key type, usually the key of the map type. The data_view + * of the property key is converted to this type before + * adding it to the map. + * @tparam TValue Value type, usally the value of the map type. The + * property_value is converted to this type before + * adding it to the map. + * @tparam TMapping A struct derived from property_value_mapping with the + * mapping for vtzero property value types to TValue-constructing + * types. (See convert_property_value() for details.) + * @param feature The feature to get the properties from. + * @returns An object of type TMap with all the properties. + * @pre @code feature.valid() @endcode + */ + template + TMap create_properties_map(const vtzero::feature& feature) { + TMap map; + + feature.for_each_property([&map](const property& p) { + map.emplace(TKey(p.key()), convert_property_value(p.value())); + return true; + }); + + return map; + } + +} // namespace vtzero + +#endif // VTZERO_FEATURE_HPP diff --git a/include/vtzero/feature_builder_impl.hpp b/include/vtzero/feature_builder_impl.hpp new file mode 100644 index 00000000..30674186 --- /dev/null +++ b/include/vtzero/feature_builder_impl.hpp @@ -0,0 +1,126 @@ +#ifndef VTZERO_FEATURE_BUILDER_IMPL_HPP +#define VTZERO_FEATURE_BUILDER_IMPL_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file feature_builder_impl.hpp + * + * @brief Contains classes internal to the builder. + */ + +#include "builder_impl.hpp" +#include "encoded_property_value.hpp" +#include "geometry.hpp" +#include "property.hpp" +#include "property_value.hpp" + +#include + +namespace vtzero { + + namespace detail { + + class feature_builder_base { + + layer_builder_impl* m_layer; + + void add_key_internal(index_value idx) { + vtzero_assert(idx.valid()); + m_pbf_tags.add_element(idx.value()); + } + + template + void add_key_internal(T&& key) { + add_key_internal(m_layer->add_key(data_view{std::forward(key)})); + } + + void add_value_internal(index_value idx) { + vtzero_assert(idx.valid()); + m_pbf_tags.add_element(idx.value()); + } + + void add_value_internal(property_value value) { + add_value_internal(m_layer->add_value(value)); + } + + template + void add_value_internal(T&& value) { + encoded_property_value v{std::forward(value)}; + add_value_internal(m_layer->add_value(v)); + } + + protected: + + protozero::pbf_builder m_feature_writer; + protozero::packed_field_uint32 m_pbf_tags; + + explicit feature_builder_base(layer_builder_impl* layer) : + m_layer(layer), + m_feature_writer(layer->message(), detail::pbf_layer::features) { + } + + ~feature_builder_base() noexcept = default; + + feature_builder_base(const feature_builder_base&) = delete; // NOLINT(hicpp-use-equals-delete, modernize-use-equals-delete) + + feature_builder_base& operator=(const feature_builder_base&) = delete; // NOLINT(hicpp-use-equals-delete, modernize-use-equals-delete) + // The check wants these functions to be public... + + feature_builder_base(feature_builder_base&&) noexcept = default; + + feature_builder_base& operator=(feature_builder_base&&) noexcept = default; + + uint32_t version() const noexcept { + return m_layer->version(); + } + + void set_id_impl(uint64_t id) { + m_feature_writer.add_uint64(detail::pbf_feature::id, id); + } + + void add_property_impl(const property& property) { + add_key_internal(property.key()); + add_value_internal(property.value()); + } + + void add_property_impl(const index_value_pair idxs) { + add_key_internal(idxs.key()); + add_value_internal(idxs.value()); + } + + template + void add_property_impl(TKey&& key, TValue&& value) { + add_key_internal(std::forward(key)); + add_value_internal(std::forward(value)); + } + + void do_commit() { + if (m_pbf_tags.valid()) { + m_pbf_tags.commit(); + } + m_feature_writer.commit(); + m_layer->increment_feature_count(); + } + + void do_rollback() { + if (m_pbf_tags.valid()) { + m_pbf_tags.rollback(); + } + m_feature_writer.rollback(); + } + + }; // class feature_builder_base + + } // namespace detail + +} // namespace vtzero + +#endif // VTZERO_FEATURE_BUILDER_IMPL_HPP diff --git a/include/vtzero/geometry.hpp b/include/vtzero/geometry.hpp new file mode 100644 index 00000000..42af6236 --- /dev/null +++ b/include/vtzero/geometry.hpp @@ -0,0 +1,444 @@ +#ifndef VTZERO_GEOMETRY_HPP +#define VTZERO_GEOMETRY_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file geometry.hpp + * + * @brief Contains classes and functions related to geometry handling. + */ + +#include "exception.hpp" +#include "types.hpp" + +#include + +#include +#include +#include + +namespace vtzero { + + /// A simple point class + struct point { + + /// X coordinate + int32_t x = 0; + + /// Y coordinate + int32_t y = 0; + + /// Default construct to 0 coordinates + constexpr point() noexcept = default; + + /// Constructor + constexpr point(int32_t x_, int32_t y_) noexcept : + x(x_), + y(y_) { + } + + }; // struct point + + /** + * Type of a polygon ring. This can either be "outer", "inner", or + * "invalid". Invalid is used when the area of the ring is 0. + */ + enum class ring_type { + outer = 0, + inner = 1, + invalid = 2 + }; // enum class ring_type + + /** + * Helper function to create a point from any type that has members x + * and y. + * + * If your point type doesn't have members x any y, you can overload this + * function for your type and it will be used by vtzero. + */ + template + point create_vtzero_point(const TPoint& p) noexcept { + return {p.x, p.y}; + } + + /// Points are equal if their coordinates are + inline constexpr bool operator==(const point a, const point b) noexcept { + return a.x == b.x && a.y == b.y; + } + + /// Points are not equal if their coordinates aren't + inline constexpr bool operator!=(const point a, const point b) noexcept { + return !(a == b); + } + + namespace detail { + + /// The command id type as specified in the vector tile spec + enum class CommandId : uint32_t { + MOVE_TO = 1, + LINE_TO = 2, + CLOSE_PATH = 7 + }; + + inline constexpr uint32_t command_integer(CommandId id, const uint32_t count) noexcept { + return (static_cast(id) & 0x7U) | (count << 3U); + } + + inline constexpr uint32_t command_move_to(const uint32_t count) noexcept { + return command_integer(CommandId::MOVE_TO, count); + } + + inline constexpr uint32_t command_line_to(const uint32_t count) noexcept { + return command_integer(CommandId::LINE_TO, count); + } + + inline constexpr uint32_t command_close_path() noexcept { + return command_integer(CommandId::CLOSE_PATH, 1); + } + + inline constexpr uint32_t get_command_id(const uint32_t command_integer) noexcept { + return command_integer & 0x7U; + } + + inline constexpr uint32_t get_command_count(const uint32_t command_integer) noexcept { + return command_integer >> 3U; + } + + // The maximum value for the command count according to the spec. + inline constexpr uint32_t max_command_count() noexcept { + return get_command_count(std::numeric_limits::max()); + } + + inline constexpr int64_t det(const point a, const point b) noexcept { + return static_cast(a.x) * static_cast(b.y) - + static_cast(b.x) * static_cast(a.y); + } + + template + struct get_result { + + using type = void; + + template + void operator()(TGeomHandler&& /*geom_handler*/) const noexcept { + } + + }; + + template + struct get_result().result()), void>::value>::type> { + + using type = decltype(std::declval().result()); + + template + type operator()(TGeomHandler&& geom_handler) { + return std::forward(geom_handler).result(); + } + + }; + + /** + * Decode a geometry as specified in spec 4.3 from a sequence of 32 bit + * unsigned integers. This templated base class can be instantiated + * with a different iterator type for testing than for normal use. + */ + template + class geometry_decoder { + + public: + + using iterator_type = TIterator; + + private: + + iterator_type m_it; + iterator_type m_end; + + point m_cursor{0, 0}; + + // maximum value for m_count before we throw an exception + uint32_t m_max_count; + + /** + * The current count value is set from the CommandInteger and + * then counted down with each next_point() call. So it must be + * greater than 0 when next_point() is called and 0 when + * next_command() is called. + */ + uint32_t m_count = 0; + + public: + + geometry_decoder(iterator_type begin, iterator_type end, std::size_t max) : + m_it(begin), + m_end(end), + m_max_count(static_cast(max)) { + vtzero_assert(max <= detail::max_command_count()); + } + + uint32_t count() const noexcept { + return m_count; + } + + bool done() const noexcept { + return m_it == m_end; + } + + bool next_command(const CommandId expected_command_id) { + vtzero_assert(m_count == 0); + + if (m_it == m_end) { + return false; + } + + const auto command_id = get_command_id(*m_it); + if (command_id != static_cast(expected_command_id)) { + throw geometry_exception{std::string{"expected command "} + + std::to_string(static_cast(expected_command_id)) + + " but got " + + std::to_string(command_id)}; + } + + if (expected_command_id == CommandId::CLOSE_PATH) { + // spec 4.3.3.3 "A ClosePath command MUST have a command count of 1" + if (get_command_count(*m_it) != 1) { + throw geometry_exception{"ClosePath command count is not 1"}; + } + } else { + m_count = get_command_count(*m_it); + if (m_count > m_max_count) { + throw geometry_exception{"count too large"}; + } + } + + ++m_it; + + return true; + } + + point next_point() { + vtzero_assert(m_count > 0); + + if (m_it == m_end || std::next(m_it) == m_end) { + throw geometry_exception{"too few points in geometry"}; + } + + // spec 4.3.2 "A ParameterInteger is zigzag encoded" + int64_t x = protozero::decode_zigzag32(*m_it++); + int64_t y = protozero::decode_zigzag32(*m_it++); + + // x and y are int64_t so this addition can never overflow + x += m_cursor.x; + y += m_cursor.y; + + // The cast is okay, because a valid vector tile can never + // contain values that would overflow here and we don't care + // what happens to invalid tiles here. + m_cursor.x = static_cast(x); + m_cursor.y = static_cast(y); + + --m_count; + + return m_cursor; + } + + template + typename detail::get_result::type decode_point(TGeomHandler&& geom_handler) { + // spec 4.3.4.2 "MUST consist of a single MoveTo command" + if (!next_command(CommandId::MOVE_TO)) { + throw geometry_exception{"expected MoveTo command (spec 4.3.4.2)"}; + } + + // spec 4.3.4.2 "command count greater than 0" + if (count() == 0) { + throw geometry_exception{"MoveTo command count is zero (spec 4.3.4.2)"}; + } + + geom_handler.points_begin(count()); + while (count() > 0) { + geom_handler.points_point(next_point()); + } + + // spec 4.3.4.2 "MUST consist of of a single ... command" + if (!done()) { + throw geometry_exception{"additional data after end of geometry (spec 4.3.4.2)"}; + } + + geom_handler.points_end(); + + return detail::get_result{}(std::forward(geom_handler)); + } + + template + typename detail::get_result::type decode_linestring(TGeomHandler&& geom_handler) { + // spec 4.3.4.3 "1. A MoveTo command" + while (next_command(CommandId::MOVE_TO)) { + // spec 4.3.4.3 "with a command count of 1" + if (count() != 1) { + throw geometry_exception{"MoveTo command count is not 1 (spec 4.3.4.3)"}; + } + + const auto first_point = next_point(); + + // spec 4.3.4.3 "2. A LineTo command" + if (!next_command(CommandId::LINE_TO)) { + throw geometry_exception{"expected LineTo command (spec 4.3.4.3)"}; + } + + // spec 4.3.4.3 "with a command count greater than 0" + if (count() == 0) { + throw geometry_exception{"LineTo command count is zero (spec 4.3.4.3)"}; + } + + geom_handler.linestring_begin(count() + 1); + + geom_handler.linestring_point(first_point); + while (count() > 0) { + geom_handler.linestring_point(next_point()); + } + + geom_handler.linestring_end(); + } + + return detail::get_result{}(std::forward(geom_handler)); + } + + template + typename detail::get_result::type decode_polygon(TGeomHandler&& geom_handler) { + // spec 4.3.4.4 "1. A MoveTo command" + while (next_command(CommandId::MOVE_TO)) { + // spec 4.3.4.4 "with a command count of 1" + if (count() != 1) { + throw geometry_exception{"MoveTo command count is not 1 (spec 4.3.4.4)"}; + } + + int64_t sum = 0; + const point start_point = next_point(); + point last_point = start_point; + + // spec 4.3.4.4 "2. A LineTo command" + if (!next_command(CommandId::LINE_TO)) { + throw geometry_exception{"expected LineTo command (spec 4.3.4.4)"}; + } + + geom_handler.ring_begin(count() + 2); + + geom_handler.ring_point(start_point); + + while (count() > 0) { + const point p = next_point(); + sum += detail::det(last_point, p); + last_point = p; + geom_handler.ring_point(p); + } + + // spec 4.3.4.4 "3. A ClosePath command" + if (!next_command(CommandId::CLOSE_PATH)) { + throw geometry_exception{"expected ClosePath command (spec 4.3.4.4)"}; + } + + sum += detail::det(last_point, start_point); + + geom_handler.ring_point(start_point); + + geom_handler.ring_end(sum > 0 ? ring_type::outer : + sum < 0 ? ring_type::inner : ring_type::invalid); + } + + return detail::get_result{}(std::forward(geom_handler)); + } + + }; // class geometry_decoder + + } // namespace detail + + /** + * Decode a point geometry. + * + * @tparam TGeomHandler Handler class. See tutorial for details. + * @param geometry The geometry as returned by feature.geometry(). + * @param geom_handler An object of TGeomHandler. + * @throws geometry_error If there is a problem with the geometry. + * @pre Geometry must be a point geometry. + */ + template + typename detail::get_result::type decode_point_geometry(const geometry& geometry, TGeomHandler&& geom_handler) { + vtzero_assert(geometry.type() == GeomType::POINT); + detail::geometry_decoder decoder{geometry.begin(), geometry.end(), geometry.data().size() / 2}; + return decoder.decode_point(std::forward(geom_handler)); + } + + /** + * Decode a linestring geometry. + * + * @tparam TGeomHandler Handler class. See tutorial for details. + * @param geometry The geometry as returned by feature.geometry(). + * @param geom_handler An object of TGeomHandler. + * @returns whatever geom_handler.result() returns if that function exists, + * void otherwise + * @throws geometry_error If there is a problem with the geometry. + * @pre Geometry must be a linestring geometry. + */ + template + typename detail::get_result::type decode_linestring_geometry(const geometry& geometry, TGeomHandler&& geom_handler) { + vtzero_assert(geometry.type() == GeomType::LINESTRING); + detail::geometry_decoder decoder{geometry.begin(), geometry.end(), geometry.data().size() / 2}; + return decoder.decode_linestring(std::forward(geom_handler)); + } + + /** + * Decode a polygon geometry. + * + * @tparam TGeomHandler Handler class. See tutorial for details. + * @param geometry The geometry as returned by feature.geometry(). + * @param geom_handler An object of TGeomHandler. + * @returns whatever geom_handler.result() returns if that function exists, + * void otherwise + * @throws geometry_error If there is a problem with the geometry. + * @pre Geometry must be a polygon geometry. + */ + template + typename detail::get_result::type decode_polygon_geometry(const geometry& geometry, TGeomHandler&& geom_handler) { + vtzero_assert(geometry.type() == GeomType::POLYGON); + detail::geometry_decoder decoder{geometry.begin(), geometry.end(), geometry.data().size() / 2}; + return decoder.decode_polygon(std::forward(geom_handler)); + } + + /** + * Decode a geometry. + * + * @tparam TGeomHandler Handler class. See tutorial for details. + * @param geometry The geometry as returned by feature.geometry(). + * @param geom_handler An object of TGeomHandler. + * @returns whatever geom_handler.result() returns if that function exists, + * void otherwise + * @throws geometry_error If the geometry has type UNKNOWN of if there is + * a problem with the geometry. + */ + template + typename detail::get_result::type decode_geometry(const geometry& geometry, TGeomHandler&& geom_handler) { + detail::geometry_decoder decoder{geometry.begin(), geometry.end(), geometry.data().size() / 2}; + switch (geometry.type()) { + case GeomType::POINT: + return decoder.decode_point(std::forward(geom_handler)); + case GeomType::LINESTRING: + return decoder.decode_linestring(std::forward(geom_handler)); + case GeomType::POLYGON: + return decoder.decode_polygon(std::forward(geom_handler)); + default: + break; + } + throw geometry_exception{"unknown geometry type"}; + } + +} // namespace vtzero + +#endif // VTZERO_GEOMETRY_HPP diff --git a/include/vtzero/index.hpp b/include/vtzero/index.hpp new file mode 100644 index 00000000..75f4df73 --- /dev/null +++ b/include/vtzero/index.hpp @@ -0,0 +1,264 @@ +#ifndef VTZERO_INDEX_HPP +#define VTZERO_INDEX_HPP + +/***************************************************************************** + +vtzero - Tiny and fast vector tile decoder and encoder in C++. + +This file is from https://github.com/mapbox/vtzero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file index.hpp + * + * @brief Contains classes for indexing the key/value tables inside layers. + */ + +#include "builder.hpp" +#include "types.hpp" + +#include +#include +#include + +namespace vtzero { + + /** + * Used to store the mapping between property keys and the index value + * in the table stored in a layer. + * + * @tparam TMap The map class to use (std::map, std::unordered_map or + * something compatible). + */ + template