Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
New export format for PostgresSQL COPY files.
  • Loading branch information
joto committed Dec 10, 2018
1 parent 4088b95 commit 3b0fd23
Show file tree
Hide file tree
Showing 7 changed files with 383 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Expand Up @@ -20,6 +20,9 @@ This project adheres to [Semantic Versioning](https://semver.org/).
* New option for smart extract strategy: `complete-partial-relations=X` will
complete all relations with at least X percent of their members already in
the extract.
* New export format "pg" creates a file in the PostgreSQL COPY text format
with the GEOMETRY as WKB and the tags in JSON(B) format. This can be
imported into a PostgreSQL/PostGIS database very quickly.

### Changed

Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Expand Up @@ -231,6 +231,7 @@ set(OSMIUM_SOURCE_FILES
util.cpp
command_help.cpp
export/export_format_json.cpp
export/export_format_pg.cpp
export/export_format_text.cpp
export/export_handler.cpp
extract/extract_bbox.cpp
Expand Down
5 changes: 5 additions & 0 deletions man/osmium-export.md
Expand Up @@ -287,6 +287,11 @@ The following output formats are supported:
* `geojsonseq` (alias: `jsonseq`): GeoJSON Text Sequence (RFC8142). Each line
(beginning with a RS (0x1e, record separator) and ending in a linefeed
character) contains one GeoJSON object. Used for streaming GeoJSON.
* `pg`: PostgreSQL COPY text format. One line per object containing the
WGS84 geometry as WKB, the tags in JSON format and, optionally, more columns
for id and attributes. You have to create the table manually, then use the
PostgreSQL COPY command to import the data. Enable verbose output to see
the SQL commands needed to create the table and load the data.
* `text` (alias: `txt`): A simple text format with the geometry in WKT format
followed by the comma-delimited tags. This is mainly intended for debugging
at the moment. THE FORMAT MIGHT CHANGE WITHOUT NOTICE!
Expand Down
13 changes: 11 additions & 2 deletions src/command_export.cpp
Expand Up @@ -25,6 +25,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
#include "util.hpp"

#include "export/export_format_json.hpp"
#include "export/export_format_pg.hpp"
#include "export/export_format_text.hpp"
#include "export/export_handler.hpp"

Expand Down Expand Up @@ -365,8 +366,8 @@ bool CommandExport::setup(const std::vector<std::string>& arguments) {

canonicalize_output_format();

if (m_output_format != "geojson" && m_output_format != "geojsonseq" && m_output_format != "text") {
throw argument_error{"Set output format with --output-format or -f to 'geojson', 'geojsonseq', or 'text'."};
if (m_output_format != "geojson" && m_output_format != "geojsonseq" && m_output_format != "pg" && m_output_format != "text") {
throw argument_error{"Set output format with --output-format or -f to 'geojson', 'geojsonseq', 'pg', or 'text'."};
}

if (vm.count("overwrite")) {
Expand Down Expand Up @@ -490,6 +491,10 @@ static std::unique_ptr<ExportFormat> create_handler(const std::string& output_fo
return std::unique_ptr<ExportFormat>{new ExportFormatJSON{output_format, output_filename, overwrite, fsync, options}};
}

if (output_format == "pg") {
return std::unique_ptr<ExportFormat>{new ExportFormatPg{output_format, output_filename, overwrite, fsync, options}};
}

if (output_format == "text") {
return std::unique_ptr<ExportFormat>{new ExportFormatText{output_format, output_filename, overwrite, fsync, options}};
}
Expand All @@ -511,6 +516,10 @@ bool CommandExport::run() {
m_area_ruleset.init_filter();

auto handler = create_handler(m_output_format, m_output_filename, m_output_overwrite, m_fsync, m_options);
if (m_vout.verbose()) {
handler->debug_output(m_vout, m_output_filename);
}

ExportHandler export_handler{std::move(handler), m_linear_ruleset, m_area_ruleset, m_geometry_types, m_show_errors, m_stop_on_error};
osmium::handler::CheckOrder check_order_handler;

Expand Down
4 changes: 4 additions & 0 deletions src/export/export_format.hpp
Expand Up @@ -28,6 +28,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
#include <osmium/fwd.hpp>
#include <osmium/io/writer_options.hpp>
#include <osmium/osm/object.hpp>
#include <osmium/util/verbose_output.hpp>

#include <cstdint>

Expand Down Expand Up @@ -64,6 +65,9 @@ class ExportFormat {

virtual void close() = 0;

virtual void debug_output(osmium::VerboseOutput& /*out*/, const std::string& /*filename*/) {
}

template <typename TFunc>
bool add_tags(const osmium::OSMObject& object, TFunc&& func) {
bool has_tags = false;
Expand Down
282 changes: 282 additions & 0 deletions src/export/export_format_pg.cpp
@@ -0,0 +1,282 @@
/*
Osmium -- OpenStreetMap data manipulation command line tool
http://osmcode.org/osmium-tool/
Copyright (C) 2013-2018 Jochen Topf <jochen@topf.org>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

#include "export_format_pg.hpp"

#include <osmium/io/detail/read_write.hpp>
#include <osmium/io/detail/string_util.hpp>

#ifndef RAPIDJSON_HAS_STDSTRING
# define RAPIDJSON_HAS_STDSTRING 1
#endif
#include <rapidjson/writer.h>
#include <rapidjson/stringbuffer.h>

#include <limits>
#include <string>

enum {
initial_buffer_size = 1024u * 1024u
};

enum {
flush_buffer_size = 800u * 1024u
};

ExportFormatPg::ExportFormatPg(const std::string& /*output_format*/,
const std::string& output_filename,
osmium::io::overwrite overwrite,
osmium::io::fsync fsync,
const options_type& options) :
ExportFormat(options),
m_fd(osmium::io::detail::open_for_writing(output_filename, overwrite)),
m_fsync(fsync) {
m_buffer.reserve(initial_buffer_size);
}

void ExportFormatPg::flush_to_output() {
osmium::io::detail::reliable_write(m_fd, m_buffer.data(), m_buffer.size());
m_buffer.clear();
m_commit_size = 0;
}

void ExportFormatPg::start_feature(const char type, const osmium::object_id_type id) {
m_buffer.resize(m_commit_size);
if (options().unique_id == unique_id_type::counter) {
m_buffer.append(std::to_string(m_count + 1));
m_buffer += '\t';
} else if (options().unique_id == unique_id_type::type_id) {
m_buffer += type;
m_buffer.append(std::to_string(id));
m_buffer += '\t';
}
}

void ExportFormatPg::append_pg_escaped(const char* str, std::size_t size = std::numeric_limits<std::size_t>::max()) {
while (size-- > 0 && *str != '\0') {
switch (*str) {
case '\\':
m_buffer += '\\';
m_buffer += '\\';
break;
case '\n':
m_buffer += '\\';
m_buffer += 'n';
break;
case '\r':
m_buffer += '\\';
m_buffer += 'r';
break;
case '\t':
m_buffer += '\\';
m_buffer += 't';
break;
default:
m_buffer += *str;
}
++str;
}
}

void ExportFormatPg::add_attributes(const osmium::OSMObject& object) {
if (!options().type.empty()) {
if (object.type() == osmium::item_type::area) {
if (static_cast<const osmium::Area&>(object).from_way()) {
m_buffer.append("way");
} else {
m_buffer.append("relation");
}
} else {
m_buffer.append(osmium::item_type_to_name(object.type()));
}
m_buffer += '\t';
}

if (!options().id.empty()) {
m_buffer.append(std::to_string(object.type() == osmium::item_type::area ? osmium::area_id_to_object_id(object.id()) : object.id()));
m_buffer += '\t';
}

if (!options().version.empty()) {
m_buffer.append(std::to_string(object.version()));
m_buffer += '\t';
}

if (!options().changeset.empty()) {
m_buffer.append(std::to_string(object.changeset()));
m_buffer += '\t';
}

if (!options().uid.empty()) {
m_buffer.append(std::to_string(object.uid()));
m_buffer += '\t';
}

if (!options().user.empty()) {
append_pg_escaped(object.user());
m_buffer += '\t';
}

if (!options().timestamp.empty()) {
m_buffer.append(object.timestamp().to_iso());
m_buffer += '\t';
}

if (!options().way_nodes.empty()) {
if (object.type() == osmium::item_type::way) {
m_buffer += '{';
for (const auto& nr : static_cast<const osmium::Way&>(object).nodes()) {
m_buffer.append(std::to_string(nr.ref()));
m_buffer += ',';
}
if (m_buffer.back() == ',') {
m_buffer.back() = '}';
} else {
m_buffer += '}';
}
} else {
m_buffer += '\\';
m_buffer += 'N';
}
m_buffer += '\t';
}
}

bool ExportFormatPg::add_tags(const osmium::OSMObject& object) {
bool has_tags = false;

rapidjson::StringBuffer stream;
rapidjson::Writer<rapidjson::StringBuffer> writer{stream};

writer.StartObject();
for (const auto& tag : object.tags()) {
if (options().tags_filter(tag)) {
has_tags = true;
writer.Key(tag.key());
writer.String(tag.value());
}
}
writer.EndObject();

append_pg_escaped(stream.GetString(), stream.GetSize());

return has_tags;
}

void ExportFormatPg::finish_feature(const osmium::OSMObject& object) {
m_buffer += '\t';
add_attributes(object);

if (add_tags(object) || options().keep_untagged) {
m_buffer += '\n';

m_commit_size = m_buffer.size();

++m_count;

if (m_buffer.size() > flush_buffer_size) {
flush_to_output();
}
}
}

void ExportFormatPg::node(const osmium::Node& node) {
start_feature('n', node.id());
m_buffer.append(m_factory.create_point(node));
finish_feature(node);
}

void ExportFormatPg::way(const osmium::Way& way) {
start_feature('w', way.id());
m_buffer.append(m_factory.create_linestring(way));
finish_feature(way);
}

void ExportFormatPg::area(const osmium::Area& area) {
start_feature('a', area.id());
m_buffer.append(m_factory.create_multipolygon(area));
finish_feature(area);
}

void ExportFormatPg::close() {
if (m_fd > 0) {
flush_to_output();
if (m_fsync == osmium::io::fsync::yes) {
osmium::io::detail::reliable_fsync(m_fd);
}
::close(m_fd);
m_fd = -1;
}
}

void ExportFormatPg::debug_output(osmium::VerboseOutput& out, const std::string& filename) {
out << '\n';
out << "Create table with something like this:\n";
out << "CREATE TABLE osmdata (\n";

if (options().unique_id == unique_id_type::counter) {
out << " id BIGINT PRIMARY KEY,\n";
} else if (options().unique_id == unique_id_type::type_id) {
out << " id VARCHAR PRIMARY KEY,\n";
}

out << " geom GEOMETRY,\n";

if (!options().type.empty()) {
out << " osm_type VARCHAR,\n";
}

if (!options().id.empty()) {
out << " osm_id BIGINT,\n";
}

if (!options().version.empty()) {
out << " version INTEGER,\n";
}

if (!options().changeset.empty()) {
out << " changeset INTEGER,\n";
}

if (!options().uid.empty()) {
out << " uid INTEGER,\n";
}

if (!options().user.empty()) {
out << " user VARCHAR,\n";
}

if (!options().timestamp.empty()) {
out << " timestamp TIMESTAMP (0) WITH TIME ZONE,\n";
}

if (!options().way_nodes.empty()) {
out << " way_nodes BIGINT[],\n";
}

out << " tags JSON -- or JSONB\n";
out << ");\n";
out << "Then load data with something like this:\n";
out << "\\copy osmdata FROM '" << filename << "'\n";
out << '\n';
}

0 comments on commit 3b0fd23

Please sign in to comment.