Skip to content

Commit

Permalink
Warn when input files are not sorted in "osmium merge" command
Browse files Browse the repository at this point in the history
  • Loading branch information
joto committed Sep 25, 2021
1 parent 7055149 commit 740a565
Show file tree
Hide file tree
Showing 15 changed files with 183 additions and 32 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ This project adheres to [Semantic Versioning](https://semver.org/).

### Changed

- The `osmium merge` command now checks that the input files are ordered
correctly and warns or stops if this is not the case.

### Fixed

- Fixed relation member recursion in tags-filter so that objects referenced
Expand Down
6 changes: 6 additions & 0 deletions man/osmium-merge.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@ This commands reads its input file(s) only once and writes its output file
in one go so it can be streamed, ie. it can read from STDIN and write to
STDOUT.

# OPTIONS

-H, \--with-history
: Do not warn when there are multiple versions of the same object in the
input files.

@MAN_COMMON_OPTIONS@
@MAN_PROGRESS_OPTIONS@
@MAN_INPUT_OPTIONS@
@MAN_OUTPUT_OPTIONS@


# DIAGNOSTICS

**osmium merge** exits with exit code
Expand Down
102 changes: 70 additions & 32 deletions src/command_merge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

#include "command_merge.hpp"
#include "exception.hpp"
#include "util.hpp"

#include <osmium/io/file.hpp>
Expand All @@ -47,6 +48,9 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.

bool CommandMerge::setup(const std::vector<std::string>& arguments) {
po::options_description opts_cmd{"COMMAND OPTIONS"};
opts_cmd.add_options()
("with-history,H", "Do not warn about input files with multiple object versions")
;

po::options_description opts_common{add_common_options()};
po::options_description opts_input{add_multiple_inputs_options()};
Expand Down Expand Up @@ -75,6 +79,10 @@ bool CommandMerge::setup(const std::vector<std::string>& arguments) {
setup_input_files(vm);
setup_output_file(vm);

if (vm.count("with-history")) {
m_with_history = true;
}

return true;
}

Expand All @@ -89,31 +97,81 @@ namespace {

using it_type = osmium::io::InputIterator<osmium::io::Reader, osmium::OSMObject>;

std::unique_ptr<osmium::io::Reader> reader;
it_type iterator;
std::unique_ptr<osmium::io::Reader> m_reader;
std::string m_name;
it_type m_iterator;

osmium::item_type m_last_type = osmium::item_type::node;
osmium::object_id_type m_last_id = 0;
osmium::object_version_type m_last_version = 0;

bool m_warning;

public:

explicit DataSource(const osmium::io::File& file) :
reader(new osmium::io::Reader{file}),
iterator(*reader) {
explicit DataSource(const osmium::io::File& file, bool with_history) :
m_reader(new osmium::io::Reader{file}),
m_name(file.filename()),
m_iterator(*m_reader),
m_warning(!with_history) {
if (m_iterator != it_type{}) {
m_last_type = m_iterator->type();
m_last_id = m_iterator->id();
m_last_version = m_iterator->version();
}
}

bool empty() const noexcept {
return iterator == it_type{};
return m_iterator == it_type{};
}

bool next() noexcept {
++iterator;
return iterator != it_type{};
bool next() {
++m_iterator;

if (m_iterator == it_type{}) { // reached end of file
return false;
}

if (m_iterator->type() < m_last_type) {
throw std::runtime_error{"Objects in input file '" + m_name + "' out of order (must be nodes, then ways, then relations)."};
} else if (m_iterator->type() > m_last_type) {
m_last_type = m_iterator->type();
m_last_id = m_iterator->id();
m_last_version = m_iterator->version();
return true;
}

if (m_iterator->id() < m_last_id) {
throw std::runtime_error{"Objects in input file '" + m_name + "' out of order (smaller ids must come first)."};
} else if (m_iterator->id() > m_last_id) {
m_last_id = m_iterator->id();
m_last_version = m_iterator->version();
return true;
}

if (m_iterator->version() < m_last_version) {
throw std::runtime_error{"Objects in input file '" + m_name + "' out of order (smaller version must come first)."};
} else if (m_iterator->version() == m_last_version) {
throw std::runtime_error{"Two objects in input file '" + m_name + "' with same version."};
}

if (m_warning) {
std::cerr << "Warning: Multiple objects with same id in input file '" + m_name + "'!\n";
std::cerr << "If you are reading history files, this is to be expected. Use --with-history to disable warning.\n";
m_warning = false;
}

m_last_version = m_iterator->version();

return true;
}

const osmium::OSMObject* get() noexcept {
return &*iterator;
return &*m_iterator;
}

std::size_t offset() const noexcept {
return reader->offset();
return m_reader->offset();
}

}; // DataSource
Expand Down Expand Up @@ -167,27 +225,7 @@ bool CommandMerge::run() {
while (osmium::memory::Buffer buffer = reader.read()) {
writer(std::move(buffer));
}
} else if (m_input_files.size() == 2) {
// Use simpler code when there are exactly two files to merge
m_vout << "Merging 2 input files to output file...\n";

// The larger file should be first so the progress bar will work better
if (file_size(m_input_files[0]) < file_size(m_input_files[1])) {
using std::swap;
swap(m_input_files[0], m_input_files[1]);
}

osmium::io::ReaderWithProgressBar reader1(display_progress(), m_input_files[0], osmium::osm_entity_bits::object);
osmium::io::Reader reader2(m_input_files[1], osmium::osm_entity_bits::object);
auto in1 = osmium::io::make_input_iterator_range<osmium::OSMObject>(reader1);
auto in2 = osmium::io::make_input_iterator_range<osmium::OSMObject>(reader2);
auto out = osmium::io::make_output_iterator(writer);

std::set_union(in1.cbegin(), in1.cend(),
in2.cbegin(), in2.cend(),
out);
} else {
// Three or more files to merge
m_vout << "Merging " << m_input_files.size() << " input files to output file...\n";
osmium::ProgressBar progress_bar{file_size_sum(m_input_files), display_progress()};
std::vector<DataSource> data_sources;
Expand All @@ -197,7 +235,7 @@ bool CommandMerge::run() {

int index = 0;
for (const osmium::io::File& file : m_input_files) {
data_sources.emplace_back(file);
data_sources.emplace_back(file, m_with_history);

if (!data_sources.back().empty()) {
queue.emplace(data_sources.back().get(), index);
Expand Down
2 changes: 2 additions & 0 deletions src/command_merge.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.

class CommandMerge : public CommandWithMultipleOSMInputs, public with_osm_output {

bool m_with_history = false;

public:

explicit CommandMerge(const CommandFactory& command_factory) :
Expand Down
11 changes: 11 additions & 0 deletions test/merge/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,15 @@ check_merge3(i3f input1.osm input2.osm input3.osm output3.osm)
check_merge2(i2f-only-version input1-only-version.osm input2-only-version.osm output2-12-only-version.osm)
check_merge2(i2r-only-version input2-only-version.osm input1-only-version.osm output2-12-only-version.osm)

do_test(merge-unsorted-types-wn "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/unsorted-types-wn.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "must be nodes, then ways, then relations")
do_test(merge-unsorted-types-nrw "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/unsorted-types-nrw.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "must be nodes, then ways, then relations")
do_test(merge-unsorted-ids-n "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/unsorted-ids-n.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "smaller ids must come first")
do_test(merge-unsorted-ids-w "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/unsorted-ids-w.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "smaller ids must come first")
do_test(merge-unsorted-versions-21 "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/unsorted-versions-21.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "smaller version must come first")
do_test(merge-unsorted-versions-132 "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/unsorted-versions-132.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "smaller version must come first")
do_test(merge-unsorted-versions-133 "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/unsorted-versions-133.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "with same version.")

do_test(merge-same-ids-warning "osmium merge ${PROJECT_SOURCE_DIR}/test/merge/same-ids.osm ${PROJECT_SOURCE_DIR}/test/merge/empty.osm -f opl" "Multiple objects with same id")
check_output(merge merge-same-ids-h "merge --generator=test --with-history -f osm merge/same-ids.osm merge/empty.osm" "merge/output-same-ids.osm")

#-----------------------------------------------------------------------------
3 changes: 3 additions & 0 deletions test/merge/empty.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
</osm>
5 changes: 5 additions & 0 deletions test/merge/output-same-ids.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" generator="test">
<node id="10" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
<node id="10" version="2" timestamp="2015-01-01T01:00:01Z" uid="1" user="test" changeset="2" lat="2" lon="2"/>
</osm>
5 changes: 5 additions & 0 deletions test/merge/same-ids.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<node id="10" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
<node id="10" version="2" timestamp="2015-01-01T01:00:01Z" uid="1" user="test" changeset="2" lat="2" lon="2"/>
</osm>
5 changes: 5 additions & 0 deletions test/merge/unsorted-ids-n.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<node id="11" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="2" lon="1"/>
<node id="10" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
</osm>
23 changes: 23 additions & 0 deletions test/merge/unsorted-ids-w.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<node id="10" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
<node id="11" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="2" lon="1"/>
<node id="13" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="4" lon="1"/>
<node id="14" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="5" lon="1"/>
<node id="16" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="8" lon="1"/>
<way id="24" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1">
<nd ref="14"/>
<nd ref="16"/>
<tag k="xyz" v="abc"/>
</way>
<way id="20" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1">
<nd ref="10"/>
<nd ref="11"/>
<nd ref="13"/>
<tag k="foo" v="bar"/>
</way>
<relation id="31" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1">
<member type="node" ref="13" role="m1"/>
<member type="way" ref="20" role="m2"/>
</relation>
</osm>
23 changes: 23 additions & 0 deletions test/merge/unsorted-types-nrw.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<node id="10" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
<node id="11" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="2" lon="1"/>
<node id="13" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="4" lon="1"/>
<node id="14" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="5" lon="1"/>
<node id="16" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="8" lon="1"/>
<relation id="31" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1">
<member type="node" ref="13" role="m1"/>
<member type="way" ref="20" role="m2"/>
</relation>
<way id="20" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1">
<nd ref="10"/>
<nd ref="11"/>
<nd ref="13"/>
<tag k="foo" v="bar"/>
</way>
<way id="24" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1">
<nd ref="14"/>
<nd ref="16"/>
<tag k="xyz" v="abc"/>
</way>
</osm>
10 changes: 10 additions & 0 deletions test/merge/unsorted-types-wn.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<way id="20" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1">
<nd ref="10"/>
<nd ref="11"/>
<nd ref="13"/>
<tag k="foo" v="bar"/>
</way>
<node id="10" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
</osm>
6 changes: 6 additions & 0 deletions test/merge/unsorted-versions-132.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<node id="10" version="1" timestamp="2015-01-01T01:00:01Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
<node id="10" version="3" timestamp="2015-01-01T01:00:03Z" uid="1" user="test" changeset="3" lat="1" lon="3"/>
<node id="10" version="2" timestamp="2015-01-01T01:00:02Z" uid="1" user="test" changeset="2" lat="1" lon="2"/>
</osm>
6 changes: 6 additions & 0 deletions test/merge/unsorted-versions-133.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<node id="10" version="1" timestamp="2015-01-01T01:00:01Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
<node id="10" version="3" timestamp="2015-01-01T01:00:03Z" uid="1" user="test" changeset="3" lat="1" lon="3"/>
<node id="10" version="3" timestamp="2015-01-01T01:00:03Z" uid="1" user="test" changeset="3" lat="1" lon="3"/>
</osm>
5 changes: 5 additions & 0 deletions test/merge/unsorted-versions-21.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" upload="false" generator="testdata">
<node id="10" version="2" timestamp="2015-01-01T01:00:01Z" uid="1" user="test" changeset="2" lat="2" lon="2"/>
<node id="10" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="1" lat="1" lon="1"/>
</osm>

0 comments on commit 740a565

Please sign in to comment.