Skip to content

Commit

Permalink
New "merge" command to merge any number of OSM files into one.
Browse files Browse the repository at this point in the history
  • Loading branch information
joto committed Aug 5, 2016
1 parent f992ba6 commit d0ddf69
Show file tree
Hide file tree
Showing 16 changed files with 536 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ This project adheres to [Semantic Versioning](http://semver.org/).

### Added

- New `merge` command to merge any number of sorted OSM files.

### Changed

### Fixed
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ if(PANDOC)
add_man_page(1 osmium-check-refs)
add_man_page(1 osmium-fileinfo)
add_man_page(1 osmium-getid)
add_man_page(1 osmium-merge)
add_man_page(1 osmium-merge-changes)
add_man_page(1 osmium-renumber)
add_man_page(1 osmium-show)
Expand Down
2 changes: 1 addition & 1 deletion man/osmium-merge-changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,6 @@ just a single change file:

# SEE ALSO

* **osmium**(1), **osmium-file-formats**(5)
* **osmium**(1), **osmium-merge**(1), **osmium-file-formats**(5)
* [Osmium website](http://osmcode.org/osmium)

57 changes: 57 additions & 0 deletions man/osmium-merge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@

# NAME

osmium-merge - merge several sorted OSM files into one


# SYNOPSIS

**osmium merge** \[*OPTIONS*\] *OSM-FILE*...


# DESCRIPTION

Merges the content of all OSM files given on the command line into one large
OSM file. Objects in all files must be sorted by type, ID, and version. The
results will also be sorted in the same way.

If there is only a single input file, its contents will be copied to the
output.


@MAN_COMMON_OPTIONS@
@MAN_INPUT_OPTIONS@
@MAN_OUTPUT_OPTIONS@

# DIAGNOSTICS

**osmium merge** exits with exit code

0
~ if everything went alright,

1
~ if there was an error processing the data, or

2
~ if there was a problem with the command line arguments.


# MEMORY USAGE

**osmium merge** doesn't keep a lot of data in memory, but if you are merging
many files, the buffers might take a noticable amount of memory.


# EXAMPLES

Merge several extracts into one:

osmium merge washington.pbf oregon.pbf california.pbf -o westcoast.pbf


# SEE ALSO

* **osmium**(1), **osmium-merge-changes**(1), **osmium-file-formats**(5)
* [Osmium website](http://osmcode.org/osmium)

1 change: 1 addition & 0 deletions man/osmium.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ If an osmium command exits with an "Out of memory" error, try running it with
**osmium-check-refs**(1),
**osmium-fileinfo**(1),
**osmium-getid**(1),
**osmium-merge**(1),
**osmium-merge-changes**(1),
**osmium-renumber**(1),
**osmium-show**(1),
Expand Down
209 changes: 209 additions & 0 deletions src/command_merge.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
Osmium -- OpenStreetMap data manipulation command line tool
http://osmcode.org/osmium
Copyright (C) 2013-2016 Jochen Topf <jochen@topf.org>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <algorithm>
#include <iostream>
#include <memory>
#include <queue>

#include <boost/program_options.hpp>

#include <osmium/io/any_input.hpp>
#include <osmium/io/any_output.hpp>
#include <osmium/io/output_iterator.hpp>

#include "command_merge.hpp"

bool CommandMerge::setup(const std::vector<std::string>& arguments) {
po::options_description opts_cmd{"COMMAND OPTIONS"};

po::options_description opts_common{add_common_options()};
po::options_description opts_input{add_multiple_inputs_options()};
po::options_description opts_output{add_output_options()};

po::options_description hidden;
hidden.add_options()
("input-filenames", po::value<std::vector<std::string>>(), "Input files")
;

po::options_description desc;
desc.add(opts_cmd).add(opts_common).add(opts_input).add(opts_output);

po::options_description parsed_options;
parsed_options.add(desc).add(hidden);

po::positional_options_description positional;
positional.add("input-filenames", -1);

po::variables_map vm;
po::store(po::command_line_parser(arguments).options(parsed_options).positional(positional).run(), vm);
po::notify(vm);

setup_common(vm, desc);
setup_input_files(vm);
setup_output_file(vm);

return true;
}

void CommandMerge::show_arguments() {
show_multiple_inputs_arguments(m_vout);
show_output_arguments(m_vout);
}

class DataSource {

using it_type = osmium::io::InputIterator<osmium::io::Reader, osmium::OSMObject>;

std::unique_ptr<osmium::io::Reader> reader;
it_type iterator;

public:

DataSource(const osmium::io::File& file) :
reader(new osmium::io::Reader{file}),
iterator(*reader) {
}

bool empty() const noexcept {
return iterator == it_type{};
}

bool next() noexcept {
++iterator;
return iterator != it_type{};
}

const osmium::OSMObject* get() noexcept {
return &*iterator;
}

}; // DataSource

class QueueElement {

const osmium::OSMObject* m_object;
int m_data_source_index;

public:

QueueElement(const osmium::OSMObject* object, int data_source_index) noexcept :
m_object(object),
m_data_source_index(data_source_index) {
}

const osmium::OSMObject& object() const noexcept {
return *m_object;
}

int data_source_index() const noexcept {
return m_data_source_index;
}

}; // QueueElement

bool operator<(const QueueElement& lhs, const QueueElement& rhs) noexcept {
return lhs.object() > rhs.object();
}

bool operator==(const QueueElement& lhs, const QueueElement& rhs) noexcept {
return lhs.object() == rhs.object();
}

bool operator!=(const QueueElement& lhs, const QueueElement& rhs) noexcept {
return ! (lhs == rhs);
}

bool CommandMerge::run() {
m_vout << "Opening output file...\n";
osmium::io::Header header;
header.set("generator", m_generator);
osmium::io::Writer writer(m_output_file, header, m_output_overwrite, m_fsync);

if (m_input_files.size() == 1) {
m_vout << "Single input file. Copying to output file...\n";
osmium::io::Reader reader(m_input_files[0]);
while (osmium::memory::Buffer buffer = reader.read()) {
writer(std::move(buffer));
}
} else if (m_input_files.size() == 2) {
// Use simpler code when there are exactly two files to merge
m_vout << "Merging 2 input files to output file...\n";
osmium::io::Reader reader1(m_input_files[0], osmium::osm_entity_bits::object);
osmium::io::Reader reader2(m_input_files[1], osmium::osm_entity_bits::object);
auto in1 = osmium::io::make_input_iterator_range<osmium::OSMObject>(reader1);
auto in2 = osmium::io::make_input_iterator_range<osmium::OSMObject>(reader2);
auto out = osmium::io::make_output_iterator(writer);

std::set_union(in1.cbegin(), in1.cend(),
in2.cbegin(), in2.cend(),
out);
} else {
// Three or more files to merge
m_vout << "Merging " << m_input_files.size() << " input files to output file...\n";
std::vector<DataSource> data_sources;
data_sources.reserve(m_input_files.size());

std::priority_queue<QueueElement> queue;

int index = 0;
for (const osmium::io::File& file : m_input_files) {
data_sources.emplace_back(file);

if (!data_sources.back().empty()) {
queue.emplace(data_sources.back().get(), index);
}

++index;
}

while (!queue.empty()) {
auto element = queue.top();
queue.pop();
if (queue.empty() || element != queue.top()) {
writer(element.object());
}

const int index = element.data_source_index();
if (data_sources[index].next()) {
queue.emplace(data_sources[index].get(), index);
}
}
}

m_vout << "Closing output file...\n";
writer.close();

show_memory_used();
m_vout << "Done.\n";

return true;
}

namespace {

const bool register_merge_command = CommandFactory::add("merge", "Merge several sorted OSM files into one", []() {
return new CommandMerge();
});

}

54 changes: 54 additions & 0 deletions src/command_merge.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#ifndef COMMAND_MERGE_HPP
#define COMMAND_MERGE_HPP

/*
Osmium -- OpenStreetMap data manipulation command line tool
http://osmcode.org/osmium
Copyright (C) 2013-2016 Jochen Topf <jochen@topf.org>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <string>
#include <vector>

#include "cmd.hpp"

class CommandMerge : public Command, public with_multiple_osm_inputs, public with_osm_output {

public:

CommandMerge() = default;

bool setup(const std::vector<std::string>& arguments) override final;

void show_arguments() override final;

bool run() override final;

const char* name() const noexcept override final {
return "merge";
}

const char* synopsis() const noexcept override final {
return "osmium merge [OPTIONS] OSM-FILE...";
}

}; // class CommandMerge


#endif // COMMAND_MERGE_HPP
1 change: 1 addition & 0 deletions src/command_merge_changes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <algorithm>
#include <iostream>

#include <boost/program_options.hpp>
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ check_cmd_help(changeset-filter)
check_cmd_help(check-refs)
check_cmd_help(fileinfo)
check_cmd_help(getid)
check_cmd_help(merge)
check_cmd_help(merge-changes)
check_cmd_help(renumber)
check_cmd_help(show)
Expand Down
Loading

0 comments on commit d0ddf69

Please sign in to comment.