Navigation Menu

Skip to content

Commit

Permalink
Merge branch 'serialization'
Browse files Browse the repository at this point in the history
  • Loading branch information
Mortal committed Jun 4, 2013
2 parents a8a9854 + 36e3be6 commit 51f4da6
Show file tree
Hide file tree
Showing 31 changed files with 4,557 additions and 162 deletions.
3 changes: 3 additions & 0 deletions doc/CMakeLists.txt
Expand Up @@ -54,6 +54,9 @@ configure_file(sorting_internal.dox.in sorting_internal.dox @ONLY)

configure_file(queue.dox.in queue.dox @ONLY)

file(READ code/serialization.inl DOCCODE_SERIALIZATION)
configure_file(serialization.dox.in serialization.dox @ONLY)

else (DOXYGEN)
message(STATUS "Doxygen not found, API documentation cannot be generated.")
endif(DOXYGEN)
Expand Down
1 change: 1 addition & 0 deletions doc/Doxyfile.in
Expand Up @@ -682,6 +682,7 @@ INPUT += @tpie_BINARY_DIR@/doc/progress.dox
INPUT += @tpie_BINARY_DIR@/doc/fractiondb.dox
INPUT += @tpie_BINARY_DIR@/doc/queue.dox
INPUT += @tpie_SOURCE_DIR@/doc/pipelining.dox
INPUT += @tpie_BINARY_DIR@/doc/serialization.dox

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
Expand Down
7 changes: 7 additions & 0 deletions doc/code/code.cpp
Expand Up @@ -4,6 +4,9 @@
#include <tpie/parallel_sort.h>
#include <tpie/priority_queue.h>
#include <tpie/progress_indicator_arrow.h>
#include <tpie/serialization2.h>
#include <tpie/serialization_stream.h>
#include <tpie/serialization_sort.h>
#include <tpie/sort.h>
#include <tpie/stack.h>
#include <tpie/tpie.h>
Expand Down Expand Up @@ -71,6 +74,10 @@ namespace _j {
#include "progress3.inl"
}

namespace _k {
#include "serialization.inl"
}

int main() {
return 0;
}
76 changes: 76 additions & 0 deletions doc/code/serialization.inl
@@ -0,0 +1,76 @@
void write_lines(std::istream & is, std::string filename) {
std::string line;
tpie::serialization_writer wr;
wr.open(filename);
while (std::getline(is, line)) {
wr.serialize(line);
}
wr.close();
}

void reverse_lines(std::string filename) {
tpie::temp_file f;
{
tpie::serialization_reader rd;
rd.open(filename);
tpie::serialization_reverse_writer wr;
wr.open(f);
while (rd.can_read()) {
std::string line;
rd.unserialize(line);
wr.serialize(line);
}
wr.close();
rd.close();
}
{
tpie::serialization_reverse_reader rd;
rd.open(f);
tpie::serialization_writer wr;
wr.open(filename);
while (rd.can_read()) {
std::string line;
rd.unserialize(line);
wr.serialize(line);
}
wr.close();
rd.close();
}
}

void read_lines(std::ostream & os, std::string filename) {
tpie::serialization_reader rd;
rd.open(filename);
while (rd.can_read()) {
std::string line;
rd.unserialize(line);
os << line << '\n';
}
rd.close();
}

void sort_lines(std::string filename) {
tpie::serialization_sort<std::string> sorter;
sorter.set_available_memory(50*1024*1024);
sorter.begin();
{
tpie::serialization_reader rd;
rd.open(filename);
while (rd.can_read()) {
std::string line;
rd.unserialize(line);
sorter.push(line);
}
rd.close();
}
sorter.end();
sorter.merge_runs();
{
tpie::serialization_writer wr;
wr.open(filename);
while (sorter.can_pull()) {
wr.serialize(sorter.pull());
}
wr.close();
}
}
101 changes: 101 additions & 0 deletions doc/serialization.dox.in
@@ -0,0 +1,101 @@
/**
\page serialization Serialization streams

\section sec_serintro Motivation

If you want to read and write text strings with TPIE \c file_streams,
the interface requires a fixed string size.
In some cases this may be unreasonable:
space is wasted on strings that are smaller than the given size limit,
and it may be impossible to give a fixed upper bound on the length of the
strings a program has to operate on.

For this, TPIE provides a serialization framework with a
distinct set of stream readers and writers that support,
in essence, variable-length item types, such as strings and arrays.
With the library support for reversing and sorting such serialization streams,
it becomes reasonably easy to implement external memory algorithms operating on
variable length items.

The goal of TPIE serialization is \em not to be portable across machines,
nor is it to provide type-checking on the serialized input.
We do not track endianness or integer widths,
so it is not in general supported to read serialized streams
written on a different platform.
Indeed, the motivation for TPIE serialization is to support
temporary streams of variable-width items in external memory;
it is not intended as a persistent store or as a data transfer format.

TPIE serialization has built-in support for plain old data,
also known as POD types.
This built-in POD support excludes pointer types, however.
POD types are serialized and unserialized by their in-memory representation.
This is intended to be fast, not safe or portable.

The framework also supports certain library types out of the box,
such as \c std::vector, \c std::string and plain old arrays of
serializable data.

\section sec_serusage Usage

The interface and usage is straightforward.
See the included test program \c lines, the bulk of which is reproduced below.

\code
@DOCCODE_SERIALIZATION@
\endcode

\section sec_ser_user User-supplied serializable types

For types other than those supported natively by TPIE serialization,
the user can provide implementations of the \c serialize and \c unserialize
procedures.
For example, we can implement simple serialization/unserialization of a point type:

\code
namespace userland {

struct point2 {
double x;
double y;
};

template <typename Dst>
void serialize(Dst & d, const point2 & pt) {
using tpie::serialize;
serialize(d, pt.x);
serialize(d, pt.y);
}

template <typename Src>
void unserialize(Src & s, point2 & pt) {
using tpie::unserialize;
unserialize(s, pt.x);
unserialize(s, pt.y);
}

} // namespace userland
\endcode

For a more complicated example,
consider how we might serialize and unserialize a \c std::vector.

\code
template <typename D, typename T, typename alloc_t>
void serialize(D & dst, const std::vector<T, alloc_t> & v) {
using tpie::serialize;
serialize(dst, v.size());
serialize(dst, v.begin(), v.end());
}

template <typename S, typename T, typename alloc_t>
void unserialize(S & src, std::vector<T, alloc_t> & v) {
typename std::vector<T>::size_type s;
using tpie::unserialize;
unserialize(src, s);
v.resize(s);
unserialize(src, v.begin(), v.end());
}
\endcode

*/
8 changes: 8 additions & 0 deletions test/CMakeLists.txt
Expand Up @@ -38,6 +38,14 @@ add_executable(tpiecat cat.cpp)
set_target_properties(tpiecat PROPERTIES FOLDER tpie/test)
target_link_libraries(tpiecat tpie)

add_executable(sort sort.cpp)
set_target_properties(sort PROPERTIES FOLDER tpie/test)
target_link_libraries(sort tpie)

add_executable(lines lines.cpp)
set_target_properties(lines PROPERTIES FOLDER tpie/test)
target_link_libraries(lines tpie)

add_executable(test_parallel_sort_threshold test_parallel_sort_threshold.cpp)
set_target_properties(test_parallel_sort_threshold PROPERTIES FOLDER tpie/test)
target_link_libraries(test_parallel_sort_threshold tpie)
Expand Down
46 changes: 46 additions & 0 deletions test/lines.cpp
@@ -0,0 +1,46 @@
// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; c-file-style: "stroustrup"; -*-
// vi:set ts=4 sts=4 sw=4 noet cino=(0 :
// Copyright 2013, The TPIE development team
//
// This file is part of TPIE.
//
// TPIE is free software: you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the
// Free Software Foundation, either version 3 of the License, or (at your
// option) any later version.
//
// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
// License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with TPIE. If not, see <http://www.gnu.org/licenses/>

#include <iostream>

#include <tpie/tpie.h>
#include <tpie/serialization_sort.h>
#include <tpie/serialization_stream.h>

#include "../doc/code/serialization.inl"

int main(int argc, char ** argv) {
std::string arg = (argc < 3) ? "" : argv[1];
std::string filename = argv[2];
tpie::tpie_init();
if (arg == "read") {
read_lines(std::cout, filename);
} else if (arg == "write") {
write_lines(std::cin, filename);
} else if (arg == "reverse") {
reverse_lines(filename);
} else if (arg == "sort") {
sort_lines(filename);
} else {
std::cerr << "Usage: " << argv[0] << " <read|write|reverse|sort> <filename>\n";
return 1;
}
tpie::tpie_finish();
return 0;
}
80 changes: 80 additions & 0 deletions test/sort.cpp
@@ -0,0 +1,80 @@
// -*- mode: c++; tab-width: 4; indent-tabs-mode: t; c-file-style: "stroustrup"; -*-
// vi:set ts=4 sts=4 sw=4 noet cino=(0 :
// Copyright 2013, The TPIE development team
//
// This file is part of TPIE.
//
// TPIE is free software: you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the
// Free Software Foundation, either version 3 of the License, or (at your
// option) any later version.
//
// TPIE is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
// License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with TPIE. If not, see <http://www.gnu.org/licenses/>

#include <iostream>

#include <tpie/tpie.h>
#include <tpie/pipelining.h>

namespace tp = tpie::pipelining;

template <typename dest_t>
class line_reader_type : public tp::node {
dest_t dest;

public:
line_reader_type(const dest_t & dest)
: dest(dest)
{
this->add_push_destination(dest);
}

void go() {
std::string line;
while (std::getline(std::cin, line)) {
dest.push(line);
}
}
};

tp::pipe_begin<tp::factory_0<line_reader_type> >
line_reader() {
return tp::factory_0<line_reader_type>();
}

class line_writer_type : public tp::node {
public:
typedef std::string item_type;

line_writer_type() {
}

void push(const std::string & line) {
std::cout << line << '\n';
}
};

tp::pipe_end<tp::termfactory_0<line_writer_type> >
line_writer() {
return tp::termfactory_0<line_writer_type>();
}

int main() {
tpie::tpie_init();
const tpie::memory_size_type memory = 100*1024*1024;
tpie::get_memory_manager().set_limit(memory);
{
tp::pipeline p = line_reader() | tp::serialization_pipesort() | line_writer();
p.plot(std::clog);
p();
}
tpie::log_info() << "Temp file usage: " << tpie::get_temp_file_usage() << std::endl;
tpie::tpie_finish();
return 0;
}
16 changes: 16 additions & 0 deletions test/speed_regression/CMakeLists.txt
Expand Up @@ -33,3 +33,19 @@ set_target_properties(pipeline_speed_test PROPERTIES FOLDER tpie/test)
add_executable(pipelining_sort_test pipelining_sort_test.cpp)
target_link_libraries(pipelining_sort_test tpie)
set_target_properties(pipelining_sort_test PROPERTIES FOLDER tpie/test)

add_executable(serialization_speed serialization.cpp ${SPEED_DEPS})
target_link_libraries(serialization_speed tpie)
set_target_properties(serialization_speed PROPERTIES FOLDER tpie/test)

add_executable(numbergen numbergen.cpp ${SPEED_DEPS})
target_link_libraries(numbergen tpie)
set_target_properties(numbergen PROPERTIES FOLDER tpie/test)

add_executable(fractile_tpie fractile_tpie.cpp ${SPEED_DEPS})
target_link_libraries(fractile_tpie tpie)
set_target_properties(fractile_tpie PROPERTIES FOLDER tpie/test)

add_executable(fractile_serialization fractile_serialization.cpp ${SPEED_DEPS})
target_link_libraries(fractile_serialization tpie)
set_target_properties(fractile_serialization PROPERTIES FOLDER tpie/test)

0 comments on commit 51f4da6

Please sign in to comment.