Skip to content

Commit

Permalink
LocalStore::addToStoreFromDump copy in chunks
Browse files Browse the repository at this point in the history
Rather than copying byte-by-byte, we let the coroutine know how much
data we would like it to send back to us.
  • Loading branch information
Ericson2314 committed Jul 14, 2020
1 parent 9de96ef commit 592851f
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 22 deletions.
16 changes: 9 additions & 7 deletions src/libstore/local-store.cc
Expand Up @@ -1036,11 +1036,13 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name,
FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
{
return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) {
return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & wanted) {
while (1) {
uint8_t buf[1];
auto n = dump.read(buf, 1);
constexpr size_t bufSize = 1024;
uint8_t buf[bufSize];
auto n = dump.read(buf, std::min(wanted, bufSize));
sink(buf, n);
// when control is yielded back to us wanted will be updated.
}
});
}
Expand All @@ -1051,7 +1053,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
{
Path srcPath(absPath(_srcPath));

return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) {
return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & _) {
if (method == FileIngestionMethod::Recursive)
dumpPath(srcPath, sink, filter);
else
Expand All @@ -1062,7 +1064,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,

StorePath LocalStore::addToStoreCommon(
const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
std::function<void(Sink &)> demux)
std::function<void(Sink &, size_t &)> demux)
{
/* For computing the NAR hash. */
auto sha256Sink = std::make_unique<HashSink>(htSHA256);
Expand All @@ -1083,7 +1085,7 @@ StorePath LocalStore::addToStoreCommon(
bool inMemory = true;
std::string nar;

auto source = sinkToSource([&](Sink & sink) {
auto source = sinkToSource([&](Sink & sink, size_t & wanted) {
LambdaSink sink2([&](const unsigned char * buf, size_t len) {
(*sha256Sink)(buf, len);
if (hashSink) (*hashSink)(buf, len);
Expand All @@ -1101,7 +1103,7 @@ StorePath LocalStore::addToStoreCommon(

if (!inMemory) sink(buf, len);
});
demux(sink2);
demux(sink2, wanted);
});

std::unique_ptr<AutoDelete> delTempDir;
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/local-store.hh
Expand Up @@ -292,7 +292,7 @@ private:

StorePath addToStoreCommon(
const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
std::function<void(Sink &)> demux);
std::function<void(Sink &, size_t &)> demux);

Path getRealStoreDir() override { return realStoreDir; }

Expand Down
33 changes: 20 additions & 13 deletions src/libutil/serialise.cc
Expand Up @@ -165,35 +165,43 @@ size_t StringSource::read(unsigned char * data, size_t len)
#endif

std::unique_ptr<Source> sinkToSource(
std::function<void(Sink &)> fun,
std::function<void(Sink &, size_t &)> fun,
std::function<void()> eof)
{
struct SinkToSource : Source
{
typedef boost::coroutines2::coroutine<std::string> coro_t;
typedef boost::coroutines2::coroutine<std::basic_string<uint8_t>> coro_t;

std::function<void(Sink &)> fun;
std::function<void(Sink &, size_t &)> fun;
std::function<void()> eof;
std::optional<coro_t::pull_type> coro;
bool started = false;

SinkToSource(std::function<void(Sink &)> fun, std::function<void()> eof)
/* It would be nicer to have the co-routines have both args and a
return value, but unfortunately that was removed from Boost's
implementation for some reason, so we use some extra state instead.
*/
size_t wanted = 0;

SinkToSource(std::function<void(Sink &, size_t &)> fun, std::function<void()> eof)
: fun(fun), eof(eof)
{
}

std::string cur;
std::basic_string<uint8_t> cur;
size_t pos = 0;

size_t read(unsigned char * data, size_t len) override
{
if (!coro)
wanted = len < cur.size() ? 0 : len - cur.size();
if (!coro) {
coro = coro_t::pull_type([&](coro_t::push_type & yield) {
LambdaSink sink([&](const unsigned char * data, size_t len) {
if (len) yield(std::string((const char *) data, len));
LambdaSink sink([&](const uint8_t * data, size_t len) {
if (len) yield(std::basic_string<uint8_t> { data, len });
});
fun(sink);
fun(sink, wanted);
});
}

if (!*coro) { eof(); abort(); }

Expand All @@ -203,11 +211,10 @@ std::unique_ptr<Source> sinkToSource(
pos = 0;
}

auto n = std::min(cur.size() - pos, len);
memcpy(data, (unsigned char *) cur.data() + pos, n);
pos += n;
auto numCopied = cur.copy(data, len, pos);
pos += numCopied;

return n;
return numCopied;
}
};

Expand Down
11 changes: 10 additions & 1 deletion src/libutil/serialise.hh
Expand Up @@ -260,11 +260,20 @@ struct LambdaSource : Source
/* Convert a function that feeds data into a Sink into a Source. The
Source executes the function as a coroutine. */
std::unique_ptr<Source> sinkToSource(
std::function<void(Sink &)> fun,
std::function<void(Sink &, size_t &)> fun,
std::function<void()> eof = []() {
throw EndOfFile("coroutine has finished");
});

static inline std::unique_ptr<Source> sinkToSource(
std::function<void(Sink &)> fun,
std::function<void()> eof = []() {
throw EndOfFile("coroutine has finished");
})
{
return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof);
}


void writePadding(size_t len, Sink & sink);
void writeString(const unsigned char * buf, size_t len, Sink & sink);
Expand Down

0 comments on commit 592851f

Please sign in to comment.