Skip to content

Commit

Permalink
Optimize restore command for file bundling.
Browse files Browse the repository at this point in the history
Since files are stored sequentially in a bundle, it is often possible to restore multiple files with a single read. Previously, each restored file required a separate read. Reducing the number of reads is particularly beneficial for object stores, but performance should benefit on any file system.

Currently if there is a gap then a new read is required. In the future we might set a limit for how large a gap we'll skip without starting a new read.
  • Loading branch information
dwsteele committed Mar 9, 2022
1 parent f7ab002 commit dca6da8
Show file tree
Hide file tree
Showing 7 changed files with 557 additions and 213 deletions.
6 changes: 6 additions & 0 deletions doc/xml/release.xml
Expand Up @@ -46,9 +46,15 @@
<release-item>
<commit subject="Remove redundant restoreFile() test and improve coverage."/>
<commit subject="Add limit parameter to ioCopyP()."/>
<commit subject="Optimize restore command for file bundling.">
<github-issue id="1149"/>
<github-pull-request id="1683"/>
</commit>

<release-item-contributor-list>
<release-item-contributor id="david.steele"/>
<release-item-reviewer id="reid.thompson"/>
<release-item-reviewer id="stefan.fercot"/>
</release-item-contributor-list>

<p>Improve small file support.</p>
Expand Down
246 changes: 149 additions & 97 deletions src/command/restore/file.c
Expand Up @@ -20,145 +20,185 @@ Restore File
#include "storage/helper.h"

/**********************************************************************************************************************************/
bool
restoreFile(
const String *const repoFile, unsigned int repoIdx, const uint64_t offset, const Variant *const limit,
const CompressType repoFileCompressType, const String *const pgFile, const String *const pgFileChecksum, const bool pgFileZero,
const uint64_t pgFileSize, const time_t pgFileModified, const mode_t pgFileMode, const String *const pgFileUser,
const String *const pgFileGroup, const time_t copyTimeBegin, const bool delta, const bool deltaForce,
const String *const cipherPass)
List *restoreFile(
const String *const repoFile, const unsigned int repoIdx, const CompressType repoFileCompressType, const time_t copyTimeBegin,
const bool delta, const bool deltaForce, const String *const cipherPass, const List *const fileList)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(STRING, repoFile);
FUNCTION_LOG_PARAM(UINT, repoIdx);
FUNCTION_LOG_PARAM(UINT64, offset);
FUNCTION_LOG_PARAM(VARIANT, limit);
FUNCTION_LOG_PARAM(ENUM, repoFileCompressType);
FUNCTION_LOG_PARAM(STRING, pgFile);
FUNCTION_LOG_PARAM(STRING, pgFileChecksum);
FUNCTION_LOG_PARAM(BOOL, pgFileZero);
FUNCTION_LOG_PARAM(UINT64, pgFileSize);
FUNCTION_LOG_PARAM(TIME, pgFileModified);
FUNCTION_LOG_PARAM(MODE, pgFileMode);
FUNCTION_LOG_PARAM(STRING, pgFileUser);
FUNCTION_LOG_PARAM(STRING, pgFileGroup);
FUNCTION_LOG_PARAM(TIME, copyTimeBegin);
FUNCTION_LOG_PARAM(BOOL, delta);
FUNCTION_LOG_PARAM(BOOL, deltaForce);
FUNCTION_TEST_PARAM(STRING, cipherPass);
FUNCTION_LOG_PARAM(LIST, fileList); // List of files to restore
FUNCTION_LOG_END();

ASSERT(repoFile != NULL);
ASSERT(pgFile != NULL);
ASSERT(limit == NULL || varType(limit) == varTypeUInt64);

// Was the file copied?
bool result = true;

// Is the file compressible during the copy?
bool compressible = true;
// Restore file results
List *result = NULL;

MEM_CONTEXT_TEMP_BEGIN()
{
// Perform delta if requested. Delta zero-length files to avoid overwriting the file if the timestamp is correct.
if (delta && !pgFileZero)
result = lstNewP(sizeof(RestoreFileResult));

// Check files to determine which ones need to be restored
for (unsigned int fileIdx = 0; fileIdx < lstSize(fileList); fileIdx++)
{
// Perform delta if the file exists
StorageInfo info = storageInfoP(storagePg(), pgFile, .ignoreMissing = true, .followLink = true);
const RestoreFile *const file = lstGet(fileList, fileIdx);
ASSERT(file->name != NULL);
ASSERT(file->limit == NULL || varType(file->limit) == varTypeUInt64);

RestoreFileResult *const fileResult = lstAdd(
result, &(RestoreFileResult){.manifestFile = file->manifestFile, .result = restoreResultCopy});

if (info.exists)
// Perform delta if requested. Delta zero-length files to avoid overwriting the file if the timestamp is correct.
if (delta && !file->zero)
{
// If force then use size/timestamp delta
if (deltaForce)
{
// Make sure that timestamp/size are equal and that timestamp is before the copy start time of the backup
if (info.size == pgFileSize && info.timeModified == pgFileModified && info.timeModified < copyTimeBegin)
result = false;
}
// Else use size and checksum
else
// Perform delta if the file exists
StorageInfo info = storageInfoP(storagePg(), file->name, .ignoreMissing = true, .followLink = true);

if (info.exists)
{
// Only continue delta if the file size is as expected
if (info.size == pgFileSize)
// If force then use size/timestamp delta
if (deltaForce)
{
// Generate checksum for the file if size is not zero
IoRead *read = NULL;

if (info.size != 0)
// Make sure that timestamp/size are equal and that timestamp is before the copy start time of the backup
if (info.size == file->size && info.timeModified == file->timeModified && info.timeModified < copyTimeBegin)
fileResult->result = restoreResultPreserve;
}
// Else use size and checksum
else
{
// Only continue delta if the file size is as expected
if (info.size == file->size)
{
read = storageReadIo(storageNewReadP(storagePgWrite(), pgFile));
ioFilterGroupAdd(ioReadFilterGroup(read), cryptoHashNew(HASH_TYPE_SHA1_STR));
ioReadDrain(read);
}
// Generate checksum for the file if size is not zero
IoRead *read = NULL;

// If size and checksum are equal then no need to copy the file
if (pgFileSize == 0 ||
strEq(
pgFileChecksum,
pckReadStrP(ioFilterGroupResultP(ioReadFilterGroup(read), CRYPTO_HASH_FILTER_TYPE))))
{
// Even if hash/size are the same set the time back to backup time. This helps with unit testing, but
// also presents a pristine version of the database after restore.
if (info.timeModified != pgFileModified)
if (info.size != 0)
{
THROW_ON_SYS_ERROR_FMT(
utime(
strZ(storagePathP(storagePg(), pgFile)),
&((struct utimbuf){.actime = pgFileModified, .modtime = pgFileModified})) == -1,
FileInfoError, "unable to set time for '%s'", strZ(storagePathP(storagePg(), pgFile)));
read = storageReadIo(storageNewReadP(storagePgWrite(), file->name));
ioFilterGroupAdd(ioReadFilterGroup(read), cryptoHashNew(HASH_TYPE_SHA1_STR));
ioReadDrain(read);
}

result = false;
// If the checksum is also equal (or file is zero size) then no need to copy the file
if (file->size == 0 ||
strEq(
file->checksum,
pckReadStrP(ioFilterGroupResultP(ioReadFilterGroup(read), CRYPTO_HASH_FILTER_TYPE))))
{
// Even if hash/size are the same set the time back to backup time. This helps with unit testing,
// but also presents a pristine version of the database after restore.
if (info.timeModified != file->timeModified)
{
THROW_ON_SYS_ERROR_FMT(
utime(
strZ(storagePathP(storagePg(), file->name)),
&((struct utimbuf){.actime = file->timeModified, .modtime = file->timeModified})) == -1,
FileInfoError, "unable to set time for '%s'", strZ(storagePathP(storagePg(), file->name)));
}

fileResult->result = restoreResultPreserve;
}
}
}
}
}
}

// Copy file from repository to database or create zero-length/sparse file
if (result)
{
// Create destination file
StorageWrite *pgFileWrite = storageNewWriteP(
storagePgWrite(), pgFile, .modeFile = pgFileMode, .user = pgFileUser, .group = pgFileGroup,
.timeModified = pgFileModified, .noAtomic = true, .noCreatePath = true, .noSyncPath = true);

// If size is zero/sparse no need to actually copy
if (pgFileSize == 0 || pgFileZero)
// Create zeroed and zero-length files
if (fileResult->result == restoreResultCopy && (file->size == 0 || file->zero))
{
// Create destination file
StorageWrite *pgFileWrite = storageNewWriteP(
storagePgWrite(), file->name, .modeFile = file->mode, .user = file->user, .group = file->group,
.timeModified = file->timeModified, .noAtomic = true, .noCreatePath = true, .noSyncPath = true);

ioWriteOpen(storageWriteIo(pgFileWrite));

// Truncate the file to specified length (note in this case the file with grow, not shrink)
if (pgFileZero)
// Truncate the file to specified length (note in this case the file will grow, not shrink)
if (file->zero)
{
THROW_ON_SYS_ERROR_FMT(
ftruncate(ioWriteFd(storageWriteIo(pgFileWrite)), (off_t)pgFileSize) == -1, FileWriteError,
"unable to truncate '%s'", strZ(pgFile));

// Report the file as not copied
result = false;
ftruncate(ioWriteFd(storageWriteIo(pgFileWrite)), (off_t)file->size) == -1, FileWriteError,
"unable to truncate '%s'", strZ(file->name));
}

ioWriteClose(storageWriteIo(pgFileWrite));

// Report the file as zeroed or zero-length
fileResult->result = restoreResultZero;
}
// Else perform the copy
else
}

// Copy files from repository to database
StorageRead *repoFileRead = NULL;
uint64_t repoFileLimit = 0;

for (unsigned int fileIdx = 0; fileIdx < lstSize(fileList); fileIdx++)
{
const RestoreFile *const file = lstGet(fileList, fileIdx);
const RestoreFileResult *const fileResult = lstGet(result, fileIdx);

// Copy file from repository to database
if (fileResult->result == restoreResultCopy)
{
// If no repo file is currently open
if (repoFileLimit == 0)
{
// If a limit is specified then we need to use it, even if there is only one pg file to copy, because we might
// be reading from the middle of a repo file containing many pg files
if (file->limit != NULL)
{
ASSERT(varUInt64(file->limit) != 0);
repoFileLimit = varUInt64(file->limit);

// Determine how many files can be copied with one read
for (unsigned int fileNextIdx = fileIdx + 1; fileNextIdx < lstSize(fileList); fileNextIdx++)
{
// Only files that are being copied are considered
if (((const RestoreFileResult *)lstGet(result, fileNextIdx))->result == restoreResultCopy)
{
const RestoreFile *const fileNext = lstGet(fileList, fileNextIdx);
ASSERT(fileNext->limit != NULL && varUInt64(fileNext->limit) != 0);

// Break if the offset is not the first file's offset + the limit of all additional files so far
if (fileNext->offset != file->offset + repoFileLimit)
break;

repoFileLimit += varUInt64(fileNext->limit);
}
// Else if the file was not copied then there is a gap so break
else
break;
}
}

// Create and open the repo file
repoFileRead = storageNewReadP(
storageRepoIdx(repoIdx), repoFile,
.compressible = repoFileCompressType == compressTypeNone && cipherPass == NULL, .offset = file->offset,
.limit = repoFileLimit != 0 ? VARUINT64(repoFileLimit) : NULL);
ioReadOpen(storageReadIo(repoFileRead));
}

// Create pg file
StorageWrite *pgFileWrite = storageNewWriteP(
storagePgWrite(), file->name, .modeFile = file->mode, .user = file->user, .group = file->group,
.timeModified = file->timeModified, .noAtomic = true, .noCreatePath = true, .noSyncPath = true);

IoFilterGroup *filterGroup = ioWriteFilterGroup(storageWriteIo(pgFileWrite));

// Add decryption filter
if (cipherPass != NULL)
{
ioFilterGroupAdd(filterGroup, cipherBlockNew(cipherModeDecrypt, cipherTypeAes256Cbc, BUFSTR(cipherPass), NULL));
compressible = false;
}

// Add decompression filter
if (repoFileCompressType != compressTypeNone)
{
ioFilterGroupAdd(filterGroup, decompressFilter(repoFileCompressType));
compressible = false;
}

// Add sha1 filter
ioFilterGroupAdd(filterGroup, cryptoHashNew(HASH_TYPE_SHA1_STR));
Expand All @@ -167,23 +207,35 @@ restoreFile(
ioFilterGroupAdd(filterGroup, ioSizeNew());

// Copy file
storageCopyP(
storageNewReadP(
storageRepoIdx(repoIdx), repoFile, .compressible = compressible, .offset = offset, .limit = limit),
pgFileWrite);
ioWriteOpen(storageWriteIo(pgFileWrite));
ioCopyP(storageReadIo(repoFileRead), storageWriteIo(pgFileWrite), .limit = file->limit);
ioWriteClose(storageWriteIo(pgFileWrite));

// If more than one file is being copied from a single read then decrement the limit
if (repoFileLimit != 0)
repoFileLimit -= varUInt64(file->limit);

// Free the repo file when there are no more files to copy from it
if (repoFileLimit == 0)
storageReadFree(repoFileRead);

// Validate checksum
if (!strEq(pgFileChecksum, pckReadStrP(ioFilterGroupResultP(filterGroup, CRYPTO_HASH_FILTER_TYPE))))
if (!strEq(file->checksum, pckReadStrP(ioFilterGroupResultP(filterGroup, CRYPTO_HASH_FILTER_TYPE))))
{
THROW_FMT(
ChecksumError,
"error restoring '%s': actual checksum '%s' does not match expected checksum '%s'", strZ(pgFile),
strZ(pckReadStrP(ioFilterGroupResultP(filterGroup, CRYPTO_HASH_FILTER_TYPE))), strZ(pgFileChecksum));
"error restoring '%s': actual checksum '%s' does not match expected checksum '%s'", strZ(file->name),
strZ(pckReadStrP(ioFilterGroupResultP(filterGroup, CRYPTO_HASH_FILTER_TYPE))), strZ(file->checksum));
}

// Free the pg file
storageWriteFree(pgFileWrite);
}
}

lstMove(result, memContextPrior());
}
MEM_CONTEXT_TEMP_END();

FUNCTION_LOG_RETURN(BOOL, result);
FUNCTION_LOG_RETURN(LIST, result);
}

0 comments on commit dca6da8

Please sign in to comment.