Skip to content

Commit

Permalink
Use xxHash instead of SHA-1 for block incremental checksums.
Browse files Browse the repository at this point in the history
xxHash is significantly faster than SHA-1 so this helps reduce the overhead of the feature.

A variable number of bytes are used from the xxHash depending on the block size with a minimum of six bytes for the smallest block size. This keeps the maps smaller while still providing enough bits to detect block changes.
  • Loading branch information
dwsteele committed Mar 9, 2023
1 parent 8b5153a commit 210bed4
Show file tree
Hide file tree
Showing 36 changed files with 6,218 additions and 129 deletions.
3 changes: 3 additions & 0 deletions doc/xml/release.xml
Expand Up @@ -48,10 +48,13 @@
<commit subject="Block-level incremental backup super blocks.">
<github-pull-request id="2011"/>
</commit>
<commit subject="Use xxHash instead of SHA-1 for block incremental checksums."/>

<release-item-contributor-list>
<release-item-contributor id="david.steele"/>
<release-item-reviewer id="john.morris"/>
<release-item-reviewer id="stephen.frost"/>
<release-item-reviewer id="stefan.fercot"/>
</release-item-contributor-list>

<p>Block-level incremental backup (BETA).</p>
Expand Down
1 change: 1 addition & 0 deletions src/Makefile.in
Expand Up @@ -111,6 +111,7 @@ SRCS = \
common/crypto/cipherBlock.c \
common/crypto/common.c \
common/crypto/hash.c \
common/crypto/xxhash.c \
common/exec.c \
common/fork.c \
common/ini.c \
Expand Down
3 changes: 3 additions & 0 deletions src/build/config/config.yaml
Expand Up @@ -1866,6 +1866,9 @@ option:
repo-block-age-map:
inherit: repo-block-size-map

repo-block-checksum-size-map:
inherit: repo-block-size-map

repo-block-size-super:
section: global
group: repo
Expand Down
10 changes: 10 additions & 0 deletions src/build/help/help.xml
Expand Up @@ -532,6 +532,16 @@
<example>7=2</example>
</config-key>

<config-key id="repo-block-checksum-size-map" name="Block Incremental Checksum Size Map">
<summary>Block incremental checksum size map.</summary>

<text>
<p>Map block size to checksum size. Smaller checksums save space in the map but may not be able to reliably detect changes in the block.</p>
</text>

<example>7=2</example>
</config-key>

<config-key id="repo-block-size-map" name="Block Incremental Size Map">
<summary>Block incremental size map.</summary>

Expand Down
75 changes: 75 additions & 0 deletions src/command/backup/backup.c
Expand Up @@ -294,13 +294,26 @@ static const ManifestBlockIncrAgeMap manifestBlockIncrAgeMapDefault[] =
{.fileAge = 7 * SEC_PER_DAY, .blockMultiplier = 2},
};

// Checksum size map
static const ManifestBlockIncrChecksumSizeMap manifestBlockIncrChecksumSizeMapDefault[] =
{
{.blockSize = 4 * 1024 * 1024, .checksumSize = BLOCK_INCR_CHECKSUM_SIZE_MIN + 6},
{.blockSize = 2 * 1024 * 1024, .checksumSize = BLOCK_INCR_CHECKSUM_SIZE_MIN + 5},
{.blockSize = 1024 * 1024, .checksumSize = BLOCK_INCR_CHECKSUM_SIZE_MIN + 4},
{.blockSize = 512 * 1024, .checksumSize = BLOCK_INCR_CHECKSUM_SIZE_MIN + 3},
{.blockSize = 128 * 1024, .checksumSize = BLOCK_INCR_CHECKSUM_SIZE_MIN + 2},
{.blockSize = 32 * 1024, .checksumSize = BLOCK_INCR_CHECKSUM_SIZE_MIN + 1},
};

// All maps
static const ManifestBlockIncrMap manifestBlockIncrMap =
{
.sizeMap = manifestBlockIncrSizeMapDefault,
.sizeMapSize = LENGTH_OF(manifestBlockIncrSizeMapDefault),
.ageMap = manifestBlockIncrAgeMapDefault,
.ageMapSize = LENGTH_OF(manifestBlockIncrAgeMapDefault),
.checksumSizeMap = manifestBlockIncrChecksumSizeMapDefault,
.checksumSizeMapSize = LENGTH_OF(manifestBlockIncrChecksumSizeMapDefault),
};

// Convert map size
Expand Down Expand Up @@ -337,6 +350,37 @@ backupBlockIncrMapSize(ConfigOption optionId, unsigned int optionKeyIdx, const S
FUNCTION_TEST_RETURN(UINT, result);
}

// Convert map checksum size
static unsigned int
backupBlockIncrMapChecksumSize(ConfigOption optionId, unsigned int optionKeyIdx, const Variant *const value)
{
FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(ENUM, optionId);
FUNCTION_TEST_PARAM(UINT, optionKeyIdx);
FUNCTION_TEST_PARAM(VARIANT, value);
FUNCTION_TEST_END();

unsigned int result = 0;

TRY_BEGIN()
{
result = varUIntForce(value);
}
CATCH_ANY()
{
}
TRY_END();

if (result < BLOCK_INCR_CHECKSUM_SIZE_MIN)
{
THROW_FMT(
OptionInvalidValueError, "'%s' is not valid for '%s' option", strZ(varStr(value)),
cfgParseOptionKeyIdxName(optionId, optionKeyIdx));
}

FUNCTION_TEST_RETURN(UINT, result);
}

static ManifestBlockIncrMap
backupBlockIncrMap(void)
{
Expand Down Expand Up @@ -404,6 +448,36 @@ backupBlockIncrMap(void)
result.ageMap = lstGet(map, 0);
result.ageMapSize = lstSize(map);
}

// Build checksum size map
const KeyValue *const manifestBlockIncrChecksumSizeKv = cfgOptionKvNull(cfgOptRepoBlockChecksumSizeMap);

if (manifestBlockIncrChecksumSizeKv != NULL)
{
List *const map = lstNewP(sizeof(ManifestBlockIncrChecksumSizeMap), .comparator = lstComparatorUInt);
const VariantList *const mapKeyList = kvKeyList(manifestBlockIncrChecksumSizeKv);

for (unsigned int mapKeyIdx = 0; mapKeyIdx < varLstSize(mapKeyList); mapKeyIdx++)
{
const Variant *mapKey = varLstGet(mapKeyList, mapKeyIdx);

ManifestBlockIncrChecksumSizeMap manifestBuildBlockIncrChecksumSizeMap =
{
.blockSize = backupBlockIncrMapSize(
cfgOptRepoBlockSizeMap, cfgOptionIdxDefault(cfgOptRepoBlockChecksumSizeMap), varStr(mapKey)),
.checksumSize = backupBlockIncrMapChecksumSize(
cfgOptRepoBlockSizeMap, cfgOptionIdxDefault(cfgOptRepoBlockChecksumSizeMap),
kvGet(manifestBlockIncrChecksumSizeKv, mapKey)),
};

lstAdd(map, &manifestBuildBlockIncrChecksumSizeMap);
}

lstSort(map, sortOrderDesc);

result.checksumSizeMap = lstGet(map, 0);
result.checksumSizeMapSize = lstSize(map);
}
}

FUNCTION_TEST_RETURN_TYPE(ManifestBlockIncrMap, result);
Expand Down Expand Up @@ -1914,6 +1988,7 @@ backupJobCallback(void *data, unsigned int clientIdx)
if (blockIncr)
{
pckWriteU64P(param, file.blockIncrSize);
pckWriteU64P(param, file.blockIncrChecksumSize);
pckWriteU64P(param, jobData->blockIncrSizeSuper);

if (file.blockIncrMapSize != 0)
Expand Down
24 changes: 15 additions & 9 deletions src/command/backup/blockIncr.c
Expand Up @@ -7,7 +7,7 @@ Block Incremental Filter
#include "command/backup/blockMap.h"
#include "common/compress/helper.h"
#include "common/crypto/cipherBlock.h"
#include "common/crypto/hash.h"
#include "common/crypto/xxhash.h"
#include "common/debug.h"
#include "common/io/bufferRead.h"
#include "common/io/bufferWrite.h"
Expand Down Expand Up @@ -39,6 +39,7 @@ typedef struct BlockIncr
uint64_t blockOffset; // Block offset
uint64_t superBlockSize; // Super block
size_t blockSize; // Block size
size_t checksumSize; // Checksum size
Buffer *block; // Block buffer

Buffer *blockOut; // Block output buffer
Expand Down Expand Up @@ -124,16 +125,15 @@ blockIncrProcess(THIS_VOID, const Buffer *const input, Buffer *const output)
MEM_CONTEXT_TEMP_BEGIN()
{
// Get block checksum
const Buffer *const checksum = cryptoHashOne(hashTypeSha1, this->block);
const Buffer *const checksum = xxHashOne(this->checksumSize, this->block);

// Does the block exist in the input map?
const BlockMapItem *const blockMapItemIn =
this->blockMapPrior != NULL && this->blockNo < blockMapSize(this->blockMapPrior) ?
blockMapGet(this->blockMapPrior, this->blockNo) : NULL;

// If the block is new or has changed then write it
if (blockMapItemIn == NULL ||
memcmp(blockMapItemIn->checksum, bufPtrConst(checksum), bufUsed(checksum)) != 0)
if (blockMapItemIn == NULL || memcmp(blockMapItemIn->checksum, bufPtrConst(checksum), this->checksumSize) != 0)
{
// Begin the super block
if (this->blockOutWrite == NULL)
Expand Down Expand Up @@ -273,7 +273,7 @@ blockIncrProcess(THIS_VOID, const Buffer *const input, Buffer *const output)

// Write the map
ioWriteOpen(write);
blockMapWrite(this->blockMapOut, write, this->blockSize == this->superBlockSize);
blockMapWrite(this->blockMapOut, write, this->blockSize == this->superBlockSize, this->checksumSize);
ioWriteClose(write);

// Get total bytes written for the map
Expand Down Expand Up @@ -382,12 +382,14 @@ blockIncrInputSame(const THIS_VOID)
/**********************************************************************************************************************************/
FN_EXTERN IoFilter *
blockIncrNew(
const uint64_t superBlockSize, const size_t blockSize, const unsigned int reference, const uint64_t bundleId,
const uint64_t bundleOffset, const Buffer *const blockMapPrior, const IoFilter *const compress, const IoFilter *const encrypt)
const uint64_t superBlockSize, const size_t blockSize, const size_t checksumSize, const unsigned int reference,
const uint64_t bundleId, const uint64_t bundleOffset, const Buffer *const blockMapPrior, const IoFilter *const compress,
const IoFilter *const encrypt)
{
FUNCTION_LOG_BEGIN(logLevelTrace);
FUNCTION_LOG_PARAM(UINT64, superBlockSize);
FUNCTION_LOG_PARAM(SIZE, blockSize);
FUNCTION_LOG_PARAM(SIZE, checksumSize);
FUNCTION_LOG_PARAM(UINT, reference);
FUNCTION_LOG_PARAM(UINT64, bundleId);
FUNCTION_LOG_PARAM(UINT64, bundleOffset);
Expand All @@ -407,6 +409,7 @@ blockIncrNew(
.memContext = memContextCurrent(),
.superBlockSize = superBlockSize,
.blockSize = blockSize,
.checksumSize = checksumSize,
.reference = reference,
.bundleId = bundleId,
.blockOffset = bundleOffset,
Expand Down Expand Up @@ -439,7 +442,7 @@ blockIncrNew(

MEM_CONTEXT_PRIOR_BEGIN()
{
driver->blockMapPrior = blockMapNewRead(read);
driver->blockMapPrior = blockMapNewRead(read, checksumSize);
}
MEM_CONTEXT_PRIOR_END();
}
Expand All @@ -455,6 +458,7 @@ blockIncrNew(

pckWriteU64P(packWrite, driver->superBlockSize);
pckWriteU64P(packWrite, blockSize);
pckWriteU64P(packWrite, checksumSize);
pckWriteU32P(packWrite, reference);
pckWriteU64P(packWrite, bundleId);
pckWriteU64P(packWrite, bundleOffset);
Expand Down Expand Up @@ -491,6 +495,7 @@ blockIncrNewPack(const Pack *const paramList)
PackRead *const paramListPack = pckReadNew(paramList);
const uint64_t superBlockSize = pckReadU64P(paramListPack);
const size_t blockSize = (size_t)pckReadU64P(paramListPack);
const size_t checksumSize = (size_t)pckReadU64P(paramListPack);
const unsigned int reference = pckReadU32P(paramListPack);
const uint64_t bundleId = pckReadU64P(paramListPack);
const uint64_t bundleOffset = pckReadU64P(paramListPack);
Expand All @@ -511,7 +516,8 @@ blockIncrNewPack(const Pack *const paramList)
encrypt = cipherBlockNewPack(encryptParam);

result = ioFilterMove(
blockIncrNew(superBlockSize, blockSize, reference, bundleId, bundleOffset, blockMapPrior, compress, encrypt),
blockIncrNew(
superBlockSize, blockSize, checksumSize, reference, bundleId, bundleOffset, blockMapPrior, compress, encrypt),
memContextPrior());
}
MEM_CONTEXT_TEMP_END();
Expand Down
12 changes: 10 additions & 2 deletions src/command/backup/blockIncr.h
Expand Up @@ -29,6 +29,14 @@ The super block list is followed by the block map, which is encrypted separately
of the filter is the stored block map size. Combined with the repo size this allows the block map to be read separately.
The block incremental should be read using BlockDelta since reconstructing the delta is quite involved.
The xxHash algorithm is used to determine which blocks have changed. A 128-bit xxHash is generated and then checksumSize bytes are
used from the hash depending on the size of the block. xxHash claims to have excellent dispersion characteristics, which has been
verified by testing with SMHasher and a custom test suite. xxHash-32 is used for up to 4MiB content blocks in lz4 and the lower
32-bits of xxHash-64 is used for content blocks in zstd. In general 32-bit checksums are pretty common defaults across filesystems
such as BtrFS and ZFS. We use at least 5 bytes even for the smallest blocks since we are looking for changes and not just
corruption. Ultimately if there is a collision and a block change is not detected it will almost certainly be caught by the overall
SHA1 file checksum. This will fail the backup, which is not ideal, but better than restoring corrupted data.
***********************************************************************************************************************************/
#ifndef COMMAND_BACKUP_BLOCK_INCR_H
#define COMMAND_BACKUP_BLOCK_INCR_H
Expand All @@ -50,8 +58,8 @@ Constants needed to read the block number in a super block
Constructors
***********************************************************************************************************************************/
FN_EXTERN IoFilter *blockIncrNew(
uint64_t superBlockSize, size_t blockSize, unsigned int reference, uint64_t bundleId, uint64_t bundleOffset,
const Buffer *blockMapPrior, const IoFilter *compress, const IoFilter *encrypt);
uint64_t superBlockSize, size_t blockSize, size_t checksumSize, unsigned int reference, uint64_t bundleId,
uint64_t bundleOffset, const Buffer *blockMapPrior, const IoFilter *compress, const IoFilter *encrypt);
FN_EXTERN IoFilter *blockIncrNewPack(const Pack *paramList);

#endif
9 changes: 5 additions & 4 deletions src/command/backup/blockMap.c
Expand Up @@ -80,7 +80,7 @@ lstComparatorBlockMapReference(const void *const blockMapRef1, const void *const
}

FN_EXTERN BlockMap *
blockMapNewRead(IoRead *const map)
blockMapNewRead(IoRead *const map, size_t checksumSize)
{
FUNCTION_LOG_BEGIN(logLevelTrace);
FUNCTION_LOG_PARAM(IO_READ, map);
Expand All @@ -97,7 +97,7 @@ blockMapNewRead(IoRead *const map)
// Read all references in packed format
BlockMap *const this = blockMapNew();
List *const refList = lstNewP(sizeof(BlockMapReference), .comparator = lstComparatorBlockMapReference);
Buffer *const checksum = bufNew(HASH_TYPE_SHA1_SIZE);
Buffer *const checksum = bufNew(checksumSize);
int64_t sizeLast = 0;
bool referenceContinue = false;

Expand Down Expand Up @@ -247,12 +247,13 @@ blockMapNewRead(IoRead *const map)

/**********************************************************************************************************************************/
FN_EXTERN void
blockMapWrite(const BlockMap *const this, IoWrite *const output, bool blockEqual)
blockMapWrite(const BlockMap *const this, IoWrite *const output, const bool blockEqual, const size_t checksumSize)
{
FUNCTION_LOG_BEGIN(logLevelTrace);
FUNCTION_LOG_PARAM(BLOCK_MAP, this);
FUNCTION_LOG_PARAM(IO_WRITE, output);
FUNCTION_LOG_PARAM(BOOL, blockEqual);
FUNCTION_LOG_PARAM(SIZE, checksumSize);
FUNCTION_LOG_END();

ASSERT(this != NULL);
Expand Down Expand Up @@ -422,7 +423,7 @@ blockMapWrite(const BlockMap *const this, IoWrite *const output, bool blockEqual
}

// Write checksum
ioWrite(output, BUF(block->checksum, HASH_TYPE_SHA1_SIZE));
ioWrite(output, BUF(block->checksum, checksumSize));

blockIdx++;
}
Expand Down
8 changes: 4 additions & 4 deletions src/command/backup/blockMap.h
Expand Up @@ -13,18 +13,18 @@ Object type
***********************************************************************************************************************************/
typedef struct BlockMap BlockMap;

#include "common/crypto/hash.h"
#include "common/crypto/xxhash.h"
#include "common/type/list.h"
#include "common/type/object.h"

typedef struct BlockMapItem
{
unsigned int reference; // Reference to backup where the block is stored
unsigned char checksum[HASH_TYPE_SHA1_SIZE]; // Checksum of the block
uint64_t bundleId; // Bundle where the block is stored (0 if not bundled)
uint64_t offset; // Offset of super block into the bundle
uint64_t size; // Size of the super block (including compression, etc.)
uint64_t block; // Block no inside of super block
unsigned char checksum[XX_HASH_SIZE_MAX]; // Checksum of the block
} BlockMapItem;

/***********************************************************************************************************************************
Expand All @@ -38,7 +38,7 @@ blockMapNew(void)
}

// New block map from IO
FN_EXTERN BlockMap *blockMapNewRead(IoRead *map);
FN_EXTERN BlockMap *blockMapNewRead(IoRead *map, size_t checksumSize);

/***********************************************************************************************************************************
Functions
Expand All @@ -52,7 +52,7 @@ blockMapAdd(BlockMap *const this, const BlockMapItem *const item)
}

// Write map to IO
FN_EXTERN void blockMapWrite(const BlockMap *this, IoWrite *output, bool blockEqual);
FN_EXTERN void blockMapWrite(const BlockMap *this, IoWrite *output, bool blockEqual, size_t checksumSize);

/***********************************************************************************************************************************
Getters/Setters
Expand Down
4 changes: 2 additions & 2 deletions src/command/backup/file.c
Expand Up @@ -254,8 +254,8 @@ backupFile(
ioFilterGroupAdd(
ioReadFilterGroup(storageReadIo(read)),
blockIncrNew(
file->blockIncrSuperSize, file->blockIncrSize, blockIncrReference, bundleId, bundleOffset, blockMap,
compress, encrypt));
file->blockIncrSuperSize, file->blockIncrSize, file->blockIncrChecksumSize, blockIncrReference,
bundleId, bundleOffset, blockMap, compress, encrypt));

repoChecksum = true;
}
Expand Down
1 change: 1 addition & 0 deletions src/command/backup/file.h
Expand Up @@ -34,6 +34,7 @@ typedef struct BackupFile
const Buffer *pgFileChecksum; // Expected pg file checksum
bool pgFileChecksumPage; // Validate page checksums?
size_t blockIncrSize; // Perform block incremental on this file?
size_t blockIncrChecksumSize; // Block checksum size
uint64_t blockIncrSuperSize; // Size of the super block
const String *blockIncrMapPriorFile; // File containing prior block incremental map (NULL if none)
uint64_t blockIncrMapPriorOffset; // Offset of prior block incremental map
Expand Down
1 change: 1 addition & 0 deletions src/command/backup/protocol.c
Expand Up @@ -53,6 +53,7 @@ backupFileProtocol(PackRead *const param, ProtocolServer *const server)

if (file.blockIncrSize > 0)
{
file.blockIncrChecksumSize = (size_t)pckReadU64P(param);
file.blockIncrSuperSize = pckReadU64P(param);
file.blockIncrMapPriorFile = pckReadStrP(param);

Expand Down

0 comments on commit 210bed4

Please sign in to comment.