Skip to content

Commit

Permalink
Add ZSTD_d_maxBlockSize parameter
Browse files Browse the repository at this point in the history
Reduces memory when blocks are guaranteed to be smaller than allowed by
the format. This is useful for streaming compression in conjunction with
ZSTD_c_maxBlockSize.

This PR saves 2 * (formatMaxBlockSize - paramMaxBlockSize) when streaming.
Once it is rebased on top of PR facebook#3616 it will save
3 * (formatMaxBlockSize - paramMaxBlockSize).
  • Loading branch information
terrelln committed Apr 15, 2023
1 parent e72e13a commit 8af1aec
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 9 deletions.
29 changes: 26 additions & 3 deletions lib/decompress/zstd_decompress.c
Expand Up @@ -245,6 +245,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
dctx->disableHufAsm = 0;
dctx->maxBlockSizeParam = 0;
}

static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
Expand Down Expand Up @@ -971,6 +972,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
}

/* Shrink the blockSizeMax if enabled */
if (dctx->maxBlockSizeParam != 0)
dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (size_t)dctx->maxBlockSizeParam);

/* Loop on each block */
while (1) {
BYTE* oBlockEnd = oend;
Expand Down Expand Up @@ -1819,6 +1824,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_d_maxBlockSize:
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
return bounds;

default:;
}
Expand Down Expand Up @@ -1863,6 +1872,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
case ZSTD_d_disableHuffmanAssembly:
*value = (int)dctx->disableHufAsm;
return 0;
case ZSTD_d_maxBlockSize:
*value = dctx->maxBlockSizeParam;
return 0;
default:;
}
RETURN_ERROR(parameter_unsupported, "");
Expand Down Expand Up @@ -1900,6 +1912,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
dctx->disableHufAsm = value != 0;
return 0;
case ZSTD_d_maxBlockSize:
if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
dctx->maxBlockSizeParam = value;
return 0;
default:;
}
RETURN_ERROR(parameter_unsupported, "");
Expand Down Expand Up @@ -1927,9 +1943,9 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
return ZSTD_sizeof_DCtx(dctx);
}

size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
{
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
Expand All @@ -1939,6 +1955,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
return minRBSize;
}

size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
{
return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
}

size_t ZSTD_estimateDStreamSize(size_t windowSize)
{
size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
Expand Down Expand Up @@ -2177,11 +2198,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
frameParameter_windowTooLarge, "");
if (zds->maxBlockSizeParam != 0)
zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (size_t)zds->maxBlockSizeParam);

/* Adapt buffer sizes to frame header instructions */
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
: 0;

ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
Expand Down
6 changes: 3 additions & 3 deletions lib/decompress/zstd_decompress_block.c
Expand Up @@ -2054,6 +2054,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
{ /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src;
size_t const blockSizeMax = frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);

/* Note : the wording of the specification
Expand All @@ -2064,7 +2065,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
* would consider this edge case as an error.
* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
* for broader compatibility with the deployed ecosystem of zstd decoders */
RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "");

/* Decode literals section */
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
Expand All @@ -2079,8 +2080,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
*/
size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + MIN(dstCapacity, blockSizeMax), (BYTE const*)dctx->virtualStart);
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than ZSTD_maxShortOffset().
* We don't expect that to be the case in 64-bit mode.
Expand Down
1 change: 1 addition & 0 deletions lib/decompress/zstd_decompress_internal.h
Expand Up @@ -166,6 +166,7 @@ struct ZSTD_DCtx_s
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
int disableHufAsm;
int maxBlockSizeParam;

/* streaming */
ZSTD_dStreamStage streamStage;
Expand Down
20 changes: 19 additions & 1 deletion lib/zstd.h
Expand Up @@ -618,14 +618,16 @@ typedef enum {
* ZSTD_d_forceIgnoreChecksum
* ZSTD_d_refMultipleDDicts
* ZSTD_d_disableHuffmanAssembly
* ZSTD_d_maxBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly
*/
ZSTD_d_experimentalParam1=1000,
ZSTD_d_experimentalParam2=1001,
ZSTD_d_experimentalParam3=1002,
ZSTD_d_experimentalParam4=1003,
ZSTD_d_experimentalParam5=1004
ZSTD_d_experimentalParam5=1004,
ZSTD_d_experimentalParam6=1005

} ZSTD_dParameter;

Expand Down Expand Up @@ -2430,6 +2432,22 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
*/
#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5

/* ZSTD_d_maxBlockSize
* Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
* The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
*
* Forces the decompressor to reject blocks whose content size is
* larger than the configured maxBlockSize. When maxBlockSize is
* larger than the windowSize, the windowSize is used instead.
* This saves memory on the decoder when you know all blocks are small.
*
* This option is typically used in conjunction with ZSTD_c_maxBlockSize.
*
* WARNING: This causes the decoder to reject otherwise valid frames
* that have block sizes larger than the configured maxBlockSize.
*/
#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6


/*! ZSTD_DCtx_setFormat() :
* This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
Expand Down
3 changes: 3 additions & 0 deletions tests/fuzz/simple_round_trip.c
Expand Up @@ -90,6 +90,9 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
}
}
if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize));
}
dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
FUZZ_ZASSERT(dSize);
FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
Expand Down
24 changes: 22 additions & 2 deletions tests/fuzz/stream_round_trip.c
Expand Up @@ -63,6 +63,8 @@ static size_t compress(uint8_t *dst, size_t capacity,
size_t dstSize = 0;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
FUZZ_setRandomParameters(cctx, srcSize, producer);
int maxBlockSize;
FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize));

while (srcSize > 0) {
ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer);
Expand Down Expand Up @@ -93,6 +95,8 @@ static size_t compress(uint8_t *dst, size_t capacity,
if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) {
size_t const remaining = in.size - in.pos;
FUZZ_setRandomParameters(cctx, remaining, producer);
/* Always use the same maxBlockSize */
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, maxBlockSize));
}
mode = -1;
}
Expand Down Expand Up @@ -132,6 +136,23 @@ static size_t compress(uint8_t *dst, size_t capacity,
return dstSize;
}

size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer)
{
ZSTD_inBuffer in = {src, srcSize, 0};
ZSTD_outBuffer out = {dst, dstCapacity, 0};
int maxBlockSize;
FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize));
if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize));
}
while (in.pos < in.size) {
size_t const ret = ZSTD_decompressStream(dctx, &out, &in);
FUZZ_ZASSERT(ret);
FUZZ_ASSERT(ret == 0);
}
return out.pos;
}

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();
Expand Down Expand Up @@ -163,8 +184,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)

{
size_t const cSize = compress(cBuf, neededBufSize, src, size, producer);
size_t const rSize =
ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize);
size_t const rSize = decompress(rBuf, neededBufSize, cBuf, cSize, producer);
FUZZ_ZASSERT(rSize);
FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!");
Expand Down
19 changes: 19 additions & 0 deletions tests/fuzzer.c
Expand Up @@ -952,6 +952,25 @@ static int basicUnitTests(U32 const seed, double compressibility)
ZSTD_freeCDict(cdict);
ZSTD_freeCCtx(cctx);
}

DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2K", testNb++);
{
ZSTD_CCtx* cctx = ZSTD_createCCtx();
ZSTD_DCtx* dctx = ZSTD_createDCtx();
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048));
CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048));

cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize));

CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024));
CHECK(ZSTD_isError(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize)));

ZSTD_freeDCtx(dctx);
ZSTD_freeCCtx(cctx);
}

DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++);
{
Expand Down
75 changes: 75 additions & 0 deletions tests/zstreamtest.c
Expand Up @@ -722,6 +722,67 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
}
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2KB : ", testNb++);
{
ZSTD_DCtx* dctx = ZSTD_createDCtx();
size_t singlePassSize, streamingSize, streaming2KSize;

{
ZSTD_CCtx* cctx = ZSTD_createCCtx();
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize);
CHECK_Z(cSize);
ZSTD_freeCCtx(cctx);
}

CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBufferSize, compressedBuffer, cSize));
singlePassSize = ZSTD_sizeof_DCtx(dctx);
CHECK_Z(singlePassSize);

inBuff.src = compressedBuffer;
inBuff.size = cSize;

outBuff.dst = decodedBuffer;
outBuff.size = decodedBufferSize;

CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048));
inBuff.pos = 0;
outBuff.pos = 0;
{
size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
CHECK_Z(r);
CHECK(r != 0, "Entire frame must be decompressed");
}
streaming2KSize = ZSTD_sizeof_DCtx(dctx);
CHECK_Z(streaming2KSize);

CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
inBuff.pos = 0;
outBuff.pos = 0;
{
size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
CHECK_Z(r);
CHECK(r != 0, "Entire frame must be decompressed");
}
streamingSize = ZSTD_sizeof_DCtx(dctx);
CHECK_Z(streamingSize);

CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024));
inBuff.pos = 0;
outBuff.pos = 0;
CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &outBuff, &inBuff)), "decompression must fail");

CHECK(streamingSize < singlePassSize + (1 << 18) + 3 * ZSTD_BLOCKSIZE_MAX, "Streaming doesn't use the right amount of memory");
CHECK(streamingSize != streaming2KSize + 2 * (ZSTD_BLOCKSIZE_MAX - 2048), "ZSTD_d_blockSizeMax didn't save the right amount of memory");
DISPLAYLEVEL(3, "| %zu | %zu | %zu | ", singlePassSize, streaming2KSize, streamingSize);

ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");

/* Decompression with ZSTD_d_stableOutBuffer */
cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, 1);
CHECK_Z(cSize);
Expand Down Expand Up @@ -2845,6 +2906,13 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_deterministicRefPrefix, FUZ_rand(&lseed) & 1, opaqueAPI) );

/* Set max block size parameters */
if (FUZ_rand(&lseed) & 1) {
int maxBlockSize = (int)(FUZ_rand(&lseed) % ZSTD_BLOCKSIZE_MAX);
maxBlockSize = MAX(1024, maxBlockSize);
CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_maxBlockSize, maxBlockSize, opaqueAPI) );
}

/* Apply parameters */
if (opaqueAPI) {
DISPLAYLEVEL(5, "t%u: applying CCtxParams \n", testNb);
Expand Down Expand Up @@ -2976,6 +3044,13 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
if (FUZ_rand(&lseed) & 1) {
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_disableHuffmanAssembly, FUZ_rand(&lseed) & 1));
}
if (FUZ_rand(&lseed) & 1) {
int maxBlockSize;
CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_maxBlockSize, &maxBlockSize));
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, maxBlockSize));
} else {
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, 0));
}
{ size_t decompressionResult = 1;
ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 };
ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 };
Expand Down

0 comments on commit 8af1aec

Please sign in to comment.