Skip to content

Commit

Permalink
fix #783
Browse files Browse the repository at this point in the history
LZ4_decompress_safe_partial()
now also supports a scenario where
nb_bytes_to_generate is <= block_decompressed_size
And
nb_bytes_to_read is >= block_compressed_size.

Previously, the only supported scenario was
nb_bytes_to_read == block_compress_size.

Pay attention that,
if nb_bytes_to_read is > block_compressed_size,
then, necessarily, it requires that
nb_bytes_to_generate is <= block_decompress_size.
If both are larger, it will generate corrupted data.
  • Loading branch information
Cyan4973 committed Aug 27, 2020
1 parent 3e3a006 commit c5d6f8a
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 32 deletions.
39 changes: 23 additions & 16 deletions lib/lz4.c
Expand Up @@ -1813,7 +1813,8 @@ LZ4_decompress_generic(
if ((dict==usingExtDict) && (match < lowPrefix)) {
if (unlikely(op+length > oend-LASTLITERALS)) {
if (partialDecoding) {
length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */
DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
length = MIN(length, (size_t)(oend-op));
} else {
goto _output_error; /* end-of-block condition violated */
} }
Expand Down Expand Up @@ -1921,29 +1922,34 @@ LZ4_decompress_generic(
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
{
/* We've either hit the input parsing restriction or the output parsing restriction.
* If we've hit the input parsing condition then this must be the last sequence.
* If we've hit the output parsing condition then we are either using partialDecoding
* or we've hit the output parsing condition.
* In the normal scenario, decoding a full block, it must be the last sequence,
* otherwise it's an error (invalid input or dimensions).
* In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
*/
if (partialDecoding) {
/* Since we are partial decoding we may be in this block because of the output parsing
* restriction, which is not valid since the output buffer is allowed to be undersized.
*/
assert(endOnInput);
/* If we're in this block because of the input parsing condition, then we must be on the
* last sequence (or invalid), so we must check that we exactly consume the input.
DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
/* Finishing in the middle of a literals segment,
* due to lack of input.
*/
if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend) && (cpy != oend)) { goto _output_error; }
assert(ip+length <= iend);
/* We are finishing in the middle of a literals segment.
* Break after the copy.
if (ip+length > iend) {
length = (size_t)(iend-ip);
cpy = op + length;
}
/* Finishing in the middle of a literals segment,
* due to lack of output space.
*/
if (cpy > oend) {
cpy = oend;
assert(op<=oend);
length = (size_t)(oend-op);
}
assert(ip+length <= iend);
} else {
/* We must be on the last sequence because of the parsing limitations so check
* that we exactly regenerate the original size (must be exact when !endOnInput).
Expand All @@ -1954,14 +1960,15 @@ LZ4_decompress_generic(
*/
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
}
memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
ip += length;
op += length;
/* Necessarily EOF when !partialDecoding. When partialDecoding
* it is EOF if we've either filled the output buffer or hit
* the input parsing restriction.
/* Necessarily EOF when !partialDecoding.
* When partialDecoding, it is EOF if we've either
* filled the output buffer or
* can't proceed with reading an offset for following match.
*/
if (!partialDecoding || (cpy == oend) || (ip == iend)) {
if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
break;
}
} else {
Expand Down
34 changes: 22 additions & 12 deletions lib/lz4.h
Expand Up @@ -221,25 +221,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
* Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
* into destination buffer 'dst' of size 'dstCapacity'.
* Up to 'targetOutputSize' bytes will be decoded.
* The function stops decoding on reaching this objective,
* which can boost performance when only the beginning of a block is required.
* The function stops decoding on reaching this objective.
* This can be useful to boost performance
* whenever only the beginning of a block is required.
*
* @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
* @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
* If source stream is detected malformed, function returns a negative result.
*
* Note : @return can be < targetOutputSize, if compressed block contains less data.
* Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
*
* Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
* and expects targetOutputSize <= dstCapacity.
* It effectively stops decoding on reaching targetOutputSize,
* Note 2 : targetOutputSize must be <= dstCapacity
*
* Note 3 : this function effectively stops decoding on reaching targetOutputSize,
* so dstCapacity is kind of redundant.
* This is because in a previous version of this function,
* decoding operation would not "break" a sequence in the middle.
* As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
* This is because in older versions of this function,
* decoding operation would still write complete sequences.
* Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
* it could write more bytes, though only up to dstCapacity.
* Some "margin" used to be required for this operation to work properly.
* This is no longer necessary.
* The function nonetheless keeps its signature, in an effort to not break API.
* Thankfully, this is no longer necessary.
* The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
*
* Note 4 : If srcSize is the exact size of the block,
* then targetOutputSize can be any value,
* including larger than the block's decompressed size.
* The function will, at most, generate block's decompressed size.
*
* Note 5 : If srcSize is _larger_ than block's compressed size,
* then targetOutputSize **MUST** be <= block's decompressed size.
* Otherwise, *silent corruption will occur*.
*/
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);

Expand Down
8 changes: 4 additions & 4 deletions tests/fuzzer.c
Expand Up @@ -618,13 +618,13 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c

/* Test partial decoding => must work */
FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial");
{ size_t const missingBytes = FUZ_rand(&randState) % (unsigned)blockSize;
int const targetSize = (int)((size_t)blockSize - missingBytes);
{ size_t const missingOutBytes = FUZ_rand(&randState) % (unsigned)blockSize;
int const targetSize = (int)((size_t)blockSize - missingOutBytes);
size_t const extraneousInBytes = FUZ_rand(&randState) % 2;
int const inCSize = (int)((size_t)compressedSize + extraneousInBytes);
char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A;
//DISPLAY("compressedSize=%i, inCSize=%i \n", compressedSize, inCSize);
//DISPLAY("decompressedSize=%i, targetDstSize=%i \n", blockSize, targetSize);
DISPLAYLEVEL(6,"compressedSize=%i, inCSize=%i \n", compressedSize, inCSize);
DISPLAYLEVEL(6,"decompressedSize=%i, targetDstSize=%i \n", blockSize, targetSize);
int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, inCSize, targetSize, blockSize);
FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult);
FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize);
Expand Down

0 comments on commit c5d6f8a

Please sign in to comment.