Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

frame-api: add function to insert uncomressed data #1094

Merged
merged 11 commits into from
Jul 5, 2022
5 changes: 5 additions & 0 deletions doc/lz4frame_manual.html
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ <h1>1.9.4 Manual</h1>
LZ4F_OBSOLETE_ENUM(skippableFrame)
} LZ4F_frameType_t;
</b></pre><BR>
<pre><b>typedef enum {
LZ4B_COMPRESSED,
LZ4B_UNCOMPRESSED,
} LZ4F_blockCompression_t;
</b></pre><BR>
<pre><b>typedef struct {
LZ4F_blockSizeID_t blockSizeID; </b>/* max64KB, max256KB, max1MB, max4MB; 0 == default */<b>
LZ4F_blockMode_t blockMode; </b>/* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */<b>
Expand Down
133 changes: 110 additions & 23 deletions examples/frameCompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
#include <errno.h>
#include <assert.h>

#include <getopt.h>
#include <lz4frame.h>


#define IN_CHUNK_SIZE (16*1024)

static const LZ4F_preferences_t kPrefs = {
Expand Down Expand Up @@ -57,10 +57,11 @@ static compressResult_t
compress_file_internal(FILE* f_in, FILE* f_out,
LZ4F_compressionContext_t ctx,
void* inBuff, size_t inChunkSize,
void* outBuff, size_t outCapacity)
void* outBuff, size_t outCapacity,
FILE* f_unc, long uncOffset)
{
compressResult_t result = { 1, 0, 0 }; /* result for an error */
unsigned long long count_in = 0, count_out;
long long count_in = 0, count_out, bytesToOffset = -1;

assert(f_in != NULL); assert(f_out != NULL);
assert(ctx != NULL);
Expand All @@ -81,22 +82,48 @@ compress_file_internal(FILE* f_in, FILE* f_out,

/* stream file */
for (;;) {
size_t const readSize = fread(inBuff, 1, IN_CHUNK_SIZE, f_in);
size_t compressedSize;
long long inSize = IN_CHUNK_SIZE;
if (uncOffset >= 0) {
bytesToOffset = uncOffset - count_in;

/* read only remaining bytes to offset position */
if (bytesToOffset < IN_CHUNK_SIZE && bytesToOffset > 0) {
inSize = bytesToOffset;
}
}

/* input data is at uncompressed data offset */
if (bytesToOffset <= 0 && uncOffset >= 0 && f_unc) {
size_t const readSize = fread(inBuff, 1, inSize, f_unc);
if (readSize == 0) {
uncOffset = -1;
continue;
}
count_in += readSize;
compressedSize = LZ4F_uncompressedUpdate(ctx,
outBuff, outCapacity,
inBuff, readSize,
NULL);
} else {
size_t const readSize = fread(inBuff, 1, inSize, f_in);
if (readSize == 0) break; /* nothing left to read from input file */
count_in += readSize;

size_t const compressedSize = LZ4F_compressUpdate(ctx,
compressedSize = LZ4F_compressUpdate(ctx,
outBuff, outCapacity,
inBuff, readSize,
NULL);
if (LZ4F_isError(compressedSize)) {
printf("Compression failed: error %u \n", (unsigned)compressedSize);
return result;
}

printf("Writing %u bytes\n", (unsigned)compressedSize);
safe_fwrite(outBuff, 1, compressedSize, f_out);
count_out += compressedSize;
}

if (LZ4F_isError(compressedSize)) {
printf("Compression failed: error %u \n", (unsigned)compressedSize);
return result;
}

printf("Writing %u bytes\n", (unsigned)compressedSize);
safe_fwrite(outBuff, 1, compressedSize, f_out);
count_out += compressedSize;
}

/* flush whatever remains within internal buffers */
Expand All @@ -120,7 +147,8 @@ compress_file_internal(FILE* f_in, FILE* f_out,
}

static compressResult_t
compress_file(FILE* f_in, FILE* f_out)
compress_file(FILE* f_in, FILE* f_out,
FILE* f_unc, int uncOffset)
{
assert(f_in != NULL);
assert(f_out != NULL);
Expand All @@ -137,7 +165,8 @@ compress_file(FILE* f_in, FILE* f_out)
result = compress_file_internal(f_in, f_out,
ctx,
src, IN_CHUNK_SIZE,
outbuff, outbufCapacity);
outbuff, outbufCapacity,
f_unc, uncOffset);
} else {
printf("error : resource allocation failed \n");
}
Expand Down Expand Up @@ -305,52 +334,106 @@ static int decompress_file(FILE* f_in, FILE* f_out)
}


int compareFiles(FILE* fp0, FILE* fp1)
int compareFiles(FILE* fp0, FILE* fp1, FILE* fpUnc, long uncOffset)
{
int result = 0;
long bytesRead = 0;
long bytesToOffset = -1;
long b1Size = 1024;

while (result==0) {
char b1[b1Size];
size_t r1;
size_t bytesToRead = sizeof b1;
if (uncOffset >= 0) {
bytesToOffset = uncOffset - bytesRead;

/* read remainder to offset */
if (bytesToOffset < b1Size) {
bytesToRead = bytesToOffset;
}
}

char b0[1024];
char b1[1024];
size_t const r0 = fread(b0, 1, sizeof(b0), fp0);
size_t const r1 = fread(b1, 1, sizeof(b1), fp1);
size_t r0;
if (bytesToOffset <= 0 && fpUnc) {
bytesToRead = sizeof b1;
r0 = fread(b0, 1,bytesToRead, fpUnc);
} else {
r0 = fread(b0, 1, bytesToRead, fp0);
}

r1 = fread(b1, 1, r0, fp1);

result = (r0 != r1);
if (!r0 || !r1) break;
if (!result) result = memcmp(b0, b1, r0);

bytesRead += r1;
}

return result;
}


int main(int argc, const char **argv) {
int main(int argc, char **argv) {
char inpFilename[256] = { 0 };
char lz4Filename[256] = { 0 };
char decFilename[256] = { 0 };

int uncOffset = -1;
char uncFilename[256] = { 0 };
int opt;

if (argc < 2) {
printf("Please specify input filename\n");
return 0;
return EXIT_FAILURE;
}

snprintf(inpFilename, 256, "%s", argv[1]);
snprintf(lz4Filename, 256, "%s.lz4", argv[1]);
snprintf(decFilename, 256, "%s.lz4.dec", argv[1]);

while ((opt = getopt(argc, argv, "o:d:")) != -1) {
switch (opt) {
case 'd':
snprintf(uncFilename, 256, "%s", optarg);
break;
case 'o':
uncOffset = atoi(optarg);
break;
default:
printf("usage: %s <input file> [-o <offset> -d <file>]\n", argv[0]);
printf("-o uncompressed data offset\n");
printf(" inject uncompressed data at this offset into the lz4 file\n");
printf("-d uncompressed file\n");
printf(" file to inject without compression into the lz4 file\n");
return EXIT_FAILURE;
}
}

printf("inp = [%s]\n", inpFilename);
printf("lz4 = [%s]\n", lz4Filename);
printf("dec = [%s]\n", decFilename);
if (uncOffset > 0) {
printf("unc = [%s]\n", uncFilename);
printf("ofs = [%i]\n", uncOffset);
}

/* compress */
{ FILE* const inpFp = fopen(inpFilename, "rb");
FILE* const outFp = fopen(lz4Filename, "wb");
FILE* const uncFp = fopen(uncFilename, "rb");

printf("compress : %s -> %s\n", inpFilename, lz4Filename);
compressResult_t const ret = compress_file(inpFp, outFp);
compressResult_t const ret = compress_file(
inpFp, outFp,
uncFp, uncOffset);

fclose(outFp);
fclose(inpFp);
if (uncFp)
fclose(uncFp);

if (ret.error) {
printf("compress : failed with code %i\n", ret.error);
Expand Down Expand Up @@ -383,12 +466,16 @@ int main(int argc, const char **argv) {
/* verify */
{ FILE* const inpFp = fopen(inpFilename, "rb");
FILE* const decFp = fopen(decFilename, "rb");
FILE* const uncFp = fopen(uncFilename, "rb");

printf("verify : %s <-> %s\n", inpFilename, decFilename);
int const cmp = compareFiles(inpFp, decFp);
int const cmp = compareFiles(inpFp, decFp,
uncFp, uncOffset);

fclose(decFp);
fclose(inpFp);
if (uncFp)
fclose(uncFp);

if (cmp) {
printf("corruption detected : decompressed file differs from original\n");
Expand Down
14 changes: 10 additions & 4 deletions lib/lz4.c
Original file line number Diff line number Diff line change
Expand Up @@ -1679,6 +1679,15 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char*
return result;
}

int LZ4_DictSize (LZ4_stream_t* LZ4_dict, int dictSize)
{
LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;

if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }

return dictSize;
}

/*! LZ4_saveDict() :
* If previously compressed data block is not guaranteed to remain available at its memory location,
Expand All @@ -1690,12 +1699,9 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char*
int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
{
LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;

dictSize = LZ4_DictSize(LZ4_dict, dictSize);
DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);

if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }

if (safeBuffer == NULL) assert(dictSize == 0);
if (dictSize > 0) {
const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
Expand Down
2 changes: 2 additions & 0 deletions lib/lz4.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,8 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in
*/
LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);

LZ4LIB_API int LZ4_DictSize (LZ4_stream_t* LZ4_dict, int dictSize);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few conventions :

  • function names start in lowercase, excluding the prefix
  • new functions shall be documented. What does it do ? Set a new dictSize ? Get a current dictSize ? What are the limitations ? What is the parameter for ? What happens in case of error ?
  • Generally, function name starts with a verb/action, to better qualify the effect, for example LZ4_setDictSize() or LZ4_reduceDictSize().
  • New symbols do not start their life directly in "stable" area. They have to spend some time in "staging" area below, to prove their worth and collect user feedback. As a consequence, the qualifier changes to LZ4LIB_STATIC_API.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I solved your comments. Also added a fuzzing test to make sure the changes are working properly

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also added a fuzzing test to make sure the changes are working properly

Great !


/*! LZ4_saveDict() :
* If last 64KB data cannot be guaranteed to remain available at its current memory location,
* save it into a safer place (char* safeBuffer).
Expand Down