Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions for faster preserving/restoring of streams #188

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
75 changes: 75 additions & 0 deletions lib/lz4.c
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,81 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
return dict->dictSize;
}

int LZ4_stream_preserve(LZ4_stream_t* stream_, struct preserved_hash_table_entry_t** buf_out)
{
/*
* Work out how many locations in the stream's hash table are filled in, and
* record which ones they are. This allows us to copy only those locations to
* a new stream, which is much faster than entirely re-initializing the
* stream (e.g. with lz4_loadDict) or just memcpying all the data (a lot of
* which is zeroes).
*/
LZ4_stream_t_internal* stream = (LZ4_stream_t_internal*)stream_;

/* How many positions in the hash table are populated? This affects how much
* space we need to malloc to record it. */
int num_locs_needed = 0;
for (int i = 0; i < HASH_SIZE_U32; i++)
{
if (stream->hashTable[i] != 0)
{
num_locs_needed++;
}
}

/* Add an extra location - this holds the sentinel value to indicate we're at
* the end of the array */
num_locs_needed += 1;

int nbytes = sizeof(struct preserved_hash_table_entry_t) * num_locs_needed;
struct preserved_hash_table_entry_t* buf = malloc(nbytes);
*buf_out = buf;

/* Look through the hash table, and for each entry that has a value, record
* which position in the hash table it was at and what the value was. */
int buf_pos = 0;
for (int i = 0; i < HASH_SIZE_U32; i++)
{
if (stream->hashTable[i] != 0)
{
buf[buf_pos].location = i;
buf[buf_pos].value = stream->hashTable[i];
buf_pos++;
}
}

/* Set a sentinel -1 value at the end of the array */
buf[buf_pos].location = -1;
buf[buf_pos].value = -1;
return buf_pos;
}

void LZ4_stream_restore_preserved(LZ4_stream_t* stream_, LZ4_stream_t* orig_, struct preserved_hash_table_entry_t* buf)
{
LZ4_stream_t_internal* orig = (LZ4_stream_t_internal*)orig_;
LZ4_stream_t_internal* stream = (LZ4_stream_t_internal*)stream_;

/* Copy over the simple int values (these don't need special handling for efficiency, unlike the hash table.). */
stream->currentOffset = orig->currentOffset;
stream->dictionary = orig->dictionary;
stream->bufferStart = orig->bufferStart;
stream->dictSize = orig->dictSize;

/* Look through the list of location-value pairs produced by
* LZ4_stream_preserve, and initialise this hash table based on those. Stop
* when we hit the sentinel location -1. */
for (int i = 0; i < HASH_SIZE_U32; i++)
{
if (buf[i].location != -1)
{
stream->hashTable[buf[i].location] = buf[i].value;
}
else
{
break;
}
}
}

static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
{
Expand Down
18 changes: 18 additions & 0 deletions lib/lz4.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,24 @@ int LZ4_freeStream (LZ4_stream_t* streamPtr);
*/
int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);


struct preserved_hash_table_entry_t {
int location;
int value;
};

/* LZ4_stream_preserve
* Use this function to preserve a stream after loading a dictionary, so it can be rapidly reloaded.
*
* This allocates buf with as much space as is needed, so the caller must later call free() on it.
*/
int LZ4_stream_preserve(LZ4_stream_t* stream_, struct preserved_hash_table_entry_t** buf);

/* LZ4_stream_restore_preserved
* Use this function with a new stream and a buffer created by LZ4_stream_preserve, to reload its state
*/
void LZ4_stream_restore_preserved(LZ4_stream_t* stream_, LZ4_stream_t* orig_, struct preserved_hash_table_entry_t* buf);

/*
* LZ4_compress_fast_continue
* Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
Expand Down