Skip to content
This repository has been archived by the owner on Sep 9, 2024. It is now read-only.

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrec committed Jul 10, 2012
0 parents commit eb73a28
Show file tree
Hide file tree
Showing 25 changed files with 3,164 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node_modules/
3 changes: 3 additions & 0 deletions .npmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.gitignore
node_modules/
data/
77 changes: 77 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# LZ4

[LZ4](http://fastcompression.blogspot.fr/) is a very fast compression and decompression algorithm. This nodejs module provides a Javascript implementation of it, currently limited on decompression. Direct bindings may be provided in the future.

This is very much a __work in progress__.


## Install

npm install lz4


## Usage

### Decoding

There are 2 ways to decode:

* __asynchronous__ using nodejs Streams - slowest but can handle very large data sets (no memory limitations)
* __synchronous__ by feeding the whole LZ4 data - faster but is limited by the amount of memory

Either way, there are 2 options that the decoder takes:

* `chunkSize` (_Number_): number of bytes that was used to compress the data (default=8Mb)
* `incrementSize` (_Number_): number of bytes by which to increment the output buffer if it becomes full and there is still data to decode. Setting it to the right value has a significant impact on performance. If the output size is known, use it as the incrementSize value for maximum performance.


#### Asynchronous decoding

First, create an LZ4 decoding stream with `LZ4#createDecoderStream()`.
The stream can then decode any data piped to it. It will emit a `data` event on each decoded sequence, which can be saved into an output stream.

The following example shows how to decode an LZ4 compressed file `test.lz4` into `test`.


```javascript
var fs = require('fs')
var lz4 = require('lz4')

var decoder = lz4.createDecoderStream()

var input = fs.createReadStream('test.lz4')
var output = fs.createWriteStream('test')

input.pipe(decoder).pipe(output)

```

#### Synchronous decoding

Read the data into memory and feed it to `LZ4#decode()`.

```javascript
var fs = require('fs')
var lz4 = require('lz4')

var input = fs.readFileSync('test.lz4')
var output = lz4.decode(input)

fs.writeFileSync('test', output)

```


## How it works

* [LZ4 stream format](http://fastcompression.blogspot.fr/2011/05/lz4-explained.html)

## Restrictions

Currently, the decoder handles pure LZ4 streams, without additional data. For instance, to compress data you can use `bin/lz4demo32`, which adds a header to the created file. In order to properly decode it with lz4-js, you need to strip it out. You can use `bin/lz4strip` for that task.

LZ4 streams have only been tested using `bin/lz4demo32`, not `bin/lz4demo64`.

## License

MIT
1 change: 1 addition & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* shim Buffer.concat for node versions < 0.8.0
1 change: 1 addition & 0 deletions data/empty.lz4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!L
1 change: 1 addition & 0 deletions data/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[object Uint8Array]
Binary file added data/package.json.lz4
Binary file not shown.
11 changes: 11 additions & 0 deletions doc/format.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
lz4 format

sequence = token(1) + literalslen(i) + literals(token >> 4 + i) + [match copy: offset(2) + length(token >> 4 << 4)]

match copy:
position = current postition - offset (0 is invalid)
length = 4 + length

last 5 bytes = literals
last match starts 12 bytes before end of stream
last sequence is incomplete and stops after the literals
25 changes: 25 additions & 0 deletions examples/file_uncompress.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Uncompress an LZ4 stream
*/
// Modules
var path = require('path')
var fs = require('fs')
var lz4 = require('..')

// Input/Output files
var inputFile = process.argv[2] || 'test.lz4'
var outputFile = process.argv[3] || path.basename(inputFile, lz4.extension)

var decoder = lz4.createDecoderStream()
// var decoder = lz4.createDecoderStream({ incrementSize: (128 << 20), chunkSize: (128 << 20) })

var input = fs.createReadStream( inputFile )
var output = fs.createWriteStream( outputFile )

console.log('Uncompressing', inputFile, 'to', outputFile, '...')
decoder.on('end', function () {
console.timeEnd('lz4')
})

console.time('lz4')
input.pipe(decoder).pipe(output)
26 changes: 26 additions & 0 deletions examples/file_uncompressSync.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/**
* Uncompress a Buffer containing LZ4 compressed data
*/
// Modules
var path = require('path')
var fs = require('fs')
var lz4 = require('..')

// Input/Output files
var inputFile = process.argv[2] || 'test.lz4'
var outputFile = process.argv[3] || path.basename(inputFile, lz4.extension)

// Load the compressed data
var input = fs.readFileSync( inputFile )

// If the final uncompressed size is known, set the incrementSize with it
// for faster decoding (no time spent resizing the output buffer)
var incrementSize = (128 << 20) // 128Mb

console.log('Uncompressing', inputFile, 'to', outputFile, '...')
console.time('lz4')
var decoded = lz4.decode( input, incrementSize )
console.timeEnd('lz4')

// Save the uncompressed data
fs.writeFileSync( outputFile, decoded )
161 changes: 161 additions & 0 deletions lib/decoder.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/**
Sequence definition: name (bytes length)
token (1)
literals length (0-n)
literals (0-l)
offset (2)
match copy length (0-c)

Chunk definition:
size (4) = n [unsigned 32 bits little endian integer]
sequences (n)

lz4demo32 and lz4demo64 output:
magic number (4) [unsigned 32 bits little endian integer]
chunks (n)
*/

;(function (exports) {

if (!Buffer) {
var Buffer = Uint8Array

if (!Buffer.prototype.concat)
Buffer.prototype.concat = function (list, size) {
if (arguments.length < 1)
for (var i = 0, n = list.length; i < n; i++)
size += list[i].length

var res = new Buffer(size)
var pos = 0

for (i = 0; i < n; i++) {
var item = list[i]
for (var j = 0, m = item.length; j < m; j++)
res[pos++] = item[j]
}

return res
}
}

/**
* Decode an encoded chunk. Assumptions: input contains all sequences of a
* chunk, output is large enough to receive the decoded data.
* If the output buffer is too small, an error will be thrown.
* If the returned value is negative, an error occured at the returned offset.
*
* @param input {Buffer} input data
* @param output {Buffer} output data
* @return {Number} number of decoded bytes
* @private
*/
function LZ4_uncompressChunk (input, output) {
// Process each sequence in the incoming data
for (var i = 0, n = input.length, j = 0; i < n;) {
var token = input[i++]

// Literals
// length of literals
var literals_length = (token >> 4)
for (
var l = literals_length + 240
; l === 255
; literals_length += (l = input[i++])
) {}

// Copy the literals
if (literals_length > 0) {
var end = i + literals_length
while (i < end) output[j++] = input[i++]
}

// End of buffer?
if (i === n) return j

// Match copy
// 2 bytes offset (little endian)
var offset = input[i++] | (input[i++] << 8)

// 0 is an invalid offset value
if (offset === 0) return -(i-2)

// length of match copy
var match_length = (token & 0xf)
for (
var l = match_length + 240
; l === 255
; match_length += (l = input[i++])
) {}
match_length += 4 // minmatch = 4

// Copy the match
var pos = j - offset // position of the match copy in the current output
var end = j + match_length
while (j < end) output[j++] = output[pos++]
}

return j
}

function decodeError (offset) {
throw new Error('Invalid data at ' + offset)
}

/**
* Decode an encoded data set.
* If the output size is known beforehand, set it to increase performance.
*
* @param input {Buffer} input data
* @param chunkSize {Number} size of the chunk (default=8Mb) (optional)
* @param outputSize {Number} size of the output (optional)
* @return {Buffer} decoded data
* @public
*/
function LZ4_uncompress (input, chunkSize, outputSize) {
chunkSize = chunkSize || (8 << 20)

// Magic number check
if (input.length < 4
|| input.readUInt32LE(0, true) !== exports.ARCHIVE_MAGICNUMBER )
decodeError(0)

// Output size is known, allocate all of it in one call
if (outputSize) {
var output = new Buffer(outputSize)

// Current index in the output buffer
var pos = 0

for (var i = 4, n = input.length; i < n;) {
var size = input.readUInt32LE(i, true)
i += 4
var decodedSize = LZ4_uncompressChunk( input.slice(i, i + size), output.slice(pos, pos + chunkSize) )
if (decodedSize < 0) decodeError(-decodedSize)
i += size
pos += decodedSize
}

return output
}

// Unknown output size, allocate on each pass
var output = []
for (var i = 4, n = input.length; i < n;) {
var size = input.readUInt32LE(i, true)
i += 4
var buf = new Buffer(chunkSize)
var decodedSize = LZ4_uncompressChunk( input.slice(i, i + size), buf )
if (decodedSize < 0) decodeError(-decodedSize)
output.push( decodedSize < chunkSize ? buf.slice(0, decodedSize) : buf )
i += size
}

return Buffer.concat(output)
}

exports.LZ4_uncompressChunk = LZ4_uncompressChunk
exports.LZ4_uncompress = LZ4_uncompress
exports.ARCHIVE_MAGICNUMBER = 0x184C2102

})( (module && module.exports) || this )
Loading

0 comments on commit eb73a28

Please sign in to comment.