Skip to content

Commit

Permalink
implement ZIP64 support. closes #6
Browse files Browse the repository at this point in the history
  • Loading branch information
thejoshwolfe committed Dec 19, 2015
1 parent e7de0a1 commit f4e5899
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 33 deletions.
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,20 @@ Any library that offers a streaming unzip API must make one of the above two com
which makes the library either dishonest or nonconformant (usually the latter).
This library insists on correctness and adherence to the spec, and so does not offer a streaming API.

### Limitted ZIP64 Support

For ZIP64, only zip files smaller than `8PiB` are supported,
not the full `16EiB` range that a 64-bit integer should be able to index.
This is due to the JavaScript Number type being an IEEE 754 double precision float.

The Node.js `fs` module probably has this same limitation.

### ZIP64 Extensible Data Sector Is Ignored

The spec does not allow zip file creators to put arbitrary data here,
but rather reserves its use for PKWARE and mentions something about Z390.
This doesn't seem useful to expose in this library, so it is ignored.

### No Multi-Disk Archive Support

This library does not support multi-disk zip files.
Expand Down Expand Up @@ -336,10 +350,6 @@ only method `0` (stored with no compression)
and method `8` (deflated) are supported.
Any of the other 15 official methods will cause the `openReadStream()` `callback` to receive an `err`.

### No ZIP64 Support

A ZIP64 file will probably cause undefined behavior.

### Data Descriptors Are Ignored

There may or may not be Data Descriptor sections in a zip file.
Expand Down
111 changes: 104 additions & 7 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ function fromRandomAccessReader(reader, totalSize, options, callback) {
if (options == null) options = {autoClose: true};
if (callback == null) callback = defaultCallback;
if (typeof totalSize !== "number") throw new Error("expected totalSize parameter to be a number");
if (totalSize > Number.MAX_SAFE_INTEGER) {
throw new Error("zip file too large. only file sizes up to 2^52 are supported due to JavaScript's Number type being an IEEE 754 double.");
}

// the matching unref() call is in zipfile.close()
reader.ref();
Expand Down Expand Up @@ -104,7 +107,51 @@ function fromRandomAccessReader(reader, totalSize, options, callback) {
// 22 - Comment
// the encoding is always cp437.
var comment = bufferToString(eocdrBuffer, 22, eocdrBuffer.length, false);
return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose));

if (!(entryCount === 0xffff || centralDirectoryOffset === 0xffffffff)) {
return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose));
}

// ZIP64 format

// ZIP64 Zip64 end of central directory locator
var zip64EocdlBuffer = new Buffer(20);
var zip64EocdlOffset = bufferReadStart + i - zip64EocdlBuffer.length;
readAndAssertNoEof(reader, zip64EocdlBuffer, 0, zip64EocdlBuffer.length, zip64EocdlOffset, function(err) {
if (err) return callback(err);

// 0 - zip64 end of central dir locator signature = 0x07064b50
if (zip64EocdlBuffer.readUInt32LE(0) !== 0x07064b50) {
return callback(new Error("invalid ZIP64 End of Central Directory Locator signature"));
}
// 4 - number of the disk with the start of the zip64 end of central directory
// 8 - relative offset of the zip64 end of central directory record
var zip64EocdrOffset = readUInt64LE(zip64EocdlBuffer, 8);
// 16 - total number of disks

// ZIP64 end of central directory record
var zip64EocdrBuffer = new Buffer(56);
readAndAssertNoEof(reader, zip64EocdrBuffer, 0, zip64EocdrBuffer.length, zip64EocdrOffset, function(err) {
if (err) return callback(err);

// 0 - zip64 end of central dir signature 4 bytes (0x06064b50)
if (zip64EocdrBuffer.readUInt32LE(0) !== 0x06064b50) return callback(new Error("invalid ZIP64 end of central directory record signature"));
// 4 - size of zip64 end of central directory record 8 bytes
// 12 - version made by 2 bytes
// 14 - version needed to extract 2 bytes
// 16 - number of this disk 4 bytes
// 20 - number of the disk with the start of the central directory 4 bytes
// 24 - total number of entries in the central directory on this disk 8 bytes
// 32 - total number of entries in the central directory 8 bytes
entryCount = readUInt64LE(zip64EocdrBuffer, 32);
// 40 - size of the central directory 8 bytes
// 48 - offset of start of central directory with respect to the starting disk number 8 bytes
centralDirectoryOffset = readUInt64LE(zip64EocdrBuffer, 48);
// 56 - zip64 extensible data sector (variable size)
return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose));
});
});
return;
}
callback(new Error("end of central directory record signature not found"));
});
Expand Down Expand Up @@ -200,12 +247,6 @@ function readEntries(self) {

self.readEntryCursor += 46;

// validate file size
if (entry.compressionMethod === 0) {
var msg = "compressed/uncompressed size mismatch for stored file: " + entry.compressedSize + " != " + entry.uncompressedSize;
if (entry.compressedSize !== entry.uncompressedSize) return emitErrorAndAutoClose(self, new Error(msg));
}

buffer = new Buffer(entry.fileNameLength + entry.extraFieldLength + entry.fileCommentLength);
readAndAssertNoEof(self.reader, buffer, 0, buffer.length, self.readEntryCursor, function(err) {
if (err) return emitErrorAndAutoClose(self, err);
Expand Down Expand Up @@ -247,6 +288,50 @@ function readEntries(self) {
self.readEntryCursor += buffer.length;
self.entriesRead += 1;

if (entry.uncompressedSize === 0xffffffff ||
entry.compressedSize === 0xffffffff ||
entry.relativeOffsetOfLocalHeader === 0xffffffff) {
// ZIP64 format
// find the Zip64 Extended Information Extra Field
var zip64EiefBuffer = null;
for (var i = 0; i < entry.extraFields.length; i++) {
var extraField = entry.extraFields[i];
if (extraField.id === 0x0001) {
zip64EiefBuffer = extraField.data;
break;
}
}
if (zip64EiefBuffer == null) return emitErrorAndAutoClose(self, new Error("expected Zip64 Extended Information Extra Field"));
var index = 0;
// 0 - Original Size 8 bytes
if (entry.uncompressedSize === 0xffffffff) {
if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, new Error("Zip64 Extended Information Extra Field does not include Original Size"));
entry.uncompressedSize = readUInt64LE(zip64EiefBuffer, index);
index += 8;
}
// 8 - Compressed Size 8 bytes
if (entry.compressedSize === 0xffffffff) {
if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, new Error("Zip64 Extended Information Extra Field does not include Compressed Size"));
entry.compressedSize = readUInt64LE(zip64EiefBuffer, index);
index += 8;
}
// 16 - Relative Header Offset 8 bytes
if (entry.relativeOffsetOfLocalHeader === 0xffffffff) {
if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, new Error("Zip64 Extended Information Extra Field does not include Relative Header Offset"));
entry.relativeOffsetOfLocalHeader = readUInt64LE(zip64EiefBuffer, index);
index += 8;
}
// 24 - Disk Start Number 4 bytes
}

// validate file size
if (entry.compressionMethod === 0) {
if (entry.compressedSize !== entry.uncompressedSize) {
var msg = "compressed/uncompressed size mismatch for stored file: " + entry.compressedSize + " != " + entry.uncompressedSize;
return emitErrorAndAutoClose(self, new Error(msg));
}
}

// validate file name
if (entry.fileName.indexOf("\\") !== -1) return emitErrorAndAutoClose(self, new Error("invalid characters in fileName: " + entry.fileName));
if (/^[a-zA-Z]:/.test(entry.fileName) || /^\//.test(entry.fileName)) return emitErrorAndAutoClose(self, new Error("absolute path: " + entry.fileName));
Expand Down Expand Up @@ -469,6 +554,18 @@ function bufferToString(buffer, start, end, isUtf8) {
}
}

function readUInt64LE(buffer, offset) {
// there is no native function for this, because we can't actually store 64-bit integers precisely.
// after 53 bits, JavaScript's Number type (IEEE 754 double) can't store individual integers anymore.
// but since 53 bits is a whole lot more than 32 bits, we do our best anyway.
var lower32 = buffer.readUInt32LE(offset);
var upper32 = buffer.readUInt32LE(offset + 4);
// we can't use bitshifting here, because JavaScript bitshifting only works on 32-bit integers.
return upper32 * 0x100000000 + lower32;
// as long as we're bounds checking the result of this function against the total file size,
// we'll catch any overflow errors, because we already made sure the total file size was within reason.
}

function defaultCallback(err) {
if (err) throw err;
}
49 changes: 27 additions & 22 deletions test/zip64.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ var util = require("util");
var Readable = require("stream").Readable;
var Writable = require("stream").Writable;
var BufferList = require("bl");
var fd_slicer = require("fd-slicer");

function usage() {
process.stdout.write("" +
Expand Down Expand Up @@ -99,29 +98,30 @@ function newLargeBinContentsProducer() {
if (!readStream.push(buffer)) return;
}
};
readStream.isLargeBinContentsStream = true;
return readStream;
}

// this is just some bytes so we can identify it.
var prefixLength = 0x100;
function getPrefixOfStream(stream, cb) {
var prefixBuffer = new Buffer(prefixLength);
var writer = new Writable();
writer._write = function(chunk, encoding, callback) {
chunk.copy(prefixBuffer, 0, 0, prefixLength);
cb(prefixBuffer);
// abandon this pipe
};
stream.pipe(writer);
}
function getPrefixOfLargeBinContents(cb) {
getPrefixOfStream(newLargeBinContentsProducer(), cb);
}
function compressFile(inputPath, outputPath) {
// this is just some bytes so we can go find it.
var prefixLength = 0x100;

getPrefixOfLargeBinContents(function(prefixBuffer) {
findPrefixInPath(prefixBuffer, function(largeBinContentsOffset) {
writeCompressedFile(largeBinContentsOffset);
});
});
function getPrefixOfLargeBinContents(cb) {
var prefixBuffer = new Buffer(prefixLength);
var writer = new Writable();
writer._write = function(chunk, encoding, callback) {
chunk.copy(prefixBuffer, 0, 0, prefixLength);
cb(prefixBuffer);
// abandon this pipe
};
newLargeBinContentsProducer().pipe(writer);
}
function findPrefixInPath(prefixBuffer, cb) {
var previewLength = 0x1000;
fs.createReadStream(inputPath, {
Expand Down Expand Up @@ -185,11 +185,15 @@ function runTest() {
}));
} else {
// make sure this is the big thing
if (readStream.isLargeBinContentsStream) {
console.log("test/zip64: " + entry.fileName + ": PASS");
} else {
throw new Error("large.bin contents read did not return expected stream")
}
getPrefixOfLargeBinContents(function(expectedPrefixBuffer) {
getPrefixOfStream(readStream, function(actualPrefixBuffer) {
if (buffersEqual(expectedPrefixBuffer, actualPrefixBuffer)) {
console.log("test/zip64: " + entry.fileName + ": PASS");
} else {
throw new Error("large.bin contents read did not return expected stream")
}
});
});
}
});
});
Expand All @@ -198,11 +202,12 @@ function runTest() {
}

function makeRandomAccessReader(cb) {
fs.readFile(path.join(__dirname, "zip64/zip64.zip_fragment"), function(err, backendContents) {
var fileName = "zip64/zip64.zip_fragment";
fs.readFile(path.join(__dirname, fileName), function(err, backendContents) {
if (err) return callback(err);

if (backendContents.length <= 4) throw new Error("unexpected EOF");
var largeBinContentsOffset = backendContents.readUInt32BE(0);
var largeBinContentsOffset = backendContents.readUInt32BE(0) - 4;
if (largeBinContentsOffset > backendContents.length) throw new Error(".zip_fragment header is malformed");
var largeBinContentsEnd = largeBinContentsOffset + largeBinLength;

Expand Down

0 comments on commit f4e5899

Please sign in to comment.