diff --git a/source/cli/dzipper/main.d b/source/cli/dzipper/main.d index 4d70dc0..b7ae0ff 100644 --- a/source/cli/dzipper/main.d +++ b/source/cli/dzipper/main.d @@ -42,45 +42,55 @@ private int run(in Opts opts) verbose = opts.verbose, zipFile = opts.zipFile, prependFile = opts.prependFile; + File tempFile; - auto mfile = new MmFile(zipFile); - writefln("file length: %d", mfile.length); - if (mfile.length < 22) + // start memory-mapped zip file scope { - stderr.cwriteln("Not a zip file (too short)."); - return 1; - } - auto eocd_index = findEocdIn(mfile); - if (eocd_index.isNull) - { - stderr.cwriteln("Unable to locate zip metadata (EOCD)."); - return 2; - } - if (verbose) - { - writeln("Found EOCD at offset ", eocd_index, "."); - } - auto eocd = parseEocd(cast(ubyte[]) mfile[eocd_index.get .. $]); + auto mfile = new MmFile(zipFile); + writefln("file length: %d", mfile.length); + if (mfile.length < 22) + { + stderr.cwriteln("Not a zip file (too short)."); + return 1; + } + auto eocd_index = findEocdIn(mfile); + if (eocd_index.isNull) + { + stderr.cwriteln("Unable to locate zip metadata (EOCD)."); + return 2; + } + if (verbose) + { + writeln("Found EOCD at offset ", eocd_index, "."); + } + auto eocd = parseEocd(cast(ubyte[]) mfile[eocd_index.get .. $]); - if (verbose) - { - writeln(eocd); - } + if (verbose) + { + writeln(eocd); + } - cwriteln("File appears to be a zip file."); + cwriteln("File appears to be a zip file."); - if (eocd.totalCentralDirectoriesCount == 0) - { - cwriteln("Warning: empty zip file."); - } + if (eocd.totalCentralDirectoriesCount == 0) + { + cwriteln("Warning: empty zip file."); + } - if (prependFile.length == 0) - { - mfile.printArchiveMetadata(eocd, verbose); + if (prependFile.length == 0) + { + mfile.printArchiveMetadata(eocd, verbose); + } + else + { + tempFile = mfile.prependFileToArchive(prependFile, eocd, verbose); + } } - else + + // the memory file has been closed now, so we can move the tempFile into the zip archive. + if (tempFile.isOpen) { - mfile.prependFileToArchive(prependFile, zipFile, eocd, verbose); + tempFile.name = zipFile; } return 0; diff --git a/source/lib/dzipper/model.d b/source/lib/dzipper/model.d index acb2929..06ddd85 100644 --- a/source/lib/dzipper/model.d +++ b/source/lib/dzipper/model.d @@ -5,8 +5,22 @@ import std.format : FormatSpec; import std.array : appender; import std.conv : to; import std.range : put; +import std.bitmanip : nativeToLittleEndian; import std.datetime.systime : SysTime; +const(uint) EOCD_SIGNATURE_UINT = 0x06054b50; +const(uint) CD_SIGNATURE_UINT = 0x02014b50; +const(uint) LOCAL_FILE_SIGNATURE_UINT = 0x04034b50; + +/** The End of Central Directory Signature. */ +immutable(ubyte[]) EOCD_SIGNATURE = nativeToLittleEndian(EOCD_SIGNATURE_UINT)[0 .. $]; + +/** The Central Directory Signature. */ +immutable(ubyte[]) CD_SIGNATURE = nativeToLittleEndian(CD_SIGNATURE_UINT)[0 .. $]; + +/** The Local File header Signature. */ +immutable(ubyte[]) LOCAL_FILE_SIGNATURE = nativeToLittleEndian(LOCAL_FILE_SIGNATURE_UINT)[0 .. $]; + private mixin template StructToString(S) { void toString(scope void delegate(const(char)[]) sink, @@ -71,6 +85,10 @@ struct EndOfCentralDirectory ushort commentLength; ubyte[] comment; mixin StructToString!EndOfCentralDirectory; + + size_t length() const { + return 22 + comment.length; + } } private mixin template FileInformation() @@ -103,7 +121,7 @@ struct CentralDirectory /// The length of the CD in bytes (notice that the struct /// does not include the CD signature). - size_t length() + size_t length() const { return 46 + fileName.length + extraField.length + comment.length; } @@ -117,7 +135,7 @@ struct LocalFileHeader /// The length of the header in bytes (notice that the struct /// does not include the local file header signature). - size_t length() + size_t length() const { return 30 + fileName.length + extraField.length; } diff --git a/source/lib/dzipper/package.d b/source/lib/dzipper/package.d index d0fd610..050fe7a 100644 --- a/source/lib/dzipper/package.d +++ b/source/lib/dzipper/package.d @@ -2,3 +2,4 @@ module dzipper; public import dzipper.model; public import dzipper.parser; +public import dzipper.process; diff --git a/source/lib/dzipper/parser.d b/source/lib/dzipper/parser.d index 53ba785..1f3bc7f 100644 --- a/source/lib/dzipper/parser.d +++ b/source/lib/dzipper/parser.d @@ -3,7 +3,7 @@ module dzipper.parser; import std.typecons : Nullable; import std.algorithm.searching : find; import std.exception : enforce, basicExceptionCtors; -import std.bitmanip : nativeToLittleEndian, littleEndianToNative, peek, Endian; +import std.bitmanip : littleEndianToNative, peek, Endian; import std.range : retro, tail, take, slide; import std.conv : to; import std.string : assumeUTF; @@ -12,15 +12,6 @@ import std.datetime.systime : DosFileTimeToSysTime, SysTime; import dzipper.model; import std.string; -/** The End of Central Directory Signature. */ -immutable(ubyte[]) EOCD_SIGNATURE = nativeToLittleEndian!uint(0x06054b50)[0 .. $]; - -/** The Central Directory Signature. */ -immutable(ubyte[]) CD_SIGNATURE = nativeToLittleEndian!uint(0x02014b50)[0 .. $]; - -/** The Local File header Signature. */ -immutable(ubyte[]) LOCAL_FILE_SIGNATURE = nativeToLittleEndian!uint(0x04034b50)[0 .. $]; - /// Reason why a Zip Archive's metadata couldn't be parsed. enum ZipParseError { @@ -209,6 +200,13 @@ LocalFileHeader parseLocalFileHeader(in ubyte[] bytes) @safe // the EOCD can only appear in the last 65536 + 22 bytes private enum maxEocdLen = 65_535 + 22; +/** + * Find the End of Central Directory (EOCD). + * + * Params: + * source = a source of bytes (must support slice operator, e.g. MmFile). + * Returns: index of the End of Central Directory if it can be found, null otherwise. + */ Nullable!size_t findEocdIn(S, size_t windowLen = 56)(ref S source) { auto bytes = cast(ubyte[])(source.length > maxEocdLen ? source[$ - maxEocdLen .. $] : source[]); diff --git a/source/lib/dzipper/process.d b/source/lib/dzipper/process.d index f237606..46c6a40 100644 --- a/source/lib/dzipper/process.d +++ b/source/lib/dzipper/process.d @@ -1,6 +1,13 @@ module dzipper.process; import std.stdio : File, writeln; +import std.range : iota; +import std.file : remove; +import std.bitmanip : append, Endian; +import std.array : appender; +import std.typecons : Nullable; +import std.conv : to; +import std.algorithm.comparison : min; import dzipper.model, dzipper.parser; @@ -14,8 +21,6 @@ void printArchiveMetadata(B)(ref B bytes, in EndOfCentralDirectory eocd, bool ve private void checkCentralDirectories(B)(ref B bytes, in EndOfCentralDirectory eocd, bool verbose) { - import std.range : iota; - uint offset = eocd.startOfCentralDirectory; foreach (i; iota(0, eocd.diskCentralDirectoriesCount)) { @@ -33,9 +38,182 @@ private void checkCentralDirectories(B)(ref B bytes, } } -void prependFileToArchive(B)(ref B bytes, string prependFile, string zipFile, in EndOfCentralDirectory eocd, bool verbose) +/// Prepend the contents of a file into the zip archive. +/// +/// This function works by first copying the contents of `prependFile` into a temp file, +/// then writing the zip archive's contents into the temp file while shifting the zip metadata +/// entries' offsets as necessary. +/// +/// Params: +/// bytes = zip archive source of bytes +/// prependFile = file to prepend +/// eocd = end of central directory structure +/// verbose = whether to log verbose output +/// Returns: the temp file the output is written to. +File prependFileToArchive(B)(ref B bytes, string prependFile, in EndOfCentralDirectory eocd, bool verbose) { - // auto prepFile = File(prependFile); - writeln("not able to prepend file yet!"); + auto outfile = File.tmpfile; + File(prependFile).copyFile(outfile); + long archiveStart = cast(long) outfile.tell(); + + uint offset = eocd.startOfCentralDirectory; + Nullable!long zipStart; + + // first, write all local file headers and the file contents + foreach (i; iota(0, eocd.diskCentralDirectoriesCount)) + { + auto cd = parseCd(cast(ubyte[]) bytes[offset .. $]); + auto lfh = parseLocalFileHeader(cast(ubyte[]) bytes[cd.startOfLocalFileHeader .. $]); + zipStart = cast(long)(zipStart.isNull + ? cd.startOfLocalFileHeader + : min(cd.startOfLocalFileHeader, zipStart.get)); + + if (verbose) + writeln("Adding entry: ", lfh.fileName); + auto lfhEnd = cd.startOfLocalFileHeader + lfh.length; + outfile.rawWrite(bytes[cd.startOfLocalFileHeader .. lfhEnd]); + outfile.rawWrite(bytes[lfhEnd .. lfhEnd + lfh.uncompressedSize]); + offset += cd.length; + } + + if (zipStart.isNull) + return outfile; + const long shift = archiveStart - zipStart.get; + offset = eocd.startOfCentralDirectory; + + if (verbose) + writeln("Shifting zip archive offsets by ", shift); + + // now, write all central directories + foreach (i; iota(0, eocd.diskCentralDirectoriesCount)) + { + auto cd = parseCd(cast(ubyte[]) bytes[offset .. $]); + cd.startOfLocalFileHeader = to!uint(cd.startOfLocalFileHeader + shift); + outfile.rawWrite(cd.toBytes); + offset += cd.length; + } + + // write the end-of-central-directory + auto shiftEocd = eocd; + shiftEocd.startOfCentralDirectory = to!uint(eocd.startOfCentralDirectory + shift); + outfile.rawWrite(shiftEocd.toBytes); + + return outfile; +} + +private void appends(T, R)(R range, immutable T value) +{ + append!(T, Endian.littleEndian, R)(range, value); +} + +/// Create a byte array representing a Central Directory. +/// +/// The bytes are returned as they would appear in a zip archive, +/// i.e. using little endian representation. +/// +/// Params: +/// cd = the central directory +/// Returns: byte array in little endian +ubyte[] toBytes(in CentralDirectory cd) +{ + import std.datetime : SysTimeToDosFileTime; + + auto bytes = new ubyte[cd.length]; + auto ap = appender(&bytes); + ap.appends(CD_SIGNATURE_UINT); + ap.appends(cd.versionMadeBy); + ap.appends(cd.versionRequired); + ap.appends(cd.generalPurposeBitFlag); + ap.appends(cd.compressionMethod); + const dosTime = SysTimeToDosFileTime(cd.lastModificationDateTime); + // time is on the lo bytes + ap.appends(cast(ushort)(dosTime & 0xFFFF)); + // date is on the hi bytes + ap.appends(cast(ushort)((dosTime >> 16) & 0xFFFF)); + ap.appends(cd.crc32); + ap.appends(cd.compressedSize); + ap.appends(cd.uncompressedSize); + ap.appends(cd.fileNameLength); + ap.appends(cd.extraFieldLength); + ap.appends(cd.commentLength); + ap.appends(cd.diskNumber); + ap.appends(cd.internalFileAttributes); + ap.appends(cd.externalFileAttributes); + ap.appends(cd.startOfLocalFileHeader); + ap.put(cast(const(ubyte)[])(cd.fileName)); + ap.put(cd.extraField); + ap.put(cd.comment); + return bytes; +} + +/// Create a byte array representing a End of Central Directory structure. +/// +/// The bytes are returned as they would appear in a zip archive, +/// i.e. using little endian representation. +/// +/// Params: +/// cd = the end of central directory +/// Returns: byte array in little endian +ubyte[] toBytes(in EndOfCentralDirectory eocd) +{ + auto bytes = new ubyte[eocd.length]; + auto ap = appender(&bytes); + ap.appends(EOCD_SIGNATURE_UINT); + ap.appends(eocd.diskNumber); + ap.appends(eocd.centralDirectoryDiskNumber); + ap.appends(eocd.diskCentralDirectoriesCount); + ap.appends(eocd.totalCentralDirectoriesCount); + ap.appends(eocd.centralDirectorySize); + ap.appends(eocd.startOfCentralDirectory); + ap.appends(eocd.commentLength); + ap.put(eocd.comment); + return bytes; +} + +private void copyFile(scope ref File from, scope ref File to) +{ + ubyte[4096] buf; + ubyte[] data; + do + { + data = from.rawRead(buf); + to.rawWrite(data); + } + while (data.length > 0); +} + +version (unittest) +{ + import tested; + import dshould; + + @name("can copy file contents") + unittest + { + scope (exit) + "temp__".remove; + scope (exit) + "temp__2".remove; + + // write some file + { + auto temp = File("temp__", "wb"); + + temp.rawWrite([1, 2, 3]); + } + + // copy to other file then add more stuff to it + { + auto from = File("temp__", "rb"); + auto to = File("temp__2", "wb"); + from.copyFile(to); + to.rawWrite([4, 5]); + } + + auto res = File("temp__2"); + ubyte[6] buf; + auto bytes = res.rawRead(buf); + bytes.should.equal([1, 2, 3, 4, 5]); + } }