Skip to content

Commit

Permalink
Implementing file prepending on zip archive.
Browse files Browse the repository at this point in the history
Implementation is probably complete but must be tested and improved.
  • Loading branch information
renatoathaydes committed Jan 20, 2024
1 parent 89d47b2 commit 54d1f90
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 48 deletions.
72 changes: 41 additions & 31 deletions source/cli/dzipper/main.d
Expand Up @@ -42,45 +42,55 @@ private int run(in Opts opts)
verbose = opts.verbose,
zipFile = opts.zipFile,
prependFile = opts.prependFile;
File tempFile;

auto mfile = new MmFile(zipFile);
writefln("file length: %d", mfile.length);
if (mfile.length < 22)
// start memory-mapped zip file scope
{
stderr.cwriteln("<yellow>Not a zip file (too short).</yellow>");
return 1;
}
auto eocd_index = findEocdIn(mfile);
if (eocd_index.isNull)
{
stderr.cwriteln("<yellow>Unable to locate zip metadata (EOCD).</yellow>");
return 2;
}
if (verbose)
{
writeln("Found EOCD at offset ", eocd_index, ".");
}
auto eocd = parseEocd(cast(ubyte[]) mfile[eocd_index.get .. $]);
auto mfile = new MmFile(zipFile);
writefln("file length: %d", mfile.length);
if (mfile.length < 22)
{
stderr.cwriteln("<yellow>Not a zip file (too short).</yellow>");
return 1;
}
auto eocd_index = findEocdIn(mfile);
if (eocd_index.isNull)
{
stderr.cwriteln("<yellow>Unable to locate zip metadata (EOCD).</yellow>");
return 2;
}
if (verbose)
{
writeln("Found EOCD at offset ", eocd_index, ".");
}
auto eocd = parseEocd(cast(ubyte[]) mfile[eocd_index.get .. $]);

if (verbose)
{
writeln(eocd);
}
if (verbose)
{
writeln(eocd);
}

cwriteln("<green>File appears to be a zip file.</green>");
cwriteln("<green>File appears to be a zip file.</green>");

if (eocd.totalCentralDirectoriesCount == 0)
{
cwriteln("<yellow>Warning: empty zip file.</yellow>");
}
if (eocd.totalCentralDirectoriesCount == 0)
{
cwriteln("<yellow>Warning: empty zip file.</yellow>");
}

if (prependFile.length == 0)
{
mfile.printArchiveMetadata(eocd, verbose);
if (prependFile.length == 0)
{
mfile.printArchiveMetadata(eocd, verbose);
}
else
{
tempFile = mfile.prependFileToArchive(prependFile, eocd, verbose);
}
}
else

// the memory file has been closed now, so we can move the tempFile into the zip archive.
if (tempFile.isOpen)
{
mfile.prependFileToArchive(prependFile, zipFile, eocd, verbose);
tempFile.name = zipFile;
}

return 0;
Expand Down
22 changes: 20 additions & 2 deletions source/lib/dzipper/model.d
Expand Up @@ -5,8 +5,22 @@ import std.format : FormatSpec;
import std.array : appender;
import std.conv : to;
import std.range : put;
import std.bitmanip : nativeToLittleEndian;
import std.datetime.systime : SysTime;

const(uint) EOCD_SIGNATURE_UINT = 0x06054b50;
const(uint) CD_SIGNATURE_UINT = 0x02014b50;
const(uint) LOCAL_FILE_SIGNATURE_UINT = 0x04034b50;

/** The End of Central Directory Signature. */
immutable(ubyte[]) EOCD_SIGNATURE = nativeToLittleEndian(EOCD_SIGNATURE_UINT)[0 .. $];

/** The Central Directory Signature. */
immutable(ubyte[]) CD_SIGNATURE = nativeToLittleEndian(CD_SIGNATURE_UINT)[0 .. $];

/** The Local File header Signature. */
immutable(ubyte[]) LOCAL_FILE_SIGNATURE = nativeToLittleEndian(LOCAL_FILE_SIGNATURE_UINT)[0 .. $];

private mixin template StructToString(S)
{
void toString(scope void delegate(const(char)[]) sink,
Expand Down Expand Up @@ -71,6 +85,10 @@ struct EndOfCentralDirectory
ushort commentLength;
ubyte[] comment;
mixin StructToString!EndOfCentralDirectory;

size_t length() const {
return 22 + comment.length;
}
}

private mixin template FileInformation()
Expand Down Expand Up @@ -103,7 +121,7 @@ struct CentralDirectory

/// The length of the CD in bytes (notice that the struct
/// does not include the CD signature).
size_t length()
size_t length() const
{
return 46 + fileName.length + extraField.length + comment.length;
}
Expand All @@ -117,7 +135,7 @@ struct LocalFileHeader

/// The length of the header in bytes (notice that the struct
/// does not include the local file header signature).
size_t length()
size_t length() const
{
return 30 + fileName.length + extraField.length;
}
Expand Down
1 change: 1 addition & 0 deletions source/lib/dzipper/package.d
Expand Up @@ -2,3 +2,4 @@ module dzipper;

public import dzipper.model;
public import dzipper.parser;
public import dzipper.process;
18 changes: 8 additions & 10 deletions source/lib/dzipper/parser.d
Expand Up @@ -3,7 +3,7 @@ module dzipper.parser;
import std.typecons : Nullable;
import std.algorithm.searching : find;
import std.exception : enforce, basicExceptionCtors;
import std.bitmanip : nativeToLittleEndian, littleEndianToNative, peek, Endian;
import std.bitmanip : littleEndianToNative, peek, Endian;
import std.range : retro, tail, take, slide;
import std.conv : to;
import std.string : assumeUTF;
Expand All @@ -12,15 +12,6 @@ import std.datetime.systime : DosFileTimeToSysTime, SysTime;
import dzipper.model;
import std.string;

/** The End of Central Directory Signature. */
immutable(ubyte[]) EOCD_SIGNATURE = nativeToLittleEndian!uint(0x06054b50)[0 .. $];

/** The Central Directory Signature. */
immutable(ubyte[]) CD_SIGNATURE = nativeToLittleEndian!uint(0x02014b50)[0 .. $];

/** The Local File header Signature. */
immutable(ubyte[]) LOCAL_FILE_SIGNATURE = nativeToLittleEndian!uint(0x04034b50)[0 .. $];

/// Reason why a Zip Archive's metadata couldn't be parsed.
enum ZipParseError
{
Expand Down Expand Up @@ -209,6 +200,13 @@ LocalFileHeader parseLocalFileHeader(in ubyte[] bytes) @safe
// the EOCD can only appear in the last 65536 + 22 bytes
private enum maxEocdLen = 65_535 + 22;

/**
* Find the End of Central Directory (EOCD).
*
* Params:
* source = a source of bytes (must support slice operator, e.g. MmFile).
* Returns: index of the End of Central Directory if it can be found, null otherwise.
*/
Nullable!size_t findEocdIn(S, size_t windowLen = 56)(ref S source)
{
auto bytes = cast(ubyte[])(source.length > maxEocdLen ? source[$ - maxEocdLen .. $] : source[]);
Expand Down
188 changes: 183 additions & 5 deletions source/lib/dzipper/process.d
@@ -1,6 +1,13 @@
module dzipper.process;

import std.stdio : File, writeln;
import std.range : iota;
import std.file : remove;
import std.bitmanip : append, Endian;
import std.array : appender;
import std.typecons : Nullable;
import std.conv : to;
import std.algorithm.comparison : min;

import dzipper.model, dzipper.parser;

Expand All @@ -14,8 +21,6 @@ void printArchiveMetadata(B)(ref B bytes, in EndOfCentralDirectory eocd, bool ve
private void checkCentralDirectories(B)(ref B bytes,
in EndOfCentralDirectory eocd, bool verbose)
{
import std.range : iota;

uint offset = eocd.startOfCentralDirectory;
foreach (i; iota(0, eocd.diskCentralDirectoriesCount))
{
Expand All @@ -33,9 +38,182 @@ private void checkCentralDirectories(B)(ref B bytes,
}
}

void prependFileToArchive(B)(ref B bytes, string prependFile, string zipFile, in EndOfCentralDirectory eocd, bool verbose)
/// Prepend the contents of a file into the zip archive.
///
/// This function works by first copying the contents of `prependFile` into a temp file,
/// then writing the zip archive's contents into the temp file while shifting the zip metadata
/// entries' offsets as necessary.
///
/// Params:
/// bytes = zip archive source of bytes
/// prependFile = file to prepend
/// eocd = end of central directory structure
/// verbose = whether to log verbose output
/// Returns: the temp file the output is written to.
File prependFileToArchive(B)(ref B bytes, string prependFile, in EndOfCentralDirectory eocd, bool verbose)
{
// auto prepFile = File(prependFile);
writeln("not able to prepend file yet!");
auto outfile = File.tmpfile;
File(prependFile).copyFile(outfile);
long archiveStart = cast(long) outfile.tell();

uint offset = eocd.startOfCentralDirectory;
Nullable!long zipStart;

// first, write all local file headers and the file contents
foreach (i; iota(0, eocd.diskCentralDirectoriesCount))
{
auto cd = parseCd(cast(ubyte[]) bytes[offset .. $]);
auto lfh = parseLocalFileHeader(cast(ubyte[]) bytes[cd.startOfLocalFileHeader .. $]);
zipStart = cast(long)(zipStart.isNull
? cd.startOfLocalFileHeader
: min(cd.startOfLocalFileHeader, zipStart.get));

if (verbose)
writeln("Adding entry: ", lfh.fileName);
auto lfhEnd = cd.startOfLocalFileHeader + lfh.length;
outfile.rawWrite(bytes[cd.startOfLocalFileHeader .. lfhEnd]);
outfile.rawWrite(bytes[lfhEnd .. lfhEnd + lfh.uncompressedSize]);
offset += cd.length;
}

if (zipStart.isNull)
return outfile;

const long shift = archiveStart - zipStart.get;
offset = eocd.startOfCentralDirectory;

if (verbose)
writeln("Shifting zip archive offsets by ", shift);

// now, write all central directories
foreach (i; iota(0, eocd.diskCentralDirectoriesCount))
{
auto cd = parseCd(cast(ubyte[]) bytes[offset .. $]);
cd.startOfLocalFileHeader = to!uint(cd.startOfLocalFileHeader + shift);
outfile.rawWrite(cd.toBytes);
offset += cd.length;
}

// write the end-of-central-directory
auto shiftEocd = eocd;
shiftEocd.startOfCentralDirectory = to!uint(eocd.startOfCentralDirectory + shift);
outfile.rawWrite(shiftEocd.toBytes);

return outfile;
}

private void appends(T, R)(R range, immutable T value)
{
append!(T, Endian.littleEndian, R)(range, value);
}

/// Create a byte array representing a Central Directory.
///
/// The bytes are returned as they would appear in a zip archive,
/// i.e. using little endian representation.
///
/// Params:
/// cd = the central directory
/// Returns: byte array in little endian
ubyte[] toBytes(in CentralDirectory cd)
{
import std.datetime : SysTimeToDosFileTime;

auto bytes = new ubyte[cd.length];
auto ap = appender(&bytes);
ap.appends(CD_SIGNATURE_UINT);
ap.appends(cd.versionMadeBy);
ap.appends(cd.versionRequired);
ap.appends(cd.generalPurposeBitFlag);
ap.appends(cd.compressionMethod);
const dosTime = SysTimeToDosFileTime(cd.lastModificationDateTime);
// time is on the lo bytes
ap.appends(cast(ushort)(dosTime & 0xFFFF));
// date is on the hi bytes
ap.appends(cast(ushort)((dosTime >> 16) & 0xFFFF));
ap.appends(cd.crc32);
ap.appends(cd.compressedSize);
ap.appends(cd.uncompressedSize);
ap.appends(cd.fileNameLength);
ap.appends(cd.extraFieldLength);
ap.appends(cd.commentLength);
ap.appends(cd.diskNumber);
ap.appends(cd.internalFileAttributes);
ap.appends(cd.externalFileAttributes);
ap.appends(cd.startOfLocalFileHeader);
ap.put(cast(const(ubyte)[])(cd.fileName));
ap.put(cd.extraField);
ap.put(cd.comment);
return bytes;
}

/// Create a byte array representing a End of Central Directory structure.
///
/// The bytes are returned as they would appear in a zip archive,
/// i.e. using little endian representation.
///
/// Params:
/// cd = the end of central directory
/// Returns: byte array in little endian
ubyte[] toBytes(in EndOfCentralDirectory eocd)
{
auto bytes = new ubyte[eocd.length];
auto ap = appender(&bytes);
ap.appends(EOCD_SIGNATURE_UINT);
ap.appends(eocd.diskNumber);
ap.appends(eocd.centralDirectoryDiskNumber);
ap.appends(eocd.diskCentralDirectoriesCount);
ap.appends(eocd.totalCentralDirectoriesCount);
ap.appends(eocd.centralDirectorySize);
ap.appends(eocd.startOfCentralDirectory);
ap.appends(eocd.commentLength);
ap.put(eocd.comment);
return bytes;
}

private void copyFile(scope ref File from, scope ref File to)
{
ubyte[4096] buf;
ubyte[] data;
do
{
data = from.rawRead(buf);
to.rawWrite(data);
}
while (data.length > 0);
}

version (unittest)
{
import tested;
import dshould;

@name("can copy file contents")
unittest
{
scope (exit)
"temp__".remove;
scope (exit)
"temp__2".remove;

// write some file
{
auto temp = File("temp__", "wb");

temp.rawWrite([1, 2, 3]);
}

// copy to other file then add more stuff to it
{
auto from = File("temp__", "rb");
auto to = File("temp__2", "wb");
from.copyFile(to);
to.rawWrite([4, 5]);
}

auto res = File("temp__2");
ubyte[6] buf;
auto bytes = res.rawRead(buf);
bytes.should.equal([1, 2, 3, 4, 5]);
}
}

0 comments on commit 54d1f90

Please sign in to comment.