Skip to content

Commit

Permalink
Add Dispack file-level filter in the libarchive chain.
Browse files Browse the repository at this point in the history
Add new file type for Win32-PE executables (Dispack).
Reset file type flag after filter processing for better compression.
Fix array index handling for file type list.
  • Loading branch information
moinakg committed Dec 20, 2014
1 parent 9a5361f commit 1db822d
Show file tree
Hide file tree
Showing 11 changed files with 456 additions and 15 deletions.
4 changes: 2 additions & 2 deletions Makefile.in
Expand Up @@ -156,7 +156,7 @@ PPNMOBJS = $(PPNMSRCS:.cpp=.o)
WAVPKSRCS = archive/wavpack_helper.c
WAVPKOBJS = $(WAVPKSRCS:.c=.o)

DISPACKSRCS = filters/dispack/dis.cpp
DISPACKSRCS = filters/dispack/dis.cpp archive/dispack_helper.cpp
DISPACKHDRS = filters/dispack/dis.hpp filters/dispack/types.hpp
DISPACKOBJS = $(DISPACKSRCS:.cpp=.o)

Expand Down Expand Up @@ -235,7 +235,7 @@ BASE_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT_P
-I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ -I./rabin/global \
-I./crypto/keccak -I./filters/transpose -I./crypto/blake2 $(EXTRA_CPPFLAGS) \
-I./crypto/xsalsa20 -I./archive -pedantic -Wall -I./filters -fno-strict-aliasing \
-Wno-unused-but-set-variable -Wno-enum-compare -I./filters/analyzer \
-Wno-unused-but-set-variable -Wno-enum-compare -I./filters/analyzer -I./filters/dispack \
@COMPAT_CPPFLAGS@ @XSALSA20_DEBUG@ -I@LIBARCHIVE_DIR@/libarchive -I./filters/packjpg \
-I./filters/packpnm @ENABLE_WAVPACK@
COMMON_CPPFLAGS = $(BASE_CPPFLAGS) -std=gnu99
Expand Down
151 changes: 151 additions & 0 deletions archive/dispack_helper.cpp
@@ -0,0 +1,151 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2014 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program.
* If not, see <http://www.gnu.org/licenses/>.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
*/

#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

#include "utils.h"
#include "winsupport.h"
#include "types.hpp"
#include "dis.hpp"

#ifdef __cplusplus
extern "C" {
#endif

typedef unsigned char uchar_t;

#pragma pack(1)
struct FileHeader
{
sU32 SizeBefore; // number of bytes before start of code section
sU32 SizeAfter; // number of bytes after code section
sU32 SizeTransformed; // size of transformed code section
sU32 SizeOriginal; // size of untransformed code section
sU32 Origin; // virtual address of first byte
};
#pragma pack()

size_t
dispack_filter_encode(uchar_t *inData, size_t len, uchar_t **out_buf)
{
uchar_t *pos;
FileHeader hdr;

*out_buf = (uchar_t *)malloc(len);
if (*out_buf == NULL)
return (0);

// assume the input file is a PE executable.
IMAGE_DOS_HEADER *doshdr = (IMAGE_DOS_HEADER *) inData;
IMAGE_NT_HEADERS *nthdr = (IMAGE_NT_HEADERS *) (inData + doshdr->e_lfanew);

if (nthdr->FileHeader.Machine != IMAGE_FILE_MACHINE_I386 ||
nthdr->OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR32_MAGIC) {
// Only 32-bit PE files for x86 supported
return (0);
}

sU32 imageBase = nthdr->OptionalHeader.ImageBase;
sU32 codeStart = nthdr->OptionalHeader.BaseOfCode;
sU32 codeSize = nthdr->OptionalHeader.SizeOfCode;
sU32 fileOffs = 0; // find file offset of first section

// find section containing code
IMAGE_SECTION_HEADER *sec = IMAGE_FIRST_SECTION(nthdr);
for (sInt i=0;i<nthdr->FileHeader.NumberOfSections;i++) {
if (codeStart >= sec[i].VirtualAddress && codeStart <
sec[i].VirtualAddress + sec[i].SizeOfRawData)
fileOffs = sec[i].PointerToRawData + (codeStart - sec[i].VirtualAddress);
}

if (fileOffs == 0) {
// Code section not found!
return (0);
}

// Keep space for header
pos = *out_buf + sizeof (hdr);

// transform code
sU32 transSize = len - sizeof (hdr);
if (DisFilter(inData + fileOffs, codeSize, imageBase + codeStart, pos, transSize) == NULL)
return (0);
pos += transSize;

// Now plonk the header
hdr.SizeBefore = fileOffs;
hdr.SizeAfter = len - (fileOffs + codeSize);
hdr.SizeTransformed = transSize;
hdr.SizeOriginal = codeSize;
hdr.Origin = imageBase + codeStart;
memcpy(*out_buf, &hdr, sizeof (hdr));

// Copy rest of the data
memcpy(pos, inData, hdr.SizeBefore);
pos += hdr.SizeBefore;
memcpy(pos, inData + (fileOffs + codeSize), hdr.SizeAfter);
pos += hdr.SizeAfter;

return (pos - *out_buf);
}

size_t
dispack_filter_decode(uchar_t *inData, size_t len, uchar_t **out_buf)
{
uchar_t *decoded;
FileHeader *hdr = (FileHeader *)inData;

sU8 *transformed = inData + sizeof (FileHeader);
sU8 *before = transformed + hdr->SizeTransformed;
sU8 *after = before + hdr->SizeBefore;

// alloc buffer for unfiltered code
*out_buf = (uchar_t *)malloc(len);
if (*out_buf == NULL)
return (0);

decoded = *out_buf;
memcpy(decoded, before, hdr->SizeBefore);
decoded += hdr->SizeBefore;

if (!DisUnFilter(transformed, hdr->SizeTransformed, decoded,
hdr->SizeOriginal, hdr->Origin)) {
return (0);
}
decoded += hdr->SizeOriginal;
memcpy(decoded, after, hdr->SizeAfter);
decoded += hdr->SizeAfter;

return (decoded - *out_buf);
}

#ifdef __cplusplus
}
#endif
118 changes: 117 additions & 1 deletion archive/pc_arc_filter.c
Expand Up @@ -61,6 +61,10 @@ extern size_t wavpack_filter_decode(uchar_t *in_buf, size_t len, uchar_t **out_b
ssize_t wavpack_filter(struct filter_info *fi, void *filter_private);
#endif

size_t dispack_filter_encode(uchar_t *inData, size_t len, uchar_t **out_buf);
size_t dispack_filter_decode(uchar_t *inData, size_t len, uchar_t **out_buf);
ssize_t dispack_filter(struct filter_info *fi, void *filter_private);

void
add_filters_by_type(struct type_data *typetab, struct filter_flags *ff)
{
Expand Down Expand Up @@ -90,6 +94,18 @@ add_filters_by_type(struct type_data *typetab, struct filter_flags *ff)
}
#endif

if (ff->exe_preprocess) {
if (!sdat) {
sdat = (struct scratch_buffer *)malloc(sizeof (struct scratch_buffer));
sdat->in_buff = NULL;
sdat->in_bufflen = 0;
}
slot = TYPE_EXE32_PE >> 3;
typetab[slot].filter_private = sdat;
typetab[slot].filter_func = dispack_filter;
typetab[slot].filter_name = "Dispack";
}

#ifdef _ENABLE_WAVPACK_
if (ff->enable_wavpack) {
if (!sdat) {
Expand All @@ -111,7 +127,7 @@ type_tag_from_filter_name(struct type_data *typetab, const char *fname, size_t l
{
size_t i;

for (i = 0; i < NUM_SUB_TYPES; i++)
for (i = 0; i <= NUM_SUB_TYPES; i++)
{
if (typetab[i].filter_name &&
strncmp(fname, typetab[i].filter_name, len) == 0)
Expand Down Expand Up @@ -507,3 +523,103 @@ wavpack_filter(struct filter_info *fi, void *filter_private)
}
#endif /* _ENABLE_WAVPACK_ */

ssize_t
dispack_filter(struct filter_info *fi, void *filter_private)
{
struct scratch_buffer *sdat = (struct scratch_buffer *)filter_private;
uchar_t *mapbuf, *out;
uint64_t len, in_size = 0, len1;

len = archive_entry_size(fi->entry);
len1 = len;
if (len > WVPK_FILE_SIZE_LIMIT) // Bork on massive files
return (FILTER_RETURN_SKIP);

if (fi->compressing) {
mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fi->fd, 0);
if (mapbuf == NULL) {
log_msg(LOG_ERR, 1, "Mmap failed in Dispack filter.");
return (FILTER_RETURN_ERROR);
}

/*
* No check for supported 32-bit exe here. EXE types are always
* detected by file header analysis. So no need to duplicate here.
*/
} else {
/*
* Allocate input buffer and read archive data stream for the entry
* into this buffer.
*/
ensure_buffer(sdat, len);
if (sdat->in_buff == NULL) {
log_msg(LOG_ERR, 1, "Out of memory.");
return (FILTER_RETURN_ERROR);
}

in_size = copy_archive_data(fi->source_arc, sdat->in_buff);
if (in_size != len) {
log_msg(LOG_ERR, 0, "Failed to read archive data.");
return (FILTER_RETURN_ERROR);
}

/*
* First 8 bytes in the data is the compressed size of the entry.
* LibArchive always zero-pads entries to their original size so
* we need to separately store the compressed size.
*/
in_size = LE64(U64_P(sdat->in_buff));
mapbuf = sdat->in_buff + 8;

/*
* No check for supported EXE types needed here since supported
* and filtered files are tagged in the archive using xattrs during
* compression.
*/
}

/*
* Compression case.
*/
if (fi->compressing) {
out = NULL;
len = dispack_filter_encode(mapbuf, len, &out);
if (len == 0 || len >= (len1 - 8)) {
munmap(mapbuf, len1);
free(out);
return (FILTER_RETURN_SKIP);
}
munmap(mapbuf, len1);

fi->fout->output_type = FILTER_OUTPUT_MEM;
fi->fout->out = out;
fi->fout->out_size = len;
fi->fout->hdr.in_size = LE64(len1);
return (ARCHIVE_OK);
}

/*
* Decompression case.
*/
out = NULL;
if ((len = dispack_filter_decode(mapbuf, in_size, &out)) == 0) {
/*
* If filter failed we indicate a soft error to continue the
* archive extraction.
*/
free(out);
out = malloc(len);
memcpy(out, sdat->in_buff, len);

fi->fout->output_type = FILTER_OUTPUT_MEM;
fi->fout->out = out;
fi->fout->out_size = len;
return (FILTER_RETURN_SOFT_ERROR);
}

fi->fout->output_type = FILTER_OUTPUT_MEM;
fi->fout->out = out;
fi->fout->out_size = len;
return (ARCHIVE_OK);
}

1 change: 1 addition & 0 deletions archive/pc_arc_filter.h
Expand Up @@ -90,6 +90,7 @@ struct filter_info {
struct filter_flags {
int enable_packjpg;
int enable_wavpack;
int exe_preprocess;
};

typedef ssize_t (*filter_func_ptr)(struct filter_info *fi, void *filter_private);
Expand Down
6 changes: 4 additions & 2 deletions archive/pc_archive.c
Expand Up @@ -63,7 +63,7 @@ static struct ext_hash_entry {
int type;
} *exthtab = NULL;

static struct type_data typetab[NUM_SUB_TYPES];
static struct type_data typetab[NUM_SUB_TYPES+1];

/*
AE_IFREG Regular file
Expand Down Expand Up @@ -1100,6 +1100,7 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry,
&fout, 1, pctx->level);
if (rv != FILTER_RETURN_SKIP &&
rv != FILTER_RETURN_ERROR) {
pctx->ctype = TYPE_UNKNOWN; // Force analyzer on filter output
if (fout.output_type == FILTER_OUTPUT_MEM) {
archive_entry_xattr_add_entry(entry, FILTER_XATTR_ENTRY,
fname, strlen(fname));
Expand Down Expand Up @@ -1176,6 +1177,7 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry,
&fout, 1, pctx->level);
if (rv != FILTER_RETURN_SKIP &&
rv != FILTER_RETURN_ERROR) {
pctx->ctype = TYPE_UNKNOWN; // Force analyzer on filter output
if (fout.output_type == FILTER_OUTPUT_MEM) {
archive_entry_xattr_add_entry(entry,
FILTER_XATTR_ENTRY,
Expand Down Expand Up @@ -1997,7 +1999,7 @@ detect_type_by_data(uchar_t *buf, size_t len)
if (id == 0x8664)
return (TYPE_BINARY|TYPE_EXE64);
else
return (TYPE_BINARY|TYPE_EXE32);
return (TYPE_BINARY|TYPE_EXE32_PE);
} else {
return (TYPE_BINARY);
}
Expand Down

0 comments on commit 1db822d

Please sign in to comment.