Skip to content

Commit

Permalink
gunzip.c & gunzip.py: implement support for uncompressed blocks
Browse files Browse the repository at this point in the history
mostly a boring distraction, but needed to decompress larger gz
files (which is interesting for benchmarking)
  • Loading branch information
nico committed Jan 16, 2018
1 parent 57c801c commit 2c14ef5
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 2 deletions.
42 changes: 41 additions & 1 deletion cab/gunzip.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,20 @@ static int bitstream_getbits(struct Bitstream* bs, int n) {
return bits;
}

static void bitstream_parse_uncompressed_block(
struct Bitstream* bs, uint16_t* size, uint16_t* nsize, uint8_t** data) {
size_t start = bs->curword;
if (bs->curbit > 0) ++start;
if (bs->curbit > 8) ++start;
*size = little_uint16t(bs->source + start);
*nsize = little_uint16t(bs->source + start + 2);
*data = bs->source + start + 4;
bs->curbit = 0;
bs->curword = start + 4 + *size;
if (bs->curword < bs->source_len)
bs->curword_val = little_uint16t(bs->source + bs->curword);
}

struct HuffTree {
// XXX: storage should be owned by client; dists only need 30 codes, not 288
int last_code[16];
Expand Down Expand Up @@ -136,6 +150,23 @@ static void window_output_literal(struct Window* w, uint8_t c) {
w->win_write++;
}

static void window_output_block(struct Window* w, uint8_t* d, uint16_t size) {
if (size > w->win_size) {
d += size - w->win_size;
size = w->win_size;
}
size_t rightspace = w->win_size - w->win_write;
if (rightspace >= size) {
memcpy(w->window + w->win_write, d, size);
w->win_write += size;
} else {
size_t leftspace = size - rightspace;
memcpy(w->window + w->win_write, d, rightspace);
memcpy(w->window, d + rightspace, leftspace);
w->win_write = leftspace;
}
}

static void window_copy_match(
struct Window* w, int match_offset, int match_length) {
// match_offset is relative to the end of the window.
Expand Down Expand Up @@ -289,7 +320,16 @@ int main(int argc, char* argv[]) {
is_last_block = bitstream_getbit(&bitstream);
int block_type = bitstream_getbits(&bitstream, 2);
if (block_type == 3) fatal("invalid block\n");
if (block_type == 0) fatal("unsupported uncompressed block\n");

if (block_type == 0) {
uint16_t size, nsize;
uint8_t* data;
bitstream_parse_uncompressed_block(&bitstream, &size, &nsize, &data);
if (size != (uint16_t)~nsize) fatal("invalid uncompressed header\n");
fwrite(data, 1, size, outfile);
window_output_block(&window, data, size);
continue;
}

int lengths[288 + 30];
int num_literals_lengths;
Expand Down
32 changes: 31 additions & 1 deletion cab/gunzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,18 @@ def getbits(self, n): # n is at most 13, for extra_dist_bits
bits = bits | (self.getbit() << i)
return bits

def parse_uncompressed_block(self):
start = self.curword
if self.curbit > 0: start += 1
if self.curbit > 8: start += 1
size, nsize = struct.unpack_from('<HH', self.source, start)
data = struct.unpack_from('%ds' % size, self.source, start + 4)[0]
self.curbit = 0
self.curword = start + 4 + size
if self.curword < len(self.source):
self.curword_val = struct.unpack_from('<H', self.source, self.curword)[0]
return data


class HuffTree(object):
def __init__(self, nodelengths):
Expand Down Expand Up @@ -132,6 +144,20 @@ def output_literal(self, c):
self.window[self.win_write] = c
self.win_write += 1

def output_block(self, data):
if len(data) > self.win_size:
data = data[len(data) - self.win_size:]
data = map(ord, data)
rightspace = self.win_size - self.win_write
if rightspace >= len(data):
self.window[self.win_write:self.win_write+len(data)] = data
self.win_write += len(data)
else:
leftspace = len(data) - rightspace
self.window[self.win_write:] = data[0:rightspace]
self.window[:leftspace] = data[rightspace:]
self.win_write = leftspace

def copy_match(self, match_offset, match_length):
# match_offset is relative to the end of the window.
no_overlap = self.win_write >= match_offset >= match_length and \
Expand Down Expand Up @@ -216,7 +242,11 @@ def deflate_decode_pretree(pretree, bitstream, num_lengths):
block_type = bitstream.getbits(2)
#print is_last_block, block_type
assert block_type != 3, 'invalid block'
assert block_type != 0, 'unsupported uncompressed block'
if block_type == 0:
data = bitstream.parse_uncompressed_block()
window.output_block(data)
outfile.write(data)
continue
if block_type == 2:
# dynamic huffman code, read huffman tree description
num_literals_lengths = bitstream.getbits(5) + 257
Expand Down

0 comments on commit 2c14ef5

Please sign in to comment.