diff --git a/.gitignore b/.gitignore index 5761abc..e490eb2 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *.o +*.bro +*.unbro diff --git a/.travis.yml b/.travis.yml index 3b1f340..f81bea0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -68,6 +68,8 @@ install: # be pre-compiling them just yet # -v = verbose, the go tools are disturbingly silent by default - go get -d -v ./... + - go get -u github.com/golang/lint/golint + - go get -u golang.org/x/tools/cmd/goimports script: # gox lets us cross-compile pretty easily - go get github.com/mitchellh/gox @@ -76,5 +78,9 @@ script: # for an ldflags explanation, cf. https://github.com/kothar/brotli-go/issues/1#issuecomment-156091015 # BROTLI_EXT is just handy to be able to call `file` later - if [[ $BROTLI_OS = windows ]]; then export BROTLI_LDFLAGS="$BROTLI_LDFLAGS -extldflags \"-Wl,--allow-multiple-definition\""; export BROTLI_EXT=".exe"; fi - - if [[ $OSARCH = "linux/amd64" || $OSARCH = "darwin/amd64" ]]; then go test -v ./...; fi + - if [[ $OSARCH = "linux/amd64" || $OSARCH = "darwin/amd64" ]]; then go vet ./...; fi + - if [[ $OSARCH = "linux/amd64" || $OSARCH = "darwin/amd64" ]]; then diff <(goimports -d .) <(printf ""); fi + - if [[ $OSARCH = "linux/amd64" || $OSARCH = "darwin/amd64" ]]; then diff <(golint ./...) <(printf ""); fi + - if [[ $OSARCH = "linux/amd64" || $OSARCH = "darwin/amd64" ]]; then go test -v -cpu=2 ./...; fi + - if [[ $OSARCH = "linux/amd64" || $OSARCH = "darwin/amd64" ]]; then go test -v -cpu=1,2,4 -short -race ./...; fi - (cd gbr && gox -osarch "$OSARCH" -ldflags "$BROTLI_LDFLAGS" -cgo -output="gbr" && file gbr${BROTLI_EXT}) diff --git a/VERSION.md b/VERSION.md index 46741ec..2eda2ef 100644 --- a/VERSION.md +++ b/VERSION.md @@ -3,4 +3,4 @@ Brotli Version Vendored Brotli implementation from https://github.com/google/brotli -Current upstream commit: `fb52958eae21d2ff604e702ad847a55ee1740093` +Current upstream commit: `c60f6d0d655ad8ee990cc03bdbe223910f0ebb55` diff --git a/dec/bit_reader.h b/dec/bit_reader.h index f390348..468afe1 100644 --- a/dec/bit_reader.h +++ b/dec/bit_reader.h @@ -18,6 +18,7 @@ #ifndef BROTLI_DEC_BIT_READER_H_ #define BROTLI_DEC_BIT_READER_H_ +#include #include #include "./port.h" #include "./types.h" diff --git a/dec/decode.c b/dec/decode.c index 5eb6d6c..31e6291 100644 --- a/dec/decode.c +++ b/dec/decode.c @@ -73,6 +73,34 @@ static const uint8_t kCodeLengthPrefixValue[16] = { #define NUM_DISTANCE_SHORT_CODES 16 +BrotliState* BrotliCreateState( + brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { + BrotliState* state = 0; + if (!alloc_func && !free_func) { + state = (BrotliState*)malloc(sizeof(BrotliState)); + } else if (alloc_func && free_func) { + state = (BrotliState*)alloc_func(opaque, sizeof(BrotliState)); + } + if (state == 0) { + (void)BROTLI_FAILURE(); + return 0; + } + BrotliStateInitWithCustomAllocators(state, alloc_func, free_func, opaque); + return state; +} + +/* Deinitializes and frees BrotliState instance. */ +void BrotliDestroyState(BrotliState* state) { + if (!state) { + return; + } else { + brotli_free_func free_func = state->free_func; + void* opaque = state->memory_manager_opaque; + BrotliStateCleanup(state); + free_func(opaque, state); + } +} + /* Decodes a number in the range [9..24], by reading 1 - 7 bits. Precondition: bit-reader accumulator has at least 7 bits. */ static uint32_t DecodeWindowBits(BrotliBitReader* br) { @@ -907,7 +935,7 @@ static BrotliResult DecodeContextMap(uint32_t context_map_size, s->context_index = 0; BROTLI_LOG_UINT(context_map_size); BROTLI_LOG_UINT(*num_htrees); - *context_map_arg = (uint8_t*)malloc((size_t)context_map_size); + *context_map_arg = (uint8_t*)BROTLI_ALLOC(s, (size_t)context_map_size); if (*context_map_arg == 0) { return BROTLI_FAILURE(); } @@ -1006,7 +1034,7 @@ static BrotliResult DecodeContextMap(uint32_t context_map_size, /* Decodes a command or literal and updates block type ringbuffer. Reads 3..54 bits. */ -static int BROTLI_INLINE DecodeBlockTypeAndLength(int safe, +static BROTLI_INLINE int DecodeBlockTypeAndLength(int safe, BrotliState* s, int tree_type) { uint32_t max_block_type = s->num_block_types[tree_type]; int tree_offset = tree_type * BROTLI_HUFFMAN_MAX_TABLE_SIZE; @@ -1048,7 +1076,7 @@ static int BROTLI_INLINE DecodeBlockTypeAndLength(int safe, /* Decodes the block type and updates the state for literal context. Reads 3..54 bits. */ -static int BROTLI_INLINE DecodeLiteralBlockSwitchInternal(int safe, +static BROTLI_INLINE int DecodeLiteralBlockSwitchInternal(int safe, BrotliState* s) { uint8_t context_mode; uint32_t context_offset; @@ -1075,7 +1103,7 @@ static int BROTLI_NOINLINE SafeDecodeLiteralBlockSwitch(BrotliState* s) { /* Block switch for insert/copy length. Reads 3..54 bits. */ -static int BROTLI_INLINE DecodeCommandBlockSwitchInternal(int safe, +static BROTLI_INLINE int DecodeCommandBlockSwitchInternal(int safe, BrotliState* s) { if (!DecodeBlockTypeAndLength(safe, s, 1)) { return 0; @@ -1093,7 +1121,7 @@ static int BROTLI_NOINLINE SafeDecodeCommandBlockSwitch(BrotliState* s) { /* Block switch for distance codes. Reads 3..54 bits. */ -static int BROTLI_INLINE DecodeDistanceBlockSwitchInternal(int safe, +static BROTLI_INLINE int DecodeDistanceBlockSwitchInternal(int safe, BrotliState* s) { if (!DecodeBlockTypeAndLength(safe, s, 2)) { return 0; @@ -1255,7 +1283,7 @@ static int BROTLI_NOINLINE BrotliAllocateRingBuffer(BrotliState* s, } s->ringbuffer_mask = s->ringbuffer_size - 1; - s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size + + s->ringbuffer = (uint8_t*)BROTLI_ALLOC(s, (size_t)(s->ringbuffer_size + kRingBufferWriteAheadSlack + kBrotliMaxDictionaryWordLength)); if (s->ringbuffer == 0) { return 0; @@ -1289,7 +1317,7 @@ static BrotliResult ReadContextModes(BrotliState* s) { return BROTLI_RESULT_SUCCESS; } -static void BROTLI_INLINE TakeDistanceFromRingBuffer(BrotliState* s) { +static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliState* s) { if (s->distance_code == 0) { --s->dist_rb_idx; s->distance_code = s->dist_rb[s->dist_rb_idx & 3]; @@ -1329,7 +1357,7 @@ static BROTLI_INLINE int SafeReadBits( } /* Precondition: s->distance_code < 0 */ -static int BROTLI_INLINE ReadDistanceInternal(int safe, +static BROTLI_INLINE int ReadDistanceInternal(int safe, BrotliState* s, BrotliBitReader* br) { int distval; BrotliBitReaderState memento; @@ -1386,15 +1414,15 @@ static int BROTLI_INLINE ReadDistanceInternal(int safe, return 1; } -static void BROTLI_INLINE ReadDistance(BrotliState* s, BrotliBitReader* br) { +static BROTLI_INLINE void ReadDistance(BrotliState* s, BrotliBitReader* br) { ReadDistanceInternal(0, s, br); } -static int BROTLI_INLINE SafeReadDistance(BrotliState* s, BrotliBitReader* br) { +static BROTLI_INLINE int SafeReadDistance(BrotliState* s, BrotliBitReader* br) { return ReadDistanceInternal(1, s, br); } -static int BROTLI_INLINE ReadCommandInternal(int safe, +static BROTLI_INLINE int ReadCommandInternal(int safe, BrotliState* s, BrotliBitReader* br, int* insert_length) { uint32_t cmd_code; uint32_t insert_len_extra = 0; @@ -1432,12 +1460,12 @@ static int BROTLI_INLINE ReadCommandInternal(int safe, return 1; } -static void BROTLI_INLINE ReadCommand(BrotliState* s, BrotliBitReader* br, +static BROTLI_INLINE void ReadCommand(BrotliState* s, BrotliBitReader* br, int* insert_length) { ReadCommandInternal(0, s, br, insert_length); } -static int BROTLI_INLINE SafeReadCommand(BrotliState* s, BrotliBitReader* br, +static BROTLI_INLINE int SafeReadCommand(BrotliState* s, BrotliBitReader* br, int* insert_length) { return ReadCommandInternal(1, s, br, insert_length); } @@ -1833,10 +1861,10 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output, size_t total_out; if (s->legacy_input_buffer == 0) { - s->legacy_input_buffer = (uint8_t*)malloc(kBufferSize); + s->legacy_input_buffer = (uint8_t*)BROTLI_ALLOC(s, kBufferSize); } if (s->legacy_output_buffer == 0) { - s->legacy_output_buffer = (uint8_t*)malloc(kBufferSize); + s->legacy_output_buffer = (uint8_t*)BROTLI_ALLOC(s, kBufferSize); } if (s->legacy_input_buffer == 0 || s->legacy_output_buffer == 0) { return BROTLI_FAILURE(); @@ -2010,7 +2038,7 @@ BrotliResult BrotliDecompressStream(size_t* available_in, s->max_backward_distance - s->custom_dict_size; /* Allocate memory for both block_type_trees and block_len_trees. */ - s->block_type_trees = (HuffmanCode*)malloc( + s->block_type_trees = (HuffmanCode*)BROTLI_ALLOC(s, 6 * BROTLI_HUFFMAN_MAX_TABLE_SIZE * sizeof(HuffmanCode)); if (s->block_type_trees == 0) { result = BROTLI_FAILURE(); @@ -2145,7 +2173,8 @@ BrotliResult BrotliDecompressStream(size_t* available_in, BROTLI_LOG_UINT(s->num_direct_distance_codes); BROTLI_LOG_UINT(s->distance_postfix_bits); s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits); - s->context_modes = (uint8_t*)malloc((size_t)s->num_block_types[0]); + s->context_modes = + (uint8_t*)BROTLI_ALLOC(s, (size_t)s->num_block_types[0]); if (s->context_modes == 0) { result = BROTLI_FAILURE(); break; @@ -2188,13 +2217,13 @@ BrotliResult BrotliDecompressStream(size_t* available_in, if (result != BROTLI_RESULT_SUCCESS) { break; } - BrotliHuffmanTreeGroupInit( - &s->literal_hgroup, kNumLiteralCodes, s->num_literal_htrees); - BrotliHuffmanTreeGroupInit( - &s->insert_copy_hgroup, kNumInsertAndCopyCodes, + BrotliHuffmanTreeGroupInit(s, &s->literal_hgroup, kNumLiteralCodes, + s->num_literal_htrees); + BrotliHuffmanTreeGroupInit(s, &s->insert_copy_hgroup, + kNumInsertAndCopyCodes, s->num_block_types[1]); - BrotliHuffmanTreeGroupInit( - &s->distance_hgroup, num_distance_codes, s->num_dist_htrees); + BrotliHuffmanTreeGroupInit(s, &s->distance_hgroup, num_distance_codes, + s->num_dist_htrees); if (s->literal_hgroup.codes == 0 || s->insert_copy_hgroup.codes == 0 || s->distance_hgroup.codes == 0) { diff --git a/dec/decode.go b/dec/decode.go index b6a4cb0..2fe0da5 100644 --- a/dec/decode.go +++ b/dec/decode.go @@ -1,5 +1,5 @@ -// Brotli decoder bindings -package dec +// Package dec provides Brotli decoder bindings +package dec // import "gopkg.in/kothar/brotli-go.v0/dec" /* #include "./decode.h" @@ -41,7 +41,7 @@ func init() { C.decodeBrotliDictionary = (*C.dict)(shared.GetDictionary()) } -// Decompress a Brotli-encoded buffer. Uses decodedBuffer as the destination buffer unless it is too small, +// DecompressBuffer decompress a Brotli-encoded buffer. Uses decodedBuffer as the destination buffer unless it is too small, // in which case a new buffer is allocated. // Returns the slice of the decodedBuffer containing the output, or an error. func DecompressBuffer(encodedBuffer []byte, decodedBuffer []byte) ([]byte, error) { @@ -91,7 +91,7 @@ func toC(array []byte) *C.uint8_t { // cf. https://github.com/youtube/vitess/blob/071d0e649f22034ad4285c7431ac0a2c9c20090d/go/cgzip/zstream.go#L86-L89 type cBrotliState [unsafe.Sizeof(C.BrotliState{})]C.char -// Decompresses a Brotli-encoded stream using the io.Reader interface +// BrotliReader decompresses a Brotli-encoded stream using the io.Reader interface type BrotliReader struct { reader io.Reader state cBrotliState @@ -183,7 +183,7 @@ func (r *BrotliReader) Close() error { return r.err } -// Returns a Reader that decompresses the stream from another reader. +// NewBrotliReader returns a Reader that decompresses the stream from another reader. // // Ensure that you Close the stream when you are finished in order to clean up the // Brotli decompression state. @@ -193,7 +193,7 @@ func NewBrotliReader(stream io.Reader) *BrotliReader { return NewBrotliReaderSize(stream, 128*1024) } -// The same as NewBrotliReader, but allows the internal buffer size to be set. +// NewBrotliReaderSize is the same as NewBrotliReader, but allows the internal buffer size to be set. // // The size of the internal buffer may be specified which will hold compressed data // before being read by the decompressor diff --git a/dec/decode.h b/dec/decode.h index 78a156e..57b6861 100644 --- a/dec/decode.h +++ b/dec/decode.h @@ -51,6 +51,16 @@ static inline BrotliResult BrotliFailure(const char *f, int l, const char *fn) { } #endif +/* Creates the instance of BrotliState and initializes it. alloc_func and + free_func MUST be both zero or both non-zero. In the case they are both zero, + default memory allocators are used. opaque parameter is passed to alloc_func + and free_func when they are called. */ +BrotliState* BrotliCreateState( + brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque); + +/* Deinitializes and frees BrotliState instance. */ +void BrotliDestroyState(BrotliState* state); + /* Sets *decoded_size to the decompressed size of the given encoded stream. */ /* This function only works if the encoded buffer has a single meta block, */ /* or if it has two meta-blocks, where the first is uncompressed and the */ diff --git a/dec/huffman.c b/dec/huffman.c index 0628c36..a580aae 100644 --- a/dec/huffman.c +++ b/dec/huffman.c @@ -362,24 +362,6 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, return goal_size; } -void BrotliHuffmanTreeGroupInit(HuffmanTreeGroup* group, uint32_t alphabet_size, - uint32_t ntrees) { - /* Pack two mallocs into one */ - const size_t code_size = - sizeof(HuffmanCode) * (size_t)(ntrees * BROTLI_HUFFMAN_MAX_TABLE_SIZE); - const size_t htree_size = sizeof(HuffmanCode*) * (size_t)ntrees; - char *p = (char*)malloc(code_size + htree_size); - group->alphabet_size = (uint16_t)alphabet_size; - group->num_htrees = (uint16_t)ntrees; - group->codes = (HuffmanCode*)p; - group->htrees = (HuffmanCode**)(p + code_size); -} - -void BrotliHuffmanTreeGroupRelease(HuffmanTreeGroup* group) { - BROTLI_FREE(group->codes); - group->htrees = NULL; -} - #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif diff --git a/dec/huffman.h b/dec/huffman.h index bb67f4b..783cd7d 100644 --- a/dec/huffman.h +++ b/dec/huffman.h @@ -70,10 +70,6 @@ typedef struct { uint16_t num_htrees; } HuffmanTreeGroup; -void BrotliHuffmanTreeGroupInit(HuffmanTreeGroup* group, - uint32_t alphabet_size, uint32_t ntrees); -void BrotliHuffmanTreeGroupRelease(HuffmanTreeGroup* group); - #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif diff --git a/dec/port.h b/dec/port.h index f8fc4a8..2f5b0ce 100644 --- a/dec/port.h +++ b/dec/port.h @@ -237,8 +237,10 @@ static BROTLI_INLINE unsigned BrotliRBit(unsigned input) { #define BROTLI_HAS_UBFX 0 #endif -#define BROTLI_FREE(X) { \ - free(X); \ +#define BROTLI_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L) + +#define BROTLI_FREE(S, X) { \ + S->free_func(S->memory_manager_opaque, X); \ X = NULL; \ } diff --git a/dec/state.c b/dec/state.c index f4f239a..32c1ece 100644 --- a/dec/state.c +++ b/dec/state.c @@ -23,7 +23,32 @@ extern "C" { #endif +static void* DefaultAllocFunc(void* opaque, size_t size) { + BROTLI_UNUSED(opaque); + return malloc(size); +} + +static void DefaultFreeFunc(void* opaque, void* address) { + BROTLI_UNUSED(opaque); + free(address); +} + void BrotliStateInit(BrotliState* s) { + BrotliStateInitWithCustomAllocators(s, 0, 0, 0); +} + +void BrotliStateInitWithCustomAllocators(BrotliState* s, + brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { + if (!alloc_func) { + s->alloc_func = DefaultAllocFunc; + s->free_func = DefaultFreeFunc; + s->memory_manager_opaque = 0; + } else { + s->alloc_func = alloc_func; + s->free_func = free_func; + s->memory_manager_opaque = opaque; + } + BrotliInitBitReader(&s->br); s->state = BROTLI_STATE_UNINITED; s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; @@ -120,22 +145,22 @@ void BrotliStateMetablockBegin(BrotliState* s) { } void BrotliStateCleanupAfterMetablock(BrotliState* s) { - BROTLI_FREE(s->context_modes); - BROTLI_FREE(s->context_map); - BROTLI_FREE(s->dist_context_map); + BROTLI_FREE(s, s->context_modes); + BROTLI_FREE(s, s->context_map); + BROTLI_FREE(s, s->dist_context_map); - BrotliHuffmanTreeGroupRelease(&s->literal_hgroup); - BrotliHuffmanTreeGroupRelease(&s->insert_copy_hgroup); - BrotliHuffmanTreeGroupRelease(&s->distance_hgroup); + BrotliHuffmanTreeGroupRelease(s, &s->literal_hgroup); + BrotliHuffmanTreeGroupRelease(s, &s->insert_copy_hgroup); + BrotliHuffmanTreeGroupRelease(s, &s->distance_hgroup); } void BrotliStateCleanup(BrotliState* s) { BrotliStateCleanupAfterMetablock(s); - BROTLI_FREE(s->ringbuffer); - BROTLI_FREE(s->block_type_trees); - BROTLI_FREE(s->legacy_input_buffer); - BROTLI_FREE(s->legacy_output_buffer); + BROTLI_FREE(s, s->ringbuffer); + BROTLI_FREE(s, s->block_type_trees); + BROTLI_FREE(s, s->legacy_input_buffer); + BROTLI_FREE(s, s->legacy_output_buffer); } int BrotliStateIsStreamStart(const BrotliState* s) { @@ -147,6 +172,23 @@ int BrotliStateIsStreamEnd(const BrotliState* s) { return s->state == BROTLI_STATE_DONE; } +void BrotliHuffmanTreeGroupInit(BrotliState* s, HuffmanTreeGroup* group, + uint32_t alphabet_size, uint32_t ntrees) { + /* Pack two allocations into one */ + const size_t code_size = + sizeof(HuffmanCode) * (size_t)(ntrees * BROTLI_HUFFMAN_MAX_TABLE_SIZE); + const size_t htree_size = sizeof(HuffmanCode*) * (size_t)ntrees; + char *p = (char*)BROTLI_ALLOC(s, code_size + htree_size); + group->alphabet_size = (uint16_t)alphabet_size; + group->num_htrees = (uint16_t)ntrees; + group->codes = (HuffmanCode*)p; + group->htrees = (HuffmanCode**)(p + code_size); +} + +void BrotliHuffmanTreeGroupRelease(BrotliState* s, HuffmanTreeGroup* group) { + BROTLI_FREE(s, group->codes); + group->htrees = NULL; +} #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ diff --git a/dec/state.h b/dec/state.h index 0d9329b..2adf26c 100644 --- a/dec/state.h +++ b/dec/state.h @@ -106,6 +106,10 @@ struct BrotliStateStruct { BrotliRunningState state; BrotliBitReader br; + brotli_alloc_func alloc_func; + brotli_free_func free_func; + void* memory_manager_opaque; + /* Temporary storage for remaining input. */ union { uint64_t u64; @@ -234,10 +238,16 @@ struct BrotliStateStruct { typedef struct BrotliStateStruct BrotliState; void BrotliStateInit(BrotliState* s); +void BrotliStateInitWithCustomAllocators(BrotliState* s, + brotli_alloc_func alloc_func, + brotli_free_func free_func, + void* opaque); void BrotliStateCleanup(BrotliState* s); void BrotliStateMetablockBegin(BrotliState* s); void BrotliStateCleanupAfterMetablock(BrotliState* s); - +void BrotliHuffmanTreeGroupInit(BrotliState* s, HuffmanTreeGroup* group, + uint32_t alphabet_size, uint32_t ntrees); +void BrotliHuffmanTreeGroupRelease(BrotliState* s, HuffmanTreeGroup* group); /* Returns 1, if s is in a state where we have not read any input bytes yet, and 0 otherwise */ diff --git a/dec/types.h b/dec/types.h index 8a9cc4a..096b591 100644 --- a/dec/types.h +++ b/dec/types.h @@ -33,4 +33,15 @@ typedef __int64 int64_t; #include #endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */ +/* Allocating function pointer. Function MUST return 0 in the case of failure. + Otherwise it MUST return a valid pointer to a memory region of at least + size length. Neither items nor size are allowed to be 0. + opaque argument is a pointer provided by client and could be used to bind + function to specific object (memory pool). */ +typedef void* (*brotli_alloc_func) (void* opaque, size_t size); + +/* Deallocating function pointer. Function SHOULD be no-op in the case the + address is 0. */ +typedef void (*brotli_free_func) (void* opaque, void* address); + #endif /* BROTLI_DEC_TYPES_H_ */ diff --git a/doc.go b/doc.go index 3528d0f..3c0487b 100644 --- a/doc.go +++ b/doc.go @@ -1,4 +1,4 @@ -// Bindings for the Brotli compression library +// Package brotli contains bindings for the Brotli compression library // // This is a very basic Cgo wrapper for the enc and dec directories from the Brotli sources. I've made a few minor changes to get // things working with Go. diff --git a/enc/block_splitter.cc b/enc/block_splitter.cc index 47b7f64..62d80a3 100644 --- a/enc/block_splitter.cc +++ b/enc/block_splitter.cc @@ -290,12 +290,12 @@ void ClusterBlocks(const DataType* data, const size_t length, std::vector clustered_histograms; std::vector histogram_symbols; // Block ids need to fit in one byte. - static const int kMaxNumberOfBlockTypes = 256; + static const size_t kMaxNumberOfBlockTypes = 256; ClusterHistograms(histograms, 1, static_cast(histograms.size()), kMaxNumberOfBlockTypes, &clustered_histograms, &histogram_symbols); - for (int i = 0; i < length; ++i) { + for (size_t i = 0; i < length; ++i) { block_ids[i] = static_cast(histogram_symbols[block_index[i]]); } } diff --git a/enc/cluster.h b/enc/cluster.h index ab46559..108e855 100644 --- a/enc/cluster.h +++ b/enc/cluster.h @@ -30,6 +30,7 @@ #include "./entropy_encode.h" #include "./fast_log.h" #include "./histogram.h" +#include "./port.h" #include "./types.h" namespace brotli { @@ -111,22 +112,25 @@ void HistogramCombine(HistogramType* out, int* cluster_size, int* symbols, int symbols_size, - int max_clusters) { + size_t max_clusters) { double cost_diff_threshold = 0.0; - int min_cluster_size = 1; + size_t min_cluster_size = 1; std::set all_symbols; std::vector clusters; for (int i = 0; i < symbols_size; ++i) { if (all_symbols.find(symbols[i]) == all_symbols.end()) { all_symbols.insert(symbols[i]); + if (!clusters.empty()) { + BROTLI_DCHECK(clusters.back() < symbols[i]); + } clusters.push_back(symbols[i]); } } // We maintain a heap of histogram pairs, ordered by the bit cost reduction. std::vector pairs; - for (int idx1 = 0; idx1 < clusters.size(); ++idx1) { - for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) { + for (size_t idx1 = 0; idx1 < clusters.size(); ++idx1) { + for (size_t idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) { CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2], &pairs); } @@ -149,14 +153,14 @@ void HistogramCombine(HistogramType* out, symbols[i] = best_idx1; } } - for (int i = 0; i + 1 < clusters.size(); ++i) { + for (size_t i = 0; i + 1 < clusters.size(); ++i) { if (clusters[i] >= best_idx2) { clusters[i] = clusters[i + 1]; } } clusters.pop_back(); // Invalidate pairs intersecting the just combined best pair. - for (int i = 0; i < pairs.size(); ++i) { + for (size_t i = 0; i < pairs.size(); ++i) { HistogramPair& p = pairs[i]; if (p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 || p.idx2 == best_idx2) { @@ -169,7 +173,7 @@ void HistogramCombine(HistogramType* out, pairs.pop_back(); } // Push new pairs formed with the combined histogram to the heap. - for (int i = 0; i < clusters.size(); ++i) { + for (size_t i = 0; i < clusters.size(); ++i) { CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs); } } @@ -232,7 +236,7 @@ void HistogramReindex(std::vector* out, std::vector tmp(*out); std::map new_index; int next_index = 0; - for (int i = 0; i < symbols->size(); ++i) { + for (size_t i = 0; i < symbols->size(); ++i) { if (new_index.find((*symbols)[i]) == new_index.end()) { new_index[(*symbols)[i]] = next_index; (*out)[next_index] = tmp[(*symbols)[i]]; @@ -240,7 +244,7 @@ void HistogramReindex(std::vector* out, } } out->resize(next_index); - for (int i = 0; i < symbols->size(); ++i) { + for (size_t i = 0; i < symbols->size(); ++i) { (*symbols)[i] = new_index[(*symbols)[i]]; } } @@ -251,10 +255,11 @@ void HistogramReindex(std::vector* out, template void ClusterHistograms(const std::vector& in, int num_contexts, int num_blocks, - int max_histograms, + size_t max_histograms, std::vector* out, std::vector* histogram_symbols) { const int in_size = num_contexts * num_blocks; + BROTLI_DCHECK(in_size == in.size()); std::vector cluster_size(in_size, 1); out->resize(in_size); histogram_symbols->resize(in_size); diff --git a/enc/encode.cc b/enc/encode.cc index f46a1c9..e77ee4d 100644 --- a/enc/encode.cc +++ b/enc/encode.cc @@ -546,8 +546,6 @@ bool BrotliCompressor::WriteMetadata(const size_t input_size, if (input_size == 0) { WriteBits(2, 0, &storage_ix, encoded_buffer); *encoded_size = (storage_ix + 7) >> 3; - } else if (input_size > (1 << 24)) { - return false; } else { int nbits = Log2Floor(static_cast(input_size) - 1) + 1; int nbytes = (nbits + 7) / 8; diff --git a/enc/encode.go b/enc/encode.go index 526162d..7f103ef 100644 --- a/enc/encode.go +++ b/enc/encode.go @@ -1,5 +1,5 @@ -// Brotli encoder bindings -package enc +// Package enc provides Brotli encoder bindings +package enc // import "gopkg.in/kothar/brotli-go.v0/enc" /* // for memcpy @@ -52,9 +52,10 @@ import ( "gopkg.in/kothar/brotli-go.v0/shared" ) +// Errors which may be returned when encoding var ( - ErrInputLargerThanBlockSize error = errors.New("data copied to ring buffer larger than brotli compressor block size") - ErrBrotliCompression error = errors.New("brotli compression error") + errInputLargerThanBlockSize = errors.New("data copied to ring buffer larger than brotli compressor block size") + errBrotliCompression = errors.New("brotli compression error") ) func init() { @@ -62,24 +63,25 @@ func init() { C.kBrotliDictionary = (*C.dict)(shared.GetDictionary()) } -// The operation mode of the compressor +// Mode defines the operation mode of the compressor type Mode int const ( - // Default compression mode. The compressor does not know anything in + // GENERIC is the default compression mode. The compressor does not know anything in // advance about the properties of the input. GENERIC Mode = iota - // Compression mode for UTF-8 format text input. + // TEXT is a compression mode for UTF-8 format text input. TEXT - // Compression mode used in WOFF 2.0. + // FONT is a compression mode used in WOFF 2.0. FONT ) +// BrotliParams describes the settings used when encoding using Brotli type BrotliParams struct { c C.struct_CBrotliParams } -// Instantiates the compressor parameters with the default settings +// NewBrotliParams instantiates the compressor parameters with the default settings func NewBrotliParams() *BrotliParams { params := &BrotliParams{C.struct_CBrotliParams{ mode: C.MODE_GENERIC, @@ -97,36 +99,44 @@ func NewBrotliParams() *BrotliParams { return params } -// The operating mode of the compressor (GENERIC, TEXT or FONT) +// Mode returns the current operating mode of the compressor func (p *BrotliParams) Mode() Mode { return Mode(p.c.mode) } + +// SetMode controls the operating mode of the compressor (GENERIC, TEXT or FONT) func (p *BrotliParams) SetMode(value Mode) { p.c.mode = C.enum_Mode(value) } -// Controls the compression-speed vs compression-density tradeoffs. The higher -// the quality, the slower the compression. Range is 0 to 11. Default is 11. +// Quality returns the quality setting of the compressor func (p *BrotliParams) Quality() int { return int(p.c.quality) } + +// SetQuality controls the compression-speed vs compression-density tradeoffs. The higher +// the quality, the slower the compression. Range is 0 to 11. Default is 11. func (p *BrotliParams) SetQuality(value int) { p.c.quality = C.int(value) } -// Base 2 logarithm of the sliding window size. Range is 10 to 24. Default is 22. +// Lgwin returns the current sliding window size setting. func (p *BrotliParams) Lgwin() int { return int(p.c.lgwin) } + +// SetLgwin sets the base 2 logarithm of the sliding window size. Range is 10 to 24. Default is 22. func (p *BrotliParams) SetLgwin(value int) { p.c.lgwin = C.int(value) } -// Base 2 logarithm of the maximum input block size. Range is 16 to 24. -// If set to 0 (default), the value will be set based on the quality. +// Lgblock returns the current maximum input block size setting. func (p *BrotliParams) Lgblock() int { return int(p.c.lgblock) } + +// SetLgblock sets the base 2 logarithm of the maximum input block size. Range is 16 to 24. +// If set to 0 (default), the value will be set based on the quality. func (p *BrotliParams) SetLgblock(value int) { p.c.lgblock = C.int(value) } @@ -138,8 +148,9 @@ func (p *BrotliParams) maxOutputSize(inputLength int) int { return int(C.BrotliMaxOutputSize(p.c, C.size_t(inputLength))) } -// Compress a buffer. Uses encodedBuffer as the destination buffer unless it is too small, -// in which case a new buffer is allocated. +// CompressBuffer compresses a single block of data. It uses encodedBuffer as +// the destination buffer unless it is too small, in which case a new buffer +// is allocated. // Default parameters are used if params is nil. // Returns the slice of the encodedBuffer containing the output, or an error. func CompressBuffer(params *BrotliParams, inputBuffer []byte, encodedBuffer []byte) ([]byte, error) { @@ -158,7 +169,7 @@ func CompressBuffer(params *BrotliParams, inputBuffer []byte, encodedBuffer []by encodedLength := C.size_t(len(encodedBuffer)) result := C.CBrotliCompressBuffer(params.c, C.size_t(inputLength), toC(inputBuffer), &encodedLength, toC(encodedBuffer)) if result == 0 { - return nil, ErrBrotliCompression + return nil, errBrotliCompression } return encodedBuffer[0:encodedLength], nil } @@ -208,7 +219,7 @@ func (bp *brotliCompressor) writeBrotliData(isLast bool, forceFlush bool) ([]byt var output *C.uint8_t success := C.CBrotliCompressorWriteBrotliData(bp.c, C.bool(isLast), C.bool(forceFlush), &outSize, &output) if success == false { - return nil, ErrInputLargerThanBlockSize + return nil, errInputLargerThanBlockSize } // resize buffer if output is larger than we've anticipated @@ -232,6 +243,8 @@ func brotliCompressorFinalizer(bp *brotliCompressor) { bp.free() } +// BrotliWriter implements the io.Writer interface, compressing the stream +// to an output Writer using Brotli. type BrotliWriter struct { compressor *brotliCompressor writer io.Writer @@ -240,6 +253,8 @@ type BrotliWriter struct { inRingBuffer int } +// NewBrotliWriter instantiates a new BrotliWriter with the provided compression +// parameters and output Writer func NewBrotliWriter(params *BrotliParams, writer io.Writer) *BrotliWriter { return &BrotliWriter{ compressor: newBrotliCompressor(params), @@ -283,6 +298,8 @@ func (w *BrotliWriter) Write(buffer []byte) (int, error) { return copied, nil } +// Close cleans up the resources used by the Brotli encoder for this +// stream. If the output buffer is an io.Closer, it will also be closed. func (w *BrotliWriter) Close() error { compressedData, err := w.compressor.writeBrotliData(true, false) if err != nil { diff --git a/enc/encode.h b/enc/encode.h index a5fb21e..319e411 100644 --- a/enc/encode.h +++ b/enc/encode.h @@ -79,7 +79,7 @@ class BrotliCompressor { ~BrotliCompressor(); // The maximum input size that can be processed at once. - size_t input_block_size() const { return 1 << params_.lgblock; } + size_t input_block_size() const { return size_t(1) << params_.lgblock; } // Encodes the data in input_buffer as a meta-block and writes it to // encoded_buffer (*encoded_size should be set to the size of diff --git a/enc/encode_test.go b/enc/encode_test.go index f3fda8e..6f880ce 100644 --- a/enc/encode_test.go +++ b/enc/encode_test.go @@ -50,7 +50,7 @@ func TestStreamEncode(T *testing.T) { inputSize := len(input1) log.Printf("q=%d, inputSize=%d\n", params.Quality(), inputSize) - for lgwin := 16; lgwin <= 22; lgwin += 1 { + for lgwin := 16; lgwin <= 22; lgwin++ { params.SetLgwin(lgwin) compressor := newBrotliCompressor(params) defer compressor.free() @@ -85,7 +85,7 @@ func TestStreamEncode(T *testing.T) { fullStreamOutput := streamBuffer.Bytes() if !bytes.Equal(fullStreamOutput, fullBufferOutput) { - T.Fatal("for lgwin %d, stream compression didn't give same result as buffer compression", params.Lgwin()) + T.Fatalf("for lgwin %d, stream compression didn't give same result as buffer compression", params.Lgwin()) } // then using the high-level Writer interface @@ -96,7 +96,7 @@ func TestStreamEncode(T *testing.T) { fullWriterOutput := writerBuffer.Bytes() if !bytes.Equal(fullWriterOutput, fullBufferOutput) { - T.Fatal("for lgwin %d, stream writer compression didn't give same result as buffer compression", params.Lgwin()) + T.Fatalf("for lgwin %d, stream writer compression didn't give same result as buffer compression", params.Lgwin()) } outputSize := len(fullStreamOutput) diff --git a/enc/entropy_encode.cc b/enc/entropy_encode.cc index 1b77c7a..935ac30 100644 --- a/enc/entropy_encode.cc +++ b/enc/entropy_encode.cc @@ -22,6 +22,7 @@ #include #include "./histogram.h" +#include "./port.h" #include "./types.h" namespace brotli { @@ -141,6 +142,7 @@ void CreateHuffmanTree(const int *data, // Add back the last sentinel node. tree.push_back(sentinel); } + BROTLI_DCHECK(tree.size() == 2 * n + 1); SetDepth(tree[2 * n - 1], &tree[0], depth, 0); // We need to pack the Huffman tree in tree_limit bits. diff --git a/enc/hash.h b/enc/hash.h index 4e9c896..71a4e96 100644 --- a/enc/hash.h +++ b/enc/hash.h @@ -369,7 +369,7 @@ class HashLongestMatch { if (prev_ix >= cur_ix) { continue; } - if (PREDICT_FALSE(backward > max_backward)) { + if (PREDICT_FALSE(backward > (int)max_backward)) { continue; } prev_ix &= static_cast(ring_buffer_mask); @@ -510,7 +510,7 @@ class HashLongestMatch { if (len > kMaxZopfliLen) { matches = orig_matches; } - *matches++ = BackwardMatch(backward, len); + *matches++ = BackwardMatch(static_cast(backward), len); } } const uint32_t key = HashBytes(&data[cur_ix_masked]); diff --git a/enc/metablock.cc b/enc/metablock.cc index a192f74..3e1094d 100644 --- a/enc/metablock.cc +++ b/enc/metablock.cc @@ -64,7 +64,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer, &distance_histograms); // Histogram ids need to fit in one byte. - static const int kMaxNumberOfHistograms = 256; + static const size_t kMaxNumberOfHistograms = 256; mb->literal_histograms = literal_histograms; ClusterHistograms(literal_histograms, diff --git a/enc/port.h b/enc/port.h index ff0ba1b..95ea648 100644 --- a/enc/port.h +++ b/enc/port.h @@ -17,6 +17,7 @@ #ifndef BROTLI_ENC_PORT_H_ #define BROTLI_ENC_PORT_H_ +#include #include #include "./types.h" @@ -149,4 +150,10 @@ inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) { #endif +#ifdef BROTLI_ENCODE_DEBUG +#define BROTLI_DCHECK(x) assert(x) +#else +#define BROTLI_DCHECK(x) +#endif + #endif // BROTLI_ENC_PORT_H_ diff --git a/enc/ringbuffer.h b/enc/ringbuffer.h index a357c41..97121fd 100644 --- a/enc/ringbuffer.h +++ b/enc/ringbuffer.h @@ -31,12 +31,12 @@ namespace brotli { class RingBuffer { public: RingBuffer(int window_bits, int tail_bits) - : window_bits_(window_bits), - mask_((1 << window_bits) - 1), - tail_size_(1 << tail_bits), + : size_((size_t(1) << window_bits)), + mask_((size_t(1) << window_bits) - 1), + tail_size_(size_t(1) << tail_bits), pos_(0) { static const int kSlackForEightByteHashingEverywhere = 7; - const size_t buflen = (1 << window_bits_) + tail_size_; + const size_t buflen = size_ + tail_size_; buffer_ = new uint8_t[buflen + kSlackForEightByteHashingEverywhere]; for (int i = 0; i < kSlackForEightByteHashingEverywhere; ++i) { buffer_[buflen + i] = 0; @@ -52,17 +52,17 @@ class RingBuffer { // The length of the writes is limited so that we do not need to worry // about a write WriteTail(bytes, n); - if (PREDICT_TRUE(masked_pos + n <= (1U << window_bits_))) { + if (PREDICT_TRUE(masked_pos + n <= size_)) { // A single write fits. memcpy(&buffer_[masked_pos], bytes, n); } else { // Split into two writes. // Copy into the end of the buffer, including the tail buffer. memcpy(&buffer_[masked_pos], bytes, - std::min(n, ((1 << window_bits_) + tail_size_) - masked_pos)); + std::min(n, (size_ + tail_size_) - masked_pos)); // Copy into the beginning of the buffer - memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos), - n - ((1 << window_bits_) - masked_pos)); + memcpy(&buffer_[0], bytes + (size_ - masked_pos), + n - (size_ - masked_pos)); } pos_ += n; } @@ -85,13 +85,13 @@ class RingBuffer { const size_t masked_pos = pos_ & mask_; if (PREDICT_FALSE(masked_pos < tail_size_)) { // Just fill the tail buffer with the beginning data. - const size_t p = (1 << window_bits_) + masked_pos; + const size_t p = size_ + masked_pos; memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos)); } } // Size of the ringbuffer is (1 << window_bits) + tail_size_. - const int window_bits_; + const size_t size_; const size_t mask_; const size_t tail_size_; diff --git a/enc/streams.cc b/enc/streams.cc index 426d73c..e2e96cb 100644 --- a/enc/streams.cc +++ b/enc/streams.cc @@ -119,5 +119,4 @@ bool BrotliFileOut::Write(const void* buf, size_t n) { return true; } - } // namespace brotli diff --git a/enc/streams.h b/enc/streams.h index 9fcd980..a2ee028 100644 --- a/enc/streams.h +++ b/enc/streams.h @@ -124,7 +124,6 @@ class BrotliFileOut : public BrotliOut { FILE* f_; }; - } // namespace brotli #endif // BROTLI_ENC_STREAMS_H_ diff --git a/enc/write_bits.h b/enc/write_bits.h index 09e98a1..7d4c75b 100644 --- a/enc/write_bits.h +++ b/enc/write_bits.h @@ -50,6 +50,7 @@ inline void WriteBits(int n_bits, printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos); #endif assert((bits >> n_bits) == 0); + assert(n_bits <= 56); #ifdef IS_LITTLE_ENDIAN // This branch of the code can write up to 56 bits at a time, // 7 bits are lost by being perhaps already in *p and at least @@ -66,12 +67,12 @@ inline void WriteBits(int n_bits, uint8_t *array_pos = &array[*pos >> 3]; const int bits_reserved_in_first_byte = (*pos & 7); bits <<= bits_reserved_in_first_byte; - *array_pos++ |= bits; + *array_pos++ |= static_cast(bits); for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte; bits_left_to_write >= 1; bits_left_to_write -= 8) { bits >>= 8; - *array_pos++ = bits; + *array_pos++ = static_cast(bits); } *array_pos = 0; *pos += n_bits; diff --git a/shared/shared.go b/shared/shared.go index c929969..4397a94 100644 --- a/shared/shared.go +++ b/shared/shared.go @@ -1,5 +1,5 @@ -// Code shared between the enc and dec packages -package shared +// Package shared contains the common dictionary used by the enc and dec packages +package shared // import "gopkg.in/kothar/brotli-go.v0/shared" /* #include "dictionary.h" @@ -8,6 +8,7 @@ import "C" import "unsafe" +// GetDictionary retrieves a pointer to the dictionary data structure func GetDictionary() unsafe.Pointer { return unsafe.Pointer(&C.sharedBrotliDictionary) }