From a91b3c41940b318277b8a8cd58a723a76f3cd536 Mon Sep 17 00:00:00 2001 From: Thomas Klausner Date: Sun, 22 Sep 2019 18:21:17 +0200 Subject: [PATCH] Add support for the XZ compression method. Based on patches provided by Alexey Bykov in https://github.com/nih-at/libzip/issues/28 --- CMakeLists.txt | 14 ++ INSTALL.md | 3 + NEWS.md | 1 + THANKS | 1 + cmake-config.h.in | 1 + lib/CMakeLists.txt | 10 +- lib/zip.h | 1 + lib/zip_algorithm_deflate.c | 1 + lib/zip_algorithm_xz.c | 241 +++++++++++++++++++++++ lib/zip_source_compress.c | 10 + lib/zipint.h | 3 + regress/CMakeLists.txt | 4 +- regress/set_compression_store_to_xz.test | 4 + regress/set_compression_xz_to_store.test | 4 + regress/testfile-lzma.zip | Bin 0 -> 153 bytes regress/testfile-stored-dos.zip | Bin 0 -> 212 bytes regress/testfile-xz.zip | Bin 0 -> 200 bytes src/ziptool.c | 13 ++ 18 files changed, 307 insertions(+), 4 deletions(-) create mode 100644 lib/zip_algorithm_xz.c create mode 100644 regress/set_compression_store_to_xz.test create mode 100644 regress/set_compression_xz_to_store.test create mode 100644 regress/testfile-lzma.zip create mode 100644 regress/testfile-stored-dos.zip create mode 100644 regress/testfile-xz.zip diff --git a/CMakeLists.txt b/CMakeLists.txt index a3e43ba39..53ef5a5da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ OPTION(ENABLE_OPENSSL "Enable use of OpenSSL" ON) OPTION(ENABLE_WINDOWS_CRYPTO "Enable use of Windows cryptography libraries" ON) OPTION(ENABLE_BZIP2 "Enable use of BZip2" ON) +OPTION(ENABLE_LZMA "Enable use of LZMA" ON) OPTION(BUILD_TOOLS "Build tools in the src directory (zipcmp, zipmerge, ziptool)" ON) OPTION(BUILD_REGRESS "Build regression tests" ON) @@ -218,6 +219,19 @@ IF(ENABLE_BZIP2) ENDIF(BZIP2_FOUND) ENDIF(ENABLE_BZIP2) +IF(ENABLE_LZMA) + FIND_PACKAGE(LibLZMA) + IF(LIBLZMA_FOUND) + SET (HAVE_LIBLZMA 1) + + INCLUDE_DIRECTORIES(${LIBLZMA_INCLUDE_DIR}) + SET (OPTIONAL_LIBRARY ${OPTIONAL_LIBRARY} ${LIBLZMA_LIBRARY}) + ELSE() + MESSAGE(WARNING "-- lzma library not found; lzma support disabled") + ENDIF(LIBLZMA_FOUND) +ENDIF(ENABLE_LZMA) + + IF (COMMONCRYPTO_FOUND) SET (HAVE_CRYPTO 1) SET (HAVE_COMMONCRYPTO 1) diff --git a/INSTALL.md b/INSTALL.md index eef900ef8..2cc390899 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -8,6 +8,9 @@ comes with most operating systems. For supporting bzip2-compressed zip archives, you need [bzip2](http://bzip.org/). +For supporting xz-compressed zip archives, you need +[liblzma](https://tukaani.org/xz/) which is part of xz. + For AES (encryption) support, you need one of these cryptographic libraries, listed in order of preference: diff --git a/NEWS.md b/NEWS.md index b4f6f7f8c..4694d8454 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Avoid using umask() since it's not thread-safe. * Set close-on-exec flag when opening files. * Do not accept empty files as valid zip archives any longer. +* Add support for XZ compressed files (using liblzma). 1.5.2 [2019-03-12] ================== diff --git a/THANKS b/THANKS index f92b7a2a5..d6a0f231c 100644 --- a/THANKS +++ b/THANKS @@ -6,6 +6,7 @@ Thanks to these people for suggestions, testing, and bug reports: Agostino Sarubbo Alexander Galanin Alexandr Shadchin +Alexey Bykov Andreas Falkenhahn Andrew Brampton Andrew Molyneux diff --git a/cmake-config.h.in b/cmake-config.h.in index fd2d451b0..7d5bdab49 100644 --- a/cmake-config.h.in +++ b/cmake-config.h.in @@ -30,6 +30,7 @@ #cmakedefine HAVE_GETPROGNAME #cmakedefine HAVE_GNUTLS #cmakedefine HAVE_LIBBZ2 +#cmakedefine HAVE_LIBLZMA #cmakedefine HAVE_LOCALTIME_R #cmakedefine HAVE_MBEDTLS #cmakedefine HAVE_MKSTEMP diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 0fedd177c..7dc736434 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -10,9 +10,9 @@ MACRO(GET_TARGET_PROPERTY_WITH_DEFAULT _variable _target _property _default_valu IF(${_variable} STREQUAL NOTFOUND) SET(${_variable} ${_default_value}) ENDIF() - + ENDMACRO() - + MACRO(CREATE_LIBTOOL_FILE _target _install_DIR) GET_TARGET_PROPERTY(_target_location ${_target} LOCATION) GET_TARGET_PROPERTY_WITH_DEFAULT(_target_static_lib ${_target} STATIC_LIB "") @@ -51,7 +51,7 @@ MACRO(CREATE_LIBTOOL_FILE _target _install_DIR) FILE(APPEND ${_laname} "libdir='${CMAKE_INSTALL_PREFIX}/${_install_DIR}'\n") INSTALL( FILES ${_laname} ${_soname} DESTINATION ${CMAKE_INSTALL_PREFIX}${_install_DIR}) ENDMACRO() - + SET(LIBZIP_SOURCES zip_add.c zip_add_dir.c @@ -192,6 +192,10 @@ IF(HAVE_LIBBZ2) SET(LIBZIP_OPTIONAL_FILES zip_algorithm_bzip2.c) ENDIF() +IF(HAVE_LIBLZMA) + SET(LIBZIP_OPTIONAL_FILES ${LIBZIP_OPTIONAL_FILES} zip_algorithm_xz.c) +ENDIF() + IF(HAVE_COMMONCRYPTO) SET(LIBZIP_OPTIONAL_FILES ${LIBZIP_OPTIONAL_FILES} zip_crypto_commoncrypto.c ) diff --git a/lib/zip.h b/lib/zip.h index 5688b5df2..597aeedd0 100644 --- a/lib/zip.h +++ b/lib/zip.h @@ -162,6 +162,7 @@ extern "C" { /* 15-17 - Reserved by PKWARE */ #define ZIP_CM_TERSE 18 /* compressed using IBM TERSE (new) */ #define ZIP_CM_LZ77 19 /* IBM LZ77 z Architecture (PFS) */ +#define ZIP_CM_LZMA2 33 #define ZIP_CM_XZ 95 /* XZ compressed data */ #define ZIP_CM_JPEG 96 /* Compressed Jpeg data */ #define ZIP_CM_WAVPACK 97 /* WavPack compressed data */ diff --git a/lib/zip_algorithm_deflate.c b/lib/zip_algorithm_deflate.c index ba5ad4394..519ef80ad 100644 --- a/lib/zip_algorithm_deflate.c +++ b/lib/zip_algorithm_deflate.c @@ -51,6 +51,7 @@ allocate(bool compress, int compression_flags, zip_error_t *error) { struct ctx *ctx; if ((ctx = (struct ctx *)malloc(sizeof(*ctx))) == NULL) { + zip_error_set(error, ZIP_ET_SYS, errno); return NULL; } diff --git a/lib/zip_algorithm_xz.c b/lib/zip_algorithm_xz.c new file mode 100644 index 000000000..bb734f460 --- /dev/null +++ b/lib/zip_algorithm_xz.c @@ -0,0 +1,241 @@ +/* + zip_algorithm_xz.c -- XZ (de)compression routines + Bazed on zip_algorithm_deflate.c -- deflate (de)compression routines + Copyright (C) 2017-2018 Dieter Baron and Thomas Klausner + + This file is part of libzip, a library to manipulate ZIP archives. + The authors can be contacted at + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. The names of the authors may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "zipint.h" + +#include +#include +#include + +struct ctx { + zip_error_t *error; + bool compress; + int compression_flags; + bool end_of_input; + lzma_stream zstr; + zip_uint16_t method; +}; + + +static void * +allocate(bool compress, int compression_flags, zip_error_t *error, zip_uint16_t method) { + struct ctx *ctx; + + if ((ctx = (struct ctx *)malloc(sizeof(*ctx))) == NULL) { + zip_error_set(error, ZIP_ET_SYS, errno); + return NULL; + } + + ctx->error = error; + ctx->compress = compress; + ctx->compression_flags = compression_flags; + ctx->compression_flags |= LZMA_PRESET_EXTREME; + ctx->end_of_input = false; + memset(&ctx->zstr, 0, sizeof(ctx->zstr)); + ctx->method = method; + return ctx; +} + + +static void * +compress_allocate(zip_uint16_t method, int compression_flags, zip_error_t *error) { + return allocate(true, compression_flags, error, method); +} + + +static void * +decompress_allocate(zip_uint16_t method, int compression_flags, zip_error_t *error) { + return allocate(false, compression_flags, error, method); +} + + +static void +deallocate(void *ud) { + struct ctx *ctx = (struct ctx *)ud; + free(ctx); +} + + +static int +compression_flags(void *ud) { + struct ctx *ctx = (struct ctx *)ud; + return 0; +} + +static int +map_error(int ret) { + switch (ret) { + case LZMA_UNSUPPORTED_CHECK: + return ZIP_ER_COMPRESSED_DATA; + + case LZMA_MEM_ERROR: + return ZIP_ER_MEMORY; + + case LZMA_OPTIONS_ERROR: + return ZIP_ER_INVAL; + + default: + return ZIP_ER_INTERNAL; + } +} + + +static bool +start(void *ud) { + struct ctx *ctx = (struct ctx *)ud; + lzma_ret ret; + + lzma_options_lzma opt_lzma; + lzma_lzma_preset(&opt_lzma, ctx->compression_flags); + lzma_filter filters[] = { + { .id = (ctx->method == ZIP_CM_LZMA ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2), .options = &opt_lzma}, + { .id = LZMA_VLI_UNKNOWN, .options = NULL }, + }; + + ctx->zstr.avail_in = 0; + ctx->zstr.next_in = NULL; + ctx->zstr.avail_out = 0; + ctx->zstr.next_out = NULL; + + if (ctx->compress) { + if (ctx->method == ZIP_CM_LZMA) + ret = lzma_alone_encoder(&ctx->zstr, filters[0].options); + else + ret = lzma_stream_encoder(&ctx->zstr, filters, LZMA_CHECK_CRC64); + } else { + if (ctx->method == ZIP_CM_LZMA) + ret = lzma_alone_decoder(&ctx->zstr, UINT64_MAX); + else + ret = lzma_stream_decoder(&ctx->zstr, UINT64_MAX, LZMA_CONCATENATED); + } + + if (ret != LZMA_OK) { + zip_error_set(ctx->error, map_error(ret), 0); + return false; + } + + return true; +} + + +static bool +end(void *ud) { + struct ctx *ctx = (struct ctx *)ud; + int err; + + lzma_end(&ctx->zstr); + return true; +} + + +static bool +input(void *ud, zip_uint8_t *data, zip_uint64_t length) { + struct ctx *ctx = (struct ctx *)ud; + + if (length > UINT_MAX || ctx->zstr.avail_in > 0) { + zip_error_set(ctx->error, ZIP_ER_INVAL, 0); + return false; + } + + ctx->zstr.avail_in = (uInt)length; + ctx->zstr.next_in = (Bytef *)data; + + return true; +} + + +static void +end_of_input(void *ud) { + struct ctx *ctx = (struct ctx *)ud; + + ctx->end_of_input = true; +} + + +static zip_compression_status_t +process(void *ud, zip_uint8_t *data, zip_uint64_t *length) { + struct ctx *ctx = (struct ctx *)ud; + lzma_ret ret; + + ctx->zstr.avail_out = (uInt)ZIP_MIN(UINT_MAX, *length); + ctx->zstr.next_out = (Bytef *)data; + + ret = lzma_code(&ctx->zstr, ctx->end_of_input ? LZMA_FINISH : LZMA_RUN); + *length = *length - ctx->zstr.avail_out; + + switch (ret) { + case LZMA_OK: + return ZIP_COMPRESSION_OK; + + case LZMA_STREAM_END: + return ZIP_COMPRESSION_END; + + case LZMA_BUF_ERROR: + if (ctx->zstr.avail_in == 0) { + return ZIP_COMPRESSION_NEED_DATA; + } + + /* fallthrough */ + default: + zip_error_set(ctx->error, map_error(ret), 0); + return ZIP_COMPRESSION_ERROR; + } +} + +// clang-format off + +zip_compression_algorithm_t zip_algorithm_xz_compress = { + compress_allocate, + deallocate, + compression_flags, + start, + end, + input, + end_of_input, + process +}; + + +zip_compression_algorithm_t zip_algorithm_xz_decompress = { + decompress_allocate, + deallocate, + compression_flags, + start, + end, + input, + end_of_input, + process +}; + +// clang-format on diff --git a/lib/zip_source_compress.c b/lib/zip_source_compress.c index d2ae220ca..ed9a8db13 100644 --- a/lib/zip_source_compress.c +++ b/lib/zip_source_compress.c @@ -67,6 +67,16 @@ static struct implementation implementations[] = { #if defined(HAVE_LIBBZ2) {ZIP_CM_BZIP2, &zip_algorithm_bzip2_compress, &zip_algorithm_bzip2_decompress}, #endif +#if defined(HAVE_LIBLZMA) +/* Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2 + archives made this way - and vice versa. + + {ZIP_CM_LZMA, &zip_algorithm_xz_compress, &zip_algorithm_xz_decompress}, + {ZIP_CM_LZMA2, &zip_algorithm_xz_compress, &zip_algorithm_xz_decompress}, +*/ + {ZIP_CM_XZ, &zip_algorithm_xz_compress, &zip_algorithm_xz_decompress}, +#endif + }; static size_t implementations_size = sizeof(implementations) / sizeof(implementations[0]); diff --git a/lib/zipint.h b/lib/zipint.h index e52b60e94..32038793f 100644 --- a/lib/zipint.h +++ b/lib/zipint.h @@ -150,6 +150,9 @@ extern zip_compression_algorithm_t zip_algorithm_bzip2_compress; extern zip_compression_algorithm_t zip_algorithm_bzip2_decompress; extern zip_compression_algorithm_t zip_algorithm_deflate_compress; extern zip_compression_algorithm_t zip_algorithm_deflate_decompress; +extern zip_compression_algorithm_t zip_algorithm_xz_compress; +extern zip_compression_algorithm_t zip_algorithm_xz_decompress; + bool zip_compression_method_supported(zip_int32_t method, bool compress); diff --git a/regress/CMakeLists.txt b/regress/CMakeLists.txt index 756a270fa..39a3a2c63 100644 --- a/regress/CMakeLists.txt +++ b/regress/CMakeLists.txt @@ -178,7 +178,9 @@ SET(EXTRA_TESTS set_compression_store_to_bzip2.test set_compression_store_to_deflate.test set_compression_store_to_store.test + set_compression_store_to_xz.test set_compression_unknown.test + set_compression_xz_to_store.test stat_index_cp437_guess.test stat_index_cp437_raw.test stat_index_cp437_strict.test @@ -206,4 +208,4 @@ INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/../lib ${CMAKE_CURRENT_SO ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND}) ADD_EXECUTABLE(fuzz_main fuzz_main.c) -TARGET_LINK_LIBRARIES(fuzz_main zip) \ No newline at end of file +TARGET_LINK_LIBRARIES(fuzz_main zip) diff --git a/regress/set_compression_store_to_xz.test b/regress/set_compression_store_to_xz.test new file mode 100644 index 000000000..88141181a --- /dev/null +++ b/regress/set_compression_store_to_xz.test @@ -0,0 +1,4 @@ +# change method from stored to xz-compressed +return 0 +args test.zip set_file_compression 0 xz 0 +file test.zip testfile-stored-dos.zip testfile-xz.zip diff --git a/regress/set_compression_xz_to_store.test b/regress/set_compression_xz_to_store.test new file mode 100644 index 000000000..26f23e428 --- /dev/null +++ b/regress/set_compression_xz_to_store.test @@ -0,0 +1,4 @@ +# change method from xz-compressed to stored +return 0 +args test.zip set_file_compression 0 store 0 +file test.zip testfile-xz.zip testfile-stored-dos.zip diff --git a/regress/testfile-lzma.zip b/regress/testfile-lzma.zip new file mode 100644 index 0000000000000000000000000000000000000000..59abb07fbbf17a1a1e90e13f051a3557006e1b19 GIT binary patch literal 153 zcmWIWW@fQxU|`^52&*vi_i!-XAPMBz05LxhCnhB(>lURJq$ZZ=l~j~)im)=o0)-hF z4BqT@bN#=3OK%*1OZP|b0B=Snd1l<^0QCShF);jX1hLRe4e(}V0|_$%p)HVh192Dt D2%KIhY$$=UxJH&=nA85kH<9b!~g?qe&O rR|VF<5E&KV&B!FefZJNIHULkH literal 0 HcmV?d00001 diff --git a/src/ziptool.c b/src/ziptool.c index e8cedd871..1f4cea645 100644 --- a/src/ziptool.c +++ b/src/ziptool.c @@ -616,6 +616,19 @@ get_compression_method(const char *arg) { #if defined(HAVE_LIBBZ2) else if (strcmp(arg, "bzip2") == 0) return ZIP_CM_BZIP2; +#endif +#if defined(HAVE_LIBLZMA) +/* Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2 + archives made this way - and vice versa. + + else if (strcmp(arg, "lzma") == 0) + return ZIP_CM_LZMA; + else if (strcmp(arg, "lzma2") == 0) + return ZIP_CM_LZMA2; +*/ + else if (strcmp(arg, "xz") == 0) + return ZIP_CM_XZ; + #endif else if (strcmp(arg, "unknown") == 0) return 100;