From 58335902966f37f82548210556095a11759bcc60 Mon Sep 17 00:00:00 2001 From: Rasmus Lerdorf Date: Thu, 27 Apr 2023 11:32:10 -0400 Subject: [PATCH] zstd support (#539) This adds zstd compression support. The current two options, zlib and fastlz is basically a choice between performance and compression ratio. You would choose zlib if you are memory-bound and fastlz if you are cpu-bound. With zstd, you get the performance of fastlz with the compression of zlib. And often it wins on both. See this benchmark I ran on json files of varying sizes: https://gist.github.com/rlerdorf/788f3d0144f9c5514d8fee9477cbe787 Taking just a 40k json blob, we see that zstd at compression level 3 reduces it to 8862 bytes. Our current zlib 1 gets worse compression at 10091 bytes and takes longer both to compress and decompress. C Size ratio% C MB/s D MB/s SCORE Name File 8037 19.9 0.58 2130.89 0.08 zstd 22 file-39.54k-json 8204 20.3 31.85 2381.59 0.01 zstd 10 file-39.54k-json 8371 20.7 47.52 547.12 0.01 zlib 9 file-39.54k-json 8477 20.9 74.84 539.83 0.01 zlib 6 file-39.54k-json 8862 21.9 449.86 2130.89 0.01 zstd 3 file-39.54k-json 9171 22.7 554.62 2381.59 0.01 zstd 1 file-39.54k-json 10091 24.9 153.94 481.99 0.01 zlib 1 file-39.54k-json 10646 26.3 43.39 8097.40 0.01 lz4 16 file-39.54k-json 10658 26.3 72.30 8097.40 0.01 lz4 10 file-39.54k-json 13004 32.1 1396.10 6747.83 0.01 lz4 1 file-39.54k-json 13321 32.9 440.08 1306.03 0.01 fastlz 2 file-39.54k-json 14807 36.6 444.91 1156.77 0.01 fastlz 1 file-39.54k-json 15517 38.3 1190.79 4048.70 0.02 zstd -10 file-39.54k-json The fact that decompression a dramatically faster with zstd is a win for most common memcache uses since they tend to be read-heavy. The PR also adds a `memcache.compression_level` INI switch which currently only applies to zstd compression. It could probably be made to also apply to zlib and fastlz. --- config.m4 | 10 ++++ php_memcached.c | 78 ++++++++++++++++++++++++++++++- php_memcached_private.h | 4 +- tests/compression_conditions.phpt | 2 + tests/compression_types.phpt | 34 ++++++++++++++ 5 files changed, 125 insertions(+), 3 deletions(-) diff --git a/config.m4 b/config.m4 index c7a15f11..0e4ef8cf 100644 --- a/config.m4 +++ b/config.m4 @@ -27,6 +27,9 @@ PHP_ARG_ENABLE(memcached-protocol, whether to enable memcached protocol support, PHP_ARG_WITH(system-fastlz, whether to use system FastLZ library, [ --with-system-fastlz Use system FastLZ library], no, no) +PHP_ARG_WITH(zstd, whether to use system zstd library, +[ --with-zstd Use system zstd library], no, no) + if test -z "$PHP_ZLIB_DIR"; then PHP_ARG_WITH(zlib-dir, for ZLIB, [ --with-zlib-dir=DIR Set the path to ZLIB install prefix.], no) @@ -345,6 +348,13 @@ if test "$PHP_MEMCACHED" != "no"; then PHP_MEMCACHED_FILES="${PHP_MEMCACHED_FILES} fastlz/fastlz.c" fi + if test "$PHP_ZSTD" != "no"; then + AC_CHECK_HEADERS([zstd.h], [ac_cv_have_zstd="yes"], [ac_cv_have_zstd="no"]) + PHP_CHECK_LIBRARY(zstd, ZSTD_compress, + [PHP_ADD_LIBRARY(zstd, 1, MEMCACHED_SHARED_LIBADD)], + [AC_MSG_ERROR(zstd library not found)]) + fi + if test "$PHP_MEMCACHED_SESSION" != "no"; then PHP_MEMCACHED_FILES="${PHP_MEMCACHED_FILES} php_memcached_session.c" fi diff --git a/php_memcached.c b/php_memcached.c index ece54408..e7d5736c 100644 --- a/php_memcached.c +++ b/php_memcached.c @@ -37,6 +37,10 @@ #endif #include +#ifdef HAVE_ZSTD_H +#include +#endif + #ifdef HAVE_JSON_API # include "ext/json/php_json.h" #endif @@ -77,6 +81,7 @@ static int php_memc_list_entry(void) { #define MEMC_OPT_COMPRESSION_TYPE -1004 #define MEMC_OPT_STORE_RETRY_COUNT -1005 #define MEMC_OPT_USER_FLAGS -1006 +#define MEMC_OPT_COMPRESSION_LEVEL -1007 /**************************************** Custom result codes @@ -107,6 +112,7 @@ static int php_memc_list_entry(void) { #define MEMC_VAL_COMPRESSED (1<<0) #define MEMC_VAL_COMPRESSION_ZLIB (1<<1) #define MEMC_VAL_COMPRESSION_FASTLZ (1<<2) +#define MEMC_VAL_COMPRESSION_ZSTD (1<<3) #define MEMC_VAL_GET_FLAGS(internal_flags) (((internal_flags) & MEMC_MASK_INTERNAL) >> 4) #define MEMC_VAL_SET_FLAG(internal_flags, internal_flag) ((internal_flags) |= (((internal_flag) << 4) & MEMC_MASK_INTERNAL)) @@ -152,6 +158,7 @@ typedef struct { zend_long serializer; zend_long compression_type; + zend_long compression_level; zend_long store_retry_count; zend_long set_udf_flags; @@ -278,6 +285,10 @@ static PHP_INI_MH(OnUpdateCompressionType) MEMC_G(compression_type) = COMPRESSION_TYPE_FASTLZ; } else if (!strcmp(ZSTR_VAL(new_value), "zlib")) { MEMC_G(compression_type) = COMPRESSION_TYPE_ZLIB; +#ifdef HAVE_ZSTD_H + } else if (!strcmp(ZSTR_VAL(new_value), "zstd")) { + MEMC_G(compression_type) = COMPRESSION_TYPE_ZSTD; +#endif } else { return FAILURE; } @@ -408,6 +419,7 @@ PHP_INI_BEGIN() MEMC_INI_ENTRY("compression_type", "fastlz", OnUpdateCompressionType, compression_name) MEMC_INI_ENTRY("compression_factor", "1.3", OnUpdateReal, compression_factor) + MEMC_INI_ENTRY("compression_level", "3", OnUpdateLong, compression_level) MEMC_INI_ENTRY("compression_threshold", "2000", OnUpdateLong, compression_threshold) MEMC_INI_ENTRY("serializer", SERIALIZER_DEFAULT_NAME, OnUpdateSerializer, serializer_name) MEMC_INI_ENTRY("store_retry_count", "0", OnUpdateLong, store_retry_count) @@ -897,6 +909,19 @@ zend_bool s_compress_value (php_memc_compression_type compression_type, zend_str } break; +#ifdef HAVE_ZSTD_H + case COMPRESSION_TYPE_ZSTD: + { + compressed_size = ZSTD_compress((void *)buffer, buffer_size, ZSTR_VAL(payload), ZSTR_LEN(payload), MEMC_G(compression_level)); + + if (!ZSTD_isError(compressed_size)) { + compress_status = 1; + compression_type_flag = MEMC_VAL_COMPRESSION_ZSTD; + } + } + break; +#endif + case COMPRESSION_TYPE_ZLIB: { compressed_size = buffer_size; @@ -2939,6 +2964,9 @@ static PHP_METHOD(Memcached, getOption) case MEMC_OPT_COMPRESSION_TYPE: RETURN_LONG(memc_user_data->compression_type); + case MEMC_OPT_COMPRESSION_LEVEL: + RETURN_LONG(memc_user_data->compression_level); + case MEMC_OPT_COMPRESSION: RETURN_BOOL(memc_user_data->compression_enabled); @@ -3001,6 +3029,9 @@ int php_memc_set_option(php_memc_object_t *intern, long option, zval *value) case MEMC_OPT_COMPRESSION_TYPE: lval = zval_get_long(value); if (lval == COMPRESSION_TYPE_FASTLZ || +#ifdef HAVE_ZSTD_H + lval == COMPRESSION_TYPE_ZSTD || +#endif lval == COMPRESSION_TYPE_ZLIB) { memc_user_data->compression_type = lval; } else { @@ -3608,16 +3639,24 @@ zend_string *s_decompress_value (const char *payload, size_t payload_len, uint32 uint32_t stored_length; unsigned long length; zend_bool decompress_status = 0; - zend_bool is_fastlz = 0, is_zlib = 0; + zend_bool is_fastlz = 0, is_zlib = 0, is_zstd = 0; if (payload_len < sizeof (uint32_t)) { return NULL; } is_fastlz = MEMC_VAL_HAS_FLAG(flags, MEMC_VAL_COMPRESSION_FASTLZ); + is_zstd = MEMC_VAL_HAS_FLAG(flags, MEMC_VAL_COMPRESSION_ZSTD); is_zlib = MEMC_VAL_HAS_FLAG(flags, MEMC_VAL_COMPRESSION_ZLIB); - if (!is_fastlz && !is_zlib) { +#ifndef HAVE_ZSTD_H + if (is_zstd) { + php_error_docref(NULL, E_WARNING, "could not decompress value: value was compressed with zstd but zstd support has not been compiled in"); + return NULL; + } +#endif + + if (!is_fastlz && !is_zlib && !is_zstd) { php_error_docref(NULL, E_WARNING, "could not decompress value: unrecognised compression type"); return NULL; } @@ -3629,6 +3668,23 @@ zend_string *s_decompress_value (const char *payload, size_t payload_len, uint32 buffer = zend_string_alloc (stored_length, 0); +#ifdef HAVE_ZSTD_H + if (is_zstd) { + length = ZSTD_getFrameContentSize(payload, payload_len); + if (length == ZSTD_CONTENTSIZE_ERROR) { + php_error_docref(NULL, E_WARNING, "value was not compressed by zstd"); + zend_string_release (buffer); + return NULL; + } else if (length == ZSTD_CONTENTSIZE_UNKNOWN) { + php_error_docref(NULL, E_WARNING, "zstd streaming decompression not supported"); + zend_string_release (buffer); + return NULL; + } + decompress_status = !ZSTD_isError(ZSTD_decompress(&buffer->val, buffer->len, payload, payload_len)); + + } + else +#endif if (is_fastlz) { decompress_status = ((length = fastlz_decompress(payload, payload_len, &buffer->val, buffer->len)) > 0); } @@ -3955,6 +4011,7 @@ PHP_GINIT_FUNCTION(php_memcached) php_memcached_globals->memc.compression_threshold = 2000; php_memcached_globals->memc.compression_type = COMPRESSION_TYPE_FASTLZ; php_memcached_globals->memc.compression_factor = 1.30; + php_memcached_globals->memc.compression_level = 3; php_memcached_globals->memc.store_retry_count = 2; php_memcached_globals->memc.sasl_initialised = 0; @@ -4000,6 +4057,7 @@ static void php_memc_register_constants(INIT_FUNC_ARGS) REGISTER_MEMC_CLASS_CONST_LONG(OPT_COMPRESSION, MEMC_OPT_COMPRESSION); REGISTER_MEMC_CLASS_CONST_LONG(OPT_COMPRESSION_TYPE, MEMC_OPT_COMPRESSION_TYPE); + REGISTER_MEMC_CLASS_CONST_LONG(OPT_COMPRESSION_LEVEL, MEMC_OPT_COMPRESSION_LEVEL); REGISTER_MEMC_CLASS_CONST_LONG(OPT_PREFIX_KEY, MEMC_OPT_PREFIX_KEY); REGISTER_MEMC_CLASS_CONST_LONG(OPT_SERIALIZER, MEMC_OPT_SERIALIZER); @@ -4015,6 +4073,15 @@ static void php_memc_register_constants(INIT_FUNC_ARGS) REGISTER_MEMC_CLASS_CONST_BOOL(HAVE_IGBINARY, 0); #endif + /* + * Indicate whether zstd compression is available + */ +#ifdef HAVE_ZSTD_H + REGISTER_MEMC_CLASS_CONST_BOOL(HAVE_ZSTD, 1); +#else + REGISTER_MEMC_CLASS_CONST_BOOL(HAVE_ZSTD, 0); +#endif + /* * Indicate whether json serializer is available */ @@ -4186,6 +4253,7 @@ static void php_memc_register_constants(INIT_FUNC_ARGS) */ REGISTER_MEMC_CLASS_CONST_LONG(COMPRESSION_FASTLZ, COMPRESSION_TYPE_FASTLZ); REGISTER_MEMC_CLASS_CONST_LONG(COMPRESSION_ZLIB, COMPRESSION_TYPE_ZLIB); + REGISTER_MEMC_CLASS_CONST_LONG(COMPRESSION_ZSTD, COMPRESSION_TYPE_ZSTD); /* * Flags. @@ -4351,6 +4419,12 @@ PHP_MINFO_FUNCTION(memcached) php_info_print_table_row(2, "msgpack support", "no"); #endif +#ifdef HAVE_ZSTD_H + php_info_print_table_row(2, "zstd support", "yes"); +#else + php_info_print_table_row(2, "zstd support", "no"); +#endif + php_info_print_table_end(); DISPLAY_INI_ENTRIES(); diff --git a/php_memcached_private.h b/php_memcached_private.h index 48f1dfab..aefaf4fb 100644 --- a/php_memcached_private.h +++ b/php_memcached_private.h @@ -98,7 +98,8 @@ typedef enum { typedef enum { COMPRESSION_TYPE_ZLIB = 1, - COMPRESSION_TYPE_FASTLZ = 2 + COMPRESSION_TYPE_FASTLZ = 2, + COMPRESSION_TYPE_ZSTD = 3 } php_memc_compression_type; typedef struct { @@ -186,6 +187,7 @@ ZEND_BEGIN_MODULE_GLOBALS(php_memcached) zend_long compression_threshold; double compression_factor; zend_long store_retry_count; + zend_long compression_level; /* Converted values*/ php_memc_serializer_type serializer_type; diff --git a/tests/compression_conditions.phpt b/tests/compression_conditions.phpt index 749ebe8a..960058b6 100644 --- a/tests/compression_conditions.phpt +++ b/tests/compression_conditions.phpt @@ -21,6 +21,8 @@ function get_compression($name) { return Memcached::COMPRESSION_ZLIB; case 'fastlz': return Memcached::COMPRESSION_FASTLZ; + case 'zstd': + return Memcached::COMPRESSION_ZSTD; default: echo "Strange compression type: $name\n"; return 0; diff --git a/tests/compression_types.phpt b/tests/compression_types.phpt index ce07aed5..81d7867c 100644 --- a/tests/compression_types.phpt +++ b/tests/compression_types.phpt @@ -15,6 +15,10 @@ function get_compression($name) { return Memcached::COMPRESSION_ZLIB; case 'fastlz': return Memcached::COMPRESSION_FASTLZ; + case 'zstd': + if (Memcached::HAVE_ZSTD) { + return Memcached::COMPRESSION_ZSTD; + } else return 0; default: echo "Strange compression type: $name\n"; return 0; @@ -54,6 +58,26 @@ fetch_with_compression($m, 'hello6', $data, '', 'fastlz'); fetch_with_compression($m, 'hello7', $data, 'zlib', ''); fetch_with_compression($m, 'hello8', $data, 'fastlz', ''); fetch_with_compression($m, 'hello9', $data, '', ''); +if (Memcached::HAVE_ZSTD) { +fetch_with_compression($m, 'hello10', $data, 'zstd', 'zstd'); +fetch_with_compression($m, 'hello11', $data, 'zstd', 'fastlz'); +fetch_with_compression($m, 'hello12', $data, 'fastlz', 'zstd'); +fetch_with_compression($m, 'hello13', $data, '', 'zstd'); +fetch_with_compression($m, 'hello14', $data, 'zstd', ''); +} else { + echo << --EXPECT-- set=[zlib] get=[zlib] @@ -74,3 +98,13 @@ set=[fastlz] get=[] bool(true) set=[] get=[] bool(true) +set=[zstd] get=[zstd] +bool(true) +set=[zstd] get=[fastlz] +bool(true) +set=[fastlz] get=[zstd] +bool(true) +set=[] get=[zstd] +bool(true) +set=[zstd] get=[] +bool(true)