Skip to content

Commit

Permalink
Merge pull request ClickHouse#58805 from ucasfl/avro-zstd
Browse files Browse the repository at this point in the history
Avro format support Zstd codec
  • Loading branch information
Avogar committed Jan 17, 2024
2 parents 3efe102 + 656ec1c commit c701633
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 3 deletions.
2 changes: 1 addition & 1 deletion contrib/avro
2 changes: 2 additions & 0 deletions contrib/boost-cmake/CMakeLists.txt
Expand Up @@ -44,12 +44,14 @@ set (SRCS_IOSTREAMS
"${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp"
"${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp"
"${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp"
"${LIBRARY_DIR}/libs/iostreams/src/zstd.cpp"
)

add_library (_boost_iostreams ${SRCS_IOSTREAMS})
add_library (boost::iostreams ALIAS _boost_iostreams)
target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zlib)
target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zstd)

# program_options

Expand Down
2 changes: 1 addition & 1 deletion src/Core/Settings.h
Expand Up @@ -1061,7 +1061,7 @@ class IColumn;
M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \
M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \
M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'.", 0) \
M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
M(UInt64, output_format_avro_rows_in_file, 1, "Max rows in a file (if permitted by storage)", 0) \
Expand Down
5 changes: 4 additions & 1 deletion src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
Expand Up @@ -520,8 +520,11 @@ static avro::Codec getCodec(const std::string & codec_name)

if (codec_name == "null") return avro::Codec::NULL_CODEC;
if (codec_name == "deflate") return avro::Codec::DEFLATE_CODEC;
if (codec_name == "zstd")
return avro::Codec::ZSTD_CODEC;
#ifdef SNAPPY_CODEC_AVAILABLE
if (codec_name == "snappy") return avro::Codec::SNAPPY_CODEC;
if (codec_name == "snappy")
return avro::Codec::SNAPPY_CODEC;
#endif

throw Exception(ErrorCodes::BAD_ARGUMENTS, "Avro codec {} is not available", codec_name);
Expand Down
@@ -0,0 +1 @@
45
@@ -0,0 +1,16 @@
-- Tags: no-fasttest
DROP TABLE IF EXISTS t;
CREATE TABLE t
(
`n1` Int32
)
ENGINE = File(Avro)
SETTINGS output_format_avro_codec = 'zstd';

INSERT INTO t SELECT *
FROM numbers(10);

SELECT sum(n1)
FROM t;

DROP TABLE t;

0 comments on commit c701633

Please sign in to comment.