Skip to content

Commit

Permalink
Backport C++20 countl_zero (#128)
Browse files Browse the repository at this point in the history
Backport C++20 countl_zero

1) countl_zero can be used in peek_0_bits and is 2X faster.
Note: peek_0_bits is not on a hot path, overall gain is limited.

2) Apply updates for clang-tidy v13 (ships with VS 2022 17.1 Preview 2)

3) Increase upper range of supported CMake to 3.22
  • Loading branch information
vbaderks committed Jan 6, 2022
1 parent 26478a8 commit 0b3ac70
Show file tree
Hide file tree
Showing 12 changed files with 210 additions and 10 deletions.
12 changes: 12 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@
# -altera-struct-pack-align => Rationale: Not applicable (Altera is for openCL)
# -readability-function-cognitive-complexity => Warning is too strict, manual review code review is preferred
# -concurrency-mt-unsafe => Reports a false warning for strerror in main (not useful for CharLS)
# -clang-diagnostic-unused-command-line-argument => Reports false warnings as MSVC options are passed
# -llvm-namespace-comment => complains about namespace issues in system header files
# -hicpp-named-parameter => complains about issues in system header files
# -altera-unroll-loops => Does not apply (is for openCL)
# -altera-id-dependent-backward-branch => Does not apply (is for openCL)
# -bugprone-easily-swappable-parameters => To many do not fix warnings

---
Checks: '*,
Expand All @@ -50,8 +56,10 @@ Checks: '*,
-android-*,
-llvmlibc-*,
-llvm-header-guard,
-llvm-namespace-comment,
-hicpp-no-array-decay,
-hicpp-signed-bitwise,
-hicpp-named-parameter,
-clang-diagnostic-c++98-compat*,
-clang-diagnostic-c++98-c++11-compat,
-clang-diagnostic-unused-macros,
Expand All @@ -61,6 +69,7 @@ Checks: '*,
-clang-diagnostic-pragma-once-outside-header,
-clang-diagnostic-unused-const-variable,
-clang-analyzer-core.NonNullParamChecker,
-clang-diagnostic-unused-command-line-argument,
-misc-non-private-member-variables-in-classes,
-modernize-use-trailing-return-type,
-readability-magic-numbers,
Expand All @@ -81,7 +90,10 @@ Checks: '*,
-cert-err58-cpp,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
-altera-struct-pack-align,
-altera-unroll-loops,
-altera-id-dependent-backward-branch,
-readability-function-cognitive-complexity,
-bugprone-easily-swappable-parameters,
-concurrency-mt-unsafe'
WarningsAsErrors: false
HeaderFilterRegex: ''
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) Team CharLS.
# SPDX-License-Identifier: BSD-3-Clause

cmake_minimum_required(VERSION 3.13...3.19)
cmake_minimum_required(VERSION 3.13...3.22)

# Extract the version info from version.h
file(READ "include/charls/version.h" version)
Expand Down
1 change: 1 addition & 0 deletions CharLS.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
<s:Boolean x:Key="/Default/UserDictionary/Words/=charlstest/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=cmove/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=cmyk/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=countl/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=cpixel/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=cppcoreguidelines/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=decodetest/@EntryIndexedValue">True</s:Boolean>
Expand Down
3 changes: 2 additions & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@
<PropertyGroup Condition="'$(Configuration)'=='Checked'">
<RunCodeAnalysis Condition="'$(MSBuildProjectExtension)'=='.vcxproj'" >true</RunCodeAnalysis>
<EnableCppCoreCheck>true</EnableCppCoreCheck>
<EnableClangTidyCodeAnalysis>true</EnableClangTidyCodeAnalysis>
<!-- Latest clang-tidy crashes during Win32 builds. -->
<EnableClangTidyCodeAnalysis Condition="'$(Platform)'!='Win32'">true</EnableClangTidyCodeAnalysis>
<EnableMicrosoftCodeAnalysis>true</EnableMicrosoftCodeAnalysis>
</PropertyGroup>

Expand Down
55 changes: 55 additions & 0 deletions benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,18 @@ __declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, con
}


inline int countl_zero(const uint64_t value) noexcept
{
if (value == 0)
return 64;

unsigned long index;
_BitScanReverse64(&index, value);

return 63 - index;
}


static void bm_get_predicted_value_default(benchmark::State& state)
{
for (const auto _ : state)
Expand Down Expand Up @@ -196,4 +208,47 @@ static void bm_quantize_gradient_lut(benchmark::State& state)
BENCHMARK(bm_quantize_gradient_lut);


int peek_zero_bits(uint64_t val_test) noexcept
{
for (int32_t count{}; count < 16; ++count)
{
if ((val_test & (static_cast<uint64_t>(1) << (64 - 1))) != 0)
return count;

val_test <<= 1;
}
return -1;
}

static void bm_peek_zero_bits(benchmark::State& state)
{
for (const auto _ : state)
{
benchmark::DoNotOptimize(peek_zero_bits(0));
benchmark::DoNotOptimize(peek_zero_bits(UINT64_MAX));
}
}
BENCHMARK(bm_peek_zero_bits);



int peek_zero_bits_intrinsic(const uint64_t value) noexcept
{
const auto count = countl_zero(value);
return count < 16 ? count : -1;
}


static void bm_peek_zero_bits_intrinsic(benchmark::State& state)
{
for (const auto _ : state)
{
benchmark::DoNotOptimize(peek_zero_bits_intrinsic(0));
benchmark::DoNotOptimize(peek_zero_bits_intrinsic(UINT64_MAX));
}
}
BENCHMARK(bm_peek_zero_bits_intrinsic);



BENCHMARK_MAIN();
1 change: 1 addition & 0 deletions cpp.hint
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@
#define CHARLS_CHECK_RETURN
#define CHARLS_RET_MAY_BE_NULL
#define USE_DECL_ANNOTATIONS
#define ASSERT(x)
2 changes: 1 addition & 1 deletion fuzztest/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ int main(const int argc, const char* const argv[]) // NOLINT(bugprone-exception-

while (__AFL_LOOP(100))
{
std::vector<uint8_t> source(1024 * 1024);
std::vector<uint8_t> source(static_cast<size_t>(1024) * 1024);
const size_t input_length = _read(fd, source.data(), static_cast<unsigned int>(source.size()));
source.resize(input_length);

Expand Down
10 changes: 9 additions & 1 deletion src/decoder_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ class decoder_strategy

FORCE_INLINE void skip(const int32_t length) noexcept
{
valid_bits_ -= length;
ASSERT(length);

valid_bits_ -= length; // Note: valid_bits_ may become negative to indicate that extra bits are needed.
read_cache_ = read_cache_ << length;
}

Expand Down Expand Up @@ -140,6 +142,11 @@ class decoder_strategy
{
fill_read_cache();
}

#if defined(_MSC_VER) || defined(__GNUC__)
const auto count = countl_zero(read_cache_);
return count < 16 ? count : -1;
#else
cache_t val_test = read_cache_;

for (int32_t count{}; count < 16; ++count)
Expand All @@ -150,6 +157,7 @@ class decoder_strategy
val_test <<= 1;
}
return -1;
#endif
}

FORCE_INLINE int32_t read_high_bits()
Expand Down
64 changes: 64 additions & 0 deletions src/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,4 +398,68 @@ constexpr auto to_underlying_type(Enum e) noexcept
return static_cast<std::underlying_type_t<Enum>>(e);
}

#ifdef _MSC_VER
#if defined(_M_X64) || defined(_M_ARM64)
/// <summary>
/// Custom implementation of C++20 std::countl_zero (for uint64_t)
/// </summary>
inline int countl_zero(const uint64_t value) noexcept
{
if (value == 0)
return 64;

unsigned long index;
_BitScanReverse64(&index, value);
return 63 - index;
}
#endif

/// <summary>
/// Custom implementation of C++20 std::countl_zero (for uint32_t)
/// </summary>
inline int countl_zero(const uint32_t value) noexcept
{
if (value == 0)
return 32;

unsigned long index;
_BitScanReverse(&index, value);

return 31 - index;
}
#endif

#ifdef __GNUC__

// A simple overload with uint64_t\uint32_t doesn't work for macOS. size_t is not the same type as uint64_t.

template<int Bits, class T>
constexpr bool is_uint_v = sizeof(T) == (Bits / 8) && std::is_integral<T>::value && !std::is_signed<T>::value;

/// <summary>
/// Custom implementation of C++20 std::countl_zero (for uint64_t)
/// </summary>
template<class T>
auto countl_zero(T value) noexcept -> std::enable_if_t<is_uint_v<64, T>, int>
{
if (value == 0)
return 64;

return __builtin_clzl(value);
}

/// <summary>
/// Custom implementation of C++20 std::countl_zero (for uint32_t)
/// </summary>
template<class T>
auto countl_zero(T value) noexcept -> std::enable_if_t<is_uint_v<32, T>, int>
{
if (value == 0)
return 32;

return __builtin_clz(value);
}

#endif

} // namespace charls
8 changes: 4 additions & 4 deletions test/bitstreamdamage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace {
void test_damaged_bit_stream1()
{
const vector<uint8_t> encoded_buffer = read_file("test/incorrect_images/InfiniteLoopFFMPEG.jls");
vector<uint8_t> destination(256 * 256 * 2);
vector<uint8_t> destination(static_cast<size_t>(256) * 256 * 2);

error_code error;
try
Expand All @@ -45,7 +45,7 @@ void test_damaged_bit_stream2()
encoded_buffer.resize(900);
encoded_buffer.resize(40000, 3);

vector<uint8_t> destination(512 * 512);
vector<uint8_t> destination(static_cast<size_t>(512) * 512);

error_code error;
try
Expand All @@ -68,7 +68,7 @@ void test_damaged_bit_stream3()
encoded_buffer[300] = 0xFF;
encoded_buffer[301] = 0xFF;

vector<uint8_t> destination(512 * 512);
vector<uint8_t> destination(static_cast<size_t>(512) * 512);

error_code error;
try
Expand All @@ -93,7 +93,7 @@ void test_file_with_random_header_damage(const char* filename)
MSVC_WARNING_SUPPRESS_NEXT_LINE(26496) // cannot be marked as const as operator() is not always defined const.
MSVC_CONST uniform_int_distribution<uint32_t> distribution(0, 255);

vector<uint8_t> destination(512 * 512);
vector<uint8_t> destination(static_cast<size_t>(512) * 512);

for (size_t i{}; i != 40; ++i)
{
Expand Down
4 changes: 2 additions & 2 deletions test/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ void test_noise_image_with_custom_reset()

void test_fail_on_too_small_output_buffer()
{
const auto input_buffer{make_some_noise(8 * 8, 8, 21344)};
const auto input_buffer{make_some_noise(static_cast<size_t>(8) * 8, 8, 21344)};

// Trigger a "destination buffer too small" when writing the header markers.
try
Expand Down Expand Up @@ -321,7 +321,7 @@ void test_bgr()
void test_too_small_output_buffer()
{
const vector<uint8_t> encoded{read_file("test/lena8b.jls")};
vector<uint8_t> destination(512 * 511);
vector<uint8_t> destination(static_cast<size_t>(512) * 511);

jpegls_decoder decoder;
decoder.source(encoded).read_header();
Expand Down
58 changes: 58 additions & 0 deletions unittest/decoder_strategy_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,64 @@ TEST_CLASS(decoder_strategy_test)
Assert::AreEqual(data.value, actual);
}
}

TEST_METHOD(peek_byte) // NOLINT
{
constexpr frame_info frame_info{};
constexpr coding_parameters parameters{};

array<uint8_t, 4> buffer{7, 100, 23, 99};

decoder_strategy_tester decoder_strategy(frame_info, parameters, buffer.data(), buffer.size());

Assert::AreEqual(7, decoder_strategy.peek_byte());
}

TEST_METHOD(read_bit) // NOLINT
{
constexpr frame_info frame_info{};
constexpr coding_parameters parameters{};

array<uint8_t, 4> buffer{0xAA, 100, 23, 99};

decoder_strategy_tester decoder_strategy(frame_info, parameters, buffer.data(), buffer.size());

Assert::IsTrue(decoder_strategy.read_bit());
Assert::IsFalse(decoder_strategy.read_bit());
Assert::IsTrue(decoder_strategy.read_bit());
Assert::IsFalse(decoder_strategy.read_bit());
Assert::IsTrue(decoder_strategy.read_bit());
Assert::IsFalse(decoder_strategy.read_bit());
Assert::IsTrue(decoder_strategy.read_bit());
Assert::IsFalse(decoder_strategy.read_bit());
}

TEST_METHOD(peek_0_bits) // NOLINT
{
constexpr frame_info frame_info{};
constexpr coding_parameters parameters{};

{
array<uint8_t, 4> buffer{0xF, 100, 23, 99};

decoder_strategy_tester decoder_strategy(frame_info, parameters, buffer.data(), buffer.size());
Assert::AreEqual(4, decoder_strategy.peek_0_bits());
}

{
array<uint8_t, 4> buffer{0, 1, 0, 0};

decoder_strategy_tester decoder_strategy(frame_info, parameters, buffer.data(), buffer.size());
Assert::AreEqual(15, decoder_strategy.peek_0_bits());
}

{
array<uint8_t, 4> buffer{0, 0, 0, 0};

decoder_strategy_tester decoder_strategy(frame_info, parameters, buffer.data(), buffer.size());
Assert::AreEqual(-1, decoder_strategy.peek_0_bits());
}
}
};

}} // namespace charls::test

0 comments on commit 0b3ac70

Please sign in to comment.