From cf77f0ee87f76b042ef6bcd19f129054b34ac438 Mon Sep 17 00:00:00 2001 From: Andre Vieira Date: Wed, 30 Nov 2022 22:04:14 +0000 Subject: [PATCH] Enable the use of [SU]Int32Size and EnumSize templates for AArch64 When benchmarking proto_benchmark from fleetbench on an AArch64 target we found that clang is able to vectorize these functions and they offer better performance than the scalar alternative. --- src/google/protobuf/wire_format_lite.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/google/protobuf/wire_format_lite.cc b/src/google/protobuf/wire_format_lite.cc index 2ed92101e086..f0e6e88a129c 100644 --- a/src/google/protobuf/wire_format_lite.cc +++ b/src/google/protobuf/wire_format_lite.cc @@ -704,7 +704,7 @@ static size_t VarintSize64(const T* data, const int n) { // and other platforms are untested, in those cases using the optimized // varint size routine for each element is faster. // Hence we enable it only for clang -#if defined(__SSE__) && defined(__clang__) +#if (defined(__SSE__) || defined(__aarch64__)) && defined(__clang__) size_t WireFormatLite::Int32Size(const RepeatedField& value) { return VarintSize(value.data(), value.size()); } @@ -722,7 +722,7 @@ size_t WireFormatLite::EnumSize(const RepeatedField& value) { return VarintSize(value.data(), value.size()); } -#else // !(defined(__SSE4_1__) && defined(__clang__)) +#else // !((defined(__SSE4_1__) || defined(__aarch64__) && defined(__clang__)) size_t WireFormatLite::Int32Size(const RepeatedField& value) { size_t out = 0;