Skip to content

Commit

Permalink
apacheGH-36327: [C++][CI] Fix Valgrind failures
Browse files Browse the repository at this point in the history
* Silence Valgrind error in OpenSSL
* Make sure the entire data buffer of pairwise_diff is initialized
* Make sure the bitmaps of REE-encoded data are initialized
  • Loading branch information
pitrou committed Jul 4, 2023
1 parent d2fd730 commit 50c2074
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 23 deletions.
5 changes: 3 additions & 2 deletions cpp/src/arrow/compute/kernels/ree_util_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ Result<std::shared_ptr<Buffer>> AllocateValuesBuffer(int64_t length, const DataT
MemoryPool* pool,
int64_t data_buffer_size) {
if (type.bit_width() == 1) {
return AllocateBitmap(length, pool);
// Make sure the bitmap is initialized (avoids Valgrind errors).
return AllocateEmptyBitmap(length, pool);
} else if (is_fixed_width(type.id())) {
return AllocateBuffer(length * type.byte_width(), pool);
} else {
Expand All @@ -62,7 +63,7 @@ Result<std::shared_ptr<ArrayData>> PreallocateValuesArray(
std::vector<std::shared_ptr<Buffer>> values_data_buffers;
std::shared_ptr<Buffer> validity_buffer = NULLPTR;
if (has_validity_buffer) {
ARROW_ASSIGN_OR_RAISE(validity_buffer, AllocateBitmap(length, pool));
ARROW_ASSIGN_OR_RAISE(validity_buffer, AllocateEmptyBitmap(length, pool));
}
ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateValuesBuffer(length, *value_type,
pool, data_buffer_size));
Expand Down
51 changes: 30 additions & 21 deletions cpp/src/arrow/compute/kernels/vector_pairwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

#include <iostream>
#include <memory>
#include "arrow/builder.h"

#include "arrow/array/builder_base.h"
#include "arrow/compute/api_vector.h"
#include "arrow/compute/exec.h"
#include "arrow/compute/function.h"
Expand All @@ -35,9 +36,9 @@
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"
#include "arrow/visit_type_inline.h"

namespace arrow::compute::internal {
namespace {

// We reuse the kernel exec function of a scalar binary function to compute pairwise
// results. For example, for pairwise_diff, we reuse subtract's kernel exec.
Expand All @@ -55,21 +56,21 @@ Status PairwiseExecImpl(KernelContext* ctx, const ArraySpan& input,
ArrayData* result) {
// We only compute values in the region where the input-with-offset overlaps
// the original input. The margin where these do not overlap gets filled with null.
auto margin_length = std::min(abs(periods), input.length);
auto computed_length = input.length - margin_length;
auto margin_start = periods > 0 ? 0 : computed_length;
auto computed_start = periods > 0 ? margin_length : 0;
auto left_start = computed_start;
auto right_start = margin_length - computed_start;
// prepare bitmap
bit_util::ClearBitmap(result->buffers[0]->mutable_data(), margin_start, margin_length);
const auto margin_length = std::min(abs(periods), input.length);
const auto computed_length = input.length - margin_length;
const auto computed_start = periods > 0 ? margin_length : 0;
const auto left_start = computed_start;
const auto right_start = margin_length - computed_start;
// prepare null bitmap
int64_t null_count = margin_length;
for (int64_t i = computed_start; i < computed_start + computed_length; i++) {
if (input.IsValid(i) && input.IsValid(i - periods)) {
bit_util::SetBit(result->buffers[0]->mutable_data(), i);
} else {
bit_util::ClearBit(result->buffers[0]->mutable_data(), i);
++null_count;
}
}
result->null_count = null_count;
// prepare input span
ArraySpan left(input);
left.SetSlice(left_start, computed_length);
Expand All @@ -90,9 +91,21 @@ Status PairwiseExecImpl(KernelContext* ctx, const ArraySpan& input,
Status PairwiseExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& state = checked_cast<const PairwiseState&>(*ctx->state());
auto input = batch[0].array;
RETURN_NOT_OK(PairwiseExecImpl(ctx, batch[0].array, state.scalar_exec, state.periods,
out->array_data_mutable()));
return Status::OK();

// The scalar diff kernel will only write into the non-null output area.
// We must therefore pre-initialize the output, otherwise the left or right
// margin would be left uninitialized.
ARROW_ASSIGN_OR_RAISE(auto builder,
MakeBuilder(out->type()->GetSharedPtr(), ctx->memory_pool()));
// Append nulls rather than empty values, so as to allocate a null bitmap.
RETURN_NOT_OK(builder->AppendNulls(out->length()));
std::shared_ptr<ArrayData> out_data;
RETURN_NOT_OK(builder->FinishInternal(&out_data));
out_data->null_count = kUnknownNullCount;
out->value = std::move(out_data);

return PairwiseExecImpl(ctx, batch[0].array, state.scalar_exec, state.periods,
out->array_data_mutable());
}

const FunctionDoc pairwise_diff_doc(
Expand Down Expand Up @@ -122,19 +135,13 @@ const PairwiseOptions* GetDefaultPairwiseOptions() {
return &kDefaultPairwiseOptions;
}

struct PairwiseKernelData {
InputType input;
OutputType output;
ArrayKernelExec exec;
};

void RegisterPairwiseDiffKernels(std::string_view func_name,
std::string_view base_func_name, const FunctionDoc& doc,
FunctionRegistry* registry) {
VectorKernel kernel;
kernel.can_execute_chunkwise = false;
kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
kernel.mem_allocation = MemAllocation::PREALLOCATE;
kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
kernel.init = OptionsWrapper<PairwiseOptions>::Init;
auto func = std::make_shared<VectorFunction>(std::string(func_name), Arity::Unary(),
doc, GetDefaultPairwiseOptions());
Expand Down Expand Up @@ -174,6 +181,8 @@ void RegisterPairwiseDiffKernels(std::string_view func_name,
DCHECK_OK(registry->AddFunction(std::move(func)));
}

} // namespace

void RegisterVectorPairwise(FunctionRegistry* registry) {
RegisterPairwiseDiffKernels("pairwise_diff", "subtract", pairwise_diff_doc, registry);
RegisterPairwiseDiffKernels("pairwise_diff_checked", "subtract_checked",
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
namespace arrow {
namespace compute {
namespace internal {
namespace {

struct RunEndEncondingState : public KernelState {
explicit RunEndEncondingState(std::shared_ptr<DataType> run_end_type)
Expand Down Expand Up @@ -526,6 +527,8 @@ static const FunctionDoc run_end_decode_doc(
"Decode run-end encoded array",
("Return a decoded version of a run-end encoded input array."), {"array"});

} // namespace

void RegisterVectorRunEndEncode(FunctionRegistry* registry) {
auto function = std::make_shared<VectorFunction>("run_end_encode", Arity::Unary(),
run_end_encode_doc);
Expand Down
6 changes: 6 additions & 0 deletions cpp/valgrind.supp
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,9 @@
...
fun:_dl_allocate_tls
}
{
<OpenSSL>:Conditional jump or move depends on uninitialised value(s)
Memcheck:Cond
...
fun:*gcm_cipher*
}

0 comments on commit 50c2074

Please sign in to comment.