Skip to content

Commit

Permalink
apacheGH-39525: [C++][Parquet] Pass memory pool to decoders (apache#3…
Browse files Browse the repository at this point in the history
…9526)

### Rationale for this change

Memory pools should be plumbed through where ever possible.

### What changes are included in this PR?

Pass through memory pool to decoders

### Are these changes tested?

Not directly; this was caught via some internal fuzz targets.

### Are there any user-facing changes?

No.

* Closes: apache#39525

Authored-by: Micah Kornfield <micahk@google.com>
Signed-off-by: mwish <maplewish117@gmail.com>
  • Loading branch information
emkornfield authored and thisisnic committed Mar 8, 2024
1 parent 3fe6a2f commit a530d99
Showing 1 changed file with 9 additions and 35 deletions.
44 changes: 9 additions & 35 deletions cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ class ColumnReaderImplBase {

if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
page->encoding() == Encoding::PLAIN) {
auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_, pool_);
dictionary->SetData(page->num_values(), page->data(), page->size());

// The dictionary is fully decoded during DictionaryDecoder::Init, so the
Expand Down Expand Up @@ -883,47 +883,21 @@ class ColumnReaderImplBase {
current_decoder_ = it->second.get();
} else {
switch (encoding) {
case Encoding::PLAIN: {
auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
break;
}
case Encoding::BYTE_STREAM_SPLIT: {
auto decoder = MakeTypedDecoder<DType>(Encoding::BYTE_STREAM_SPLIT, descr_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
break;
}
case Encoding::RLE: {
auto decoder = MakeTypedDecoder<DType>(Encoding::RLE, descr_);
case Encoding::PLAIN:
case Encoding::BYTE_STREAM_SPLIT:
case Encoding::RLE:
case Encoding::DELTA_BINARY_PACKED:
case Encoding::DELTA_BYTE_ARRAY:
case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
auto decoder = MakeTypedDecoder<DType>(encoding, descr_, pool_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
break;
}

case Encoding::RLE_DICTIONARY:
throw ParquetException("Dictionary page must be before data page.");

case Encoding::DELTA_BINARY_PACKED: {
auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BINARY_PACKED, descr_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
break;
}
case Encoding::DELTA_BYTE_ARRAY: {
auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BYTE_ARRAY, descr_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
break;
}
case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
auto decoder =
MakeTypedDecoder<DType>(Encoding::DELTA_LENGTH_BYTE_ARRAY, descr_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
break;
}

default:
throw ParquetException("Unknown encoding type.");
}
Expand Down

0 comments on commit a530d99

Please sign in to comment.