Skip to content

Commit

Permalink
apacheGH-38304: [C++][Parquet] Fix Valgrind memory leak in arrow-data…
Browse files Browse the repository at this point in the history
…set-file-parquet-encryption-test

If OpenSSL initializes itself from a non-main thread, it can fail deallocating all memory at shutdown.

This is really a benign leak, but we don't want any spurious CI errors.
  • Loading branch information
pitrou committed Oct 17, 2023
1 parent 39298fe commit a49c40f
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 2 deletions.
7 changes: 7 additions & 0 deletions cpp/src/arrow/dataset/file_parquet_encryption_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "parquet/arrow/reader.h"
#include "parquet/encryption/crypto_factory.h"
#include "parquet/encryption/encryption.h"
#include "parquet/encryption/encryption_internal.h"
#include "parquet/encryption/kms_client.h"
#include "parquet/encryption/test_in_memory_kms.h"

Expand All @@ -58,6 +59,12 @@ class DatasetEncryptionTest : public ::testing::Test {
// partitioning scheme. The function also checks if the written files exist in the file
// system.
static void SetUpTestSuite() {
#ifdef ARROW_VALGRIND
// Not necessary otherwise, but prevents a Valgrind leak by making sure
// OpenSSL initialization is done from the main thread.
::parquet::encryption::EnsureBackendInitialized();
#endif

// Creates a mock file system using the current time point.
EXPECT_OK_AND_ASSIGN(file_system_, fs::internal::MockFileSystem::Make(
std::chrono::system_clock::now(), {}));
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/parquet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ if(ARROW_HAVE_RUNTIME_AVX2)
endif()

if(PARQUET_REQUIRE_ENCRYPTION)
set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc)
set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc
encryption/openssl_internal.cc)
# Encryption key management
set(PARQUET_SRCS
${PARQUET_SRCS}
Expand Down
13 changes: 12 additions & 1 deletion cpp/src/parquet/encryption/encryption_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

#include "parquet/encryption/encryption_internal.h"

#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/rand.h>
Expand All @@ -27,6 +28,7 @@
#include <string>
#include <vector>

#include "parquet/encryption/openssl_internal.h"
#include "parquet/exception.h"

using parquet::ParquetException;
Expand Down Expand Up @@ -92,6 +94,8 @@ class AesEncryptor::AesEncryptorImpl {

AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int key_len,
bool metadata, bool write_length) {
openssl::EnsureInitialized();

ctx_ = nullptr;

length_buffer_length_ = write_length ? kBufferSizeLength : 0;
Expand Down Expand Up @@ -358,6 +362,8 @@ AesDecryptor::~AesDecryptor() {}

AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, int key_len,
bool metadata, bool contains_length) {
openssl::EnsureInitialized();

ctx_ = nullptr;
length_buffer_length_ = contains_length ? kBufferSizeLength : 0;
ciphertext_size_delta_ = length_buffer_length_ + kNonceLength;
Expand Down Expand Up @@ -646,6 +652,11 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) {
std::memcpy(AAD->data() + AAD->length() - 2, page_ordinal_bytes.data(), 2);
}

void RandBytes(unsigned char* buf, int num) { RAND_bytes(buf, num); }
void RandBytes(unsigned char* buf, int num) {
openssl::EnsureInitialized();
RAND_bytes(buf, num);
}

void EnsureBackendInitialized() { openssl::EnsureInitialized(); }

} // namespace parquet::encryption
8 changes: 8 additions & 0 deletions cpp/src/parquet/encryption/encryption_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,12 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD);
// Wraps OpenSSL RAND_bytes function
void RandBytes(unsigned char* buf, int num);

// Ensure OpenSSL is initialized.
//
// This is only necessary in specific situations since OpenSSL otherwise
// initializes itself automatically. For example, under Valgrind, a memory
// leak will be reported if OpenSSL is initialized for the first time from
// a worker thread; calling this function from the main thread prevents this.
void EnsureBackendInitialized();

} // namespace parquet::encryption
2 changes: 2 additions & 0 deletions cpp/src/parquet/encryption/encryption_internal_nossl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,6 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) {

void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); }

void EnsureBackendInitialized() {}

} // namespace parquet::encryption
34 changes: 34 additions & 0 deletions cpp/src/parquet/encryption/openssl_internal.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "parquet/encryption/openssl_internal.h"

#include <openssl/crypto.h>

#include "parquet/exception.h"

namespace parquet::encryption::openssl {

void EnsureInitialized() {
// Initialize ciphers and random engines
if (!OPENSSL_init_crypto(OPENSSL_INIT_ENGINE_ALL_BUILTIN | OPENSSL_INIT_ADD_ALL_CIPHERS,
NULL)) {
throw ParquetException("OpenSSL initialization failed");
}
}

} // namespace parquet::encryption::openssl
28 changes: 28 additions & 0 deletions cpp/src/parquet/encryption/openssl_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <memory>
#include <string>
#include <vector>

namespace parquet::encryption::openssl {

void EnsureInitialized();

} // namespace parquet::encryption::openssl

0 comments on commit a49c40f

Please sign in to comment.