diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index 572f15e100dcb..31e64b594b02a 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -34,6 +34,7 @@ #include "parquet/arrow/reader.h" #include "parquet/encryption/crypto_factory.h" #include "parquet/encryption/encryption.h" +#include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/kms_client.h" #include "parquet/encryption/test_in_memory_kms.h" @@ -58,6 +59,12 @@ class DatasetEncryptionTest : public ::testing::Test { // partitioning scheme. The function also checks if the written files exist in the file // system. static void SetUpTestSuite() { +#ifdef ARROW_VALGRIND + // Not necessary otherwise, but prevents a Valgrind leak by making sure + // OpenSSL initialization is done from the main thread. + ::parquet::encryption::EnsureBackendInitialized(); +#endif + // Creates a mock file system using the current time point. EXPECT_OK_AND_ASSIGN(file_system_, fs::internal::MockFileSystem::Make( std::chrono::system_clock::now(), {})); diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 3cdd24dbe9eb9..0d04ec3e306e5 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -234,7 +234,8 @@ if(ARROW_HAVE_RUNTIME_AVX2) endif() if(PARQUET_REQUIRE_ENCRYPTION) - set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc) + set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc + encryption/openssl_internal.cc) # Encryption key management set(PARQUET_SRCS ${PARQUET_SRCS} diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index 6e66efeff6326..b1770be53358d 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -16,6 +16,7 @@ // under the License. #include "parquet/encryption/encryption_internal.h" + #include #include #include @@ -27,6 +28,7 @@ #include #include +#include "parquet/encryption/openssl_internal.h" #include "parquet/exception.h" using parquet::ParquetException; @@ -92,6 +94,8 @@ class AesEncryptor::AesEncryptorImpl { AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int key_len, bool metadata, bool write_length) { + openssl::EnsureInitialized(); + ctx_ = nullptr; length_buffer_length_ = write_length ? kBufferSizeLength : 0; @@ -358,6 +362,8 @@ AesDecryptor::~AesDecryptor() {} AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, int key_len, bool metadata, bool contains_length) { + openssl::EnsureInitialized(); + ctx_ = nullptr; length_buffer_length_ = contains_length ? kBufferSizeLength : 0; ciphertext_size_delta_ = length_buffer_length_ + kNonceLength; @@ -646,6 +652,11 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) { std::memcpy(AAD->data() + AAD->length() - 2, page_ordinal_bytes.data(), 2); } -void RandBytes(unsigned char* buf, int num) { RAND_bytes(buf, num); } +void RandBytes(unsigned char* buf, int num) { + openssl::EnsureInitialized(); + RAND_bytes(buf, num); +} + +void EnsureBackendInitialized() { openssl::EnsureInitialized(); } } // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index 77921d8731d25..1bdf47c56f291 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -130,4 +130,12 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD); // Wraps OpenSSL RAND_bytes function void RandBytes(unsigned char* buf, int num); +// Ensure OpenSSL is initialized. +// +// This is only necessary in specific situations since OpenSSL otherwise +// initializes itself automatically. For example, under Valgrind, a memory +// leak will be reported if OpenSSL is initialized for the first time from +// a worker thread; calling this function from the main thread prevents this. +void EnsureBackendInitialized(); + } // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index 0241923474de9..ead868643baae 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -114,4 +114,6 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) { void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); } +void EnsureBackendInitialized() {} + } // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/openssl_internal.cc b/cpp/src/parquet/encryption/openssl_internal.cc new file mode 100644 index 0000000000000..05f2773532353 --- /dev/null +++ b/cpp/src/parquet/encryption/openssl_internal.cc @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/encryption/openssl_internal.h" + +#include + +#include "parquet/exception.h" + +namespace parquet::encryption::openssl { + +void EnsureInitialized() { + // Initialize ciphers and random engines + if (!OPENSSL_init_crypto(OPENSSL_INIT_ENGINE_ALL_BUILTIN | OPENSSL_INIT_ADD_ALL_CIPHERS, + NULL)) { + throw ParquetException("OpenSSL initialization failed"); + } +} + +} // namespace parquet::encryption::openssl diff --git a/cpp/src/parquet/encryption/openssl_internal.h b/cpp/src/parquet/encryption/openssl_internal.h new file mode 100644 index 0000000000000..096aaf9640463 --- /dev/null +++ b/cpp/src/parquet/encryption/openssl_internal.h @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +namespace parquet::encryption::openssl { + +void EnsureInitialized(); + +} // namespace parquet::encryption::openssl