Skip to content

Commit

Permalink
Merge branch 'apache:main' into apacheGH-35984
Browse files Browse the repository at this point in the history
  • Loading branch information
sgilmore10 committed Jun 8, 2023
2 parents 1743855 + 8b5919d commit 79f5a17
Show file tree
Hide file tree
Showing 129 changed files with 6,250 additions and 883 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ jobs:
/d 1 `
/f
- name: Installed Packages
run: choco list -l
run: choco list
- name: Install Dependencies
run: choco install -y --no-progress openssl
- name: Checkout Arrow
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ swift/Arrow/.build

# Go dependencies
go/vendor
# go debug binaries
__debug_bin

# direnv
.envrc
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ makes it easier for us to process the backlog of submitted Pull Requests.

Any functionality change should have a GitHub issue opened. For minor changes that
affect documentation, you do not need to open up a GitHub issue. Instead you can
prefix the title of your PR with "MINOR: " if meets the following guidelines:
prefix the title of your PR with "MINOR: " if meets one of the following:

* Grammar, usage and spelling fixes that affect no more than 2 files
* Documentation updates affecting no more than 2 files and not more
Expand Down
3 changes: 2 additions & 1 deletion ci/docker/debian-11-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ RUN apt-get update -y -q && \
libgoogle-glog-dev \
libgrpc++-dev \
liblz4-dev \
libprotobuf-dev \
libprotoc-dev \
libre2-dev \
libsnappy-dev \
libssl-dev \
Expand Down Expand Up @@ -121,5 +123,4 @@ ENV absl_SOURCE=BUNDLED \
GTest_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PATH=/usr/lib/ccache/:$PATH \
Protobuf_SOURCE=BUNDLED \
xsimd_SOURCE=BUNDLED
5 changes: 0 additions & 5 deletions ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,7 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
# provided by the distribution:
# - Abseil is old
# - libc-ares-dev does not install CMake config files
# - flatbuffer is not packaged
# - libgtest-dev only provide sources
# - libprotobuf-dev only provide sources
# ARROW-17051: this build uses static Protobuf, so we must also use
# static Arrow to run Flight/Flight SQL tests
ENV absl_SOURCE=BUNDLED \
ARROW_ACERO=ON \
ARROW_BUILD_STATIC=ON \
Expand Down Expand Up @@ -199,6 +195,5 @@ ENV absl_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
PATH=/usr/lib/ccache/:$PATH \
Protobuf_SOURCE=BUNDLED \
PYTHON=python3 \
xsimd_SOURCE=BUNDLED
6 changes: 4 additions & 2 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1758,8 +1758,10 @@ endmacro()

if(ARROW_WITH_PROTOBUF)
if(ARROW_FLIGHT_SQL)
# Flight SQL uses proto3 optionals, which require 3.15 or later.
set(ARROW_PROTOBUF_REQUIRED_VERSION "3.15.0")
# Flight SQL uses proto3 optionals, which require 3.12 or later.
# 3.12.0-3.14.0: need --experimental_allow_proto3_optional
# 3.15.0-: don't need --experimental_allow_proto3_optional
set(ARROW_PROTOBUF_REQUIRED_VERSION "3.12.0")
elseif(ARROW_SUBSTRAIT)
# Substrait protobuf files use proto3 syntax
set(ARROW_PROTOBUF_REQUIRED_VERSION "3.0.0")
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/asof_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ class KeyHasher {
size_t index_;
std::vector<col_index_t> indices_;
std::vector<KeyColumnMetadata> metadata_;
const RecordBatch* batch_;
std::atomic<const RecordBatch*> batch_;
std::vector<HashType> hashes_;
LightContext ctx_;
std::vector<KeyColumnArray> column_arrays_;
Expand Down
122 changes: 122 additions & 0 deletions cpp/src/arrow/c/abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,27 @@
// specific language governing permissions and limitations
// under the License.

/// \file abi.h Arrow C Data Interface
///
/// The Arrow C Data interface defines a very small, stable set
/// of C definitions which can be easily copied into any project's
/// source code and vendored to be used for columnar data interchange
/// in the Arrow format. For non-C/C++ languages and runtimes,
/// it should be almost as easy to translate the C definitions into
/// the corresponding C FFI declarations.
///
/// Applications and libraries can therefore work with Arrow memory
/// without necessarily using the Arrow libraries or reinventing
/// the wheel. Developers can choose between tight integration
/// with the Arrow software project or minimal integration with
/// the Arrow format only.

#pragma once

#include <stdint.h>

// Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -65,6 +82,61 @@ struct ArrowArray {

#endif // ARROW_C_DATA_INTERFACE

#ifndef ARROW_C_DEVICE_DATA_INTERFACE
#define ARROW_C_DEVICE_DATA_INTERFACE

// Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html

// DeviceType for the allocated memory
typedef int32_t ArrowDeviceType;

// CPU device, same as using ArrowArray directly
#define ARROW_DEVICE_CPU 1
// CUDA GPU Device
#define ARROW_DEVICE_CUDA 2
// Pinned CUDA CPU memory by cudaMallocHost
#define ARROW_DEVICE_CUDA_HOST 3
// OpenCL Device
#define ARROW_DEVICE_OPENCL 4
// Vulkan buffer for next-gen graphics
#define ARROW_DEVICE_VULKAN 7
// Metal for Apple GPU
#define ARROW_DEVICE_METAL 8
// Verilog simulator buffer
#define ARROW_DEVICE_VPI 9
// ROCm GPUs for AMD GPUs
#define ARROW_DEVICE_ROCM 10
// Pinned ROCm CPU memory allocated by hipMallocHost
#define ARROW_DEVICE_ROCM_HOST 11
// Reserved for extension
#define ARROW_DEVICE_EXT_DEV 12
// CUDA managed/unified memory allocated by cudaMallocManaged
#define ARROW_DEVICE_CUDA_MANAGED 13
// unified shared memory allocated on a oneAPI non-partitioned device.
#define ARROW_DEVICE_ONEAPI 14
// GPU support for next-gen WebGPU standard
#define ARROW_DEVICE_WEBGPU 15
// Qualcomm Hexagon DSP
#define ARROW_DEVICE_HEXAGON 16

struct ArrowDeviceArray {
// the Allocated Array
//
// the buffers in the array (along with the buffers of any
// children) are what is allocated on the device.
struct ArrowArray array;
// The device id to identify a specific device
int64_t device_id;
// The type of device which can access this memory.
ArrowDeviceType device_type;
// An event-like object to synchronize on if needed.
void* sync_event;
// Reserved bytes for future expansion.
int64_t reserved[3];
};

#endif // ARROW_C_DEVICE_DATA_INTERFACE

#ifndef ARROW_C_STREAM_INTERFACE
#define ARROW_C_STREAM_INTERFACE

Expand Down Expand Up @@ -106,6 +178,56 @@ struct ArrowArrayStream {

#endif // ARROW_C_STREAM_INTERFACE

#ifndef ARROW_C_DEVICE_STREAM_INTERFACE
#define ARROW_C_DEVICE_STREAM_INTERFACE

// Equivalent to ArrowArrayStream, but for ArrowDeviceArrays.
//
// This stream is intended to provide a stream of data on a single
// device, if a producer wants data to be produced on multiple devices
// then multiple streams should be provided. One per device.
struct ArrowDeviceArrayStream {
// The device that this stream produces data on.
ArrowDeviceType device_type;

// Callback to get the stream schema
// (will be the same for all arrays in the stream).
//
// Return value 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowSchema must be released independently from the stream.
// The schema should be accessible via CPU memory.
int (*get_schema)(struct ArrowDeviceArrayStream* self, struct ArrowSchema* out);

// Callback to get the next array
// (if no error and the array is released, the stream has ended)
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowDeviceArray must be released independently from the stream.
int (*get_next)(struct ArrowDeviceArrayStream* self, struct ArrowDeviceArray* out);

// Callback to get optional detailed error information.
// This must only be called if the last stream operation failed
// with a non-0 return code.
//
// Return value: pointer to a null-terminated character array describing
// the last error, or NULL if no description is available.
//
// The returned pointer is only valid until the next operation on this stream
// (including release).
const char* (*get_last_error)(struct ArrowDeviceArrayStream* self);

// Release callback: release the stream's own resources.
// Note that arrays returned by `get_next` must be individually released.
void (*release)(struct ArrowDeviceArrayStream* self);

// Opaque producer-specific data
void* private_data;
};

#endif // ARROW_C_DEVICE_STREAM_INTERFACE

#ifdef __cplusplus
}
#endif
27 changes: 15 additions & 12 deletions cpp/src/arrow/engine/substrait/extension_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,9 @@ ExtensionIdRegistry::SubstraitAggregateToArrow DecodeBasicAggregate(
return Status::Invalid("Expected aggregate call ", call.id().uri, "#",
call.id().name, " to have at least one argument");
}
case 1: {
default: {
// Handles all arity > 0

std::shared_ptr<compute::FunctionOptions> options = nullptr;
if (arrow_function_name == "stddev" || arrow_function_name == "variance") {
// See the following URL for the spec of stddev and variance:
Expand All @@ -981,21 +983,22 @@ ExtensionIdRegistry::SubstraitAggregateToArrow DecodeBasicAggregate(
}
fixed_arrow_func += arrow_function_name;

ARROW_ASSIGN_OR_RAISE(compute::Expression arg, call.GetValueArg(0));
const FieldRef* arg_ref = arg.field_ref();
if (!arg_ref) {
return Status::Invalid("Expected an aggregate call ", call.id().uri, "#",
call.id().name, " to have a direct reference");
std::vector<FieldRef> target;
for (int i = 0; i < call.size(); i++) {
ARROW_ASSIGN_OR_RAISE(compute::Expression arg, call.GetValueArg(i));
const FieldRef* arg_ref = arg.field_ref();
if (!arg_ref) {
return Status::Invalid("Expected an aggregate call ", call.id().uri, "#",
call.id().name, " to have a direct reference");
}
// Copy arg_ref here because field_ref() return const FieldRef*
target.emplace_back(*arg_ref);
}

return compute::Aggregate{std::move(fixed_arrow_func),
options ? std::move(options) : nullptr, *arg_ref, ""};
options ? std::move(options) : nullptr,
std::move(target), ""};
}
default:
break;
}
return Status::NotImplemented(
"Only nullary and unary aggregate functions are currently supported");
};
}

Expand Down
12 changes: 9 additions & 3 deletions cpp/src/arrow/flight/sql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,16 @@ set(FLIGHT_SQL_GENERATED_PROTO_FILES "${CMAKE_CURRENT_BINARY_DIR}/FlightSql.pb.c

set(PROTO_DEPENDS ${FLIGHT_SQL_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF})

set(FLIGHT_SQL_PROTOC_COMMAND
${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_SQL_PROTO_PATH}"
"--cpp_out=dllexport_decl=ARROW_FLIGHT_SQL_EXPORT:${CMAKE_CURRENT_BINARY_DIR}")
if(Protobuf_VERSION VERSION_LESS 3.15)
list(APPEND FLIGHT_SQL_PROTOC_COMMAND "--experimental_allow_proto3_optional")
endif()
list(APPEND FLIGHT_SQL_PROTOC_COMMAND "${FLIGHT_SQL_PROTO}")

add_custom_command(OUTPUT ${FLIGHT_SQL_GENERATED_PROTO_FILES}
COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_SQL_PROTO_PATH}"
"--cpp_out=dllexport_decl=ARROW_FLIGHT_SQL_EXPORT:${CMAKE_CURRENT_BINARY_DIR}"
"${FLIGHT_SQL_PROTO}"
COMMAND ${FLIGHT_SQL_PROTOC_COMMAND}
DEPENDS ${PROTO_DEPENDS})

set_source_files_properties(${FLIGHT_SQL_GENERATED_PROTO_FILES} PROPERTIES GENERATED TRUE)
Expand Down
16 changes: 16 additions & 0 deletions cpp/src/arrow/flight/sql/client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,22 @@ namespace {
arrow::Result<FlightDescriptor> GetFlightDescriptorForCommand(
const google::protobuf::Message& command) {
google::protobuf::Any any;
#if PROTOBUF_VERSION >= 3015000
if (!any.PackFrom(command)) {
return Status::SerializationError("Failed to pack ", command.GetTypeName());
}
#else
any.PackFrom(command);
#endif

std::string buf;
#if PROTOBUF_VERSION >= 3015000
if (!any.SerializeToString(&buf)) {
return Status::SerializationError("Failed to serialize ", command.GetTypeName());
}
#else
any.SerializeToString(&buf);
#endif
return FlightDescriptor::Command(buf);
}

Expand All @@ -71,16 +79,24 @@ arrow::Result<std::unique_ptr<SchemaResult>> GetSchemaForCommand(
::arrow::Result<Action> PackAction(const std::string& action_type,
const google::protobuf::Message& message) {
google::protobuf::Any any;
#if PROTOBUF_VERSION >= 3015000
if (!any.PackFrom(message)) {
return Status::SerializationError("Could not pack ", message.GetTypeName(),
" into Any");
}
#else
any.PackFrom(message);
#endif

std::string buffer;
#if PROTOBUF_VERSION >= 3015000
if (!any.SerializeToString(&buffer)) {
return Status::SerializationError("Could not serialize packed ",
message.GetTypeName());
}
#else
any.SerializeToString(&buffer);
#endif

Action action;
action.type = action_type;
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/flight/sql/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -361,14 +361,22 @@ arrow::Result<ActionEndTransactionRequest> ParseActionEndTransactionRequest(

arrow::Result<Result> PackActionResult(const google::protobuf::Message& message) {
google::protobuf::Any any;
#if PROTOBUF_VERSION >= 3015000
if (!any.PackFrom(message)) {
return Status::IOError("Failed to pack ", message.GetTypeName());
}
#else
any.PackFrom(message);
#endif

std::string buffer;
#if PROTOBUF_VERSION >= 3015000
if (!any.SerializeToString(&buffer)) {
return Status::IOError("Failed to serialize packed ", message.GetTypeName());
}
#else
any.SerializeToString(&buffer);
#endif
return Result{Buffer::FromString(std::move(buffer))};
}

Expand Down
Loading

0 comments on commit 79f5a17

Please sign in to comment.