Skip to content

Commit

Permalink
apacheGH-33670: [GLib] Add GArrowProjectNodeOptions
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Jan 15, 2023
1 parent 4bb7d94 commit b4999fa
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 1 deletion.
112 changes: 111 additions & 1 deletion c_glib/arrow-glib/compute.cpp
Expand Up @@ -26,6 +26,7 @@
#include <arrow-glib/datum.hpp>
#include <arrow-glib/enums.h>
#include <arrow-glib/error.hpp>
#include <arrow-glib/expression.hpp>
#include <arrow-glib/reader.hpp>
#include <arrow-glib/record-batch.hpp>
#include <arrow-glib/scalar.hpp>
Expand Down Expand Up @@ -109,7 +110,6 @@ namespace {
return
(sort_key.target == other_sort_key.target) &&
(sort_key.order == other_sort_key.order);

}
}

Expand All @@ -136,6 +136,8 @@ G_BEGIN_DECLS
*
* #GArrowSourceNodeOptions is a class to customize a source node.
*
* #GArrowProjectNodeOptions is a class to customize a project node.
*
* #GArrowAggregation is a class to specify how to aggregate.
*
* #GArrowAggregateNodeOptions is a class to customize an aggregate node.
Expand Down Expand Up @@ -1014,6 +1016,81 @@ garrow_source_node_options_new_table(GArrowTable *table)
}


struct GArrowProjectNodeOptionsPrivate {
GList *expressions;
};

G_DEFINE_TYPE_WITH_PRIVATE(GArrowProjectNodeOptions,
garrow_project_node_options,
GARROW_TYPE_EXECUTE_NODE_OPTIONS)

#define GARROW_PROJECT_NODE_OPTIONS_GET_PRIVATE(object) \
static_cast<GArrowProjectNodeOptionsPrivate *>( \
garrow_project_node_options_get_instance_private( \
GARROW_PROJECT_NODE_OPTIONS(object)))

static void
garrow_project_node_options_dispose(GObject *object)
{
auto priv = GARROW_PROJECT_NODE_OPTIONS_GET_PRIVATE(object);
g_list_free_full(priv->expressions,
static_cast<GDestroyNotify>(g_object_unref));
G_OBJECT_CLASS(garrow_project_node_options_parent_class)->dispose(object);
}

static void
garrow_project_node_options_init(GArrowProjectNodeOptions *object)
{
}

static void
garrow_project_node_options_class_init(GArrowProjectNodeOptionsClass *klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);
gobject_class->dispose = garrow_project_node_options_dispose;
}

/**
* garrow_project_node_options_new:
* @expressions: (element-type GArrowExpression):
* A list of #GArrowExpression to be executed.
* @names: (nullable) (array length=n_names):
* A list of output column names of @expressions. If @names is %NULL,
* the string representations of @expressions will be used.
* @n_names: The number of @names.
*
* Returns: A newly created #GArrowProjectNodeOptions.
*
* Since: 11.0.0
*/
GArrowProjectNodeOptions *
garrow_project_node_options_new(GList *expressions,
gchar **names,
gsize n_names)
{
std::vector<arrow::compute::Expression> arrow_expressions;
std::vector<std::string> arrow_names;
for (auto node = expressions; node; node = g_list_next(node)) {
auto expression = GARROW_EXPRESSION(node->data);
arrow_expressions.push_back(*garrow_expression_get_raw(expression));
}
for (gsize i = 0; i < n_names; ++i) {
arrow_names.emplace_back(names[i]);
}
if (!arrow_names.empty()) {
for (size_t i = arrow_names.size(); i < arrow_expressions.size(); ++i) {
arrow_names.push_back(arrow_expressions[i].ToString());
}
}
auto arrow_options =
new arrow::compute::ProjectNodeOptions(arrow_expressions, arrow_names);
auto options = g_object_new(GARROW_TYPE_PROJECT_NODE_OPTIONS,
"options", arrow_options,
NULL);
return GARROW_PROJECT_NODE_OPTIONS(options);
}


typedef struct GArrowAggregationPrivate_ {
gchar *function;
GArrowFunctionOptions *options;
Expand Down Expand Up @@ -1771,6 +1848,39 @@ garrow_execute_plan_build_source_node(GArrowExecutePlan *plan,
error);
}

/**
* garrow_execute_plan_build_project_node:
* @plan: A #GArrowExecutePlan.
* @input: A #GArrowExecuteNode.
* @options: A #GArrowProjectNodeOptions.
* @error: (nullable): Return location for a #GError or %NULL.
*
* This is a shortcut of garrow_execute_plan_build_node() for project
* node.
*
* Returns: (transfer full): A newly built and added #GArrowExecuteNode
* for project on success, %NULL on error.
*
* Since: 11.0.0
*/
GArrowExecuteNode *
garrow_execute_plan_build_project_node(GArrowExecutePlan *plan,
GArrowExecuteNode *input,
GArrowProjectNodeOptions *options,
GError **error)
{
GList *inputs = nullptr;
inputs = g_list_prepend(inputs, input);
auto node =
garrow_execute_plan_build_node(plan,
"project",
inputs,
GARROW_EXECUTE_NODE_OPTIONS(options),
error);
g_list_free(inputs);
return node;
}

/**
* garrow_execute_plan_build_aggregate_node:
* @plan: A #GArrowExecutePlan.
Expand Down
24 changes: 24 additions & 0 deletions c_glib/arrow-glib/compute.h
Expand Up @@ -156,6 +156,24 @@ GArrowSourceNodeOptions *
garrow_source_node_options_new_table(GArrowTable *table);


#define GARROW_TYPE_PROJECT_NODE_OPTIONS (garrow_project_node_options_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowProjectNodeOptions,
garrow_project_node_options,
GARROW,
PROJECT_NODE_OPTIONS,
GArrowExecuteNodeOptions)
struct _GArrowProjectNodeOptionsClass
{
GArrowExecuteNodeOptionsClass parent_class;
};

GARROW_AVAILABLE_IN_11_0
GArrowProjectNodeOptions *
garrow_project_node_options_new(GList *expressions,
gchar **names,
gsize n_names);


#define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowAggregation,
garrow_aggregation,
Expand Down Expand Up @@ -321,6 +339,12 @@ GArrowExecuteNode *
garrow_execute_plan_build_source_node(GArrowExecutePlan *plan,
GArrowSourceNodeOptions *options,
GError **error);
GARROW_AVAILABLE_IN_11_0
GArrowExecuteNode *
garrow_execute_plan_build_project_node(GArrowExecutePlan *plan,
GArrowExecuteNode *input,
GArrowProjectNodeOptions *options,
GError **error);
GARROW_AVAILABLE_IN_6_0
GArrowExecuteNode *
garrow_execute_plan_build_aggregate_node(GArrowExecutePlan *plan,
Expand Down
83 changes: 83 additions & 0 deletions c_glib/test/test-project-node.rb
@@ -0,0 +1,83 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

class TestProjectNode < Test::Unit::TestCase
include Helper::Buildable

def execute_plan(options)
plan = Arrow::ExecutePlan.new
numbers = build_int8_array([1, 2, 3, 4, 5])
strings = build_string_array(["a", "b", "a", "b", "a"])
table = build_table(number: numbers,
string: strings)
source_node_options = Arrow::SourceNodeOptions.new(table)
source_node = plan.build_source_node(source_node_options)
project_node = plan.build_project_node(source_node, options)
sink_node_options = Arrow::SinkNodeOptions.new
sink_node = plan.build_sink_node(project_node,
sink_node_options)
plan.validate
plan.start
plan.wait
reader = sink_node_options.get_reader(project_node.output_schema)
table = reader.read_all
plan.stop
table
end

def test_expressions
three_scalar = Arrow::Int8Scalar.new(3)
three_datum = Arrow::ScalarDatum.new(three_scalar)
expressions = [
Arrow::FieldExpression.new("number"),
Arrow::CallExpression.new("multiply",
[
Arrow::FieldExpression.new("number"),
Arrow::LiteralExpression.new(three_datum),
]),
]
options = Arrow::ProjectNodeOptions.new(expressions)
assert_equal(build_table("number" => [
build_int8_array([1, 2, 3, 4, 5]),
],
"multiply(number, 3)" => [
build_int8_array([3, 6, 9, 12, 15]),
]),
execute_plan(options))
end

def testnames
three_scalar = Arrow::Int8Scalar.new(3)
three_datum = Arrow::ScalarDatum.new(three_scalar)
expressions = [
Arrow::CallExpression.new("multiply",
[
Arrow::FieldExpression.new("number"),
Arrow::LiteralExpression.new(three_datum),
]),
Arrow::FieldExpression.new("number"),
]
options = Arrow::ProjectNodeOptions.new(expressions, ["number * 3"])
assert_equal(build_table("number * 3" => [
build_int8_array([3, 6, 9, 12, 15]),
],
"number" => [
build_int8_array([1, 2, 3, 4, 5]),
]),
execute_plan(options))
end
end

0 comments on commit b4999fa

Please sign in to comment.