Skip to content

Commit

Permalink
ARROW-13831: [GLib][Ruby] Add support for writing by Arrow Dataset
Browse files Browse the repository at this point in the history
Closes apache#11055 from kou/ruby-table-save-dataset

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
kou authored and ViniciusSouzaRoque committed Oct 20, 2021
1 parent f730909 commit d1bec0a
Show file tree
Hide file tree
Showing 38 changed files with 2,290 additions and 139 deletions.
2 changes: 2 additions & 0 deletions c_glib/arrow-dataset-glib/arrow-dataset-glib.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

#include <arrow-dataset-glib/dataset-factory.h>
#include <arrow-dataset-glib/dataset.h>
#include <arrow-dataset-glib/enums.h>
#include <arrow-dataset-glib/file-format.h>
#include <arrow-dataset-glib/fragment.h>
#include <arrow-dataset-glib/partitioning.h>
#include <arrow-dataset-glib/scanner.h>
1 change: 1 addition & 0 deletions c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@
#include <arrow-dataset-glib/dataset.hpp>
#include <arrow-dataset-glib/file-format.hpp>
#include <arrow-dataset-glib/fragment.hpp>
#include <arrow-dataset-glib/partitioning.hpp>
#include <arrow-dataset-glib/scanner.hpp>
68 changes: 68 additions & 0 deletions c_glib/arrow-dataset-glib/dataset-factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <arrow-dataset-glib/dataset-factory.hpp>
#include <arrow-dataset-glib/dataset.hpp>
#include <arrow-dataset-glib/file-format.hpp>
#include <arrow-dataset-glib/partitioning.hpp>

G_BEGIN_DECLS

Expand Down Expand Up @@ -142,13 +143,16 @@ gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
typedef struct GADatasetFileSystemDatasetFactoryPrivate_ {
GADatasetFileFormat *format;
GArrowFileSystem *file_system;
GADatasetPartitioning *partitioning;
GList *files;
arrow::dataset::FileSystemFactoryOptions options;
} GADatasetFileSystemDatasetFactoryPrivate;

enum {
PROP_FORMAT = 1,
PROP_FILE_SYSTEM,
PROP_PARTITIONING,
PROP_PARTITION_BASE_DIR,
};

G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetFactory,
Expand All @@ -175,6 +179,11 @@ gadataset_file_system_dataset_factory_dispose(GObject *object)
priv->file_system = NULL;
}

if (priv->partitioning) {
g_object_unref(priv->partitioning);
priv->partitioning = NULL;
}

if (priv->files) {
g_list_free_full(priv->files, g_object_unref);
priv->files = NULL;
Expand Down Expand Up @@ -205,6 +214,29 @@ gadataset_file_system_dataset_factory_set_property(GObject *object,
case PROP_FORMAT:
priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value));
break;
case PROP_PARTITIONING:
{
auto partitioning = g_value_get_object(value);
if (partitioning == priv->partitioning) {
break;
}
auto old_partitioning = priv->partitioning;
if (partitioning) {
g_object_ref(partitioning);
priv->partitioning = GADATASET_PARTITIONING(partitioning);
priv->options.partitioning =
gadataset_partitioning_get_raw(priv->partitioning);
} else {
priv->options.partitioning = arrow::dataset::Partitioning::Default();
}
if (old_partitioning) {
g_object_unref(old_partitioning);
}
}
break;
case PROP_PARTITION_BASE_DIR:
priv->options.partition_base_dir = g_value_get_string(value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
Expand All @@ -226,6 +258,12 @@ gadataset_file_system_dataset_factory_get_property(GObject *object,
case PROP_FILE_SYSTEM:
g_value_set_object(value, priv->file_system);
break;
case PROP_PARTITIONING:
g_value_set_object(value, priv->partitioning);
break;
case PROP_PARTITION_BASE_DIR:
g_value_set_string(value, priv->options.partition_base_dir.c_str());
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
Expand Down Expand Up @@ -279,6 +317,35 @@ gadataset_file_system_dataset_factory_class_init(
GARROW_TYPE_FILE_SYSTEM,
static_cast<GParamFlags>(G_PARAM_READABLE));
g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);

/**
* GADatasetFileSystemDatasetFactory:partitioning:
*
* Partitioning used by #GADatasetFileSystemDataset.
*
* Since: 6.0.0
*/
spec = g_param_spec_object("partitioning",
"Partitioning",
"Partitioning used by GADatasetFileSystemDataset",
GADATASET_TYPE_PARTITIONING,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec);

/**
* GADatasetFileSystemDatasetFactory:partition-base-dir:
*
* Partition base directory used by #GADatasetFileSystemDataset.
*
* Since: 6.0.0
*/
spec = g_param_spec_string("partition-base-dir",
"Partition base directory",
"Partition base directory "
"used by GADatasetFileSystemDataset",
NULL,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_PARTITION_BASE_DIR, spec);
}

/**
Expand Down Expand Up @@ -454,6 +521,7 @@ gadataset_file_system_dataset_factory_finish(
"dataset", &arrow_dataset,
"file-system", priv->file_system,
"format", priv->format,
"partitioning", priv->partitioning,
NULL));
}

Expand Down

0 comments on commit d1bec0a

Please sign in to comment.