Skip to content

Commit

Permalink
feat!: add as_polars_df and as_polars_lf generic functions (#519)
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi committed Nov 18, 2023
1 parent 86721ea commit 3bcce4d
Show file tree
Hide file tree
Showing 14 changed files with 382 additions and 43 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Collate:
'after-wrappers.R'
'Field.R'
'PTime.R'
'as_polars.R'
'autocompletion.R'
'construction.R'
'convert.R'
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ S3method(as.list,rpolars_raw_list)
S3method(as.matrix,DataFrame)
S3method(as.matrix,LazyFrame)
S3method(as.vector,Series)
S3method(as_polars_df,ArrowTabular)
S3method(as_polars_df,DataFrame)
S3method(as_polars_df,GroupBy)
S3method(as_polars_df,LazyFrame)
S3method(as_polars_df,RecordBatchReader)
S3method(as_polars_df,Series)
S3method(as_polars_df,data.frame)
S3method(as_polars_df,default)
S3method(as_polars_lf,LazyFrame)
S3method(as_polars_lf,default)
S3method(as_polars_series,POSIXlt)
S3method(as_polars_series,default)
S3method(as_polars_series,vctrs_rcrd)
Expand Down Expand Up @@ -161,6 +171,8 @@ S3method(tail,LazyFrame)
S3method(unique,DataFrame)
S3method(unique,LazyFrame)
export(.pr)
export(as_polars_df)
export(as_polars_lf)
export(as_polars_series)
export(knit_print.DataFrame)
export(pl)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
is aimed for r-polars extensions, and will be kept stable as much as possible (#504).
- New functions `pl$min_horizontal()`, `pl$max_horizontal()`, `pl$sum_horizontal()`,
`pl$all_horizontal()`, `pl$any_horizontal()` (#508).
- New generic functions `as_polars_df()` and `as_polars_lf()` to create polars DataFrames
and LazyFrames (#519).

# polars 0.10.1

Expand Down
156 changes: 156 additions & 0 deletions R/as_polars.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#' To polars DataFrame
#'
#' [as_polars_df()] is a generic function that converts an R object to a
#' polars DataFrame. It is basically a wrapper for [pl$DataFrame()][pl_DataFrame],
#' but has special implementations for Apache Arrow-based objects such as
#' polars [LazyFrame][LazyFrame_class] and [arrow::Table].
#'
#' For [LazyFrame][LazyFrame_class] objects, this function is a shortcut for
#' [$collect()][LazyFrame_collect] or [$fetch()][LazyFrame_fetch], depending on
#' whether the number of rows to fetch is infinite or not.
#' @rdname as_polars_df
#' @param x Object to convert to a polars DataFrame.
#' @param ... Additional arguments passed to methods.
#' @examplesIf requireNamespace("arrow", quietly = TRUE)
#' at = arrow::as_arrow_table(mtcars)
#'
#' # Convert an arrow Table to a polars LazyFrame
#' lf = as_polars_df(at)$lazy()
#'
#' # Collect all rows
#' as_polars_df(lf)
#'
#' # Fetch 5 rows
#' as_polars_df(lf, 5)
#' @export
as_polars_df = function(x, ...) {
UseMethod("as_polars_df")
}


#' @rdname as_polars_df
#' @export
as_polars_df.default = function(x, ...) {
as_polars_df(as.data.frame(x, stringsAsFactors = FALSE), ...)
}


#' @rdname as_polars_df
#' @export
as_polars_df.data.frame = function(x, ...) {
pl$DataFrame(x)
}


#' @rdname as_polars_df
#' @export
as_polars_df.DataFrame = function(x, ...) {
x
}


#' @rdname as_polars_df
#' @export
as_polars_df.GroupBy = function(x, ...) {
x$to_data_frame()
}


#' @rdname as_polars_df
#' @export
as_polars_df.Series = function(x, ...) {
pl$DataFrame(x)
}


#' @rdname as_polars_df
#' @param n_rows Number of rows to fetch. Defaults to `Inf`, meaning all rows.
#' @inheritParams LazyFrame_collect
#' @export
as_polars_df.LazyFrame = function(
x,
n_rows = Inf,
...,
type_coercion = TRUE,
predicate_pushdown = TRUE,
projection_pushdown = TRUE,
simplify_expression = TRUE,
slice_pushdown = TRUE,
comm_subplan_elim = TRUE,
comm_subexpr_elim = TRUE,
streaming = FALSE,
no_optimization = FALSE,
inherit_optimization = FALSE,
collect_in_background = FALSE) {
# capture all args and modify some to match lower level function
args = as.list(environment())
args$... = list(...)

if (is.infinite(args$n_rows)) {
args$n_rows = NULL
.fn = x$collect
} else {
args$collect_in_background = NULL
.fn = x$fetch
}

args$x = NULL
check_no_missing_args(.fn, args)
do.call(.fn, args)
}


#' @rdname as_polars_df
#' @inheritParams pl_from_arrow
#' @export
as_polars_df.ArrowTabular = function(
x,
...,
rechunk = TRUE,
schema = NULL,
schema_overrides = NULL) {
pl$from_arrow(
x,
...,
rechunk = rechunk,
schema = schema,
schema_overrides = schema_overrides
)
}


#' @rdname as_polars_df
#' @export
as_polars_df.RecordBatchReader = as_polars_df.ArrowTabular


# TODO: as_polars_df.nanoarrow_array_stream


#' To polars LazyFrame
#'
#' [as_polars_lf()] is a generic function that converts an R object to a
#' polars LazyFrame. It is basically a shortcut for [as_polars_df(x, ...)][as_polars_df] with the
#' [$lazy()][DataFrame_lazy] method.
#' @rdname as_polars_lf
#' @inheritParams as_polars_df
#' @examples
#' as_polars_lf(mtcars)
#' @export
as_polars_lf = function(x, ...) {
UseMethod("as_polars_lf")
}


#' @rdname as_polars_lf
#' @export
as_polars_lf.default = function(x, ...) {
as_polars_df(x, ...)$lazy()
}


#' @rdname as_polars_lf
#' @export
as_polars_lf.LazyFrame = function(x, ...) {
x
}
8 changes: 7 additions & 1 deletion R/convert.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#' @description import Arrow Table or Array
#' @name pl_from_arrow
#' @param data arrow Table or Array or ChunkedArray
#' @param ... Ignored.
#' @param rechunk bool rewrite in one array per column, Implemented for ChunkedArray
#' Array is already contiguous. Not implemented for Table. C
#' @param schema named list of DataTypes or char vec of names. Same length as arrow table.
Expand All @@ -24,7 +25,12 @@
#' data = arrow::arrow_table(iris),
#' schema = char_schema
#' )
pl$from_arrow = function(data, rechunk = TRUE, schema = NULL, schema_overrides = NULL) {
pl$from_arrow = function(
data,
...,
rechunk = TRUE,
schema = NULL,
schema_overrides = NULL) {
if (!requireNamespace("arrow", quietly = TRUE)) {
stop("in pl$from_arrow: cannot import from arrow without R package arrow installed")
}
Expand Down
3 changes: 3 additions & 0 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ LazyFrame_set_optimization_toggle = function(
#' @description `$collect()` performs the query on the LazyFrame. It returns a
#' DataFrame
#' @inheritParams LazyFrame_set_optimization_toggle
#' @param ... Ignored.
#' @param no_optimization Boolean. Sets the following parameters to `FALSE`:
#' `predicate_pushdown`, `projection_pushdown`, `slice_pushdown`,
#' `comm_subplan_elim`, `comm_subexpr_elim`.
Expand All @@ -364,6 +365,7 @@ LazyFrame_set_optimization_toggle = function(
#' - [`$sink_ipc()`][LazyFrame_sink_ipc()] streams query to a arrow file.

LazyFrame_collect = function(
...,
type_coercion = TRUE,
predicate_pushdown = TRUE,
projection_pushdown = TRUE,
Expand Down Expand Up @@ -1311,6 +1313,7 @@ LazyFrame_dtypes = method_as_property(function() {
#' fetch(3)
LazyFrame_fetch = function(
n_rows = 500,
...,
type_coercion = TRUE,
predicate_pushdown = TRUE,
projection_pushdown = TRUE,
Expand Down
3 changes: 3 additions & 0 deletions man/LazyFrame_collect.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/LazyFrame_fetch.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3bcce4d

Please sign in to comment.