Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add basic SQL features #457

Merged
merged 8 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Collate:
's3_methods.R'
'series__series.R'
'series__trait.R'
'sql.R'
'translation.R'
'vctrs.R'
'zzz.R'
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ S3method("$",RField)
S3method("$",RNullValues)
S3method("$",RPolarsDataType)
S3method("$",RPolarsErr)
S3method("$",RPolarsSQLContext)
S3method("$",RPolarsStringCacheHolder)
S3method("$",RThreadHandle)
S3method("$",Series)
Expand Down Expand Up @@ -72,6 +73,7 @@ S3method("[[",RField)
S3method("[[",RNullValues)
S3method("[[",RPolarsDataType)
S3method("[[",RPolarsErr)
S3method("[[",RPolarsSQLContext)
S3method("[[",RPolarsStringCacheHolder)
S3method("[[",RThreadHandle)
S3method("[[",Series)
Expand All @@ -88,6 +90,7 @@ S3method(.DollarNames,GroupBy)
S3method(.DollarNames,LazyFrame)
S3method(.DollarNames,RField)
S3method(.DollarNames,RPolarsErr)
S3method(.DollarNames,RPolarsSQLContext)
S3method(.DollarNames,RThreadHandle)
S3method(.DollarNames,Series)
S3method(.DollarNames,Then)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
- `$scan_parquet()` gains an argument `hive_partitioning`.
- `$meta$tree_format()` has a better formatted output.

## What's changed

- New class `RPolarsSQLContext` and its methods to perform SQL queries on DataFrame like objects.
To use this feature, needs to build Rust library with full features. (#457)

# polars 0.9.0

## BREAKING CHANGES DUE TO RUST-POLARS UPDATE
Expand Down
3 changes: 2 additions & 1 deletion R/after-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ extendr_method_to_pure_functions = function(env, class_name = NULL) {
.pr$RPolarsErr = extendr_method_to_pure_functions(RPolarsErr)
.pr$RThreadHandle = extendr_method_to_pure_functions(RThreadHandle)
.pr$RPolarsStringCacheHolder = extendr_method_to_pure_functions(RPolarsStringCacheHolder)
.pr$RPolarsSQLContext = extendr_method_to_pure_functions(RPolarsSQLContext)



Expand Down Expand Up @@ -267,7 +268,7 @@ DataType = clone_env_one_level_deep(RPolarsDataType)
pl_class_names = sort(
c(
"LazyFrame", "Series", "LazyGroupBy", "DataType", "Expr", "DataFrame",
"When", "Then", "ChainedWhen", "ChainedThen"
"When", "Then", "ChainedWhen", "ChainedThen", "RPolarsSQLContext"
)
) # TODO discover all public class automatically

Expand Down
18 changes: 18 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,24 @@ Series$from_arrow <- function(name, array) .Call(wrap__Series__from_arrow, name,
#' @export
`[[.Series` <- `$.Series`

RPolarsSQLContext <- new.env(parent = emptyenv())

RPolarsSQLContext$new <- function() .Call(wrap__RPolarsSQLContext__new)

RPolarsSQLContext$execute <- function(query) .Call(wrap__RPolarsSQLContext__execute, self, query)

RPolarsSQLContext$get_tables <- function() .Call(wrap__RPolarsSQLContext__get_tables, self)

RPolarsSQLContext$register <- function(name, lf) .Call(wrap__RPolarsSQLContext__register, self, name, lf)

RPolarsSQLContext$unregister <- function(name) .Call(wrap__RPolarsSQLContext__unregister, self, name)

#' @export
`$.RPolarsSQLContext` <- function (self, name) { func <- RPolarsSQLContext[[name]]; environment(func) <- environment(); func }

#' @export
`[[.RPolarsSQLContext` <- `$.RPolarsSQLContext`

RPolarsStringCacheHolder <- new.env(parent = emptyenv())

RPolarsStringCacheHolder$hold <- function() .Call(wrap__RPolarsStringCacheHolder__hold)
Expand Down
167 changes: 167 additions & 0 deletions R/sql.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#' @title Run SQL queries against DataFrame/LazyFrame data.
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
#' @description Run SQL queries against DataFrame/LazyFrame data.
#' @details Currently, only available when built with the `full` feature.
#' See [polars_info()] for more information.
#' @name SQLContext_class
#' @keywords SQLContext
#' @examplesIf pl$polars_info()$features$sql
#' lf = pl$LazyFrame(a = 1:3, b = c("x", NA, "z"))
#' res = pl$SQLContext(frame = lf)$execute(
#' "SELECT b, a*2 AS two_a FROM frame WHERE b IS NOT NULL"
#' )
#' res$collect()
RPolarsSQLContext


#' @title auto complete $-access into a polars object
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
#' @description called by the interactive R session internally
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
#' @param x RPolarsSQLContext
#' @param pattern code-stump as string to auto-complete
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
#' @return char vec
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
#' @export
#' @noRd
#' @inherit .DollarNames.DataFrame return
#' @keywords internal
.DollarNames.RPolarsSQLContext = function(x, pattern = "") {
get_method_usages(RPolarsSQLContext, pattern = pattern)
}


#' Initialise a new SQLContext
#' @name pl_SQLContext
#' @description Create a new SQLContext and register the given LazyFrames.
#' @param ... Name-value pairs of [LazyFrame][LazyFrame_class] like objects to register.
#' @return RPolarsSQLContext
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext(mtcars = mtcars)
#' ctx
pl$SQLContext = function(...) {
self = .pr$RPolarsSQLContext$new()
lazyframes = list(...)

if (length(lazyframes)) {
for (index in seq_along(lazyframes)) {
.pr$RPolarsSQLContext$register(
self,
names(lazyframes[index]),
lazyframes[[index]]
) |>
unwrap("in $SQLContext()")
}
}

self
}


#' @title Execute SQL query against the registered data
#' @description Parse the given SQL query and execute it against the registered frame data.
#' @param query A valid string SQL query.
#' @param eager A logical flag indicating whether to collect the result immediately.
#' If FALSE (default), a [LazyFrame][LazyFrame_class] is returned. If TRUE, a [DataFrame][DataFrame_class] is returned.
#' @return A [LazyFrame][LazyFrame_class] or [DataFrame][DataFrame_class] depending on the value of `eager`.
#' @examplesIf pl$polars_info()$features$sql
#' query = "SELECT * FROM mtcars WHERE cyl = 4"
#' pl$SQLContext(mtcars = mtcars)$execute(query)
#' pl$SQLContext(mtcars = mtcars)$execute(query, eager = TRUE)
SQLContext_execute = function(query, eager = FALSE) {
lf = .pr$RPolarsSQLContext$execute(self, query) |>
unwrap("in $execute()")

if (eager) {
lf$collect()
} else {
lf
}
}


#' @title Register a single data as a table
#' @description Register a single frame as a table, using the given name.
#' @param name A string name to register the frame as.
#' @param frame A [LazyFrame][LazyFrame_class] like object to register.
#' @return Returns the [SQLContext_class] object invisibly.
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext()
#' ctx$register("mtcars", mtcars)
#'
#' ctx$execute("SELECT * FROM mtcars LIMIT 5")$collect()
SQLContext_register = function(name, frame) {
.pr$RPolarsSQLContext$register(self, name, frame) |>
unwrap("in $register()")
invisible(self)
}


#' @title Register multiple data as tables
#' @description Register multiple frames as tables.
#' @param ... Name-value pairs of [LazyFrame][LazyFrame_class] like objects to register.
#' @return Returns the [SQLContext_class] object invisibly.
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext()
#' r_df = mtcars
#' pl_df = pl$DataFrame(mtcars)
#' pl_lf = pl$LazyFrame(mtcars)
#'
#' ctx$register_many(r_df = r_df, pl_df = pl_df, pl_lf = pl_lf)
#'
#' ctx$execute(
#' "SELECT * FROM r_df
#' UNION ALL
#' SELECT * FROM pl_df
#' UNION ALL
#' SELECT * FROM pl_lf"
#' )$collect()
SQLContext_register_many = function(...) {
lazyframes = list(...)

if (length(lazyframes)) {
for (index in seq_along(lazyframes)) {
.pr$RPolarsSQLContext$register(
self,
names(lazyframes[index]),
lazyframes[[index]]
) |>
unwrap("in $register_many()")
}
}

invisible(self)
}


#' @title Unregister tables by name
#' @description Unregister tables by name.
#' @param names A character vector of table names to unregister.
#' @return Returns the [SQLContext_class] object invisibly.
#' @examplesIf pl$polars_info()$features$sql
#' # Initialise a new SQLContext and register the given tables.
#' ctx = pl$SQLContext(x = mtcars, y = mtcars, z = mtcars)
#' ctx$tables()
#'
#' # Unregister some tables.
#' ctx$unregister(c("x", "y"))
#' ctx$tables()
SQLContext_unregister = function(names) {
for (index in seq_along(names)) {
.pr$RPolarsSQLContext$unregister(self, names[index]) |>
unwrap("in $register()")
}
invisible(self)
}


#' @title List registered tables
#' @description Return a character vector of the registered table names.
#' @return A character vector of the registered table names.
#' @examplesIf pl$polars_info()$features$sql
#' ctx = pl$SQLContext()
#' ctx$tables()
#' ctx$register("df1", mtcars)
#' ctx$tables()
#' ctx$register("df2", mtcars)
#' ctx$tables()
SQLContext_tables = function() {
.pr$RPolarsSQLContext$get_tables(self) |>
unwrap("in $tables()")
}
2 changes: 2 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ replace_private_with_pub_methods(Series, "^Series_")
# RThreadHandle
replace_private_with_pub_methods(RThreadHandle, "^RThreadHandle_")

# SQLContext
replace_private_with_pub_methods(RPolarsSQLContext, "^SQLContext_")



Expand Down
4 changes: 2 additions & 2 deletions docs/make-docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ rd2md = function(src) {
for (i in seq_along(chunks)) {
if (any(grepl("<h3>Usage</h3>", chunks[[i]], fixed = TRUE))) {
# order is important
for (cl in c("DataFrame_", "Series_", "Expr_", "LazyFrame_", "LazyGroupBy_", "GroupBy_", "RField_")) {
for (cl in c("DataFrame_", "Series_", "Expr_", "LazyFrame_", "LazyGroupBy_", "GroupBy_", "RField_", "SQLContext_")) {
chunks[[i]] = gsub(cl, paste0("&lt", sub("_$", "", cl), "&gt$"), chunks[[i]])
}
}
Expand Down Expand Up @@ -136,7 +136,7 @@ make_doc_hierarchy = function() {
"pl", "Series", "DataFrame", "LazyFrame", "GroupBy",
"LazyGroupBy", "ExprList", "ExprBin", "ExprCat", "ExprDT",
"ExprMeta", "ExprName", "ExprStr", "ExprStruct",
"Expr", "IO", "RThreadHandle"
"Expr", "IO", "RThreadHandle", "SQLContext"
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
)
for (cl in classes) {
files = grep(paste0("^", cl, "_"), other, value = TRUE)
Expand Down
22 changes: 22 additions & 0 deletions man/SQLContext_class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/SQLContext_execute.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/SQLContext_register.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions man/SQLContext_register_many.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading