Skip to content

Commit

Permalink
debug/profile polars (#193)
Browse files Browse the repository at this point in the history
Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
  • Loading branch information
sorhawell and etiennebacher committed May 12, 2023
1 parent 5ae1aef commit 6a3522d
Show file tree
Hide file tree
Showing 13 changed files with 211 additions and 14 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ S3method("$",VecDataFrame)
S3method("$",When)
S3method("$",WhenThen)
S3method("$",WhenThenThen)
S3method("$",pl_polars_env)
S3method("$",private_polars_env)
S3method("$<-",DataFrame)
S3method("$<-",RField)
S3method("&",Expr)
Expand Down
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# polars (development version)
## What's changed
- use `pl$set_polars_options(debug_polars = TRUE)` to profile/debug method-calls of a polars query (#193)

## What's changed
- lazy functions translated: `pl$implode`, `pl$explode`, `pl$unique`, `pl$approx_unique`, `pl$head`, `pl$tail` (#196)
- `pl$list` is deprecated, use `pl$implode` instead (#196)

# polars 0.6.0

## BREAKING CHANGES

- Bump rust-polars from 2023-02-17 unreleased version to 2023-04-20 unreleased version. (#183)
- `top_k`'s `reverse` option is removed. Use the new `bottom_k` method instead.
- The name of the `fmt` argument of some methods (e.g. `parse_date`) has been changed to `format`.
Expand Down
45 changes: 43 additions & 2 deletions R/after-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,42 @@ build_debug_print = FALSE
#' extendr methods into pure functions
#'
#' @param env environment object output from extendr-wrappers.R classes
#' @param class_name optional class string, only used for debug printing
#' Default NULL, will infer class_name automatically
#' @keywords internal
#' @description self is a global of extendr wrapper methods
#' this function copies the function into a new environment and
#' modify formals to have a self argument
#'
#' @return env of pure function calls to rust
#'
extendr_method_to_pure_functions = function(env) {
as.environment(lapply(env,function(f) {
extendr_method_to_pure_functions = function(env,class_name=NULL) {
if(is.null(class_name)) class_name = as.character(sys.call()[2])
e = as.environment(lapply(env,function(f) {
if(!is.function(f)) return(f)
if("self" %in% codetools::findGlobals(f)) {
formals(f) <- c(alist(self=),formals(f))
}
f
}))
class(e) = c("private_polars_env", paste0("pr_",class_name) ,"environment")
e
}


#' get private method from Class
#' @details This method if polars_optenv$debug_polars == TRUE will print what methods are called
#' @export
#' @keywords internal
"$.private_polars_env" = function(self, name) {
#print called private class in debug mode
if(polars_optenv$debug_polars) {
cat(
"[",format(subtimer_ms("TIME? "),digits = 4),"ms]\n .pr$",
substr(class(self)[2],4,99), "$",name,"() -> ", sep= ""
)
}
self[[name]]
}


Expand Down Expand Up @@ -158,6 +179,26 @@ method_as_property = function(f, setter=FALSE) {
#' )
pl = new.env(parent=emptyenv())

class(pl) = c("pl_polars_env", "environment")



#' get public function from pl namespace/env
#' @details This method if polars_optenv$debug_polars == TRUE will print what methods are called
#' @export
#' @keywords internal
"$.pl_polars_env" = function(self, name) {
#print called private class in debug mode
if(polars_optenv$debug_polars) {
cat(
"[",format(subtimer_ms("TIME? "),digits = 4),"ms]\npl$",name,"() -> ", sep= ""
)
}
self[[name]]
}



#remap
DataType = clone_env_one_level_deep(RPolarsDataType)

Expand Down
30 changes: 27 additions & 3 deletions R/options.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ polars_optreq$default_maintain_order = list( # set requirement functions of defa
}
)

#' @rdname polars_options
#' @name debug_polars
#' @details prints any call to public or private polars method
#' @param debug_polars bool, default = FALSE,
#' turn of messages
polars_optenv$debug_polars = FALSE #set default value
polars_optreq$debug_polars = list( #set requirement functions of default value
is_bool = function (x) {
is.logical(x) && length(x)==1 && !is.na(x)
}
)


## END OF DEFINED OPTIONS

Expand Down Expand Up @@ -170,7 +182,8 @@ class(pl$options) = c("polars_option_list", "list")
#' @param ... any options to modify
#'
#' @param return_replaced_options return previous state of modified options
#' Convenient for temporarily swapping of options during testing.
#' Convenient for temporarily swapping of options during testing. The immediate
#' return value is invisible.
#'
#' @rdname polars_options
#' @name set_polars_options
Expand Down Expand Up @@ -225,8 +238,8 @@ pl$set_polars_options = function(
polars_optenv[[i]] = opts[[i]]
}

if (return_replaced_options) {
return(replaced_opts_list)
if(return_replaced_options) {
return(invisible(replaced_opts_list))
}

# return current option set invisible
Expand Down Expand Up @@ -269,3 +282,14 @@ pl$get_polars_opt_requirements = function() {
#' @description This environment is used internally for the package to remember
#' what has been going on. Currently only used to throw one-time warnings()
runtime_state = new.env(parent = emptyenv())


subtimer_ms = function(cap_name = NULL,cap=9999) {
last = runtime_state$last_subtime %||% 0
this = as.numeric(Sys.time())
runtime_state$last_subtime = this
time = min((this - last)*1000, cap)
if(!is.null(cap_name) && time==cap) cap_name else time
}


6 changes: 6 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ check_no_missing_args = function(





#' Verify user selected method/attribute exists
#' @description internal function to check method call of env_classes
#'
Expand All @@ -34,6 +36,10 @@ check_no_missing_args = function(
#' @return invisible(NULL)
verify_method_call = function(Class_env,Method_name,call=sys.call(1L),class_name =NULL) {

if(polars_optenv$debug_polars) {
class_name = class_name %||% as.character(as.list(match.call())$Class_env)
cat("[",format(subtimer_ms(),digits = 4),"ms]\n",class_name,"$",Method_name,"() -> ",sep = "")
}
if(!Method_name %in% names(Class_env)) {
class_name = class_name %||% as.character(as.list(match.call())$Class_env)
stop(
Expand Down
40 changes: 40 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,43 @@ cd r-polars
- The `RPOLARS_RUST_SOURCE` environment variable allows **polars** to recover the Cargo cache even if source files have been moved. Replace with your own absolute path to your local clone!
- `filter_rcmdcheck.R` removes known warnings from final check report.
- `unlink("check")` cleans up.

### Misc

If you experience unexpected sluggish performance, when using polars in a given IDE, we'd like to hear about it. You can try to activate `pl$set_polars_options(debug_polars = TRUE)` to profile what methods are being touched (not necessarily run) and how fast. Below is an example of good behavior.
```r
#run e.g. an eager query after setting debug_polars = TRUE
pl$DataFrame(iris)$select("Species")
[TIME? ms]
pl$DataFrame() -> [0.73ms]
.pr$DataFrame$new_with_capacity() -> [0.56ms]
.pr$DataFrame$set_column_from_robj() -> [11.04ms]
.pr$DataFrame$set_column_from_robj() -> [0.3309ms]
.pr$DataFrame$set_column_from_robj() -> [0.283ms]
.pr$DataFrame$set_column_from_robj() -> [0.2761ms]
.pr$DataFrame$set_column_from_robj() -> [12.54ms]
DataFrame$select() -> [0.3681ms]
ProtoExprArray$push_back_rexpr() -> [0.21ms]
pl$col() -> [0.1669ms]
.pr$Expr$col() -> [0.212ms]
.pr$DataFrame$select() -> [1.229ms]
DataFrame$print() -> [0.1781ms]
.pr$DataFrame$print() -> shape: (150, 1)
┌───────────┐
│ Species │
│ --- │
│ cat │
╞═══════════╡
│ setosa │
│ setosa │
│ setosa │
│ setosa │
│ … │
│ virginica │
│ virginica │
│ virginica │
│ virginica │
└───────────┘
```
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,3 +338,47 @@ unlink("check",recursive = TRUE, force =TRUE)
with your own absolute path to your local clone!
- `filter_rcmdcheck.R` removes known warnings from final check report.
- `unlink("check")` cleans up.

### Misc

If you experience unexpected sluggish performance, when using polars in
a given IDE, we’d like to hear about it. You can try to activate
`pl$set_polars_options(debug_polars = TRUE)` to profile what methods are
being touched (not necessarily run) and how fast. Below is an example of
good behavior.

``` r
#run e.g. an eager query after setting debug_polars = TRUE
pl$DataFrame(iris)$select("Species")

[TIME? ms]
pl$DataFrame() -> [0.73ms]
.pr$DataFrame$new_with_capacity() -> [0.56ms]
.pr$DataFrame$set_column_from_robj() -> [11.04ms]
.pr$DataFrame$set_column_from_robj() -> [0.3309ms]
.pr$DataFrame$set_column_from_robj() -> [0.283ms]
.pr$DataFrame$set_column_from_robj() -> [0.2761ms]
.pr$DataFrame$set_column_from_robj() -> [12.54ms]
DataFrame$select() -> [0.3681ms]
ProtoExprArray$push_back_rexpr() -> [0.21ms]
pl$col() -> [0.1669ms]
.pr$Expr$col() -> [0.212ms]
.pr$DataFrame$select() -> [1.229ms]
DataFrame$print() -> [0.1781ms]
.pr$DataFrame$print() -> shape: (150, 1)
┌───────────┐
Species
---
cat
╞═══════════╡
setosa
setosa
setosa
setosa
│ … │
virginica
virginica
virginica
virginica
└───────────┘
```
15 changes: 15 additions & 0 deletions man/cash-.pl_polars_env.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions man/cash-.private_polars_env.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/extendr_method_to_pure_functions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/nanoarrow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pl_pl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion man/polars_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6a3522d

Please sign in to comment.