diff --git a/NAMESPACE b/NAMESPACE index 57e895eb5..47f582288 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,8 @@ S3method("$",VecDataFrame) S3method("$",When) S3method("$",WhenThen) S3method("$",WhenThenThen) +S3method("$",pl_polars_env) +S3method("$",private_polars_env) S3method("$<-",DataFrame) S3method("$<-",RField) S3method("&",Expr) diff --git a/NEWS.md b/NEWS.md index 8224b3e5e..80af3ac68 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,13 @@ # polars (development version) +## What's changed + - use `pl$set_polars_options(debug_polars = TRUE)` to profile/debug method-calls of a polars query (#193) ## What's changed - lazy functions translated: `pl$implode`, `pl$explode`, `pl$unique`, `pl$approx_unique`, `pl$head`, `pl$tail` (#196) - `pl$list` is deprecated, use `pl$implode` instead (#196) # polars 0.6.0 - ## BREAKING CHANGES - - Bump rust-polars from 2023-02-17 unreleased version to 2023-04-20 unreleased version. (#183) - `top_k`'s `reverse` option is removed. Use the new `bottom_k` method instead. - The name of the `fmt` argument of some methods (e.g. `parse_date`) has been changed to `format`. diff --git a/R/after-wrappers.R b/R/after-wrappers.R index 3a35a4530..d3f1abdd1 100644 --- a/R/after-wrappers.R +++ b/R/after-wrappers.R @@ -5,6 +5,8 @@ build_debug_print = FALSE #' extendr methods into pure functions #' #' @param env environment object output from extendr-wrappers.R classes +#' @param class_name optional class string, only used for debug printing +#' Default NULL, will infer class_name automatically #' @keywords internal #' @description self is a global of extendr wrapper methods #' this function copies the function into a new environment and @@ -12,14 +14,33 @@ build_debug_print = FALSE #' #' @return env of pure function calls to rust #' -extendr_method_to_pure_functions = function(env) { - as.environment(lapply(env,function(f) { +extendr_method_to_pure_functions = function(env,class_name=NULL) { + if(is.null(class_name)) class_name = as.character(sys.call()[2]) + e = as.environment(lapply(env,function(f) { if(!is.function(f)) return(f) if("self" %in% codetools::findGlobals(f)) { formals(f) <- c(alist(self=),formals(f)) } f })) + class(e) = c("private_polars_env", paste0("pr_",class_name) ,"environment") + e +} + + +#' get private method from Class +#' @details This method if polars_optenv$debug_polars == TRUE will print what methods are called +#' @export +#' @keywords internal +"$.private_polars_env" = function(self, name) { + #print called private class in debug mode + if(polars_optenv$debug_polars) { + cat( + "[",format(subtimer_ms("TIME? "),digits = 4),"ms]\n .pr$", + substr(class(self)[2],4,99), "$",name,"() -> ", sep= "" + ) + } + self[[name]] } @@ -158,6 +179,26 @@ method_as_property = function(f, setter=FALSE) { #' ) pl = new.env(parent=emptyenv()) +class(pl) = c("pl_polars_env", "environment") + + + +#' get public function from pl namespace/env +#' @details This method if polars_optenv$debug_polars == TRUE will print what methods are called +#' @export +#' @keywords internal +"$.pl_polars_env" = function(self, name) { + #print called private class in debug mode + if(polars_optenv$debug_polars) { + cat( + "[",format(subtimer_ms("TIME? "),digits = 4),"ms]\npl$",name,"() -> ", sep= "" + ) + } + self[[name]] +} + + + #remap DataType = clone_env_one_level_deep(RPolarsDataType) diff --git a/R/options.R b/R/options.R index 1d55ff0f2..e61bd0e22 100644 --- a/R/options.R +++ b/R/options.R @@ -87,6 +87,18 @@ polars_optreq$default_maintain_order = list( # set requirement functions of defa } ) +#' @rdname polars_options +#' @name debug_polars +#' @details prints any call to public or private polars method +#' @param debug_polars bool, default = FALSE, +#' turn of messages +polars_optenv$debug_polars = FALSE #set default value +polars_optreq$debug_polars = list( #set requirement functions of default value + is_bool = function (x) { + is.logical(x) && length(x)==1 && !is.na(x) + } +) + ## END OF DEFINED OPTIONS @@ -170,7 +182,8 @@ class(pl$options) = c("polars_option_list", "list") #' @param ... any options to modify #' #' @param return_replaced_options return previous state of modified options -#' Convenient for temporarily swapping of options during testing. +#' Convenient for temporarily swapping of options during testing. The immediate +#' return value is invisible. #' #' @rdname polars_options #' @name set_polars_options @@ -225,8 +238,8 @@ pl$set_polars_options = function( polars_optenv[[i]] = opts[[i]] } - if (return_replaced_options) { - return(replaced_opts_list) + if(return_replaced_options) { + return(invisible(replaced_opts_list)) } # return current option set invisible @@ -269,3 +282,14 @@ pl$get_polars_opt_requirements = function() { #' @description This environment is used internally for the package to remember #' what has been going on. Currently only used to throw one-time warnings() runtime_state = new.env(parent = emptyenv()) + + +subtimer_ms = function(cap_name = NULL,cap=9999) { + last = runtime_state$last_subtime %||% 0 + this = as.numeric(Sys.time()) + runtime_state$last_subtime = this + time = min((this - last)*1000, cap) + if(!is.null(cap_name) && time==cap) cap_name else time +} + + diff --git a/R/utils.R b/R/utils.R index e67508312..1b9f6105a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -23,6 +23,8 @@ check_no_missing_args = function( + + #' Verify user selected method/attribute exists #' @description internal function to check method call of env_classes #' @@ -34,6 +36,10 @@ check_no_missing_args = function( #' @return invisible(NULL) verify_method_call = function(Class_env,Method_name,call=sys.call(1L),class_name =NULL) { + if(polars_optenv$debug_polars) { + class_name = class_name %||% as.character(as.list(match.call())$Class_env) + cat("[",format(subtimer_ms(),digits = 4),"ms]\n",class_name,"$",Method_name,"() -> ",sep = "") + } if(!Method_name %in% names(Class_env)) { class_name = class_name %||% as.character(as.list(match.call())$Class_env) stop( diff --git a/README.Rmd b/README.Rmd index 30f176027..24239ac4d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -277,3 +277,43 @@ cd r-polars - The `RPOLARS_RUST_SOURCE` environment variable allows **polars** to recover the Cargo cache even if source files have been moved. Replace with your own absolute path to your local clone! - `filter_rcmdcheck.R` removes known warnings from final check report. - `unlink("check")` cleans up. + +### Misc + +If you experience unexpected sluggish performance, when using polars in a given IDE, we'd like to hear about it. You can try to activate `pl$set_polars_options(debug_polars = TRUE)` to profile what methods are being touched (not necessarily run) and how fast. Below is an example of good behavior. + +```r +#run e.g. an eager query after setting debug_polars = TRUE +pl$DataFrame(iris)$select("Species") + +[TIME? ms] +pl$DataFrame() -> [0.73ms] + .pr$DataFrame$new_with_capacity() -> [0.56ms] + .pr$DataFrame$set_column_from_robj() -> [11.04ms] + .pr$DataFrame$set_column_from_robj() -> [0.3309ms] + .pr$DataFrame$set_column_from_robj() -> [0.283ms] + .pr$DataFrame$set_column_from_robj() -> [0.2761ms] + .pr$DataFrame$set_column_from_robj() -> [12.54ms] +DataFrame$select() -> [0.3681ms] +ProtoExprArray$push_back_rexpr() -> [0.21ms] +pl$col() -> [0.1669ms] + .pr$Expr$col() -> [0.212ms] + .pr$DataFrame$select() -> [1.229ms] +DataFrame$print() -> [0.1781ms] + .pr$DataFrame$print() -> shape: (150, 1) +┌───────────┐ +│ Species │ +│ --- │ +│ cat │ +╞═══════════╡ +│ setosa │ +│ setosa │ +│ setosa │ +│ setosa │ +│ … │ +│ virginica │ +│ virginica │ +│ virginica │ +│ virginica │ +└───────────┘ +``` diff --git a/README.md b/README.md index 41a82a3d1..eec55c3bb 100644 --- a/README.md +++ b/README.md @@ -338,3 +338,47 @@ unlink("check",recursive = TRUE, force =TRUE) with your own absolute path to your local clone! - `filter_rcmdcheck.R` removes known warnings from final check report. - `unlink("check")` cleans up. + +### Misc + +If you experience unexpected sluggish performance, when using polars in +a given IDE, we’d like to hear about it. You can try to activate +`pl$set_polars_options(debug_polars = TRUE)` to profile what methods are +being touched (not necessarily run) and how fast. Below is an example of +good behavior. + +``` r +#run e.g. an eager query after setting debug_polars = TRUE +pl$DataFrame(iris)$select("Species") + +[TIME? ms] +pl$DataFrame() -> [0.73ms] + .pr$DataFrame$new_with_capacity() -> [0.56ms] + .pr$DataFrame$set_column_from_robj() -> [11.04ms] + .pr$DataFrame$set_column_from_robj() -> [0.3309ms] + .pr$DataFrame$set_column_from_robj() -> [0.283ms] + .pr$DataFrame$set_column_from_robj() -> [0.2761ms] + .pr$DataFrame$set_column_from_robj() -> [12.54ms] +DataFrame$select() -> [0.3681ms] +ProtoExprArray$push_back_rexpr() -> [0.21ms] +pl$col() -> [0.1669ms] + .pr$Expr$col() -> [0.212ms] + .pr$DataFrame$select() -> [1.229ms] +DataFrame$print() -> [0.1781ms] + .pr$DataFrame$print() -> shape: (150, 1) +┌───────────┐ +│ Species │ +│ --- │ +│ cat │ +╞═══════════╡ +│ setosa │ +│ setosa │ +│ setosa │ +│ setosa │ +│ … │ +│ virginica │ +│ virginica │ +│ virginica │ +│ virginica │ +└───────────┘ +``` diff --git a/man/cash-.pl_polars_env.Rd b/man/cash-.pl_polars_env.Rd new file mode 100644 index 000000000..b2f7b498a --- /dev/null +++ b/man/cash-.pl_polars_env.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/after-wrappers.R +\name{$.pl_polars_env} +\alias{$.pl_polars_env} +\title{get public function from pl namespace/env} +\usage{ +\method{$}{pl_polars_env}(self, name) +} +\description{ +get public function from pl namespace/env +} +\details{ +This method if polars_optenv$debug_polars == TRUE will print what methods are called +} +\keyword{internal} diff --git a/man/cash-.private_polars_env.Rd b/man/cash-.private_polars_env.Rd new file mode 100644 index 000000000..3d118952c --- /dev/null +++ b/man/cash-.private_polars_env.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/after-wrappers.R +\name{$.private_polars_env} +\alias{$.private_polars_env} +\title{get private method from Class} +\usage{ +\method{$}{private_polars_env}(self, name) +} +\description{ +get private method from Class +} +\details{ +This method if polars_optenv$debug_polars == TRUE will print what methods are called +} +\keyword{internal} diff --git a/man/extendr_method_to_pure_functions.Rd b/man/extendr_method_to_pure_functions.Rd index 5af47df40..4a3c89885 100644 --- a/man/extendr_method_to_pure_functions.Rd +++ b/man/extendr_method_to_pure_functions.Rd @@ -4,10 +4,13 @@ \alias{extendr_method_to_pure_functions} \title{extendr methods into pure functions} \usage{ -extendr_method_to_pure_functions(env) +extendr_method_to_pure_functions(env, class_name = NULL) } \arguments{ \item{env}{environment object output from extendr-wrappers.R classes} + +\item{class_name}{optional class string, only used for debug printing +Default NULL, will infer class_name automatically} } \value{ env of pure function calls to rust diff --git a/man/nanoarrow.Rd b/man/nanoarrow.Rd index e232bf50c..7c8810c4c 100644 --- a/man/nanoarrow.Rd +++ b/man/nanoarrow.Rd @@ -16,13 +16,13 @@ \alias{as_record_batch_reader.DataFrame} \title{polars to nanoarrow and arrow} \usage{ -as_nanoarrow_array_stream.DataFrame(x, ..., schema = NULL) +\method{as_nanoarrow_array_stream}{DataFrame}(x, ..., schema = NULL) -infer_nanoarrow_schema.DataFrame(x, ...) +\method{infer_nanoarrow_schema}{DataFrame}(x, ...) -as_arrow_table.DataFrame(x, ...) +\method{as_arrow_table}{DataFrame}(x, ...) -as_record_batch_reader.DataFrame(x, ..., schema = NULL) +\method{as_record_batch_reader}{DataFrame}(x, ..., schema = NULL) } \arguments{ \item{x}{a polars DataFrame} diff --git a/man/pl_pl.Rd b/man/pl_pl.Rd index b476ff3a3..4cb02b03e 100644 --- a/man/pl_pl.Rd +++ b/man/pl_pl.Rd @@ -6,7 +6,7 @@ \alias{pl} \title{The complete polars public API.} \format{ -An object of class \code{environment} of length 69. +An object of class \code{pl_polars_env} (inherits from \code{environment}) of length 69. } \usage{ pl diff --git a/man/polars_options.Rd b/man/polars_options.Rd index 7633110b4..7c5d79902 100644 --- a/man/polars_options.Rd +++ b/man/polars_options.Rd @@ -6,6 +6,7 @@ \alias{no_messages} \alias{do_not_repeat_call} \alias{default_maintain_order} +\alias{debug_polars} \alias{get_polars_options} \alias{polars_options} \alias{pl_options} @@ -35,10 +36,14 @@ turn of messages} \item{default_maintain_orderr}{bool, default = FALSE} +\item{debug_polars}{bool, default = FALSE, +turn of messages} + \item{...}{any options to modify} \item{return_replaced_options}{return previous state of modified options -Convenient for temporarily swapping of options during testing.} +Convenient for temporarily swapping of options during testing. The immediate +return value is invisible.} } \value{ current settings as list @@ -58,6 +63,8 @@ do not print the call causing the error in error messages sets maintain_order = TRUE as default implicated methods/functions are currently: DataFrame_GroupBy + LazyFrameGroupby. +prints any call to public or private polars method + modifing list takes no effect, pass it to pl$set_polars_options get/set/resest interact with internal env \code{polars:::polars_optenv}