From 9bb47ae596df5a77ad2303cc9e43c118d35cc6cb Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Tue, 11 Apr 2023 22:41:16 -0400 Subject: [PATCH 01/26] DataFrame translations: No arguments --- R/dataframe__frame.R | 63 +++++++++++++++++++++++++++++++++ R/extendr-wrappers.R | 14 ++++++++ R/lazyframe__lazy.R | 63 +++++++++++++++++++++++++++++++++ man/DataFrame_first.Rd | 18 ++++++++++ man/DataFrame_last.Rd | 18 ++++++++++ man/DataFrame_max.Rd | 18 ++++++++++ man/DataFrame_mean.Rd | 18 ++++++++++ man/DataFrame_median.Rd | 18 ++++++++++ man/DataFrame_min.Rd | 18 ++++++++++ man/DataFrame_sum.Rd | 18 ++++++++++ man/LazyFrame_first.Rd | 18 ++++++++++ man/LazyFrame_last.Rd | 18 ++++++++++ man/LazyFrame_max.Rd | 18 ++++++++++ man/LazyFrame_mean.Rd | 18 ++++++++++ man/LazyFrame_median.Rd | 18 ++++++++++ man/LazyFrame_min.Rd | 18 ++++++++++ man/LazyFrame_sum.Rd | 18 ++++++++++ src/rust/src/lazy/dataframe.rs | 28 +++++++++++++++ tests/testthat/test-dataframe.R | 30 ++++++++++++++++ tests/testthat/test-lazy.R | 32 +++++++++++++++++ 20 files changed, 482 insertions(+) create mode 100644 man/DataFrame_first.Rd create mode 100644 man/DataFrame_last.Rd create mode 100644 man/DataFrame_max.Rd create mode 100644 man/DataFrame_mean.Rd create mode 100644 man/DataFrame_median.Rd create mode 100644 man/DataFrame_min.Rd create mode 100644 man/DataFrame_sum.Rd create mode 100644 man/LazyFrame_first.Rd create mode 100644 man/LazyFrame_last.Rd create mode 100644 man/LazyFrame_max.Rd create mode 100644 man/LazyFrame_mean.Rd create mode 100644 man/LazyFrame_median.Rd create mode 100644 man/LazyFrame_min.Rd create mode 100644 man/LazyFrame_sum.Rd diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 17538ea63..04c2d4433 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -851,3 +851,66 @@ DataFrame_unnest = function(names = NULL) { + +#' @title First +#' @description Get the first row of the DataFrame. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied filter. +#' @examples pl$DataFrame(mtcars)$first() +DataFrame_first = function() { + self$lazy()$first()$collect() +} + +#' @title Last +#' @description Get the last row of the DataFrame. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied filter. +#' @examples pl$DataFrame(mtcars)$last() +DataFrame_last = function() { + self$lazy()$last()$collect() +} + +#' @title Max +#' @description Aggregate the columns in the DataFrame to their maximum value. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$max() +DataFrame_max = function() { + self$lazy()$max()$collect() +} + +#' @title Mean +#' @description Aggregate the columns in the DataFrame to their mean value. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$mean() +DataFrame_mean = function() { + self$lazy()$mean()$collect() +} + +#' @title Median +#' @description Aggregate the columns in the DataFrame to their median value. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$median() +DataFrame_median = function() { + self$lazy()$median()$collect() +} + +#' @title Min +#' @description Aggregate the columns in the DataFrame to their minimum value. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$min() +DataFrame_min = function() { + self$lazy()$min()$collect() +} + +#' @title Sum +#' @description Aggregate the columns of this DataFrame to their sum values. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$sum() +DataFrame_sum = function() { + self$lazy()$sum()$collect() +} \ No newline at end of file diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 3a4183d38..7b1855cfc 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -808,6 +808,20 @@ LazyFrame$collect_background <- function() .Call(wrap__LazyFrame__collect_backgr LazyFrame$collect <- function() .Call(wrap__LazyFrame__collect, self) +LazyFrame$first <- function() .Call(wrap__LazyFrame__first, self) + +LazyFrame$last <- function() .Call(wrap__LazyFrame__last, self) + +LazyFrame$max <- function() .Call(wrap__LazyFrame__max, self) + +LazyFrame$mean <- function() .Call(wrap__LazyFrame__mean, self) + +LazyFrame$median <- function() .Call(wrap__LazyFrame__median, self) + +LazyFrame$min <- function() .Call(wrap__LazyFrame__min, self) + +LazyFrame$sum <- function() .Call(wrap__LazyFrame__sum, self) + LazyFrame$select <- function(exprs) .Call(wrap__LazyFrame__select, self, exprs) LazyFrame$limit <- function(n) .Call(wrap__LazyFrame__limit, self, n) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 8b7ae7ebc..0eeda9c98 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -213,6 +213,69 @@ LazyFrame_limit = function(n) { unwrap(.pr$LazyFrame$limit(self,n)) } +#' @title First +#' @description Get the first row of the DataFrame. +#' @keywords DataFrame +#' @return A new `DataFrame` object with applied filter. +#' @examples pl$DataFrame(mtcars)$lazy()$first()$collect() +LazyFrame_first = function() { + unwrap(.pr$LazyFrame$first(self)) +} + +#' @title Last +#' @description Aggregate the columns in the DataFrame to their maximum value. +#' @keywords LazyFrame +#' @return A new `LazyFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$lazy()$last()$collect() +LazyFrame_last = function() { + unwrap(.pr$LazyFrame$last(self)) +} + +#' @title Max +#' @description Aggregate the columns in the DataFrame to their maximum value. +#' @keywords LazyFrame +#' @return A new `LazyFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$lazy()$max()$collect() +LazyFrame_max = function() { + unwrap(.pr$LazyFrame$max(self)) +} + +#' @title Mean +#' @description Aggregate the columns in the DataFrame to their mean value. +#' @keywords LazyFrame +#' @return A new `LazyFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$lazy()$mean()$collect() +LazyFrame_mean = function() { + unwrap(.pr$LazyFrame$mean(self)) +} + +#' @title Median +#' @description Aggregate the columns in the DataFrame to their median value. +#' @keywords LazyFrame +#' @return A new `LazyFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$lazy()$median()$collect() +LazyFrame_median = function() { + unwrap(.pr$LazyFrame$median(self)) +} + +#' @title Min +#' @description Aggregate the columns in the DataFrame to their minimum value. +#' @keywords LazyFrame +#' @return A new `LazyFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$lazy()$min()$collect() +LazyFrame_min = function() { + unwrap(.pr$LazyFrame$min(self)) +} + +#' @title Sum +#' @description Aggregate the columns of this DataFrame to their sum values. +#' @keywords LazyFrame +#' @return LazyFrame +#' @examples pl$DataFrame(mtcars)$sum() +LazyFrame_sum = function() { + unwrap(.pr$LazyFrame$sum(self)) +} + #' @title Lazy_groupby #' @description apply groupby on LazyFrame, return LazyGroupBy diff --git a/man/DataFrame_first.Rd b/man/DataFrame_first.Rd new file mode 100644 index 000000000..7f063e765 --- /dev/null +++ b/man/DataFrame_first.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_first} +\alias{DataFrame_first} +\title{First} +\usage{ +DataFrame_first() +} +\value{ +A new \code{DataFrame} object with applied filter. +} +\description{ +Get the first row of the DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$first() +} +\keyword{DataFrame} diff --git a/man/DataFrame_last.Rd b/man/DataFrame_last.Rd new file mode 100644 index 000000000..6b2e0503a --- /dev/null +++ b/man/DataFrame_last.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_last} +\alias{DataFrame_last} +\title{Last} +\usage{ +DataFrame_last() +} +\value{ +A new \code{DataFrame} object with applied filter. +} +\description{ +Get the last row of the DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$last() +} +\keyword{DataFrame} diff --git a/man/DataFrame_max.Rd b/man/DataFrame_max.Rd new file mode 100644 index 000000000..5e9145ee0 --- /dev/null +++ b/man/DataFrame_max.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_max} +\alias{DataFrame_max} +\title{Max} +\usage{ +DataFrame_max() +} +\value{ +A new \code{DataFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their maximum value. +} +\examples{ +pl$DataFrame(mtcars)$max() +} +\keyword{DataFrame} diff --git a/man/DataFrame_mean.Rd b/man/DataFrame_mean.Rd new file mode 100644 index 000000000..8361ffb78 --- /dev/null +++ b/man/DataFrame_mean.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_mean} +\alias{DataFrame_mean} +\title{Mean} +\usage{ +DataFrame_mean() +} +\value{ +A new \code{DataFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their mean value. +} +\examples{ +pl$DataFrame(mtcars)$mean() +} +\keyword{DataFrame} diff --git a/man/DataFrame_median.Rd b/man/DataFrame_median.Rd new file mode 100644 index 000000000..5dc01f63f --- /dev/null +++ b/man/DataFrame_median.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_median} +\alias{DataFrame_median} +\title{Median} +\usage{ +DataFrame_median() +} +\value{ +A new \code{DataFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their median value. +} +\examples{ +pl$DataFrame(mtcars)$median() +} +\keyword{DataFrame} diff --git a/man/DataFrame_min.Rd b/man/DataFrame_min.Rd new file mode 100644 index 000000000..0c0b42319 --- /dev/null +++ b/man/DataFrame_min.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_min} +\alias{DataFrame_min} +\title{Min} +\usage{ +DataFrame_min() +} +\value{ +A new \code{DataFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their minimum value. +} +\examples{ +pl$DataFrame(mtcars)$min() +} +\keyword{DataFrame} diff --git a/man/DataFrame_sum.Rd b/man/DataFrame_sum.Rd new file mode 100644 index 000000000..7b2f85780 --- /dev/null +++ b/man/DataFrame_sum.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_sum} +\alias{DataFrame_sum} +\title{Sum} +\usage{ +DataFrame_sum() +} +\value{ +A new \code{DataFrame} object with applied aggregation. +} +\description{ +Aggregate the columns of this DataFrame to their sum values. +} +\examples{ +pl$DataFrame(mtcars)$sum() +} +\keyword{DataFrame} diff --git a/man/LazyFrame_first.Rd b/man/LazyFrame_first.Rd new file mode 100644 index 000000000..c3bba4cf2 --- /dev/null +++ b/man/LazyFrame_first.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_first} +\alias{LazyFrame_first} +\title{First} +\usage{ +LazyFrame_first() +} +\value{ +A new \code{DataFrame} object with applied filter. +} +\description{ +Get the first row of the DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$lazy()$first()$collect() +} +\keyword{DataFrame} diff --git a/man/LazyFrame_last.Rd b/man/LazyFrame_last.Rd new file mode 100644 index 000000000..588255aee --- /dev/null +++ b/man/LazyFrame_last.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_last} +\alias{LazyFrame_last} +\title{Last} +\usage{ +LazyFrame_last() +} +\value{ +A new \code{LazyFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their maximum value. +} +\examples{ +pl$DataFrame(mtcars)$lazy()$last()$collect() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_max.Rd b/man/LazyFrame_max.Rd new file mode 100644 index 000000000..734817014 --- /dev/null +++ b/man/LazyFrame_max.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_max} +\alias{LazyFrame_max} +\title{Max} +\usage{ +LazyFrame_max() +} +\value{ +A new \code{LazyFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their maximum value. +} +\examples{ +pl$DataFrame(mtcars)$lazy()$max()$collect() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_mean.Rd b/man/LazyFrame_mean.Rd new file mode 100644 index 000000000..e9eb40dd8 --- /dev/null +++ b/man/LazyFrame_mean.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_mean} +\alias{LazyFrame_mean} +\title{Mean} +\usage{ +LazyFrame_mean() +} +\value{ +A new \code{LazyFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their mean value. +} +\examples{ +pl$DataFrame(mtcars)$lazy()$mean()$collect() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_median.Rd b/man/LazyFrame_median.Rd new file mode 100644 index 000000000..115f4e3fe --- /dev/null +++ b/man/LazyFrame_median.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_median} +\alias{LazyFrame_median} +\title{Median} +\usage{ +LazyFrame_median() +} +\value{ +A new \code{LazyFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their median value. +} +\examples{ +pl$DataFrame(mtcars)$lazy()$median()$collect() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_min.Rd b/man/LazyFrame_min.Rd new file mode 100644 index 000000000..85ee1a29e --- /dev/null +++ b/man/LazyFrame_min.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_min} +\alias{LazyFrame_min} +\title{Min} +\usage{ +LazyFrame_min() +} +\value{ +A new \code{LazyFrame} object with applied aggregation. +} +\description{ +Aggregate the columns in the DataFrame to their minimum value. +} +\examples{ +pl$DataFrame(mtcars)$lazy()$min()$collect() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_sum.Rd b/man/LazyFrame_sum.Rd new file mode 100644 index 000000000..37378591f --- /dev/null +++ b/man/LazyFrame_sum.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_sum} +\alias{LazyFrame_sum} +\title{Sum} +\usage{ +LazyFrame_sum() +} +\value{ +LazyFrame +} +\description{ +Aggregate the columns of this DataFrame to their sum values. +} +\examples{ +pl$DataFrame(mtcars)$sum() +} +\keyword{LazyFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 127649e26..1a662ff9c 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -49,6 +49,34 @@ impl LazyFrame { }); r_result_list(result) } + + fn first(&self) -> Result { + Ok(LazyFrame(self.0.clone().first())) + } + + fn last(&self) -> Result { + Ok(LazyFrame(self.0.clone().last())) + } + + fn max(&self) -> Result { + Ok(LazyFrame(self.0.clone().max())) + } + + fn mean(&self) -> Result { + Ok(LazyFrame(self.0.clone().mean())) + } + + fn median(&self) -> Result { + Ok(LazyFrame(self.0.clone().median())) + } + + fn min(&self) -> Result { + Ok(LazyFrame(self.0.clone().min())) + } + + fn sum(&self) -> Result { + Ok(LazyFrame(self.0.clone().sum())) + } fn select(&self, exprs: &ProtoExprArray) -> LazyFrame { let exprs: Vec = exprs diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 44b3aab26..16df77d02 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -424,3 +424,33 @@ test_that("to_Struct, unnest, to_frame, as_data_frame", { expect_identical(df$as_data_frame(), df_e) }) + +test_that("methods without arguments", { + a = pl$DataFrame(mtcars)$first()$as_data_frame() + b = data.frame(lapply(mtcars, head, 1)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$last()$as_data_frame() + b = data.frame(lapply(mtcars, tail, 1)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$max()$as_data_frame() + b = data.frame(lapply(mtcars, max)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$mean()$as_data_frame() + b = data.frame(lapply(mtcars, mean)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$median()$as_data_frame() + b = data.frame(lapply(mtcars, median)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$min()$as_data_frame() + b = data.frame(lapply(mtcars, min)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$sum()$as_data_frame() + b = data.frame(lapply(mtcars, sum)) + expect_equal(a, b, ignore_attr = TRUE) +}) \ No newline at end of file diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 02e89b993..aa623f0b5 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -100,4 +100,36 @@ test_that("lazy filter", { }) +test_that("methods without arguments", { + a = pl$DataFrame(mtcars)$lazy()$first()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, head, 1)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$last()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, tail, 1)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$max()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, max)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$mean()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, mean)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$median()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, median)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$min()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, min)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$sum()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, sum)) + expect_equal(a, b, ignore_attr = TRUE) +}) + + + #TODO complete tests for lazy From a89e355320bc8306acfc4ac1842cad1d6329cddc Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Tue, 11 Apr 2023 23:11:30 -0400 Subject: [PATCH 02/26] translate: DataFrame$reverse() --- R/dataframe__frame.R | 9 +++++++++ R/extendr-wrappers.R | 2 ++ R/lazyframe__lazy.R | 10 ++++++++++ man/DataFrame_reverse.Rd | 18 ++++++++++++++++++ man/LazyFrame_reverse.Rd | 18 ++++++++++++++++++ src/rust/src/lazy/dataframe.rs | 4 ++++ tests/testthat/test-dataframe.R | 5 +++++ tests/testthat/test-lazy.R | 5 +++++ 8 files changed, 71 insertions(+) create mode 100644 man/DataFrame_reverse.Rd create mode 100644 man/LazyFrame_reverse.Rd diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 04c2d4433..1011935e8 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -913,4 +913,13 @@ DataFrame_min = function() { #' @examples pl$DataFrame(mtcars)$sum() DataFrame_sum = function() { self$lazy()$sum()$collect() +} + +#' @title Reverse +#' @description Reverse the DataFrame. +#' @keywords LazyFrame +#' @return LazyFrame +#' @examples pl$DataFrame(mtcars)$reverse() +DataFrame_reverse = function() { + self$lazy()$reverse()$collect() } \ No newline at end of file diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 7b1855cfc..56394d0f0 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -822,6 +822,8 @@ LazyFrame$min <- function() .Call(wrap__LazyFrame__min, self) LazyFrame$sum <- function() .Call(wrap__LazyFrame__sum, self) +LazyFrame$reverse <- function() .Call(wrap__LazyFrame__reverse, self) + LazyFrame$select <- function(exprs) .Call(wrap__LazyFrame__select, self, exprs) LazyFrame$limit <- function(n) .Call(wrap__LazyFrame__limit, self, n) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 0eeda9c98..88948bcc9 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -277,6 +277,16 @@ LazyFrame_sum = function() { } +#' @title Reverse +#' @description Reverse the DataFrame. +#' @keywords LazyFrame +#' @return LazyFrame +#' @examples pl$DataFrame(mtcars)$reverse() +LazyFrame_reverse = function() { + unwrap(.pr$LazyFrame$reverse(self)) +} + + #' @title Lazy_groupby #' @description apply groupby on LazyFrame, return LazyGroupBy #' @keywords LazyFrame diff --git a/man/DataFrame_reverse.Rd b/man/DataFrame_reverse.Rd new file mode 100644 index 000000000..94c644a69 --- /dev/null +++ b/man/DataFrame_reverse.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_reverse} +\alias{DataFrame_reverse} +\title{Reverse} +\usage{ +DataFrame_reverse() +} +\value{ +LazyFrame +} +\description{ +Reverse the DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$reverse() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_reverse.Rd b/man/LazyFrame_reverse.Rd new file mode 100644 index 000000000..cc6f011e9 --- /dev/null +++ b/man/LazyFrame_reverse.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_reverse} +\alias{LazyFrame_reverse} +\title{Reverse} +\usage{ +LazyFrame_reverse() +} +\value{ +LazyFrame +} +\description{ +Reverse the DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$reverse() +} +\keyword{LazyFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 1a662ff9c..1d87103a0 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -78,6 +78,10 @@ impl LazyFrame { Ok(LazyFrame(self.0.clone().sum())) } + fn reverse(&self) -> Result { + Ok(LazyFrame(self.0.clone().reverse())) + } + fn select(&self, exprs: &ProtoExprArray) -> LazyFrame { let exprs: Vec = exprs .0 diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 16df77d02..da8e6dddd 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -453,4 +453,9 @@ test_that("methods without arguments", { a = pl$DataFrame(mtcars)$sum()$as_data_frame() b = data.frame(lapply(mtcars, sum)) expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$reverse()$as_data_frame() + b = mtcars[32:1,] + expect_equal(a, b, ignore_attr = TRUE) + }) \ No newline at end of file diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index aa623f0b5..1918fde78 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -128,6 +128,11 @@ test_that("methods without arguments", { a = pl$DataFrame(mtcars)$lazy()$sum()$collect()$as_data_frame() b = data.frame(lapply(mtcars, sum)) expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$reverse()$collect()$as_data_frame() + b = mtcars[32:1,] + expect_equal(a, b, ignore_attr = TRUE) + }) From fa1a91459ecb16771e3ac70c471c04f357160ad0 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 12 Apr 2023 00:04:20 -0400 Subject: [PATCH 03/26] translation: DataFrame$slice() --- R/dataframe__frame.R | 14 ++++++++++++++ R/extendr-wrappers.R | 2 ++ R/lazyframe__lazy.R | 13 +++++++++++++ man/DataFrame_slice.Rd | 24 ++++++++++++++++++++++++ man/LazyFrame_slice.Rd | 24 ++++++++++++++++++++++++ src/rust/src/lazy/dataframe.rs | 5 +++++ tests/testthat/test-dataframe.R | 3 +++ tests/testthat/test-lazy.R | 3 +++ 8 files changed, 88 insertions(+) create mode 100644 man/DataFrame_slice.Rd create mode 100644 man/LazyFrame_slice.Rd diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 1011935e8..317773388 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -922,4 +922,18 @@ DataFrame_sum = function() { #' @examples pl$DataFrame(mtcars)$reverse() DataFrame_reverse = function() { self$lazy()$reverse()$collect() +} + + +#' @title Slice +#' @description Get a slice of this DataFrame. +#' @keywords LazyFrame +#' @return LazyFrame +#' @param offset integer +#' @param length integer or NULL +#' @examples +#' pl$DataFrame(mtcars)$slice(2, 4) +#' mtcars[2:6,] +DataFrame_slice = function(offset, length = NULL) { + self$lazy()$slice(offset, length)$collect() } \ No newline at end of file diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 56394d0f0..4075c8d3e 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -824,6 +824,8 @@ LazyFrame$sum <- function() .Call(wrap__LazyFrame__sum, self) LazyFrame$reverse <- function() .Call(wrap__LazyFrame__reverse, self) +LazyFrame$slice <- function(offset, length) .Call(wrap__LazyFrame__slice, self, offset, length) + LazyFrame$select <- function(exprs) .Call(wrap__LazyFrame__select, self, exprs) LazyFrame$limit <- function(n) .Call(wrap__LazyFrame__limit, self, n) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 88948bcc9..7f49f37fa 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -286,6 +286,19 @@ LazyFrame_reverse = function() { unwrap(.pr$LazyFrame$reverse(self)) } +#' @title Slice +#' @description Get a slice of this DataFrame. +#' @keywords DataFrame +#' @return DataFrame +#' @param offset integer +#' @param length integer or NULL +#' @examples +#' pl$DataFrame(mtcars)$slice(2, 4) +#' mtcars[2:6,] +LazyFrame_slice = function(offset, length = NULL) { + unwrap(.pr$LazyFrame$slice(self, offset, length)) +} + #' @title Lazy_groupby #' @description apply groupby on LazyFrame, return LazyGroupBy diff --git a/man/DataFrame_slice.Rd b/man/DataFrame_slice.Rd new file mode 100644 index 000000000..7e42c1b23 --- /dev/null +++ b/man/DataFrame_slice.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_slice} +\alias{DataFrame_slice} +\title{Slice} +\usage{ +DataFrame_slice(offset, length = NULL) +} +\arguments{ +\item{offset}{integer} + +\item{length}{integer or NULL} +} +\value{ +LazyFrame +} +\description{ +Get a slice of this DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$slice(2, 4) +mtcars[2:6,] +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_slice.Rd b/man/LazyFrame_slice.Rd new file mode 100644 index 000000000..82fa2fdcc --- /dev/null +++ b/man/LazyFrame_slice.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_slice} +\alias{LazyFrame_slice} +\title{Slice} +\usage{ +LazyFrame_slice(offset, length = NULL) +} +\arguments{ +\item{offset}{integer} + +\item{length}{integer or NULL} +} +\value{ +DataFrame +} +\description{ +Get a slice of this DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$slice(2, 4) +mtcars[2:6,] +} +\keyword{DataFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 1d87103a0..76940b38d 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -4,6 +4,7 @@ use crate::rdatatype::new_join_type; use crate::utils::r_result_list; use crate::utils::try_f64_into_u32; use crate::utils::try_f64_into_usize; +use crate::robj_to; use extendr_api::prelude::*; use polars::prelude as pl; @@ -82,6 +83,10 @@ impl LazyFrame { Ok(LazyFrame(self.0.clone().reverse())) } + fn slice(&self, offset: Robj, length: Robj) -> Result { + Ok(LazyFrame(self.0.clone().slice(robj_to!(i64, offset)?, robj_to!(u32, length)?))) + } + fn select(&self, exprs: &ProtoExprArray) -> LazyFrame { let exprs: Vec = exprs .0 diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index da8e6dddd..1d6348427 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -458,4 +458,7 @@ test_that("methods without arguments", { b = mtcars[32:1,] expect_equal(a, b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$slice(2, 4)$as_data_frame() + b = mtcars[3:6,] + expect_equal(a, b, ignore_attr = TRUE) }) \ No newline at end of file diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 1918fde78..e692e3cad 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -133,6 +133,9 @@ test_that("methods without arguments", { b = mtcars[32:1,] expect_equal(a, b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$lazy()$slice(2, 4)$collect()$as_data_frame() + b = mtcars[3:6,] + expect_equal(a, b, ignore_attr = TRUE) }) From 10b44cdb2be9349c1ce2b468521c3165f32d87c6 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 11 Apr 2023 19:57:47 +0000 Subject: [PATCH 04/26] typo in readme (#102) --- README.Rmd | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.Rmd b/README.Rmd index 4e6507108..420ed8807 100644 --- a/README.Rmd +++ b/README.Rmd @@ -170,7 +170,7 @@ Here are the steps required for an example contribution, where we are implementi - Notice we use `Expr_cos = "use_extendr_wrapper"`, it means we're just using unmodified the [extendr auto-generated wrapper](https://github.com/pola-rs/r-polars/blob/c56c49a6fc172685f50c15fffe3d14231297ad97/R/extendr-wrappers.R#L253) - Write a test [here](https://github.com/pola-rs/r-polars/blob/c56c49a6fc172685f50c15fffe3d14231297ad97/tests/testthat/test-expr.R#L1921). - Run `renv::restore()` and resolve all R packages - - Run `extendr::document()` to recompile and confirm the added method functions as intended, e.g. `pl$DataFrame(a=c(0,pi/2,pi,NA_real_))$select(pl$col("a")$cos())` + - Run `rextendr::document()` to recompile and confirm the added method functions as intended, e.g. `pl$DataFrame(a=c(0,pi/2,pi,NA_real_))$select(pl$col("a")$cos())` - Run `devtools::test()`. See below for how to set up your development environment correctly. Note that PRs to **rpolars** will be automatically be built and tested on all diff --git a/README.md b/README.md index 936ed9961..f7bb2bdae 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ expression](https://rpolars.github.io/reference/Expr_cos.html): - Write a test [here](https://github.com/pola-rs/r-polars/blob/c56c49a6fc172685f50c15fffe3d14231297ad97/tests/testthat/test-expr.R#L1921). - Run `renv::restore()` and resolve all R packages -- Run `extendr::document()` to recompile and confirm the added method +- Run `rextendr::document()` to recompile and confirm the added method functions as intended, e.g. `pl$DataFrame(a=c(0,pi/2,pi,NA_real_))$select(pl$col("a")$cos())` - Run `devtools::test()`. See below for how to set up your development From 386c0fbfb2af8ecad7b504c6d8db04f7ba735a3e Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 12 Apr 2023 17:13:43 -0400 Subject: [PATCH 05/26] DataFrame$tail() (#103) * DataFrame$tail() * DataFrame_tail PR review --- R/dataframe__frame.R | 13 +++++++++++++ R/extendr-wrappers.R | 2 ++ R/lazyframe__lazy.R | 12 ++++++++++++ man/DataFrame_tail.Rd | 21 +++++++++++++++++++++ man/LazyFrame_tail.Rd | 21 +++++++++++++++++++++ src/rust/src/lazy/dataframe.rs | 4 ++++ tests/testthat/test-dataframe.R | 6 +++++- tests/testthat/test-lazy.R | 5 ++++- 8 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 man/DataFrame_tail.Rd create mode 100644 man/LazyFrame_tail.Rd diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 317773388..ce0dc84c8 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -686,6 +686,19 @@ DataFrame_limit = function(n) { } +#' Tail a DataFrame +#' @name DataFrame_tail +#' @description Get the last n rows. +#' @param n positive numeric of integer number not larger than 2^32 +#' +#' @details any number will converted to u32. Negative raises error +#' @keywords DataFrame +#' @return DataFrame +DataFrame_tail = function(n) { + self$lazy()$tail(n)$collect() +} + + #' filter DataFrame #' @aliases DataFrame_filter #' @description DataFrame$filter(bool_expr) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 4075c8d3e..7251b97aa 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -830,6 +830,8 @@ LazyFrame$select <- function(exprs) .Call(wrap__LazyFrame__select, self, exprs) LazyFrame$limit <- function(n) .Call(wrap__LazyFrame__limit, self, n) +LazyFrame$tail <- function(n) .Call(wrap__LazyFrame__tail, self, n) + LazyFrame$filter <- function(expr) .Call(wrap__LazyFrame__filter, self, expr) LazyFrame$groupby <- function(exprs, maintain_order) .Call(wrap__LazyFrame__groupby, self, exprs, maintain_order) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 7f49f37fa..49cff63c6 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -213,6 +213,7 @@ LazyFrame_limit = function(n) { unwrap(.pr$LazyFrame$limit(self,n)) } +<<<<<<< HEAD #' @title First #' @description Get the first row of the DataFrame. #' @keywords DataFrame @@ -299,6 +300,17 @@ LazyFrame_slice = function(offset, length = NULL) { unwrap(.pr$LazyFrame$slice(self, offset, length)) } +#' @title Tail +#' @description take last n rows of query +#' @keywords LazyFrame +#' @param n positive numeric or integer number not larger than 2^32 +#' +#' @details any number will converted to u32. Negative raises error +#' +#' @return A new `LazyFrame` object with applied filter. +LazyFrame_tail = function(n) { + unwrap(.pr$LazyFrame$tail(self,n)) +} #' @title Lazy_groupby #' @description apply groupby on LazyFrame, return LazyGroupBy diff --git a/man/DataFrame_tail.Rd b/man/DataFrame_tail.Rd new file mode 100644 index 000000000..1567ba507 --- /dev/null +++ b/man/DataFrame_tail.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_tail} +\alias{DataFrame_tail} +\title{Tail a DataFrame} +\usage{ +DataFrame_tail(n) +} +\arguments{ +\item{n}{positive numeric of integer number not larger than 2^32} +} +\value{ +DataFrame +} +\description{ +Get the last n rows. +} +\details{ +any number will converted to u32. Negative raises error +} +\keyword{DataFrame} diff --git a/man/LazyFrame_tail.Rd b/man/LazyFrame_tail.Rd new file mode 100644 index 000000000..a992538f5 --- /dev/null +++ b/man/LazyFrame_tail.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_tail} +\alias{LazyFrame_tail} +\title{Tail} +\usage{ +LazyFrame_tail(n) +} +\arguments{ +\item{n}{positive numeric or integer number not larger than 2^32} +} +\value{ +A new \code{LazyFrame} object with applied filter. +} +\description{ +take last n rows of query +} +\details{ +any number will converted to u32. Negative raises error +} +\keyword{LazyFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 76940b38d..7c0a4e5b1 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -107,6 +107,10 @@ impl LazyFrame { ) } + fn tail(&self, n: Robj) -> Result { + Ok(LazyFrame(self.0.clone().tail(robj_to!(u32, n)?))) + } + fn filter(&self, expr: &Expr) -> LazyFrame { let new_df = self.clone().0.filter(expr.0.clone()); LazyFrame(new_df) diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 1d6348427..9cab6af55 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -461,4 +461,8 @@ test_that("methods without arguments", { a = pl$DataFrame(mtcars)$slice(2, 4)$as_data_frame() b = mtcars[3:6,] expect_equal(a, b, ignore_attr = TRUE) -}) \ No newline at end of file + + a = as.data.frame(pl$DataFrame(mtcars)$tail(6)) + b = tail(mtcars) + expect_equal(a, b, ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index e692e3cad..66135c640 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -136,8 +136,11 @@ test_that("methods without arguments", { a = pl$DataFrame(mtcars)$lazy()$slice(2, 4)$collect()$as_data_frame() b = mtcars[3:6,] expect_equal(a, b, ignore_attr = TRUE) -}) + a = pl$DataFrame(mtcars)$lazy()$tail(6)$collect()$as_data_frame() + b = tail(mtcars) + expect_equal(a, b, ignore_attr = TRUE) +}) #TODO complete tests for lazy From 3c43036c3e7c9206c59584bfc5c733c2aafb0cc7 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 12 Apr 2023 20:27:33 -0400 Subject: [PATCH 06/26] merge conflict cruft --- R/lazyframe__lazy.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 49cff63c6..bf049243c 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -213,7 +213,6 @@ LazyFrame_limit = function(n) { unwrap(.pr$LazyFrame$limit(self,n)) } -<<<<<<< HEAD #' @title First #' @description Get the first row of the DataFrame. #' @keywords DataFrame From 347d41c2d4ecf4a5d56f86dcb3fa9a28299374ca Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 12 Apr 2023 21:24:01 -0400 Subject: [PATCH 07/26] GroupBy translations --- R/groupby.R | 112 ++++++++++++++++++++++++++++++++++ R/zzz.R | 15 ++--- man/GroupBy_first.Rd | 24 ++++++++ man/GroupBy_last.Rd | 24 ++++++++ man/GroupBy_max.Rd | 24 ++++++++ man/GroupBy_mean.Rd | 24 ++++++++ man/GroupBy_median.Rd | 24 ++++++++ man/GroupBy_min.Rd | 24 ++++++++ man/GroupBy_sum.Rd | 24 ++++++++ tests/testthat/test-groupby.R | 40 ++++++++++++ 10 files changed, 324 insertions(+), 11 deletions(-) create mode 100644 man/GroupBy_first.Rd create mode 100644 man/GroupBy_last.Rd create mode 100644 man/GroupBy_max.Rd create mode 100644 man/GroupBy_mean.Rd create mode 100644 man/GroupBy_median.Rd create mode 100644 man/GroupBy_min.Rd create mode 100644 man/GroupBy_sum.Rd diff --git a/R/groupby.R b/R/groupby.R index 7ebb6359e..500aa38e9 100644 --- a/R/groupby.R +++ b/R/groupby.R @@ -75,6 +75,118 @@ GroupBy_agg = function(...) { } +#' GroupBy First +#' @description Reduce the groups to the first value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$first() +GroupBy_first = function() { + self$agg(pl$all()$first()) +} + +#' GroupBy Last +#' @description Reduce the groups to the last value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$last() +GroupBy_last = function() { + self$agg(pl$all()$last()) +} + +#' GroupBy Max +#' @description Reduce the groups to the maximum value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$max() +GroupBy_max = function() { + self$agg(pl$all()$max()) +} + +#' GroupBy Mean +#' @description Reduce the groups to the mean value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$mean() +GroupBy_mean = function() { + self$agg(pl$all()$mean()) +} + +#' GroupBy Median +#' @description Reduce the groups to the median value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$median() +GroupBy_median = function() { + self$agg(pl$all()$median()) +} + +#' GroupBy Min +#' @description Reduce the groups to the minimum value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$min() +GroupBy_min = function() { + self$agg(pl$all()$min()) +} + +#' GroupBy Sum +#' @description Reduce the groups to the sum value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$sum() +GroupBy_sum = function() { + self$agg(pl$all()$sum()) +} + #' convert to data.frame #' #' @param ... any opt param passed to R as.data.frame diff --git a/R/zzz.R b/R/zzz.R index a01cc3c6f..87b4bf0d5 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -13,22 +13,15 @@ if(build_debug_print) print(paste( # modify these Dataframe methods replace_private_with_pub_methods(DataFrame, "^DataFrame_") - -env = GroupBy -env$agg = GroupBy_agg -env$as_data_frame = GroupBy_as_data_frame -macro_add_syntax_check_to_class("GroupBy") +# GroupBy - is special read header info in groupby.R +replace_private_with_pub_methods(GroupBy, "^GroupBy_") +macro_add_syntax_check_to_class("GroupBy") # not activated automatically as GroupBy is not extendr # LazyFrame replace_private_with_pub_methods(LazyFrame, "^LazyFrame_") # LazyGroupBy -env = LazyGroupBy -env$agg = LazyGroupBy_agg -env$apply = LazyGroupBy_apply -env$head = LazyGroupBy_head -env$tail = LazyGroupBy_tail -rm(env) +replace_private_with_pub_methods(LazyGroupBy, "^LazyGroupBy_") # PolarsBackgroundHandle replace_private_with_pub_methods(PolarsBackgroundHandle, "^PolarsBackgroundHandle_") diff --git a/man/GroupBy_first.Rd b/man/GroupBy_first.Rd new file mode 100644 index 000000000..dec2feea2 --- /dev/null +++ b/man/GroupBy_first.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_first} +\alias{GroupBy_first} +\title{GroupBy First} +\usage{ +GroupBy_first() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the first value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$first() +} +\keyword{GroupBy} diff --git a/man/GroupBy_last.Rd b/man/GroupBy_last.Rd new file mode 100644 index 000000000..b71e61377 --- /dev/null +++ b/man/GroupBy_last.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_last} +\alias{GroupBy_last} +\title{GroupBy Last} +\usage{ +GroupBy_last() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the last value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$last() +} +\keyword{GroupBy} diff --git a/man/GroupBy_max.Rd b/man/GroupBy_max.Rd new file mode 100644 index 000000000..1d0d6a71e --- /dev/null +++ b/man/GroupBy_max.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_max} +\alias{GroupBy_max} +\title{GroupBy Max} +\usage{ +GroupBy_max() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the maximum value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$max() +} +\keyword{GroupBy} diff --git a/man/GroupBy_mean.Rd b/man/GroupBy_mean.Rd new file mode 100644 index 000000000..c8540943d --- /dev/null +++ b/man/GroupBy_mean.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_mean} +\alias{GroupBy_mean} +\title{GroupBy Mean} +\usage{ +GroupBy_mean() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the mean value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$mean() +} +\keyword{GroupBy} diff --git a/man/GroupBy_median.Rd b/man/GroupBy_median.Rd new file mode 100644 index 000000000..bec3f5201 --- /dev/null +++ b/man/GroupBy_median.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_median} +\alias{GroupBy_median} +\title{GroupBy Median} +\usage{ +GroupBy_median() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the median value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$median() +} +\keyword{GroupBy} diff --git a/man/GroupBy_min.Rd b/man/GroupBy_min.Rd new file mode 100644 index 000000000..d35e20316 --- /dev/null +++ b/man/GroupBy_min.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_min} +\alias{GroupBy_min} +\title{GroupBy Min} +\usage{ +GroupBy_min() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the minimum value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$min() +} +\keyword{GroupBy} diff --git a/man/GroupBy_sum.Rd b/man/GroupBy_sum.Rd new file mode 100644 index 000000000..f383e2fc9 --- /dev/null +++ b/man/GroupBy_sum.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_sum} +\alias{GroupBy_sum} +\title{GroupBy Sum} +\usage{ +GroupBy_sum() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the sum value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$sum() +} +\keyword{GroupBy} diff --git a/tests/testthat/test-groupby.R b/tests/testthat/test-groupby.R index 822f51a37..28d4d258a 100644 --- a/tests/testthat/test-groupby.R +++ b/tests/testthat/test-groupby.R @@ -49,3 +49,43 @@ test_that("groupby", { ) }) + + +test_that("methods without arguments", { + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$first()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, head, 1)))) + b = b[order(b$cyl), colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$last()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, tail, 1)))) + b = b[order(b$cyl), colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$max()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, max)))) + b = b[order(b$cyl), colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$mean()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, mean)))) + b = b[order(b$cyl), colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$median()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, median)))) + b = b[order(b$cyl), colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$min()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, min)))) + b = b[order(b$cyl), colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$sum()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, sum)))) + b = b[, colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + +}) From a9a0fc6500bddb3c4820f301111cbfc6bd5b3a3e Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 12 Apr 2023 21:35:11 -0400 Subject: [PATCH 08/26] DataFrame$slice() optional arg --- R/lazyframe__lazy.R | 1 + src/rust/src/lazy/dataframe.rs | 5 ++++- tests/testthat/test-dataframe.R | 4 ++++ tests/testthat/test-lazy.R | 4 ++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index bf049243c..d388c437c 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -294,6 +294,7 @@ LazyFrame_reverse = function() { #' @param length integer or NULL #' @examples #' pl$DataFrame(mtcars)$slice(2, 4) +#' pl$DataFrame(mtcars)$slice(30) #' mtcars[2:6,] LazyFrame_slice = function(offset, length = NULL) { unwrap(.pr$LazyFrame$slice(self, offset, length)) diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 7c0a4e5b1..a8d234a77 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -84,7 +84,10 @@ impl LazyFrame { } fn slice(&self, offset: Robj, length: Robj) -> Result { - Ok(LazyFrame(self.0.clone().slice(robj_to!(i64, offset)?, robj_to!(u32, length)?))) + Ok(LazyFrame(self.0.clone().slice( + robj_to!(i64, offset)?, + robj_to!(Option, u32, length)?.unwrap_or(u32::MAX), + ))) } fn select(&self, exprs: &ProtoExprArray) -> LazyFrame { diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 9cab6af55..77927b45c 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -462,6 +462,10 @@ test_that("methods without arguments", { b = mtcars[3:6,] expect_equal(a, b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$slice(30)$as_data_frame() + b = tail(mtcars, 2) + expect_equal(a, b, ignore_attr = TRUE) + a = as.data.frame(pl$DataFrame(mtcars)$tail(6)) b = tail(mtcars) expect_equal(a, b, ignore_attr = TRUE) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 66135c640..b3f28b7f6 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -137,6 +137,10 @@ test_that("methods without arguments", { b = mtcars[3:6,] expect_equal(a, b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$lazy()$slice(30)$collect()$as_data_frame() + b = tail(mtcars, 2) + expect_equal(a, b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$lazy()$tail(6)$collect()$as_data_frame() b = tail(mtcars) expect_equal(a, b, ignore_attr = TRUE) From 0e25e16fb0d0f0906f804c1e8a9ce99b07548ec6 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 12 Apr 2023 21:46:43 -0400 Subject: [PATCH 09/26] translation DataFrame.var() and .std() --- R/dataframe__frame.R | 20 ++++++++++++++++++++ R/extendr-wrappers.R | 4 ++++ R/groupby.R | 32 ++++++++++++++++++++++++++++++++ R/lazyframe__lazy.R | 20 ++++++++++++++++++++ man/DataFrame_std.Rd | 21 +++++++++++++++++++++ man/DataFrame_var.Rd | 21 +++++++++++++++++++++ man/LazyFrame_slice.Rd | 1 + man/LazyFrame_std.Rd | 21 +++++++++++++++++++++ man/LazyFrame_var.Rd | 21 +++++++++++++++++++++ src/rust/src/lazy/dataframe.rs | 8 ++++++++ tests/testthat/test-dataframe.R | 16 ++++++++++++++++ tests/testthat/test-groupby.R | 10 ++++++++++ tests/testthat/test-lazy.R | 16 ++++++++++++++++ 13 files changed, 211 insertions(+) create mode 100644 man/DataFrame_std.Rd create mode 100644 man/DataFrame_var.Rd create mode 100644 man/LazyFrame_std.Rd create mode 100644 man/LazyFrame_var.Rd diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index ce0dc84c8..d93a6fe97 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -928,6 +928,26 @@ DataFrame_sum = function() { self$lazy()$sum()$collect() } +#' @title Var +#' @description Aggregate the columns of this DataFrame to their variance values. +#' @keywords DataFrame +#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. +#' @return A new `DataFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$var() +DataFrame_var = function(ddof = 1) { + self$lazy()$var(ddof)$collect() +} + +#' @title Std +#' @description Aggregate the columns of this DataFrame to their standard deviation values. +#' @keywords DataFrame +#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. +#' @return A new `DataFrame` object with applied aggregation. +#' @examples pl$DataFrame(mtcars)$std() +DataFrame_std = function(ddof = 1) { + self$lazy()$std(ddof)$collect() +} + #' @title Reverse #' @description Reverse the DataFrame. #' @keywords LazyFrame diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 7251b97aa..7124091ae 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -822,6 +822,10 @@ LazyFrame$min <- function() .Call(wrap__LazyFrame__min, self) LazyFrame$sum <- function() .Call(wrap__LazyFrame__sum, self) +LazyFrame$var <- function(ddof) .Call(wrap__LazyFrame__var, self, ddof) + +LazyFrame$std <- function(ddof) .Call(wrap__LazyFrame__std, self, ddof) + LazyFrame$reverse <- function() .Call(wrap__LazyFrame__reverse, self) LazyFrame$slice <- function(offset, length) .Call(wrap__LazyFrame__slice, self, offset, length) diff --git a/R/groupby.R b/R/groupby.R index 500aa38e9..6806aa578 100644 --- a/R/groupby.R +++ b/R/groupby.R @@ -187,6 +187,38 @@ GroupBy_sum = function() { self$agg(pl$all()$sum()) } +#' GroupBy Var +#' @description Reduce the groups to the variance value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$var() +GroupBy_var = function() { + self$agg(pl$all()$var()) +} + +#' GroupBy Std +#' @description Reduce the groups to the standard deviation value. +#' @return aggregated DataFrame +#' @keywords GroupBy +#' @examples +#' df = pl$DataFrame( +#' a = c(1, 2, 2, 3, 4, 5), +#' b = c(0.5, 0.5, 4, 10, 13, 14), +#' c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), +#' d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +#' ) +#' df$groupby("d", maintain_order=TRUE)$std() +GroupBy_std = function() { + self$agg(pl$all()$std()) +} + #' convert to data.frame #' #' @param ... any opt param passed to R as.data.frame diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index d388c437c..9a2f0cc98 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -276,6 +276,26 @@ LazyFrame_sum = function() { unwrap(.pr$LazyFrame$sum(self)) } +#' @title Var +#' @description Aggregate the columns of this LazyFrame to their variance values. +#' @keywords LazyFrame +#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. +#' @return A new `LazyFrame` object with applied aggregation. +#' @examples pl$LazyFrame(mtcars)$var() +LazyFrame_var = function(ddof = 1) { + unwrap(.pr$LazyFrame$var(self, ddof)) +} + +#' @title Std +#' @description Aggregate the columns of this LazyFrame to their standard deviation values. +#' @keywords LazyFrame +#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. +#' @return A new `LazyFrame` object with applied aggregation. +#' @examples pl$LazyFrame(mtcars)$std() +LazyFrame_std = function(ddof = 1) { + unwrap(.pr$LazyFrame$std(self, ddof)) +} + #' @title Reverse #' @description Reverse the DataFrame. diff --git a/man/DataFrame_std.Rd b/man/DataFrame_std.Rd new file mode 100644 index 000000000..0d5605edb --- /dev/null +++ b/man/DataFrame_std.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_std} +\alias{DataFrame_std} +\title{Std} +\usage{ +DataFrame_std(ddof = 1) +} +\arguments{ +\item{ddof}{integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.} +} +\value{ +A new \code{DataFrame} object with applied aggregation. +} +\description{ +Aggregate the columns of this DataFrame to their standard deviation values. +} +\examples{ +pl$DataFrame(mtcars)$std() +} +\keyword{DataFrame} diff --git a/man/DataFrame_var.Rd b/man/DataFrame_var.Rd new file mode 100644 index 000000000..a351c54c4 --- /dev/null +++ b/man/DataFrame_var.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_var} +\alias{DataFrame_var} +\title{Var} +\usage{ +DataFrame_var(ddof = 1) +} +\arguments{ +\item{ddof}{integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.} +} +\value{ +A new \code{DataFrame} object with applied aggregation. +} +\description{ +Aggregate the columns of this DataFrame to their variance values. +} +\examples{ +pl$DataFrame(mtcars)$var() +} +\keyword{DataFrame} diff --git a/man/LazyFrame_slice.Rd b/man/LazyFrame_slice.Rd index 82fa2fdcc..0bfd67be8 100644 --- a/man/LazyFrame_slice.Rd +++ b/man/LazyFrame_slice.Rd @@ -19,6 +19,7 @@ Get a slice of this DataFrame. } \examples{ pl$DataFrame(mtcars)$slice(2, 4) +pl$DataFrame(mtcars)$slice(30) mtcars[2:6,] } \keyword{DataFrame} diff --git a/man/LazyFrame_std.Rd b/man/LazyFrame_std.Rd new file mode 100644 index 000000000..497a280d2 --- /dev/null +++ b/man/LazyFrame_std.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_std} +\alias{LazyFrame_std} +\title{Std} +\usage{ +LazyFrame_std(ddof = 1) +} +\arguments{ +\item{ddof}{integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.} +} +\value{ +A new \code{LazyFrame} object with applied aggregation. +} +\description{ +Aggregate the columns of this LazyFrame to their standard deviation values. +} +\examples{ +pl$LazyFrame(mtcars)$std() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_var.Rd b/man/LazyFrame_var.Rd new file mode 100644 index 000000000..84b94113b --- /dev/null +++ b/man/LazyFrame_var.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_var} +\alias{LazyFrame_var} +\title{Var} +\usage{ +LazyFrame_var(ddof = 1) +} +\arguments{ +\item{ddof}{integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.} +} +\value{ +A new \code{LazyFrame} object with applied aggregation. +} +\description{ +Aggregate the columns of this LazyFrame to their variance values. +} +\examples{ +pl$LazyFrame(mtcars)$var() +} +\keyword{LazyFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index a8d234a77..9b237d863 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -79,6 +79,14 @@ impl LazyFrame { Ok(LazyFrame(self.0.clone().sum())) } + fn var(&self, ddof: u8) -> Result { + Ok(LazyFrame(self.0.clone().var(ddof))) + } + + fn std(&self, ddof: u8) -> Result { + Ok(LazyFrame(self.0.clone().std(ddof))) + } + fn reverse(&self) -> Result { Ok(LazyFrame(self.0.clone().reverse())) } diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 77927b45c..1088a7195 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -454,6 +454,22 @@ test_that("methods without arguments", { b = data.frame(lapply(mtcars, sum)) expect_equal(a, b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$var()$as_data_frame() + b = data.frame(lapply(mtcars, var)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$var(10)$as_data_frame() + b = data.frame(lapply(mtcars, var)) + expect_true(all(a != b)) + + a = pl$DataFrame(mtcars)$std()$as_data_frame() + b = data.frame(lapply(mtcars, sd)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$std(10)$as_data_frame() + b = data.frame(lapply(mtcars, sd)) + expect_true(all(a != b)) + a = pl$DataFrame(mtcars)$reverse()$as_data_frame() b = mtcars[32:1,] expect_equal(a, b, ignore_attr = TRUE) diff --git a/tests/testthat/test-groupby.R b/tests/testthat/test-groupby.R index 28d4d258a..9eb6ca1d6 100644 --- a/tests/testthat/test-groupby.R +++ b/tests/testthat/test-groupby.R @@ -88,4 +88,14 @@ test_that("methods without arguments", { b = b[, colnames(b) != "cyl"] expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$var()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, var)))) + b = b[, colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$std()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, sd)))) + b = b[, colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + }) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index b3f28b7f6..4ec5cc75b 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -141,6 +141,22 @@ test_that("methods without arguments", { b = tail(mtcars, 2) expect_equal(a, b, ignore_attr = TRUE) + a = pl$DataFrame(mtcars)$lazy()$var()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, var)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$var(10)$collect()$as_data_frame() + b = data.frame(lapply(mtcars, var)) + expect_true(all(a != b)) + + a = pl$DataFrame(mtcars)$lazy()$std()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, sd)) + expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$lazy()$std(10)$collect()$as_data_frame() + b = data.frame(lapply(mtcars, sd)) + expect_true(all(a != b)) + a = pl$DataFrame(mtcars)$lazy()$tail(6)$collect()$as_data_frame() b = tail(mtcars) expect_equal(a, b, ignore_attr = TRUE) From 21e07331bccbf80ecb9175bd4522e5d27cc2ae75 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Wed, 12 Apr 2023 23:54:49 -0400 Subject: [PATCH 10/26] DataFrame: null_count(), estimated_size() --- R/dataframe__frame.R | 24 ++++++++++++++++++++++++ R/extendr-wrappers.R | 4 ++++ R/groupby.R | 12 ++++++++++++ man/GroupBy_std.Rd | 24 ++++++++++++++++++++++++ man/GroupBy_var.Rd | 24 ++++++++++++++++++++++++ src/rust/src/rdataframe/mod.rs | 8 ++++++++ 6 files changed, 96 insertions(+) create mode 100644 man/GroupBy_std.Rd create mode 100644 man/GroupBy_var.Rd diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index d93a6fe97..7042d6279 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -969,4 +969,28 @@ DataFrame_reverse = function() { #' mtcars[2:6,] DataFrame_slice = function(offset, length = NULL) { self$lazy()$slice(offset, length)$collect() +} + + +#' @title Null count +#' @description Create a new DataFrame that shows the null counts per column. +#' @keywords DataFrame +#' @return DataFrame +#' @examples +#' x = mtcars +#' x[1, 2:3] = NA +#' pl$DataFrame(x)$null_count() +DataFrame_null_count <- function() { + .pr$DataFrame$null_count(self) +} + + +#' @title Estimated size +#' @description Return an estimation of the total (heap) allocated size of the DataFrame. +#' @keywords DataFrame +#' @return Bytes +#' @examples +#' pl$DataFrame(mtcars)$estimated_size() +DataFrame_estimated_size <- function() { + .pr$DataFrame$estimated_size(self) } \ No newline at end of file diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 7124091ae..3cc6f5602 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -102,6 +102,10 @@ DataFrame$export_stream <- function(stream_ptr) invisible(.Call(wrap__DataFrame_ DataFrame$from_arrow_record_batches <- function(rbr) .Call(wrap__DataFrame__from_arrow_record_batches, rbr) +DataFrame$estimated_size <- function() .Call(wrap__DataFrame__estimated_size, self) + +DataFrame$null_count <- function() .Call(wrap__DataFrame__null_count, self) + #' @export `$.DataFrame` <- function (self, name) { func <- DataFrame[[name]]; environment(func) <- environment(); func } diff --git a/R/groupby.R b/R/groupby.R index 6806aa578..1bbe11638 100644 --- a/R/groupby.R +++ b/R/groupby.R @@ -219,6 +219,18 @@ GroupBy_std = function() { self$agg(pl$all()$std()) } +#' @title GroupBy null count +#' @description Create a new DataFrame that shows the null counts per column. +#' @keywords DataFrame +#' @return DataFrame +#' @examples +#' x = mtcars +#' x[1:10, 3:5] = NA +#' pl$DataFrame(x)$groupby("cyl")$null_count() +GroupBy_null_count <- function() { + self$agg(pl$all()$null_count()) +} + #' convert to data.frame #' #' @param ... any opt param passed to R as.data.frame diff --git a/man/GroupBy_std.Rd b/man/GroupBy_std.Rd new file mode 100644 index 000000000..289b814cb --- /dev/null +++ b/man/GroupBy_std.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_std} +\alias{GroupBy_std} +\title{GroupBy Std} +\usage{ +GroupBy_std() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the standard deviation value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$std() +} +\keyword{GroupBy} diff --git a/man/GroupBy_var.Rd b/man/GroupBy_var.Rd new file mode 100644 index 000000000..960609fdb --- /dev/null +++ b/man/GroupBy_var.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_var} +\alias{GroupBy_var} +\title{GroupBy Var} +\usage{ +GroupBy_var() +} +\value{ +aggregated DataFrame +} +\description{ +Reduce the groups to the variance value. +} +\examples{ +df = pl$DataFrame( + a = c(1, 2, 2, 3, 4, 5), + b = c(0.5, 0.5, 4, 10, 13, 14), + c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE), + d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana") +) +df$groupby("d", maintain_order=TRUE)$var() +} +\keyword{GroupBy} diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index a9dd62e22..ae6fdc6fd 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -317,6 +317,14 @@ impl DataFrame { pub fn from_arrow_record_batches(rbr: Robj) -> Result { Ok(DataFrame(crate::arrow_interop::to_rust::to_rust_df(rbr)?)) } + + pub fn estimated_size(&self) -> Result { + Ok(self.0.clone().estimated_size()) + } + + pub fn null_count(&self) -> Result { + Ok(DataFrame(self.0.clone().null_count())) + } } use crate::utils::wrappers::null_to_opt; impl DataFrame { From da78b0f813d30e03cf3f07f114b676ecd0cb1945 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Thu, 13 Apr 2023 00:01:04 -0400 Subject: [PATCH 11/26] docs --- man/DataFrame_estimated_size.Rd | 18 ++++++++++++++++++ man/DataFrame_null_count.Rd | 20 ++++++++++++++++++++ man/GroupBy_null_count.Rd | 20 ++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 man/DataFrame_estimated_size.Rd create mode 100644 man/DataFrame_null_count.Rd create mode 100644 man/GroupBy_null_count.Rd diff --git a/man/DataFrame_estimated_size.Rd b/man/DataFrame_estimated_size.Rd new file mode 100644 index 000000000..458f45be0 --- /dev/null +++ b/man/DataFrame_estimated_size.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_estimated_size} +\alias{DataFrame_estimated_size} +\title{Estimated size} +\usage{ +DataFrame_estimated_size() +} +\value{ +Bytes +} +\description{ +Return an estimation of the total (heap) allocated size of the DataFrame. +} +\examples{ +pl$DataFrame(mtcars)$estimated_size() +} +\keyword{DataFrame} diff --git a/man/DataFrame_null_count.Rd b/man/DataFrame_null_count.Rd new file mode 100644 index 000000000..7659cfbeb --- /dev/null +++ b/man/DataFrame_null_count.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_null_count} +\alias{DataFrame_null_count} +\title{Null count} +\usage{ +DataFrame_null_count() +} +\value{ +DataFrame +} +\description{ +Create a new DataFrame that shows the null counts per column. +} +\examples{ +x = mtcars +x[1, 2:3] = NA +pl$DataFrame(x)$null_count() +} +\keyword{DataFrame} diff --git a/man/GroupBy_null_count.Rd b/man/GroupBy_null_count.Rd new file mode 100644 index 000000000..34069e161 --- /dev/null +++ b/man/GroupBy_null_count.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/groupby.R +\name{GroupBy_null_count} +\alias{GroupBy_null_count} +\title{GroupBy null count} +\usage{ +GroupBy_null_count() +} +\value{ +DataFrame +} +\description{ +Create a new DataFrame that shows the null counts per column. +} +\examples{ +x = mtcars +x[1:10, 3:5] = NA +pl$DataFrame(x)$groupby("cyl")$null_count() +} +\keyword{DataFrame} From 9f4d84694ea7f2a8728c0e58689b697774a54905 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Thu, 13 Apr 2023 08:13:13 -0400 Subject: [PATCH 12/26] try to fix examples --- R/lazyframe__lazy.R | 14 ++++++++------ man/LazyFrame_limit.Rd | 3 +++ man/LazyFrame_reverse.Rd | 2 +- man/LazyFrame_slice.Rd | 4 ++-- man/LazyFrame_std.Rd | 2 +- man/LazyFrame_sum.Rd | 2 +- man/LazyFrame_tail.Rd | 3 +++ man/LazyFrame_var.Rd | 2 +- 8 files changed, 20 insertions(+), 12 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 9a2f0cc98..e9ec7104d 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -207,6 +207,7 @@ LazyFrame_collect_background = function() { #' #' @details any number will converted to u32. Negative raises error #' +#' @examples pl$DataFrame(mtcars)$lazy()$limit(4)$collect() #' @return A new `LazyFrame` object with applied filter. LazyFrame_limit = function(n) { if(!is.numeric(n)) stopf("limit: n must be numeric") @@ -271,7 +272,7 @@ LazyFrame_min = function() { #' @description Aggregate the columns of this DataFrame to their sum values. #' @keywords LazyFrame #' @return LazyFrame -#' @examples pl$DataFrame(mtcars)$sum() +#' @examples pl$DataFrame(mtcars)$lazy()$sum()$collect() LazyFrame_sum = function() { unwrap(.pr$LazyFrame$sum(self)) } @@ -281,7 +282,7 @@ LazyFrame_sum = function() { #' @keywords LazyFrame #' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. #' @return A new `LazyFrame` object with applied aggregation. -#' @examples pl$LazyFrame(mtcars)$var() +#' @examples pl$DataFrame(mtcars)$lazy()$var()$collect() LazyFrame_var = function(ddof = 1) { unwrap(.pr$LazyFrame$var(self, ddof)) } @@ -291,7 +292,7 @@ LazyFrame_var = function(ddof = 1) { #' @keywords LazyFrame #' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. #' @return A new `LazyFrame` object with applied aggregation. -#' @examples pl$LazyFrame(mtcars)$std() +#' @examples pl$DataFrame(mtcars)$lazy()$std()$collect() LazyFrame_std = function(ddof = 1) { unwrap(.pr$LazyFrame$std(self, ddof)) } @@ -301,7 +302,7 @@ LazyFrame_std = function(ddof = 1) { #' @description Reverse the DataFrame. #' @keywords LazyFrame #' @return LazyFrame -#' @examples pl$DataFrame(mtcars)$reverse() +#' @examples pl$DataFrame(mtcars)$lazy()$reverse()$collect() LazyFrame_reverse = function() { unwrap(.pr$LazyFrame$reverse(self)) } @@ -313,8 +314,8 @@ LazyFrame_reverse = function() { #' @param offset integer #' @param length integer or NULL #' @examples -#' pl$DataFrame(mtcars)$slice(2, 4) -#' pl$DataFrame(mtcars)$slice(30) +#' pl$DataFrame(mtcars)$lazy()$slice(2, 4)$collect() +#' pl$DataFrame(mtcars)$lazy()$slice(30)$collect() #' mtcars[2:6,] LazyFrame_slice = function(offset, length = NULL) { unwrap(.pr$LazyFrame$slice(self, offset, length)) @@ -327,6 +328,7 @@ LazyFrame_slice = function(offset, length = NULL) { #' #' @details any number will converted to u32. Negative raises error #' +#' @examples pl$DataFrame(mtcars)$lazy()$tail(2)$collect() #' @return A new `LazyFrame` object with applied filter. LazyFrame_tail = function(n) { unwrap(.pr$LazyFrame$tail(self,n)) diff --git a/man/LazyFrame_limit.Rd b/man/LazyFrame_limit.Rd index a95703ca7..a014d64ed 100644 --- a/man/LazyFrame_limit.Rd +++ b/man/LazyFrame_limit.Rd @@ -18,4 +18,7 @@ take limit of n rows of query \details{ any number will converted to u32. Negative raises error } +\examples{ +pl$DataFrame(mtcars)$lazy()$limit(4)$collect() +} \keyword{LazyFrame} diff --git a/man/LazyFrame_reverse.Rd b/man/LazyFrame_reverse.Rd index cc6f011e9..43194784e 100644 --- a/man/LazyFrame_reverse.Rd +++ b/man/LazyFrame_reverse.Rd @@ -13,6 +13,6 @@ LazyFrame Reverse the DataFrame. } \examples{ -pl$DataFrame(mtcars)$reverse() +pl$DataFrame(mtcars)$lazy()$reverse()$collect() } \keyword{LazyFrame} diff --git a/man/LazyFrame_slice.Rd b/man/LazyFrame_slice.Rd index 0bfd67be8..8fc6f2b3c 100644 --- a/man/LazyFrame_slice.Rd +++ b/man/LazyFrame_slice.Rd @@ -18,8 +18,8 @@ DataFrame Get a slice of this DataFrame. } \examples{ -pl$DataFrame(mtcars)$slice(2, 4) -pl$DataFrame(mtcars)$slice(30) +pl$DataFrame(mtcars)$lazy()$slice(2, 4)$collect() +pl$DataFrame(mtcars)$lazy()$slice(30)$collect() mtcars[2:6,] } \keyword{DataFrame} diff --git a/man/LazyFrame_std.Rd b/man/LazyFrame_std.Rd index 497a280d2..034802d51 100644 --- a/man/LazyFrame_std.Rd +++ b/man/LazyFrame_std.Rd @@ -16,6 +16,6 @@ A new \code{LazyFrame} object with applied aggregation. Aggregate the columns of this LazyFrame to their standard deviation values. } \examples{ -pl$LazyFrame(mtcars)$std() +pl$DataFrame(mtcars)$lazy()$std()$collect() } \keyword{LazyFrame} diff --git a/man/LazyFrame_sum.Rd b/man/LazyFrame_sum.Rd index 37378591f..065b90af3 100644 --- a/man/LazyFrame_sum.Rd +++ b/man/LazyFrame_sum.Rd @@ -13,6 +13,6 @@ LazyFrame Aggregate the columns of this DataFrame to their sum values. } \examples{ -pl$DataFrame(mtcars)$sum() +pl$DataFrame(mtcars)$lazy()$sum()$collect() } \keyword{LazyFrame} diff --git a/man/LazyFrame_tail.Rd b/man/LazyFrame_tail.Rd index a992538f5..81dbec31f 100644 --- a/man/LazyFrame_tail.Rd +++ b/man/LazyFrame_tail.Rd @@ -18,4 +18,7 @@ take last n rows of query \details{ any number will converted to u32. Negative raises error } +\examples{ +pl$DataFrame(mtcars)$lazy()$tail(2)$collect() +} \keyword{LazyFrame} diff --git a/man/LazyFrame_var.Rd b/man/LazyFrame_var.Rd index 84b94113b..6f3d4bcd3 100644 --- a/man/LazyFrame_var.Rd +++ b/man/LazyFrame_var.Rd @@ -16,6 +16,6 @@ A new \code{LazyFrame} object with applied aggregation. Aggregate the columns of this LazyFrame to their variance values. } \examples{ -pl$LazyFrame(mtcars)$var() +pl$DataFrame(mtcars)$lazy()$var()$collect() } \keyword{LazyFrame} From 3582ca1803d86d45d8412ebca20dd955049081fe Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Thu, 13 Apr 2023 09:19:25 -0400 Subject: [PATCH 13/26] patrick --- tests/testthat/test-dataframe.R | 63 ++++++++++++++------------------- tests/testthat/test-lazy.R | 63 ++++++++++++++------------------- 2 files changed, 52 insertions(+), 74 deletions(-) diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 9fac2fea2..95cb624e8 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -425,47 +425,36 @@ test_that("to_Struct, unnest, to_frame, as_data_frame", { }) -test_that("methods without arguments", { - a = pl$DataFrame(mtcars)$first()$as_data_frame() - b = data.frame(lapply(mtcars, head, 1)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$last()$as_data_frame() - b = data.frame(lapply(mtcars, tail, 1)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$max()$as_data_frame() - b = data.frame(lapply(mtcars, max)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$mean()$as_data_frame() - b = data.frame(lapply(mtcars, mean)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$median()$as_data_frame() - b = data.frame(lapply(mtcars, median)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$min()$as_data_frame() - b = data.frame(lapply(mtcars, min)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$sum()$as_data_frame() - b = data.frame(lapply(mtcars, sum)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$var()$as_data_frame() - b = data.frame(lapply(mtcars, var)) - expect_equal(a, b, ignore_attr = TRUE) - +make_cases <- function() { + tibble::tribble( + ~ .test_name, ~ pola, ~ base, + "max", "max", max, + "mean", "mean", mean, + "median", "median", median, + "max", "max", max, + "min", "min", min, + "std", "std", sd, + "sum", "sum", sum, + "var", "var", var, + "first", "first", function(x) head(x, 1), + "last", "last", function(x) tail(x, 1) + ) +} + +with_parameters_test_that( + "simple translations: eager", { + a = pl$DataFrame(mtcars)[[pola]]()$as_data_frame() + b = data.frame(lapply(mtcars, base)) + testthat::expect_equal(a, b, ignore_attr = TRUE) + }, + .cases = make_cases() +) + +test_that("simple translations", { a = pl$DataFrame(mtcars)$var(10)$as_data_frame() b = data.frame(lapply(mtcars, var)) expect_true(all(a != b)) - a = pl$DataFrame(mtcars)$std()$as_data_frame() - b = data.frame(lapply(mtcars, sd)) - expect_equal(a, b, ignore_attr = TRUE) - a = pl$DataFrame(mtcars)$std(10)$as_data_frame() b = data.frame(lapply(mtcars, sd)) expect_true(all(a != b)) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index dc06cab49..357e89f4a 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -100,35 +100,32 @@ test_that("lazy filter", { }) -test_that("methods without arguments", { - a = pl$DataFrame(mtcars)$lazy()$first()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, head, 1)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$lazy()$last()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, tail, 1)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$lazy()$max()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, max)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$lazy()$mean()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, mean)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$lazy()$median()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, median)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$lazy()$min()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, min)) - expect_equal(a, b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$lazy()$sum()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, sum)) - expect_equal(a, b, ignore_attr = TRUE) - +make_cases <- function() { + tibble::tribble( + ~ .test_name, ~ pola, ~ base, + "max", "max", max, + "mean", "mean", mean, + "median", "median", median, + "max", "max", max, + "min", "min", min, + "std", "std", sd, + "sum", "sum", sum, + "var", "var", var, + "first", "first", function(x) head(x, 1), + "last", "last", function(x) tail(x, 1) + ) +} + +with_parameters_test_that( + "simple translations: lazy", { + a = pl$DataFrame(mtcars)$lazy()[[pola]]()$collect()$as_data_frame() + b = data.frame(lapply(mtcars, base)) + testthat::expect_equal(a, b, ignore_attr = TRUE) + }, + .cases = make_cases() +) + +test_that("simple translations", { a = pl$DataFrame(mtcars)$lazy()$reverse()$collect()$as_data_frame() b = mtcars[32:1,] expect_equal(a, b, ignore_attr = TRUE) @@ -141,18 +138,10 @@ test_that("methods without arguments", { b = tail(mtcars, 2) expect_equal(a, b, ignore_attr = TRUE) - a = pl$DataFrame(mtcars)$lazy()$var()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, var)) - expect_equal(a, b, ignore_attr = TRUE) - a = pl$DataFrame(mtcars)$lazy()$var(10)$collect()$as_data_frame() b = data.frame(lapply(mtcars, var)) expect_true(all(a != b)) - a = pl$DataFrame(mtcars)$lazy()$std()$collect()$as_data_frame() - b = data.frame(lapply(mtcars, sd)) - expect_equal(a, b, ignore_attr = TRUE) - a = pl$DataFrame(mtcars)$lazy()$std(10)$collect()$as_data_frame() b = data.frame(lapply(mtcars, sd)) expect_true(all(a != b)) From 5351a368a112e853e03128ba1018552babe18c98 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Thu, 13 Apr 2023 09:25:45 -0400 Subject: [PATCH 14/26] patrick 2 --- tests/testthat/test-groupby.R | 73 ++++++++++++----------------------- 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/tests/testthat/test-groupby.R b/tests/testthat/test-groupby.R index 9eb6ca1d6..2b4442a56 100644 --- a/tests/testthat/test-groupby.R +++ b/tests/testthat/test-groupby.R @@ -51,51 +51,28 @@ test_that("groupby", { }) -test_that("methods without arguments", { - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$first()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, head, 1)))) - b = b[order(b$cyl), colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$last()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, tail, 1)))) - b = b[order(b$cyl), colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$max()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, max)))) - b = b[order(b$cyl), colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$mean()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, mean)))) - b = b[order(b$cyl), colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$median()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, median)))) - b = b[order(b$cyl), colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$min()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, min)))) - b = b[order(b$cyl), colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$sum()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, sum)))) - b = b[, colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$var()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, var)))) - b = b[, colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - - a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$std()$as_data_frame() - b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, sd)))) - b = b[, colnames(b) != "cyl"] - expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) - -}) +make_cases <- function() { + tibble::tribble( + ~ .test_name, ~ pola, ~ base, + "max", "max", max, + "mean", "mean", mean, + "median", "median", median, + "max", "max", max, + "min", "min", min, + "std", "std", sd, + "sum", "sum", sum, + "var", "var", var, + "first", "first", function(x) head(x, 1), + "last", "last", function(x) tail(x, 1) + ) +} + +with_parameters_test_that( + "simple translations: eager", { + a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$first()$as_data_frame() + b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, head, 1)))) + b = b[order(b$cyl), colnames(b) != "cyl"] + expect_equal(a[order(a$cyl), 2:ncol(a)], b, ignore_attr = TRUE) + }, + .cases = make_cases() +) From 4933fcf54f389dc7d1ea49fe609368b3b648f50c Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Thu, 13 Apr 2023 09:35:16 -0400 Subject: [PATCH 15/26] patrick 3 --- tests/testthat/test-dataframe.R | 2 +- tests/testthat/test-groupby.R | 2 +- tests/testthat/test-lazy.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 95cb624e8..054207bd3 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -441,7 +441,7 @@ make_cases <- function() { ) } -with_parameters_test_that( +patrick::with_parameters_test_that( "simple translations: eager", { a = pl$DataFrame(mtcars)[[pola]]()$as_data_frame() b = data.frame(lapply(mtcars, base)) diff --git a/tests/testthat/test-groupby.R b/tests/testthat/test-groupby.R index 2b4442a56..6085b4b98 100644 --- a/tests/testthat/test-groupby.R +++ b/tests/testthat/test-groupby.R @@ -67,7 +67,7 @@ make_cases <- function() { ) } -with_parameters_test_that( +patrick::with_parameters_test_that( "simple translations: eager", { a = pl$DataFrame(mtcars)$groupby(pl$col("cyl"))$first()$as_data_frame() b = as.data.frame(do.call(rbind, by(mtcars, mtcars$cyl, \(x) apply(x, 2, head, 1)))) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 357e89f4a..8e232eb01 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -116,7 +116,7 @@ make_cases <- function() { ) } -with_parameters_test_that( +patrick::with_parameters_test_that( "simple translations: lazy", { a = pl$DataFrame(mtcars)$lazy()[[pola]]()$collect()$as_data_frame() b = data.frame(lapply(mtcars, base)) From 96b98ebf242edf9f8e3b656533b8d845c18f7280 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Thu, 13 Apr 2023 09:49:49 -0400 Subject: [PATCH 16/26] patrick 4 --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 6672a00ec..bf6234f43 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,6 +25,7 @@ Suggests: patrick, bit64, knitr, + tibble, rmarkdown Config/testthat/edition: 3 Collate: From d9380646423bbbf02f53a74f297788823c026ad9 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 13 Apr 2023 23:12:57 +0200 Subject: [PATCH 17/26] simplify basic LazyFrame methods --- R/extendr-wrappers.R | 9 ++++-- R/lazyframe__lazy.R | 54 ++++++++++++++++------------------ man/LazyFrame_first.Rd | 6 +++- man/LazyFrame_last.Rd | 6 +++- man/LazyFrame_max.Rd | 6 +++- man/LazyFrame_mean.Rd | 6 +++- man/LazyFrame_median.Rd | 6 +++- man/LazyFrame_min.Rd | 6 +++- man/LazyFrame_reverse.Rd | 6 +++- man/LazyFrame_sum.Rd | 5 +++- src/rust/src/lazy/dataframe.rs | 50 +++++++++++++++++-------------- 11 files changed, 100 insertions(+), 60 deletions(-) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 3cc6f5602..8708c4add 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -1,4 +1,7 @@ # Generated by extendr: Do not edit by hand + +# nolint start + # # This file was created with the following call: # .Call("wrap__make_rpolars_wrappers", use_symbols = TRUE, package_name = "rpolars") @@ -818,12 +821,12 @@ LazyFrame$last <- function() .Call(wrap__LazyFrame__last, self) LazyFrame$max <- function() .Call(wrap__LazyFrame__max, self) +LazyFrame$min <- function() .Call(wrap__LazyFrame__min, self) + LazyFrame$mean <- function() .Call(wrap__LazyFrame__mean, self) LazyFrame$median <- function() .Call(wrap__LazyFrame__median, self) -LazyFrame$min <- function() .Call(wrap__LazyFrame__min, self) - LazyFrame$sum <- function() .Call(wrap__LazyFrame__sum, self) LazyFrame$var <- function(ddof) .Call(wrap__LazyFrame__var, self, ddof) @@ -978,3 +981,5 @@ PolarsBackgroundHandle$is_exhausted <- function() .Call(wrap__PolarsBackgroundHa #' @export `[[.PolarsBackgroundHandle` <- `$.PolarsBackgroundHandle` + +# nolint end diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index e9ec7104d..75dba7ca6 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -215,67 +215,67 @@ LazyFrame_limit = function(n) { } #' @title First -#' @description Get the first row of the DataFrame. +#' @description Get the first row of the DataFrame. #' @keywords DataFrame #' @return A new `DataFrame` object with applied filter. +#' @docType NULL +#' @format function #' @examples pl$DataFrame(mtcars)$lazy()$first()$collect() -LazyFrame_first = function() { - unwrap(.pr$LazyFrame$first(self)) -} +LazyFrame_first = "use_extendr_wrapper" #' @title Last #' @description Aggregate the columns in the DataFrame to their maximum value. #' @keywords LazyFrame #' @return A new `LazyFrame` object with applied aggregation. +#' @docType NULL +#' @format function #' @examples pl$DataFrame(mtcars)$lazy()$last()$collect() -LazyFrame_last = function() { - unwrap(.pr$LazyFrame$last(self)) -} +LazyFrame_last = "use_extendr_wrapper" #' @title Max #' @description Aggregate the columns in the DataFrame to their maximum value. #' @keywords LazyFrame #' @return A new `LazyFrame` object with applied aggregation. +#' @docType NULL +#' @format function #' @examples pl$DataFrame(mtcars)$lazy()$max()$collect() -LazyFrame_max = function() { - unwrap(.pr$LazyFrame$max(self)) -} +LazyFrame_max = "use_extendr_wrapper" #' @title Mean #' @description Aggregate the columns in the DataFrame to their mean value. #' @keywords LazyFrame #' @return A new `LazyFrame` object with applied aggregation. +#' @docType NULL +#' @format function #' @examples pl$DataFrame(mtcars)$lazy()$mean()$collect() -LazyFrame_mean = function() { - unwrap(.pr$LazyFrame$mean(self)) -} +LazyFrame_mean = "use_extendr_wrapper" #' @title Median #' @description Aggregate the columns in the DataFrame to their median value. #' @keywords LazyFrame #' @return A new `LazyFrame` object with applied aggregation. +#' @docType NULL +#' @format function #' @examples pl$DataFrame(mtcars)$lazy()$median()$collect() -LazyFrame_median = function() { - unwrap(.pr$LazyFrame$median(self)) -} +LazyFrame_median = "use_extendr_wrapper" #' @title Min #' @description Aggregate the columns in the DataFrame to their minimum value. #' @keywords LazyFrame #' @return A new `LazyFrame` object with applied aggregation. +#' @docType NULL +#' @format function #' @examples pl$DataFrame(mtcars)$lazy()$min()$collect() -LazyFrame_min = function() { - unwrap(.pr$LazyFrame$min(self)) -} +LazyFrame_min = "use_extendr_wrapper" #' @title Sum #' @description Aggregate the columns of this DataFrame to their sum values. #' @keywords LazyFrame #' @return LazyFrame +#' @docType NULL +#' @format function #' @examples pl$DataFrame(mtcars)$lazy()$sum()$collect() -LazyFrame_sum = function() { - unwrap(.pr$LazyFrame$sum(self)) -} +LazyFrame_sum = "use_extendr_wrapper" #' @title Var #' @description Aggregate the columns of this LazyFrame to their variance values. @@ -284,7 +284,7 @@ LazyFrame_sum = function() { #' @return A new `LazyFrame` object with applied aggregation. #' @examples pl$DataFrame(mtcars)$lazy()$var()$collect() LazyFrame_var = function(ddof = 1) { - unwrap(.pr$LazyFrame$var(self, ddof)) + .pr$LazyFrame$var(self, ddof) } #' @title Std @@ -294,7 +294,7 @@ LazyFrame_var = function(ddof = 1) { #' @return A new `LazyFrame` object with applied aggregation. #' @examples pl$DataFrame(mtcars)$lazy()$std()$collect() LazyFrame_std = function(ddof = 1) { - unwrap(.pr$LazyFrame$std(self, ddof)) + .pr$LazyFrame$std(self, ddof) } @@ -303,9 +303,7 @@ LazyFrame_std = function(ddof = 1) { #' @keywords LazyFrame #' @return LazyFrame #' @examples pl$DataFrame(mtcars)$lazy()$reverse()$collect() -LazyFrame_reverse = function() { - unwrap(.pr$LazyFrame$reverse(self)) -} +LazyFrame_reverse = "use_extendr_wrapper" #' @title Slice #' @description Get a slice of this DataFrame. @@ -313,7 +311,7 @@ LazyFrame_reverse = function() { #' @return DataFrame #' @param offset integer #' @param length integer or NULL -#' @examples +#' @examples #' pl$DataFrame(mtcars)$lazy()$slice(2, 4)$collect() #' pl$DataFrame(mtcars)$lazy()$slice(30)$collect() #' mtcars[2:6,] diff --git a/man/LazyFrame_first.Rd b/man/LazyFrame_first.Rd index c3bba4cf2..6e61bfe93 100644 --- a/man/LazyFrame_first.Rd +++ b/man/LazyFrame_first.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R +\docType{data} \name{LazyFrame_first} \alias{LazyFrame_first} \title{First} +\format{ +function +} \usage{ -LazyFrame_first() +LazyFrame_first } \value{ A new \code{DataFrame} object with applied filter. diff --git a/man/LazyFrame_last.Rd b/man/LazyFrame_last.Rd index 588255aee..8307f2824 100644 --- a/man/LazyFrame_last.Rd +++ b/man/LazyFrame_last.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R +\docType{data} \name{LazyFrame_last} \alias{LazyFrame_last} \title{Last} +\format{ +function +} \usage{ -LazyFrame_last() +LazyFrame_last } \value{ A new \code{LazyFrame} object with applied aggregation. diff --git a/man/LazyFrame_max.Rd b/man/LazyFrame_max.Rd index 734817014..2dac2826f 100644 --- a/man/LazyFrame_max.Rd +++ b/man/LazyFrame_max.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R +\docType{data} \name{LazyFrame_max} \alias{LazyFrame_max} \title{Max} +\format{ +function +} \usage{ -LazyFrame_max() +LazyFrame_max } \value{ A new \code{LazyFrame} object with applied aggregation. diff --git a/man/LazyFrame_mean.Rd b/man/LazyFrame_mean.Rd index e9eb40dd8..a0b97f2bf 100644 --- a/man/LazyFrame_mean.Rd +++ b/man/LazyFrame_mean.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R +\docType{data} \name{LazyFrame_mean} \alias{LazyFrame_mean} \title{Mean} +\format{ +function +} \usage{ -LazyFrame_mean() +LazyFrame_mean } \value{ A new \code{LazyFrame} object with applied aggregation. diff --git a/man/LazyFrame_median.Rd b/man/LazyFrame_median.Rd index 115f4e3fe..09f01b465 100644 --- a/man/LazyFrame_median.Rd +++ b/man/LazyFrame_median.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R +\docType{data} \name{LazyFrame_median} \alias{LazyFrame_median} \title{Median} +\format{ +function +} \usage{ -LazyFrame_median() +LazyFrame_median } \value{ A new \code{LazyFrame} object with applied aggregation. diff --git a/man/LazyFrame_min.Rd b/man/LazyFrame_min.Rd index 85ee1a29e..c89ccb649 100644 --- a/man/LazyFrame_min.Rd +++ b/man/LazyFrame_min.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R +\docType{data} \name{LazyFrame_min} \alias{LazyFrame_min} \title{Min} +\format{ +function +} \usage{ -LazyFrame_min() +LazyFrame_min } \value{ A new \code{LazyFrame} object with applied aggregation. diff --git a/man/LazyFrame_reverse.Rd b/man/LazyFrame_reverse.Rd index 43194784e..9a68ecf08 100644 --- a/man/LazyFrame_reverse.Rd +++ b/man/LazyFrame_reverse.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R +\docType{data} \name{LazyFrame_reverse} \alias{LazyFrame_reverse} \title{Reverse} +\format{ +An object of class \code{character} of length 1. +} \usage{ -LazyFrame_reverse() +LazyFrame_reverse } \value{ LazyFrame diff --git a/man/LazyFrame_sum.Rd b/man/LazyFrame_sum.Rd index 065b90af3..7b3d9c73b 100644 --- a/man/LazyFrame_sum.Rd +++ b/man/LazyFrame_sum.Rd @@ -3,8 +3,11 @@ \name{LazyFrame_sum} \alias{LazyFrame_sum} \title{Sum} +\format{ +function +} \usage{ -LazyFrame_sum() +LazyFrame_sum } \value{ LazyFrame diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 9b237d863..0531d1d6b 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -1,10 +1,10 @@ use crate::concurrent::{handle_thread_r_requests, PolarsBackgroundHandle}; use crate::lazy::dsl::*; use crate::rdatatype::new_join_type; +use crate::robj_to; use crate::utils::r_result_list; use crate::utils::try_f64_into_u32; use crate::utils::try_f64_into_usize; -use crate::robj_to; use extendr_api::prelude::*; use polars::prelude as pl; @@ -14,6 +14,12 @@ use std::result::Result; #[derive(Clone)] pub struct LazyFrame(pub pl::LazyFrame); +impl From for LazyFrame { + fn from(item: pl::LazyFrame) -> Self { + LazyFrame(item) + } +} + #[extendr] impl LazyFrame { fn print(&self) -> Self { @@ -50,45 +56,45 @@ impl LazyFrame { }); r_result_list(result) } - - fn first(&self) -> Result { - Ok(LazyFrame(self.0.clone().first())) + + fn first(&self) -> Self { + self.0.clone().first().into() } - fn last(&self) -> Result { - Ok(LazyFrame(self.0.clone().last())) + fn last(&self) -> Self { + self.0.clone().last().into() } - fn max(&self) -> Result { - Ok(LazyFrame(self.0.clone().max())) + fn max(&self) -> Self { + self.0.clone().max().into() } - fn mean(&self) -> Result { - Ok(LazyFrame(self.0.clone().mean())) + fn min(&self) -> Self { + self.0.clone().min().into() } - fn median(&self) -> Result { - Ok(LazyFrame(self.0.clone().median())) + fn mean(&self) -> Self { + self.0.clone().mean().into() } - fn min(&self) -> Result { - Ok(LazyFrame(self.0.clone().min())) + fn median(&self) -> Self { + self.0.clone().median().into() } - fn sum(&self) -> Result { - Ok(LazyFrame(self.0.clone().sum())) + fn sum(&self) -> Self { + self.0.clone().sum().into() } - fn var(&self, ddof: u8) -> Result { - Ok(LazyFrame(self.0.clone().var(ddof))) + fn var(&self, ddof: u8) -> Self { + self.0.clone().var(ddof).into() } - fn std(&self, ddof: u8) -> Result { - Ok(LazyFrame(self.0.clone().std(ddof))) + fn std(&self, ddof: u8) -> Self { + self.0.clone().std(ddof).into() } - fn reverse(&self) -> Result { - Ok(LazyFrame(self.0.clone().reverse())) + fn reverse(&self) -> Self { + self.0.clone().reverse().into() } fn slice(&self, offset: Robj, length: Robj) -> Result { From d9865716ad79f6d6b9e64d0033f2f45ab50ca125 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 13 Apr 2023 23:16:23 +0200 Subject: [PATCH 18/26] roxydocs for previous commit --- man/LazyFrame_first.Rd | 1 - man/LazyFrame_last.Rd | 1 - man/LazyFrame_max.Rd | 1 - man/LazyFrame_mean.Rd | 1 - man/LazyFrame_median.Rd | 1 - man/LazyFrame_min.Rd | 1 - 6 files changed, 6 deletions(-) diff --git a/man/LazyFrame_first.Rd b/man/LazyFrame_first.Rd index 6e61bfe93..475be556e 100644 --- a/man/LazyFrame_first.Rd +++ b/man/LazyFrame_first.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R -\docType{data} \name{LazyFrame_first} \alias{LazyFrame_first} \title{First} diff --git a/man/LazyFrame_last.Rd b/man/LazyFrame_last.Rd index 8307f2824..76e797984 100644 --- a/man/LazyFrame_last.Rd +++ b/man/LazyFrame_last.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R -\docType{data} \name{LazyFrame_last} \alias{LazyFrame_last} \title{Last} diff --git a/man/LazyFrame_max.Rd b/man/LazyFrame_max.Rd index 2dac2826f..7424c50d4 100644 --- a/man/LazyFrame_max.Rd +++ b/man/LazyFrame_max.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R -\docType{data} \name{LazyFrame_max} \alias{LazyFrame_max} \title{Max} diff --git a/man/LazyFrame_mean.Rd b/man/LazyFrame_mean.Rd index a0b97f2bf..f477a2bc8 100644 --- a/man/LazyFrame_mean.Rd +++ b/man/LazyFrame_mean.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R -\docType{data} \name{LazyFrame_mean} \alias{LazyFrame_mean} \title{Mean} diff --git a/man/LazyFrame_median.Rd b/man/LazyFrame_median.Rd index 09f01b465..4f25eac37 100644 --- a/man/LazyFrame_median.Rd +++ b/man/LazyFrame_median.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R -\docType{data} \name{LazyFrame_median} \alias{LazyFrame_median} \title{Median} diff --git a/man/LazyFrame_min.Rd b/man/LazyFrame_min.Rd index c89ccb649..56fd28527 100644 --- a/man/LazyFrame_min.Rd +++ b/man/LazyFrame_min.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazyframe__lazy.R -\docType{data} \name{LazyFrame_min} \alias{LazyFrame_min} \title{Min} From 960e0ed08db83b954cd67f36a49c743ffa0a259b Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 13 Apr 2023 23:20:17 +0200 Subject: [PATCH 19/26] add newline eof --- R/dataframe__frame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 7042d6279..a2cb54a56 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -993,4 +993,4 @@ DataFrame_null_count <- function() { #' pl$DataFrame(mtcars)$estimated_size() DataFrame_estimated_size <- function() { .pr$DataFrame$estimated_size(self) -} \ No newline at end of file +} From 943171ded80c32ef60c78c3b197c6f0c97836855 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 13 Apr 2023 23:52:40 +0200 Subject: [PATCH 20/26] simplify DataFrame .null_count .estimated_size --- R/dataframe__frame.R | 22 +++++++++++----------- man/DataFrame_estimated_size.Rd | 5 ++++- man/DataFrame_null_count.Rd | 5 ++++- src/rust/src/rdataframe/mod.rs | 16 +++++++++++----- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index a2cb54a56..f152a8d57 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -866,7 +866,7 @@ DataFrame_unnest = function(names = NULL) { #' @title First -#' @description Get the first row of the DataFrame. +#' @description Get the first row of the DataFrame. #' @keywords DataFrame #' @return A new `DataFrame` object with applied filter. #' @examples pl$DataFrame(mtcars)$first() @@ -875,7 +875,7 @@ DataFrame_first = function() { } #' @title Last -#' @description Get the last row of the DataFrame. +#' @description Get the last row of the DataFrame. #' @keywords DataFrame #' @return A new `DataFrame` object with applied filter. #' @examples pl$DataFrame(mtcars)$last() @@ -964,7 +964,7 @@ DataFrame_reverse = function() { #' @return LazyFrame #' @param offset integer #' @param length integer or NULL -#' @examples +#' @examples #' pl$DataFrame(mtcars)$slice(2, 4) #' mtcars[2:6,] DataFrame_slice = function(offset, length = NULL) { @@ -976,21 +976,21 @@ DataFrame_slice = function(offset, length = NULL) { #' @description Create a new DataFrame that shows the null counts per column. #' @keywords DataFrame #' @return DataFrame -#' @examples +#' @docType NULL +#' @format function +#' @examples #' x = mtcars #' x[1, 2:3] = NA #' pl$DataFrame(x)$null_count() -DataFrame_null_count <- function() { - .pr$DataFrame$null_count(self) -} +DataFrame_null_count = "use_extendr_wrapper" #' @title Estimated size #' @description Return an estimation of the total (heap) allocated size of the DataFrame. #' @keywords DataFrame #' @return Bytes -#' @examples +#' @docType NULL +#' @format function +#' @examples #' pl$DataFrame(mtcars)$estimated_size() -DataFrame_estimated_size <- function() { - .pr$DataFrame$estimated_size(self) -} +DataFrame_estimated_size = "use_extendr_wrapper" diff --git a/man/DataFrame_estimated_size.Rd b/man/DataFrame_estimated_size.Rd index 458f45be0..3b8a7d483 100644 --- a/man/DataFrame_estimated_size.Rd +++ b/man/DataFrame_estimated_size.Rd @@ -3,8 +3,11 @@ \name{DataFrame_estimated_size} \alias{DataFrame_estimated_size} \title{Estimated size} +\format{ +function +} \usage{ -DataFrame_estimated_size() +DataFrame_estimated_size } \value{ Bytes diff --git a/man/DataFrame_null_count.Rd b/man/DataFrame_null_count.Rd index 7659cfbeb..e3c124396 100644 --- a/man/DataFrame_null_count.Rd +++ b/man/DataFrame_null_count.Rd @@ -3,8 +3,11 @@ \name{DataFrame_null_count} \alias{DataFrame_null_count} \title{Null count} +\format{ +function +} \usage{ -DataFrame_null_count() +DataFrame_null_count } \value{ DataFrame diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index ae6fdc6fd..7e2de91b4 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -71,6 +71,12 @@ impl Iterator for OwnedDataFrameIterator { #[derive(Debug, Clone)] pub struct DataFrame(pub pl::DataFrame); +impl From for DataFrame { + fn from(item: pl::DataFrame) -> Self { + DataFrame(item) + } +} + #[extendr] impl DataFrame { pub fn shape(&self) -> Robj { @@ -317,13 +323,13 @@ impl DataFrame { pub fn from_arrow_record_batches(rbr: Robj) -> Result { Ok(DataFrame(crate::arrow_interop::to_rust::to_rust_df(rbr)?)) } - - pub fn estimated_size(&self) -> Result { - Ok(self.0.clone().estimated_size()) + + pub fn estimated_size(&self) -> f64 { + self.0.clone().estimated_size() as f64 } - pub fn null_count(&self) -> Result { - Ok(DataFrame(self.0.clone().null_count())) + pub fn null_count(&self) -> Self { + self.0.clone().null_count().into() } } use crate::utils::wrappers::null_to_opt; From 0b0fc2607d9fc6ded660e1e851d537b075d522c2 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Fri, 14 Apr 2023 08:49:33 -0400 Subject: [PATCH 21/26] test .estimated_size() --- tests/testthat/test-dataframe.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 054207bd3..e19d36d9d 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -470,6 +470,9 @@ test_that("simple translations", { a = pl$DataFrame(mtcars)$slice(30)$as_data_frame() b = tail(mtcars, 2) expect_equal(a, b, ignore_attr = TRUE) + + a = pl$DataFrame(mtcars)$estimated_size() + expect_equal(a, 2816, tolerance = .1) }) From 76b696406d3edf36fbd957488b7c781843af3f86 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Fri, 14 Apr 2023 09:00:24 -0400 Subject: [PATCH 22/26] NEWS --- NEWS.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/NEWS.md b/NEWS.md index 8bb604438..a275ca09f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,13 @@ ## What's changed - Revamped docs that includes a new introductory vignette (#81 @grantmcdermott) +- Misc documentation improvements +- New methods implemented for DataFrame, LazyFrame, and GroupBy objects: min, max, mean, median, sum, std, var, first, last, head, tail, reverse, slice, null_count, estimated_size (#105 @vincentarelbundock) + +## New Contributors + +- @grantmcdermott made their first contribution in #81 +- @vincentarelbundock made their first contribution in #105 # rpolars v0.4.6 From 93af6d63560e8de2f3abec15272d7df6961a0e0b Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Fri, 14 Apr 2023 09:07:18 -0400 Subject: [PATCH 23/26] Revert "NEWS" This reverts commit 76b696406d3edf36fbd957488b7c781843af3f86. --- NEWS.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index a275ca09f..8bb604438 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,13 +3,6 @@ ## What's changed - Revamped docs that includes a new introductory vignette (#81 @grantmcdermott) -- Misc documentation improvements -- New methods implemented for DataFrame, LazyFrame, and GroupBy objects: min, max, mean, median, sum, std, var, first, last, head, tail, reverse, slice, null_count, estimated_size (#105 @vincentarelbundock) - -## New Contributors - -- @grantmcdermott made their first contribution in #81 -- @vincentarelbundock made their first contribution in #105 # rpolars v0.4.6 From 6c11a636f088c0ca52b0cddf21f9c2c5ba3c90e7 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Fri, 14 Apr 2023 09:10:54 -0400 Subject: [PATCH 24/26] NEWS 2 --- NEWS.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 8bb604438..84b60298c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,20 @@ -# rpolars v0.4.6.9000 (development version) +# polars (development version) + +## BREAKING CHANGE + +- The package name was changed from `rpolars` to `polars`. (#84) + +## What's changed + +- New methods implemented for DataFrame, LazyFrame, and GroupBy objects: min, max, mean, median, sum, std, var, first, last, head, tail, reverse, slice, null_count, estimated_size (#105 @vincentarelbundock) + + +# rpolars v0.4.7 ## What's changed - Revamped docs that includes a new introductory vignette (#81 @grantmcdermott) +- Misc documentation improvements # rpolars v0.4.6 From 8ce30ff02e23fa38b4ea7da3660cd6854bb4d32c Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Fri, 14 Apr 2023 09:14:01 -0400 Subject: [PATCH 25/26] Revert "NEWS 2" This reverts commit 6c11a636f088c0ca52b0cddf21f9c2c5ba3c90e7. --- NEWS.md | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index 84b60298c..8bb604438 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,20 +1,8 @@ -# polars (development version) - -## BREAKING CHANGE - -- The package name was changed from `rpolars` to `polars`. (#84) - -## What's changed - -- New methods implemented for DataFrame, LazyFrame, and GroupBy objects: min, max, mean, median, sum, std, var, first, last, head, tail, reverse, slice, null_count, estimated_size (#105 @vincentarelbundock) - - -# rpolars v0.4.7 +# rpolars v0.4.6.9000 (development version) ## What's changed - Revamped docs that includes a new introductory vignette (#81 @grantmcdermott) -- Misc documentation improvements # rpolars v0.4.6 From 512e8065b582a3f89ba284385c058d1ef9e77bf4 Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Fri, 14 Apr 2023 11:24:51 -0400 Subject: [PATCH 26/26] test .null_count() on DataFrame and GroupBy --- tests/testthat/test-dataframe.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index e19d36d9d..687822ce5 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -476,6 +476,21 @@ test_that("simple translations", { }) +test_that("null_count 64bit", { + skip_if_not_installed("bit64") + suppressPackageStartupMessages(library("bit64", quietly = TRUE)) + tmp = mtcars + tmp[1:2, 1:2] = NA + tmp[5, 3] = NA + a = pl$DataFrame(tmp)$null_count()$as_data_frame() + a = sapply(a, as.integer) + b = sapply(tmp, function(x) sum(is.na(x))) + expect_equal(a, b) + + a = pl$DataFrame(tmp)$groupby("vs")$null_count()$as_data_frame() + expect_equal(dim(a), c(2, 11)) +}) + test_that("tail", { a = as.data.frame(pl$DataFrame(mtcars)$tail(6)) b = tail(mtcars)