From 1000df8ef39ea6a38d16e6be52c5462eaf8abd94 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:09:13 +0100 Subject: [PATCH 1/2] init --- NEWS.md | 1 + R/datatype.R | 20 ++++++++++++++++++++ R/extendr-wrappers.R | 2 +- man/DataType_Duration.Rd | 27 +++++++++++++++++++++++++++ man/pl_pl.Rd | 2 +- src/rust/src/rdatatype.rs | 6 ++++-- tests/testthat/test-datatype.R | 15 +++++++++++++++ 7 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 man/DataType_Duration.Rd diff --git a/NEWS.md b/NEWS.md index 391598f9d..d31562bda 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,7 @@ graphviz dot syntax (#928). - Argument `ambiguous` can now take the value `"null"` to convert ambigous datetimes to null values (#937). +- Export the `Duration` datatype (#955). ## Polars R Package 0.15.1 diff --git a/R/datatype.R b/R/datatype.R index efdc320ca..cc37fec07 100644 --- a/R/datatype.R +++ b/R/datatype.R @@ -146,6 +146,7 @@ DataType_constructors = function() { Array = DataType_Array, Categorical = DataType_Categorical, Datetime = DataType_Datetime, + Duration = DataType_Duration, List = DataType_List, Struct = DataType_Struct ) @@ -185,6 +186,25 @@ DataType_Datetime = function(time_unit = "us", time_zone = NULL) { unwrap(.pr$DataType$new_datetime(time_unit, time_zone)) } +#' Data type representing a time duration +#' +#' @inheritParams DataType_Datetime +#' +#' @return Duration DataType +#' +#' @examples +#' test = pl$DataFrame( +#' a = 1:2, +#' b = c("a", "b"), +#' c = pl$duration(weeks = c(1, 2), days = c(0, 2)) +#' ) +#' +#' # select all columns of type "duration" +#' test$select(pl$col(pl$Duration())) +DataType_Duration = function(time_unit = "us") { + unwrap(.pr$DataType$new_duration(time_unit)) +} + #' Create Struct DataType #' #' Struct DataType Constructor diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 4ca637b41..34135c42a 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -256,7 +256,7 @@ RPolarsDataType$new_categorical <- function(ordering) .Call(wrap__RPolarsDataTyp RPolarsDataType$new_datetime <- function(tu, tz) .Call(wrap__RPolarsDataType__new_datetime, tu, tz) -RPolarsDataType$new_duration <- function() .Call(wrap__RPolarsDataType__new_duration) +RPolarsDataType$new_duration <- function(tu) .Call(wrap__RPolarsDataType__new_duration, tu) RPolarsDataType$new_list <- function(inner) .Call(wrap__RPolarsDataType__new_list, inner) diff --git a/man/DataType_Duration.Rd b/man/DataType_Duration.Rd new file mode 100644 index 000000000..bf3b19451 --- /dev/null +++ b/man/DataType_Duration.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datatype.R +\name{DataType_Duration} +\alias{DataType_Duration} +\title{Data type representing a time duration} +\usage{ +DataType_Duration(time_unit = "us") +} +\arguments{ +\item{time_unit}{Unit of time. One of \code{"ms"}, \code{"us"} (default) or \code{"ns"}.} +} +\value{ +Duration DataType +} +\description{ +Data type representing a time duration +} +\examples{ +test = pl$DataFrame( + a = 1:2, + b = c("a", "b"), + c = pl$duration(weeks = c(1, 2), days = c(0, 2)) +) + +# select all columns of type "duration" +test$select(pl$col(pl$Duration())) +} diff --git a/man/pl_pl.Rd b/man/pl_pl.Rd index ed7dda13a..a3ffcfd66 100644 --- a/man/pl_pl.Rd +++ b/man/pl_pl.Rd @@ -6,7 +6,7 @@ \alias{pl} \title{The complete polars public API.} \format{ -An object of class \code{pl_polars_env} (inherits from \code{environment}) of length 98. +An object of class \code{pl_polars_env} (inherits from \code{environment}) of length 99. } \usage{ pl diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index 9b8e2b80d..70d583505 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -96,8 +96,10 @@ impl RPolarsDataType { .map(|dt| RPolarsDataType(pl::DataType::Datetime(dt, null_to_opt(tz)))) } - pub fn new_duration() -> RPolarsDataType { - todo!("duration not implemented") + pub fn new_duration(tu: Robj) -> RResult { + Ok(RPolarsDataType(pl::DataType::Duration(robj_to!( + timeunit, tu + )?))) } pub fn new_list(inner: &RPolarsDataType) -> RPolarsDataType { diff --git a/tests/testthat/test-datatype.R b/tests/testthat/test-datatype.R index fec6c6fe3..4b73dd0e8 100644 --- a/tests/testthat/test-datatype.R +++ b/tests/testthat/test-datatype.R @@ -141,3 +141,18 @@ test_that("is_polars_dtype works", { expect_false(is_polars_dtype(pl$Unknown)) expect_true(is_polars_dtype(pl$Unknown, include_unknown = TRUE)) }) + +test_that("pl$Duration", { + test = pl$DataFrame( + a = 1:2, + b = c("a", "b"), + c = pl$duration(weeks = c(1, 2), days = c(0, 2)) + ) + + # cannot test conversion of duration from polars to R yet, only thing we can + # test is that selection on this dtype is correct + expect_equal( + test$select(pl$col(pl$Duration()))$width, + 1 + ) +}) From 81bd377c29f2ec8c37aa9c93c7bc8e1c876fff00 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:09:32 +0100 Subject: [PATCH 2/2] snapshot --- tests/testthat/_snaps/after-wrappers.md | 93 +++++++++++++------------ 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/tests/testthat/_snaps/after-wrappers.md b/tests/testthat/_snaps/after-wrappers.md index c20fcf90c..c262f2031 100644 --- a/tests/testthat/_snaps/after-wrappers.md +++ b/tests/testthat/_snaps/after-wrappers.md @@ -6,52 +6,53 @@ [1] "Array" "Binary" [3] "Boolean" "Categorical" [5] "DataFrame" "Date" - [7] "Datetime" "Field" - [9] "Float32" "Float64" - [11] "Int16" "Int32" - [13] "Int64" "Int8" - [15] "LazyFrame" "List" - [17] "Null" "PTime" - [19] "SQLContext" "Series" - [21] "String" "Struct" - [23] "Time" "UInt16" - [25] "UInt32" "UInt64" - [27] "UInt8" "Unknown" - [29] "Utf8" "all" - [31] "all_horizontal" "any_horizontal" - [33] "approx_n_unique" "arg_where" - [35] "class_names" "coalesce" - [37] "col" "concat" - [39] "concat_list" "concat_str" - [41] "corr" "count" - [43] "cov" "date" - [45] "date_range" "datetime" - [47] "disable_string_cache" "dtypes" - [49] "duration" "element" - [51] "enable_string_cache" "first" - [53] "fold" "from_epoch" - [55] "get_global_rpool_cap" "head" - [57] "implode" "is_schema" - [59] "last" "len" - [61] "lit" "max" - [63] "max_horizontal" "mean" - [65] "median" "mem_address" - [67] "min" "min_horizontal" - [69] "n_unique" "numeric_dtypes" - [71] "raw_list" "read_csv" - [73] "read_ndjson" "read_parquet" - [75] "reduce" "rolling_corr" - [77] "rolling_cov" "same_outer_dt" - [79] "scan_csv" "scan_ipc" - [81] "scan_ndjson" "scan_parquet" - [83] "select" "set_global_rpool_cap" - [85] "show_all_public_functions" "show_all_public_methods" - [87] "std" "struct" - [89] "sum" "sum_horizontal" - [91] "tail" "thread_pool_size" - [93] "threadpool_size" "time" - [95] "using_string_cache" "var" - [97] "when" "with_string_cache" + [7] "Datetime" "Duration" + [9] "Field" "Float32" + [11] "Float64" "Int16" + [13] "Int32" "Int64" + [15] "Int8" "LazyFrame" + [17] "List" "Null" + [19] "PTime" "SQLContext" + [21] "Series" "String" + [23] "Struct" "Time" + [25] "UInt16" "UInt32" + [27] "UInt64" "UInt8" + [29] "Unknown" "Utf8" + [31] "all" "all_horizontal" + [33] "any_horizontal" "approx_n_unique" + [35] "arg_where" "class_names" + [37] "coalesce" "col" + [39] "concat" "concat_list" + [41] "concat_str" "corr" + [43] "count" "cov" + [45] "date" "date_range" + [47] "datetime" "disable_string_cache" + [49] "dtypes" "duration" + [51] "element" "enable_string_cache" + [53] "first" "fold" + [55] "from_epoch" "get_global_rpool_cap" + [57] "head" "implode" + [59] "is_schema" "last" + [61] "len" "lit" + [63] "max" "max_horizontal" + [65] "mean" "median" + [67] "mem_address" "min" + [69] "min_horizontal" "n_unique" + [71] "numeric_dtypes" "raw_list" + [73] "read_csv" "read_ndjson" + [75] "read_parquet" "reduce" + [77] "rolling_corr" "rolling_cov" + [79] "same_outer_dt" "scan_csv" + [81] "scan_ipc" "scan_ndjson" + [83] "scan_parquet" "select" + [85] "set_global_rpool_cap" "show_all_public_functions" + [87] "show_all_public_methods" "std" + [89] "struct" "sum" + [91] "sum_horizontal" "tail" + [93] "thread_pool_size" "threadpool_size" + [95] "time" "using_string_cache" + [97] "var" "when" + [99] "with_string_cache" ---