diff --git a/NEWS.md b/NEWS.md index 391598f9d..2b1f78492 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ ### Breaking changes due to Rust-polars update - rust-polars is updated to 0.38.3 (#937). - - New argument `non_existent` in `$replace_time_zone()` to specify what should + - New argument `non_existent` in `$dt$replace_time_zone()` to specify what should happen when a datetime doesn't exist. - In rolling aggregation functions (such as `$rolling_mean()`), the default value of argument `closed` now is `NULL`. Using `closed` with a fixed @@ -20,6 +20,10 @@ - In `pl$read_*` and `pl$scan_*` functions, the first argument is now `source`. - In `$write_*` functions, the first argument is now `file`. - In `$sink_*` functions, the first argument is now `path`. + - In `$dt$convert_time_zone()` and `$dt$replace_time_zone()`, the `tz` argument + is renamed to `time_zone` (#944). + - In `$dt$replace_time_zone()`, all arguments except `time_zone` must be named + arguments (#944). - The argument `columns` in `$drop()` is removed. `$drop()` now accepts several character scalars, such as `$drop("a", "b", "c")` (#912). - In `pl$col()`, the `name` argument is removed, and the `...` argument no longer diff --git a/R/as_polars.R b/R/as_polars.R index 7c5b9ca1c..4ecacae14 100644 --- a/R/as_polars.R +++ b/R/as_polars.R @@ -498,16 +498,16 @@ as_polars_series.clock_sys_time = function(x, name = NULL, ...) { #' @rdname as_polars_series #' @export as_polars_series.clock_zoned_time = function(x, name = NULL, ...) { - tz = clock::zoned_time_zone(x) + time_zone = clock::zoned_time_zone(x) - if (isTRUE(tz == "")) { + if (isTRUE(time_zone == "")) { # https://github.com/r-lib/clock/issues/366 - tz = Sys.timezone() + time_zone = Sys.timezone() } - if (!isTRUE(tz %in% base::OlsonNames())) { + if (!isTRUE(time_zone %in% base::OlsonNames())) { sprintf( "The time zone '%s' is not supported in polars. See `base::OlsonNames()` for supported time zones.", - tz + time_zone ) |> Err_plain() |> unwrap("in as_polars_series():") @@ -517,5 +517,5 @@ as_polars_series.clock_zoned_time = function(x, name = NULL, ...) { clock::as_naive_time(x), name = name, ... - )$dt$replace_time_zone(tz) + )$dt$replace_time_zone(time_zone) } diff --git a/R/expr__datetime.R b/R/expr__datetime.R index e9695b03f..0ae3b6b87 100644 --- a/R/expr__datetime.R +++ b/R/expr__datetime.R @@ -641,68 +641,31 @@ ExprDT_cast_time_unit = function(tu = c("ns", "us", "ms")) { unwrap() } -#' With Time Zone -#' @description Set time zone for a Series of type Datetime. -#' Use to change time zone annotation, but keep the corresponding global timepoint. +#' Convert to given time zone for an expression of type Datetime. #' -#' @param tz String time zone from base::OlsonNames() +#' If converting from a time-zone-naive datetime, +#' then conversion will happen as if converting from UTC, +#' regardless of your system’s time zone. +#' @param time_zone String time zone from [base::OlsonNames()] #' @return Expr of i64 -#' @keywords ExprDT -#' @details corresponds to in R manually modifying the tzone attribute of POSIXt objects -#' @aliases (Expr)$dt$convert_time_zone #' @examples #' df = pl$DataFrame( -#' london_timezone = pl$date_range( +#' date = pl$date_range( #' as.POSIXct("2020-03-01", tz = "UTC"), -#' as.POSIXct("2020-07-01", tz = "UTC"), -#' "1mo", -#' time_zone = "UTC" -#' )$dt$convert_time_zone("Europe/London") +#' as.POSIXct("2020-05-01", tz = "UTC"), +#' "1mo" +#' ) #' ) #' #' df$select( -#' "london_timezone", -#' London_to_Amsterdam = pl$col( -#' "london_timezone" -#' )$dt$replace_time_zone("Europe/Amsterdam") +#' "date", +#' London = pl$col("date")$dt$convert_time_zone("Europe/London") #' ) -#' -#' # You can use `ambiguous` to deal with ambiguous datetimes: -#' dates = c( -#' "2018-10-28 01:30", -#' "2018-10-28 02:00", -#' "2018-10-28 02:30", -#' "2018-10-28 02:00" -#' ) -#' -#' df = pl$DataFrame( -#' ts = pl$Series(dates)$str$strptime(pl$Datetime("us"), "%F %H:%M"), -#' ambiguous = c("earliest", "earliest", "latest", "latest") -#' ) -#' -#' df$with_columns( -#' ts_localized = pl$col("ts")$dt$replace_time_zone( -#' "Europe/Brussels", -#' ambiguous = pl$col("ambiguous") -#' ) -#' ) -#' -#' # Polars Datetime type without a time zone will be converted to R -#' # with respect to the session time zone. If ambiguous times are present -#' # an error will be raised. It is recommended to add a time zone before -#' # converting to R. -#' s_without_tz = pl$Series(dates)$str$strptime(pl$Datetime("us"), "%F %H:%M") -#' s_without_tz -#' -#' s_with_tz = s_without_tz$dt$replace_time_zone("UTC") -#' s_with_tz -#' -#' as.vector(s_with_tz) -ExprDT_convert_time_zone = function(tz) { - check_tz_to_result(tz) |> - map(\(valid_tz) .pr$Expr$dt_convert_time_zone(self, valid_tz)) |> +ExprDT_convert_time_zone = function(time_zone) { + check_tz_to_result(time_zone) |> + and_then(\(valid_tz) .pr$Expr$dt_convert_time_zone(self, valid_tz)) |> map_err(\(err) paste("in dt$convert_time_zone:", err)) |> - unwrap() + unwrap("in $convert_time_zone():") } #' Replace time zone @@ -712,7 +675,8 @@ ExprDT_convert_time_zone = function(tz) { #' underlying timestamp. Use to correct a wrong time zone annotation. This will #' change the corresponding global timepoint. #' -#' @param tz NULL or string time zone from [base::OlsonNames()] +#' @param time_zone `NULL` or string time zone from [base::OlsonNames()] +#' @param ... Ignored. #' @param ambiguous Determine how to deal with ambiguous datetimes: #' * `"raise"` (default): throw an error #' * `"earliest"`: use the earliest datetime @@ -725,29 +689,43 @@ ExprDT_convert_time_zone = function(tz) { #' @keywords ExprDT #' @aliases (Expr)$dt$replace_time_zone #' @examples -#' df_1 = pl$DataFrame(x = as.POSIXct("2009-08-07 00:00:01", tz = "America/New_York")) +#' df1 = pl$DataFrame( +#' london_timezone = pl$date_range( +#' as.POSIXct("2020-03-01", tz = "UTC"), +#' as.POSIXct("2020-07-01", tz = "UTC"), +#' "1mo" +#' )$dt$convert_time_zone("Europe/London") +#' ) #' -#' df_1$with_columns( -#' pl$col("x")$dt$replace_time_zone("UTC")$alias("utc"), -#' pl$col("x")$dt$replace_time_zone("Europe/Amsterdam")$alias("cest") +#' df1$select( +#' "london_timezone", +#' London_to_Amsterdam = pl$col("london_timezone")$dt$replace_time_zone("Europe/Amsterdam") #' ) #' -#' # You can use ambiguous to deal with ambiguous datetimes -#' df_2 = pl$DataFrame( -#' x = seq( -#' as.POSIXct("2018-10-28 01:30", tz = "UTC"), -#' as.POSIXct("2018-10-28 02:30", tz = "UTC"), -#' by = "30 min" -#' ) +#' # You can use `ambiguous` to deal with ambiguous datetimes: +#' dates = c( +#' "2018-10-28 01:30", +#' "2018-10-28 02:00", +#' "2018-10-28 02:30", +#' "2018-10-28 02:00" +#' ) +#' df2 = pl$DataFrame( +#' ts = as_polars_series(dates)$str$strptime(pl$Datetime("us")), +#' ambiguous = c("earliest", "earliest", "latest", "latest") #' ) #' -#' df_2$with_columns( -#' pl$col("x")$dt$replace_time_zone("Europe/Brussels", "earliest")$alias("earliest"), -#' pl$col("x")$dt$replace_time_zone("Europe/Brussels", "latest")$alias("latest"), -#' pl$col("x")$dt$replace_time_zone("Europe/Brussels", "null")$alias("null") +#' df2$with_columns( +#' ts_localized = pl$col("ts")$dt$replace_time_zone( +#' "Europe/Brussels", +#' ambiguous = pl$col("ambiguous") +#' ) #' ) -ExprDT_replace_time_zone = function(tz, ambiguous = "raise", non_existent = "raise") { - check_tz_to_result(tz) |> +ExprDT_replace_time_zone = function( + time_zone, + ..., + ambiguous = "raise", + non_existent = "raise") { + check_tz_to_result(time_zone) |> and_then(\(valid_tz) { .pr$Expr$dt_replace_time_zone(self, valid_tz, ambiguous, non_existent) }) |> diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 4ca637b41..c4b666128 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -778,9 +778,9 @@ RPolarsExpr$dt_with_time_unit <- function(tu) .Call(wrap__RPolarsExpr__dt_with_t RPolarsExpr$dt_cast_time_unit <- function(tu) .Call(wrap__RPolarsExpr__dt_cast_time_unit, self, tu) -RPolarsExpr$dt_convert_time_zone <- function(tz) .Call(wrap__RPolarsExpr__dt_convert_time_zone, self, tz) +RPolarsExpr$dt_convert_time_zone <- function(time_zone) .Call(wrap__RPolarsExpr__dt_convert_time_zone, self, time_zone) -RPolarsExpr$dt_replace_time_zone <- function(tz, ambiguous, non_existent) .Call(wrap__RPolarsExpr__dt_replace_time_zone, self, tz, ambiguous, non_existent) +RPolarsExpr$dt_replace_time_zone <- function(time_zone, ambiguous, non_existent) .Call(wrap__RPolarsExpr__dt_replace_time_zone, self, time_zone, ambiguous, non_existent) RPolarsExpr$dt_total_days <- function() .Call(wrap__RPolarsExpr__dt_total_days, self) diff --git a/man/ExprDT_convert_time_zone.Rd b/man/ExprDT_convert_time_zone.Rd index 27fae6770..3914e8690 100644 --- a/man/ExprDT_convert_time_zone.Rd +++ b/man/ExprDT_convert_time_zone.Rd @@ -2,71 +2,32 @@ % Please edit documentation in R/expr__datetime.R \name{ExprDT_convert_time_zone} \alias{ExprDT_convert_time_zone} -\alias{(Expr)$dt$convert_time_zone} -\title{With Time Zone} +\title{Convert to given time zone for an expression of type Datetime.} \usage{ -ExprDT_convert_time_zone(tz) +ExprDT_convert_time_zone(time_zone) } \arguments{ -\item{tz}{String time zone from base::OlsonNames()} +\item{time_zone}{String time zone from \code{\link[base:timezones]{base::OlsonNames()}}} } \value{ Expr of i64 } \description{ -Set time zone for a Series of type Datetime. -Use to change time zone annotation, but keep the corresponding global timepoint. -} -\details{ -corresponds to in R manually modifying the tzone attribute of POSIXt objects +If converting from a time-zone-naive datetime, +then conversion will happen as if converting from UTC, +regardless of your system’s time zone. } \examples{ df = pl$DataFrame( - london_timezone = pl$date_range( + date = pl$date_range( as.POSIXct("2020-03-01", tz = "UTC"), - as.POSIXct("2020-07-01", tz = "UTC"), - "1mo", - time_zone = "UTC" - )$dt$convert_time_zone("Europe/London") + as.POSIXct("2020-05-01", tz = "UTC"), + "1mo" + ) ) df$select( - "london_timezone", - London_to_Amsterdam = pl$col( - "london_timezone" - )$dt$replace_time_zone("Europe/Amsterdam") -) - -# You can use `ambiguous` to deal with ambiguous datetimes: -dates = c( - "2018-10-28 01:30", - "2018-10-28 02:00", - "2018-10-28 02:30", - "2018-10-28 02:00" -) - -df = pl$DataFrame( - ts = pl$Series(dates)$str$strptime(pl$Datetime("us"), "\%F \%H:\%M"), - ambiguous = c("earliest", "earliest", "latest", "latest") + "date", + London = pl$col("date")$dt$convert_time_zone("Europe/London") ) - -df$with_columns( - ts_localized = pl$col("ts")$dt$replace_time_zone( - "Europe/Brussels", - ambiguous = pl$col("ambiguous") - ) -) - -# Polars Datetime type without a time zone will be converted to R -# with respect to the session time zone. If ambiguous times are present -# an error will be raised. It is recommended to add a time zone before -# converting to R. -s_without_tz = pl$Series(dates)$str$strptime(pl$Datetime("us"), "\%F \%H:\%M") -s_without_tz - -s_with_tz = s_without_tz$dt$replace_time_zone("UTC") -s_with_tz - -as.vector(s_with_tz) } -\keyword{ExprDT} diff --git a/man/ExprDT_replace_time_zone.Rd b/man/ExprDT_replace_time_zone.Rd index 489749c7a..449ac3fd4 100644 --- a/man/ExprDT_replace_time_zone.Rd +++ b/man/ExprDT_replace_time_zone.Rd @@ -5,10 +5,17 @@ \alias{(Expr)$dt$replace_time_zone} \title{Replace time zone} \usage{ -ExprDT_replace_time_zone(tz, ambiguous = "raise", non_existent = "raise") +ExprDT_replace_time_zone( + time_zone, + ..., + ambiguous = "raise", + non_existent = "raise" +) } \arguments{ -\item{tz}{NULL or string time zone from \code{\link[base:timezones]{base::OlsonNames()}}} +\item{time_zone}{\code{NULL} or string time zone from \code{\link[base:timezones]{base::OlsonNames()}}} + +\item{...}{Ignored.} \item{ambiguous}{Determine how to deal with ambiguous datetimes: \itemize{ @@ -34,26 +41,36 @@ underlying timestamp. Use to correct a wrong time zone annotation. This will change the corresponding global timepoint. } \examples{ -df_1 = pl$DataFrame(x = as.POSIXct("2009-08-07 00:00:01", tz = "America/New_York")) +df1 = pl$DataFrame( + london_timezone = pl$date_range( + as.POSIXct("2020-03-01", tz = "UTC"), + as.POSIXct("2020-07-01", tz = "UTC"), + "1mo" + )$dt$convert_time_zone("Europe/London") +) -df_1$with_columns( - pl$col("x")$dt$replace_time_zone("UTC")$alias("utc"), - pl$col("x")$dt$replace_time_zone("Europe/Amsterdam")$alias("cest") +df1$select( + "london_timezone", + London_to_Amsterdam = pl$col("london_timezone")$dt$replace_time_zone("Europe/Amsterdam") ) -# You can use ambiguous to deal with ambiguous datetimes -df_2 = pl$DataFrame( - x = seq( - as.POSIXct("2018-10-28 01:30", tz = "UTC"), - as.POSIXct("2018-10-28 02:30", tz = "UTC"), - by = "30 min" - ) +# You can use `ambiguous` to deal with ambiguous datetimes: +dates = c( + "2018-10-28 01:30", + "2018-10-28 02:00", + "2018-10-28 02:30", + "2018-10-28 02:00" +) +df2 = pl$DataFrame( + ts = as_polars_series(dates)$str$strptime(pl$Datetime("us")), + ambiguous = c("earliest", "earliest", "latest", "latest") ) -df_2$with_columns( - pl$col("x")$dt$replace_time_zone("Europe/Brussels", "earliest")$alias("earliest"), - pl$col("x")$dt$replace_time_zone("Europe/Brussels", "latest")$alias("latest"), - pl$col("x")$dt$replace_time_zone("Europe/Brussels", "null")$alias("null") +df2$with_columns( + ts_localized = pl$col("ts")$dt$replace_time_zone( + "Europe/Brussels", + ambiguous = pl$col("ambiguous") + ) ) } \keyword{ExprDT} diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 95a51ccee..573e6e0f1 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -1440,18 +1440,24 @@ impl RPolarsExpr { )) } - pub fn dt_convert_time_zone(&self, tz: String) -> Self { - self.0.clone().dt().convert_time_zone(tz).into() + pub fn dt_convert_time_zone(&self, time_zone: Robj) -> RResult { + Ok(RPolarsExpr( + self.0 + .clone() + .dt() + .convert_time_zone(robj_to!(String, time_zone)?) + .into(), + )) } pub fn dt_replace_time_zone( &self, - tz: Nullable, + time_zone: Nullable, ambiguous: Robj, non_existent: Robj, ) -> RResult { Ok(RPolarsExpr(self.0.clone().dt().replace_time_zone( - tz.into_option(), + time_zone.into_option(), robj_to!(PLExpr, ambiguous)?, robj_to!(NonExistent, non_existent)?, ))) diff --git a/tests/testthat/test-expr_datetime.R b/tests/testthat/test-expr_datetime.R index a9dc52cc8..75f26b668 100644 --- a/tests/testthat/test-expr_datetime.R +++ b/tests/testthat/test-expr_datetime.R @@ -697,9 +697,9 @@ test_that("replace_time_zone for ambiguous time", { x = seq(as.POSIXct("2018-10-28 01:30", tz = "UTC"), as.POSIXct("2018-10-28 02:30", tz = "UTC"), by = "30 min") pl_out = pl$DataFrame(x = x)$with_columns( - pl$col("x")$dt$replace_time_zone("Europe/Brussels", "earliest")$alias("earliest"), - pl$col("x")$dt$replace_time_zone("Europe/Brussels", "latest")$alias("latest"), - pl$col("x")$dt$replace_time_zone("Europe/Brussels", "null")$alias("null") + pl$col("x")$dt$replace_time_zone("Europe/Brussels", ambiguous = "earliest")$alias("earliest"), + pl$col("x")$dt$replace_time_zone("Europe/Brussels", ambiguous = "latest")$alias("latest"), + pl$col("x")$dt$replace_time_zone("Europe/Brussels", ambiguous = "null")$alias("null") )$to_data_frame() lubridate_out = data.frame(