From 7c42cc12e653573680d889c5ed5459e8fe80d44c Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 8 Apr 2021 10:17:08 -0400 Subject: [PATCH 1/2] Add parsing tests related to parsing into less precise containers --- tests/testthat/test-date.R | 4 +-- .../testthat/test-gregorian-year-month-day.R | 18 +++++++++++ tests/testthat/test-naive-time.R | 31 +++++++++++++++++++ tests/testthat/test-zoned-time.R | 28 +++++++++++++++++ 4 files changed, 78 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-date.R b/tests/testthat/test-date.R index c58d62a7..083b2fd8 100644 --- a/tests/testthat/test-date.R +++ b/tests/testthat/test-date.R @@ -255,9 +255,7 @@ test_that("`%z` and `%Z` commands are ignored", { ) }) -# TODO: We probably don't want this: -# https://github.com/HowardHinnant/date/issues/657 -test_that("parsing into a less precise time point rounds rather than floors", { +test_that("parsing into a date if you requested to parse time components rounds the time (#207)", { expect_identical( date_parse("2019-12-31 11:59:59", format = "%Y-%m-%d %H:%M:%S"), as.Date("2019-12-31") diff --git a/tests/testthat/test-gregorian-year-month-day.R b/tests/testthat/test-gregorian-year-month-day.R index 3aff3943..c3a52334 100644 --- a/tests/testthat/test-gregorian-year-month-day.R +++ b/tests/testthat/test-gregorian-year-month-day.R @@ -312,6 +312,24 @@ test_that("parsing NA returns NA", { ) }) +test_that("parsing doesn't round parsed components more precise than the resulting container (#207)", { + # With year-month-day, only the year/month/day components are extracted at the end, + # the hour component isn't touched + expect_identical( + year_month_day_parse("2019-12-31 12", format = "%Y-%m-%d %H", precision = "day"), + year_month_day(2019, 12, 31) + ) +}) + +test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", { + # Requesting `%7S` parses the full `01.1238`, and the `1238` portion is rounded up immediately + # after parsing the `%S` command, not at the very end + expect_identical( + year_month_day_parse("2019-01-01 01:01:01.1238", format = "%Y-%m-%d %H:%M:%7S", precision = "millisecond"), + year_month_day(2019, 1, 1, 1, 1, 1, 124, subsecond_precision = "millisecond") + ) +}) + # ------------------------------------------------------------------------------ # calendar_group() diff --git a/tests/testthat/test-naive-time.R b/tests/testthat/test-naive-time.R index f55ad1f6..8275cbfb 100644 --- a/tests/testthat/test-naive-time.R +++ b/tests/testthat/test-naive-time.R @@ -272,6 +272,37 @@ test_that("%Z is completely ignored", { ) }) +test_that("parsing rounds parsed components more precise than the resulting container (#207)", { + expect_identical( + naive_time_parse("2019-12-31 11", format = "%Y-%m-%d %H", precision = "day"), + as_naive_time(year_month_day(2019, 12, 31)) + ) + expect_identical( + naive_time_parse("2019-12-31 12", format = "%Y-%m-%d %H", precision = "day"), + as_naive_time(year_month_day(2020, 1, 1)) + ) + + # If you don't try and parse them, it won't round + expect_identical( + naive_time_parse("2019-12-31 12", format = "%Y-%m-%d", precision = "day"), + as_naive_time(year_month_day(2019, 12, 31)) + ) +}) + +test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", { + # Default N for milliseconds is 6, so `%6S` (2 hour seconds, 1 for decimal, 3 for subseconds) + expect_identical( + naive_time_parse("2019-01-01 01:01:01.1238", format = "%Y-%m-%d %H:%M:%S", precision = "millisecond"), + as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 123, subsecond_precision = "millisecond")) + ) + + # Requesting `%7S` parses the full `01.1238`, and the `1238` portion is rounded up + expect_identical( + naive_time_parse("2019-01-01 01:01:01.1238", format = "%Y-%m-%d %H:%M:%7S", precision = "millisecond"), + as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 124, subsecond_precision = "millisecond")) + ) +}) + # ------------------------------------------------------------------------------ # format() diff --git a/tests/testthat/test-zoned-time.R b/tests/testthat/test-zoned-time.R index 0af313f1..cc0a035c 100644 --- a/tests/testthat/test-zoned-time.R +++ b/tests/testthat/test-zoned-time.R @@ -230,6 +230,34 @@ test_that("`x` is translated to UTF-8", { ) }) +test_that("leftover subseconds result in a parse failure", { + x <- "2019-01-01 01:01:01.1238-05:00[America/New_York]" + + # This is fine + expect_identical( + zoned_time_parse_complete(x, precision = "microsecond"), + as_zoned_time(as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 123800, subsecond_precision = "microsecond")), "America/New_York") + ) + + # This defaults to `%6S`, which parses `01.123` then stops, + # leaving a `8` for `%z` to parse, resulting in a failure. Because everything + # fails, we get a UTC time zone. + expect_identical( + expect_warning(zoned_time_parse_complete(x, precision = "millisecond"), class = "clock_warning_parse_failures"), + as_zoned_time(naive_seconds(NA) + duration_milliseconds(NA), zone = "UTC") + ) +}) + +test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", { + x <- "2019-01-01 01:01:01.1238-05:00[America/New_York]" + + # Requesting `%7S` parses the full `01.1238`, and the `1238` portion is rounded up + expect_identical( + zoned_time_parse_complete(x, precision = "millisecond", format = "%Y-%m-%d %H:%M:%7S%Ez[%Z]"), + as_zoned_time(as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 124, subsecond_precision = "millisecond")), "America/New_York") + ) +}) + # ------------------------------------------------------------------------------ # zoned_time_parse_abbrev() From 2986f8a36eb4734bade899be75b55a70e060c91f Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 8 Apr 2021 10:41:59 -0400 Subject: [PATCH 2/2] Document rounding process when parsing --- R/date.R | 27 +++++++++++++++++++++++++++ R/posixt.R | 28 ++++++++++++++++++++++++++++ man/date-time-parse.Rd | 28 ++++++++++++++++++++++++++++ man/date_parse.Rd | 27 +++++++++++++++++++++++++++ 4 files changed, 110 insertions(+) diff --git a/R/date.R b/R/date.R index d68892be..c067c91d 100644 --- a/R/date.R +++ b/R/date.R @@ -908,6 +908,10 @@ date_set_zone.Date <- function(x, zone) { #' _`date_parse()` ignores both the `%z` and `%Z` commands,_ as clock treats #' Date as a _naive_ type, with a yet-to-be-specified time zone. #' +#' If parsing a string with sub-daily components, such as hours, minutes or +#' seconds, note that the conversion to Date will round those components to +#' the nearest day. See the examples for a way to control this. +#' #' @inheritParams zoned-parsing #' #' @return A Date. @@ -931,6 +935,29 @@ date_set_zone.Date <- function(x, zone) { #' # A neat feature of `date_parse()` is the ability to parse #' # the ISO year-week-day format #' date_parse("2020-W01-2", format = "%G-W%V-%u") +#' +#' # --------------------------------------------------------------------------- +#' # Rounding of sub-daily components +#' +#' # Note that rounding a string with time components will round them to the +#' # nearest day if you try and parse them +#' x <- c("2019-01-01 11", "2019-01-01 12") +#' +#' # Hour 12 rounds up to the next day +#' date_parse(x, format = "%Y-%m-%d %H") +#' +#' # If you don't like this, one option is to just not parse the time component +#' date_parse(x, format = "%Y-%m-%d") +#' +#' # A more general option is to parse the full string as a naive-time, +#' # then round manually +#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H", precision = "hour") +#' nt +#' +#' nt <- time_point_floor(nt, "day") +#' nt +#' +#' as.Date(nt) date_parse <- function(x, ..., format = NULL, locale = clock_locale()) { x <- naive_time_parse(x, ..., format = format, precision = "day", locale = locale) as.Date(x) diff --git a/R/posixt.R b/R/posixt.R index ab2167a5..5fc73246 100644 --- a/R/posixt.R +++ b/R/posixt.R @@ -1052,6 +1052,10 @@ date_set_zone.POSIXt <- function(x, zone) { #' `NA`s, or completely fails to parse, then no time zone will be able to be #' determined. In that case, the result will use `"UTC"`. #' +#' If manually parsing sub-second components, be aware that they will be +#' automatically rounded to the nearest second when converting them to POSIXct. +#' See the examples for a way to control this. +#' #' @inheritParams zoned-parsing #' @inheritParams as-zoned-time-naive-time #' @@ -1105,6 +1109,30 @@ date_set_zone.POSIXt <- function(x, zone) { #' "1970-10-25 01:00:00 EST" #' ) #' date_time_parse_abbrev(abbrev_times, "America/New_York") +#' +#' # --------------------------------------------------------------------------- +#' # Rounding of sub-second components +#' +#' # Generally, if you have a string with sub-second components, they will +#' # be ignored when parsing into a date-time +#' x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.7") +#' +#' date_time_parse(x, "America/New_York") +#' +#' # If you manually try and parse those sub-second components with `%4S` to +#' # read the 2 seconds, 1 decimal point, and 1 fractional component, the +#' # fractional component will be rounded to the nearest second +#' date_time_parse(x, "America/New_York", format = "%Y-%m-%d %H:%M:%4S") +#' +#' # If you don't like this, parse the full string as a naive-time, +#' # then round manually and convert to a POSIXct +#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H:%M:%S", precision = "millisecond") +#' nt +#' +#' nt <- time_point_floor(nt, "second") +#' nt +#' +#' as.POSIXct(nt, "America/New_York") NULL #' @rdname date-time-parse diff --git a/man/date-time-parse.Rd b/man/date-time-parse.Rd index 99198371..94b84559 100644 --- a/man/date-time-parse.Rd +++ b/man/date-time-parse.Rd @@ -314,6 +314,10 @@ The default \code{format} used is \code{"\%Y-\%m-\%d \%H:\%M:\%S \%Z"}. If \code{date_time_parse_complete()} is given input that is length zero, all \code{NA}s, or completely fails to parse, then no time zone will be able to be determined. In that case, the result will use \code{"UTC"}. + +If manually parsing sub-second components, be aware that they will be +automatically rounded to the nearest second when converting them to POSIXct. +See the examples for a way to control this. } \examples{ # Parse with a known `zone`, even though that information isn't in the string @@ -361,4 +365,28 @@ abbrev_times <- c( "1970-10-25 01:00:00 EST" ) date_time_parse_abbrev(abbrev_times, "America/New_York") + +# --------------------------------------------------------------------------- +# Rounding of sub-second components + +# Generally, if you have a string with sub-second components, they will +# be ignored when parsing into a date-time +x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.7") + +date_time_parse(x, "America/New_York") + +# If you manually try and parse those sub-second components with `\%4S` to +# read the 2 seconds, 1 decimal point, and 1 fractional component, the +# fractional component will be rounded to the nearest second +date_time_parse(x, "America/New_York", format = "\%Y-\%m-\%d \%H:\%M:\%4S") + +# If you don't like this, parse the full string as a naive-time, +# then round manually and convert to a POSIXct +nt <- naive_time_parse(x, format = "\%Y-\%m-\%d \%H:\%M:\%S", precision = "millisecond") +nt + +nt <- time_point_floor(nt, "second") +nt + +as.POSIXct(nt, "America/New_York") } diff --git a/man/date_parse.Rd b/man/date_parse.Rd index 0007351b..bc0069ab 100644 --- a/man/date_parse.Rd +++ b/man/date_parse.Rd @@ -187,6 +187,10 @@ The default \code{format} used is \code{"\%Y-\%m-\%d"}. \details{ \emph{\code{date_parse()} ignores both the \verb{\%z} and \verb{\%Z} commands,} as clock treats Date as a \emph{naive} type, with a yet-to-be-specified time zone. + +If parsing a string with sub-daily components, such as hours, minutes or +seconds, note that the conversion to Date will round those components to +the nearest day. See the examples for a way to control this. } \examples{ date_parse("2020-01-01") @@ -206,4 +210,27 @@ date_parse( # A neat feature of `date_parse()` is the ability to parse # the ISO year-week-day format date_parse("2020-W01-2", format = "\%G-W\%V-\%u") + +# --------------------------------------------------------------------------- +# Rounding of sub-daily components + +# Note that rounding a string with time components will round them to the +# nearest day if you try and parse them +x <- c("2019-01-01 11", "2019-01-01 12") + +# Hour 12 rounds up to the next day +date_parse(x, format = "\%Y-\%m-\%d \%H") + +# If you don't like this, one option is to just not parse the time component +date_parse(x, format = "\%Y-\%m-\%d") + +# A more general option is to parse the full string as a naive-time, +# then round manually +nt <- naive_time_parse(x, format = "\%Y-\%m-\%d \%H", precision = "hour") +nt + +nt <- time_point_floor(nt, "day") +nt + +as.Date(nt) }