r-lib · DavisVaughan · May 10, 2021 · May 10, 2021 · May 10, 2021 · May 10, 2021
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,33 @@
 # clock (development version)
 
+* Parsing into a date-time type that is coarser than the original string is now
+  considered ambiguous and undefined behavior. For example, parsing a string
+  with fractional seconds using `date_time_parse(x)` or
+  `naive_time_parse(x, precision = "second")` is no longer considered correct.
+  Instead, if you only require second precision from such a string, parse the
+  full string, with fractional seconds, into a clock type that can handle them,
+  then round to seconds using whatever rounding convention is required for your
+  use case, such as `time_point_floor()` (#230).
+
+  For example:
+
+  ```
+  x <- c("2019-01-01 00:00:59.123", "2019-01-01 00:00:59.556")
+
+  x <- naive_time_parse(x, precision = "millisecond")
+  x
+  #> <time_point<naive><millisecond>[2]>
+  #> [1] "2019-01-01 00:00:59.123" "2019-01-01 00:00:59.556"
+
+  x <- time_point_round(x, "second")
+  x
+  #> <time_point<naive><second>[2]>
+  #> [1] "2019-01-01 00:00:59" "2019-01-01 00:01:00"
+
+  as_date_time(x, "America/New_York")
+  #> [1] "2019-01-01 00:00:59 EST" "2019-01-01 00:01:00 EST"
+  ```
+
 # clock 0.3.0
 
 * New `date_seq()` for generating date and date-time sequences (#218).

diff --git a/R/date.R b/R/date.R
@@ -983,9 +983,11 @@ date_set_zone.Date <- function(x, zone) {
 #' _`date_parse()` ignores both the `%z` and `%Z` commands,_ as clock treats
 #' Date as a _naive_ type, with a yet-to-be-specified time zone.
 #'
-#' If parsing a string with sub-daily components, such as hours, minutes or
-#' seconds, note that the conversion to Date will round those components to
-#' the nearest day. See the examples for a way to control this.
+#' Parsing strings with sub-daily components, such as hours, minutes, or
+#' seconds, should be done with [date_time_parse()]. If you only need the date
+#' components, round the result to day precision, and then use [as_date()].
+#' Attempting to directly parse a sub-daily string into a Date is ambiguous and
+#' undefined, and is unlikely to work as you might expect.
 #'
 #' @inheritParams zoned-parsing
 #'
@@ -1012,27 +1014,21 @@ date_set_zone.Date <- function(x, zone) {
 #' date_parse("2020-W01-2", format = "%G-W%V-%u")
 #'
 #' # ---------------------------------------------------------------------------
-#' # Rounding of sub-daily components
+#' # Sub-daily components
 #'
-#' # Note that rounding a string with time components will round them to the
-#' # nearest day if you try and parse them
+#' # If you have a string with sub-daily components, but only require the date,
+#' # first parse them as date-times to fully parse the sub-daily components,
+#' # then round using whatever convention is required for your use case before
+#' # converting to date.
 #' x <- c("2019-01-01 11", "2019-01-01 12")
 #'
-#' # Hour 12 rounds up to the next day
-#' date_parse(x, format = "%Y-%m-%d %H")
-#'
-#' # If you don't like this, one option is to just not parse the time component
-#' date_parse(x, format = "%Y-%m-%d")
-#'
-#' # A more general option is to parse the full string as a naive-time,
-#' # then round manually
-#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H", precision = "hour")
-#' nt
+#' x <- date_time_parse(x, zone = "UTC", format = "%Y-%m-%d %H")
+#' x
 #'
-#' nt <- time_point_floor(nt, "day")
-#' nt
+#' date_floor(x, "day")
+#' date_round(x, "day")
 #'
-#' as.Date(nt)
+#' as_date(date_round(x, "day"))
 date_parse <- function(x, ..., format = NULL, locale = clock_locale()) {
   x <- naive_time_parse(x, ..., format = format, precision = "day", locale = locale)
   as.Date(x)

diff --git a/R/gregorian-year-month-day.R b/R/gregorian-year-month-day.R
@@ -171,6 +171,8 @@ vec_ptype_abbr.clock_year_month_day <- function(x, ...) {
 #' @details
 #' `year_month_day_parse()` completely ignores the `%z` and `%Z` commands.
 #'
+#' @inheritSection zoned-parsing Full Precision Parsing
+#'
 #' @inheritParams zoned-parsing
 #'
 #' @param x `[character]`

diff --git a/R/naive-time.R b/R/naive-time.R
@@ -61,6 +61,8 @@ is_naive_time <- function(x) {
 #' If your date-time strings contain a UTC offset, but not a full time zone
 #' name, use [sys_time_parse()].
 #'
+#' @inheritSection zoned-parsing Full Precision Parsing
+#'
 #' @inheritParams sys_time_parse
 #'
 #' @return A naive-time.
@@ -80,6 +82,29 @@ is_naive_time <- function(x) {
 #'   "2020-01-01 -4000 America/New_York",
 #'   format = "%Y-%m-%d %z %Z"
 #' )
+#'
+#' # ---------------------------------------------------------------------------
+#' # Fractional seconds and POSIXct
+#'
+#' # If you have a string with fractional seconds and want to convert it to
+#' # a POSIXct, remember that clock treats POSIXct as a second precision type.
+#' # Ideally, you'd use a clock type that can support fractional seconds, but
+#' # if you really want to parse it into a POSIXct, the correct way to do so
+#' # is to parse the full fractional time point with the correct `precision`,
+#' # then round to seconds using whatever convention you require, and finally
+#' # convert that to POSIXct.
+#' x <- c("2020-01-01 00:00:00.123", "2020-01-01 00:00:00.555")
+#'
+#' # First, parse string with full precision
+#' x <- naive_time_parse(x, precision = "millisecond")
+#' x
+#'
+#' # Then round to second with a floor, ceiling, or round to nearest
+#' time_point_floor(x, "second")
+#' time_point_round(x, "second")
+#'
+#' # Finally, convert to POSIXct
+#' as_date_time(time_point_round(x, "second"), zone = "UTC")
 naive_time_parse <- function(x,
                              ...,
                              format = NULL,

diff --git a/R/posixt.R b/R/posixt.R
@@ -1164,9 +1164,16 @@ date_set_zone.POSIXt <- function(x, zone) {
 #' `NA`s, or completely fails to parse, then no time zone will be able to be
 #' determined. In that case, the result will use `"UTC"`.
 #'
-#' If manually parsing sub-second components, be aware that they will be
-#' automatically rounded to the nearest second when converting them to POSIXct.
-#' See the examples for a way to control this.
+#' If you have strings with sub-second components, then these date-time parsers
+#' are not appropriate for you. Remember that clock treats POSIXct as a second
+#' precision type, so parsing a string with fractional seconds directly into a
+#' POSIXct is ambiguous and undefined. Instead, fully parse the string,
+#' including its fractional seconds, into a clock type that can handle it, such
+#' as a naive-time with [naive_time_parse()], then round to seconds with
+#' whatever rounding convention is appropriate for your use case, such as
+#' [time_point_floor()], and finally convert that to POSIXct with
+#' [as_date_time()]. This gives you complete control over how the fractional
+#' seconds are handled when converting to POSIXct.
 #'
 #' @inheritParams zoned-parsing
 #' @inheritParams as-zoned-time-naive-time
@@ -1223,28 +1230,21 @@ date_set_zone.POSIXt <- function(x, zone) {
 #' date_time_parse_abbrev(abbrev_times, "America/New_York")
 #'
 #' # ---------------------------------------------------------------------------
-#' # Rounding of sub-second components
+#' # Sub-second components
 #'
-#' # Generally, if you have a string with sub-second components, they will
-#' # be ignored when parsing into a date-time
-#' x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.7")
+#' # If you have a string with sub-second components, but only require up to
+#' # seconds, first parse them into a clock type that can handle sub-seconds to
+#' # fully capture that information, then round using whatever convention is
+#' # required for your use case before converting to a date-time.
+#' x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.78")
 #'
-#' date_time_parse(x, "America/New_York")
-#'
-#' # If you manually try and parse those sub-second components with `%4S` to
-#' # read the 2 seconds, 1 decimal point, and 1 fractional component, the
-#' # fractional component will be rounded to the nearest second
-#' date_time_parse(x, "America/New_York", format = "%Y-%m-%d %H:%M:%4S")
-#'
-#' # If you don't like this, parse the full string as a naive-time,
-#' # then round manually and convert to a POSIXct
-#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H:%M:%S", precision = "millisecond")
-#' nt
+#' x <- naive_time_parse(x, precision = "millisecond")
+#' x
 #'
-#' nt <- time_point_floor(nt, "second")
-#' nt
+#' time_point_floor(x, "second")
+#' time_point_round(x, "second")
 #'
-#' as.POSIXct(nt, "America/New_York")
+#' as_date_time(time_point_round(x, "second"), "America/New_York")
 NULL
 
 #' @rdname date-time-parse

diff --git a/R/sys-time.R b/R/sys-time.R
@@ -69,6 +69,8 @@ is_sys_time <- function(x) {
 #' consider using [naive_time_parse()], since the resulting naive-time doesn't
 #' come with an assumption of a UTC time zone.
 #'
+#' @inheritSection zoned-parsing Full Precision Parsing
+#'
 #' @inheritParams zoned-parsing
 #'
 #' @param precision `[character(1)]`

diff --git a/R/zoned-time.R b/R/zoned-time.R
@@ -300,6 +300,16 @@ zoned_time_format <- function(print_zone_name) {
 #' know the time zone that the date-times are supposed to be in, you can convert
 #' to a zoned-time with [as_zoned_time()].
 #'
+#' @section Full Precision Parsing:
+#'
+#' It is highly recommended to parse all of the information in the date-time
+#' string into a type at least as precise as the string. For example, if your
+#' string has fractional seconds, but you only require seconds, specify a
+#' sub-second `precision`, then round to seconds manually using whatever
+#' convention is appropriate for your use case. Parsing such a string directly
+#' into a second precision result is ambiguous and undefined, and is unlikely to
+#' work as you might expect.
+#'
 #' @inheritParams ellipsis::dots_empty
 #'
 #' @param x `[character]`
@@ -434,12 +444,17 @@ zoned_time_format <- function(print_zone_name) {
 #'   characters to read. If not specified, the default is `2`. Leading zeroes
 #'   are permitted but not required.
 #'
-#'   - `%S`: The seconds as a decimal number. The modified command `%NS` where
-#'   `N` is a positive decimal integer specifies the maximum number of
-#'   characters to read. If not specified, the default is determined by the
-#'   precision that you are parsing at. If encountered, the `locale`
-#'   determines the decimal point character. Leading zeroes are permitted but
-#'   not required.
+#'   - `%S`: The seconds as a decimal number. Leading zeroes are permitted but
+#'   not required. If encountered, the `locale` determines the decimal point
+#'   character. Generally, the maximum number of characters to read is
+#'   determined by the precision that you are parsing at. For example, a
+#'   precision of `"second"` would read a maximum of 2 characters, while a
+#'   precision of `"millisecond"` would read a maximum of 6 (2 for the values
+#'   before the decimal point, 1 for the decimal point, and 3 for the values
+#'   after it). The modified command `%NS`, where `N` is a positive decimal
+#'   integer, can be used to exactly specify the maximum number of characters to
+#'   read. This is only useful if you happen to have seconds with more than 1
+#'   leading zero.
 #'
 #'   - `%p`: The `locale`'s equivalent of the AM/PM designations associated with
 #'   a 12-hour clock. The command `%I` must precede `%p` in the format string.

diff --git a/man/date-time-parse.Rd b/man/date-time-parse.Rd