diff --git a/NEWS.md b/NEWS.md index ea29d8d1..9856a92f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,33 @@ # clock (development version) +* Parsing into a date-time type that is coarser than the original string is now + considered ambiguous and undefined behavior. For example, parsing a string + with fractional seconds using `date_time_parse(x)` or + `naive_time_parse(x, precision = "second")` is no longer considered correct. + Instead, if you only require second precision from such a string, parse the + full string, with fractional seconds, into a clock type that can handle them, + then round to seconds using whatever rounding convention is required for your + use case, such as `time_point_floor()` (#230). + + For example: + + ``` + x <- c("2019-01-01 00:00:59.123", "2019-01-01 00:00:59.556") + + x <- naive_time_parse(x, precision = "millisecond") + x + #> [2]> + #> [1] "2019-01-01 00:00:59.123" "2019-01-01 00:00:59.556" + + x <- time_point_round(x, "second") + x + #> [2]> + #> [1] "2019-01-01 00:00:59" "2019-01-01 00:01:00" + + as_date_time(x, "America/New_York") + #> [1] "2019-01-01 00:00:59 EST" "2019-01-01 00:01:00 EST" + ``` + # clock 0.3.0 * New `date_seq()` for generating date and date-time sequences (#218). diff --git a/R/date.R b/R/date.R index 7c017e25..4529382d 100644 --- a/R/date.R +++ b/R/date.R @@ -983,9 +983,11 @@ date_set_zone.Date <- function(x, zone) { #' _`date_parse()` ignores both the `%z` and `%Z` commands,_ as clock treats #' Date as a _naive_ type, with a yet-to-be-specified time zone. #' -#' If parsing a string with sub-daily components, such as hours, minutes or -#' seconds, note that the conversion to Date will round those components to -#' the nearest day. See the examples for a way to control this. +#' Parsing strings with sub-daily components, such as hours, minutes, or +#' seconds, should be done with [date_time_parse()]. If you only need the date +#' components, round the result to day precision, and then use [as_date()]. +#' Attempting to directly parse a sub-daily string into a Date is ambiguous and +#' undefined, and is unlikely to work as you might expect. #' #' @inheritParams zoned-parsing #' @@ -1012,27 +1014,21 @@ date_set_zone.Date <- function(x, zone) { #' date_parse("2020-W01-2", format = "%G-W%V-%u") #' #' # --------------------------------------------------------------------------- -#' # Rounding of sub-daily components +#' # Sub-daily components #' -#' # Note that rounding a string with time components will round them to the -#' # nearest day if you try and parse them +#' # If you have a string with sub-daily components, but only require the date, +#' # first parse them as date-times to fully parse the sub-daily components, +#' # then round using whatever convention is required for your use case before +#' # converting to date. #' x <- c("2019-01-01 11", "2019-01-01 12") #' -#' # Hour 12 rounds up to the next day -#' date_parse(x, format = "%Y-%m-%d %H") -#' -#' # If you don't like this, one option is to just not parse the time component -#' date_parse(x, format = "%Y-%m-%d") -#' -#' # A more general option is to parse the full string as a naive-time, -#' # then round manually -#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H", precision = "hour") -#' nt +#' x <- date_time_parse(x, zone = "UTC", format = "%Y-%m-%d %H") +#' x #' -#' nt <- time_point_floor(nt, "day") -#' nt +#' date_floor(x, "day") +#' date_round(x, "day") #' -#' as.Date(nt) +#' as_date(date_round(x, "day")) date_parse <- function(x, ..., format = NULL, locale = clock_locale()) { x <- naive_time_parse(x, ..., format = format, precision = "day", locale = locale) as.Date(x) diff --git a/R/gregorian-year-month-day.R b/R/gregorian-year-month-day.R index feb5fc93..2d84f496 100644 --- a/R/gregorian-year-month-day.R +++ b/R/gregorian-year-month-day.R @@ -171,6 +171,8 @@ vec_ptype_abbr.clock_year_month_day <- function(x, ...) { #' @details #' `year_month_day_parse()` completely ignores the `%z` and `%Z` commands. #' +#' @inheritSection zoned-parsing Full Precision Parsing +#' #' @inheritParams zoned-parsing #' #' @param x `[character]` diff --git a/R/naive-time.R b/R/naive-time.R index cc291cbf..4f9b7778 100644 --- a/R/naive-time.R +++ b/R/naive-time.R @@ -61,6 +61,8 @@ is_naive_time <- function(x) { #' If your date-time strings contain a UTC offset, but not a full time zone #' name, use [sys_time_parse()]. #' +#' @inheritSection zoned-parsing Full Precision Parsing +#' #' @inheritParams sys_time_parse #' #' @return A naive-time. @@ -80,6 +82,29 @@ is_naive_time <- function(x) { #' "2020-01-01 -4000 America/New_York", #' format = "%Y-%m-%d %z %Z" #' ) +#' +#' # --------------------------------------------------------------------------- +#' # Fractional seconds and POSIXct +#' +#' # If you have a string with fractional seconds and want to convert it to +#' # a POSIXct, remember that clock treats POSIXct as a second precision type. +#' # Ideally, you'd use a clock type that can support fractional seconds, but +#' # if you really want to parse it into a POSIXct, the correct way to do so +#' # is to parse the full fractional time point with the correct `precision`, +#' # then round to seconds using whatever convention you require, and finally +#' # convert that to POSIXct. +#' x <- c("2020-01-01 00:00:00.123", "2020-01-01 00:00:00.555") +#' +#' # First, parse string with full precision +#' x <- naive_time_parse(x, precision = "millisecond") +#' x +#' +#' # Then round to second with a floor, ceiling, or round to nearest +#' time_point_floor(x, "second") +#' time_point_round(x, "second") +#' +#' # Finally, convert to POSIXct +#' as_date_time(time_point_round(x, "second"), zone = "UTC") naive_time_parse <- function(x, ..., format = NULL, diff --git a/R/posixt.R b/R/posixt.R index a972d358..5aab120a 100644 --- a/R/posixt.R +++ b/R/posixt.R @@ -1164,9 +1164,16 @@ date_set_zone.POSIXt <- function(x, zone) { #' `NA`s, or completely fails to parse, then no time zone will be able to be #' determined. In that case, the result will use `"UTC"`. #' -#' If manually parsing sub-second components, be aware that they will be -#' automatically rounded to the nearest second when converting them to POSIXct. -#' See the examples for a way to control this. +#' If you have strings with sub-second components, then these date-time parsers +#' are not appropriate for you. Remember that clock treats POSIXct as a second +#' precision type, so parsing a string with fractional seconds directly into a +#' POSIXct is ambiguous and undefined. Instead, fully parse the string, +#' including its fractional seconds, into a clock type that can handle it, such +#' as a naive-time with [naive_time_parse()], then round to seconds with +#' whatever rounding convention is appropriate for your use case, such as +#' [time_point_floor()], and finally convert that to POSIXct with +#' [as_date_time()]. This gives you complete control over how the fractional +#' seconds are handled when converting to POSIXct. #' #' @inheritParams zoned-parsing #' @inheritParams as-zoned-time-naive-time @@ -1223,28 +1230,21 @@ date_set_zone.POSIXt <- function(x, zone) { #' date_time_parse_abbrev(abbrev_times, "America/New_York") #' #' # --------------------------------------------------------------------------- -#' # Rounding of sub-second components +#' # Sub-second components #' -#' # Generally, if you have a string with sub-second components, they will -#' # be ignored when parsing into a date-time -#' x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.7") +#' # If you have a string with sub-second components, but only require up to +#' # seconds, first parse them into a clock type that can handle sub-seconds to +#' # fully capture that information, then round using whatever convention is +#' # required for your use case before converting to a date-time. +#' x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.78") #' -#' date_time_parse(x, "America/New_York") -#' -#' # If you manually try and parse those sub-second components with `%4S` to -#' # read the 2 seconds, 1 decimal point, and 1 fractional component, the -#' # fractional component will be rounded to the nearest second -#' date_time_parse(x, "America/New_York", format = "%Y-%m-%d %H:%M:%4S") -#' -#' # If you don't like this, parse the full string as a naive-time, -#' # then round manually and convert to a POSIXct -#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H:%M:%S", precision = "millisecond") -#' nt +#' x <- naive_time_parse(x, precision = "millisecond") +#' x #' -#' nt <- time_point_floor(nt, "second") -#' nt +#' time_point_floor(x, "second") +#' time_point_round(x, "second") #' -#' as.POSIXct(nt, "America/New_York") +#' as_date_time(time_point_round(x, "second"), "America/New_York") NULL #' @rdname date-time-parse diff --git a/R/sys-time.R b/R/sys-time.R index 6eb4d536..5500e6e3 100644 --- a/R/sys-time.R +++ b/R/sys-time.R @@ -69,6 +69,8 @@ is_sys_time <- function(x) { #' consider using [naive_time_parse()], since the resulting naive-time doesn't #' come with an assumption of a UTC time zone. #' +#' @inheritSection zoned-parsing Full Precision Parsing +#' #' @inheritParams zoned-parsing #' #' @param precision `[character(1)]` diff --git a/R/zoned-time.R b/R/zoned-time.R index a615606d..9ba59ffc 100644 --- a/R/zoned-time.R +++ b/R/zoned-time.R @@ -300,6 +300,16 @@ zoned_time_format <- function(print_zone_name) { #' know the time zone that the date-times are supposed to be in, you can convert #' to a zoned-time with [as_zoned_time()]. #' +#' @section Full Precision Parsing: +#' +#' It is highly recommended to parse all of the information in the date-time +#' string into a type at least as precise as the string. For example, if your +#' string has fractional seconds, but you only require seconds, specify a +#' sub-second `precision`, then round to seconds manually using whatever +#' convention is appropriate for your use case. Parsing such a string directly +#' into a second precision result is ambiguous and undefined, and is unlikely to +#' work as you might expect. +#' #' @inheritParams ellipsis::dots_empty #' #' @param x `[character]` @@ -434,12 +444,17 @@ zoned_time_format <- function(print_zone_name) { #' characters to read. If not specified, the default is `2`. Leading zeroes #' are permitted but not required. #' -#' - `%S`: The seconds as a decimal number. The modified command `%NS` where -#' `N` is a positive decimal integer specifies the maximum number of -#' characters to read. If not specified, the default is determined by the -#' precision that you are parsing at. If encountered, the `locale` -#' determines the decimal point character. Leading zeroes are permitted but -#' not required. +#' - `%S`: The seconds as a decimal number. Leading zeroes are permitted but +#' not required. If encountered, the `locale` determines the decimal point +#' character. Generally, the maximum number of characters to read is +#' determined by the precision that you are parsing at. For example, a +#' precision of `"second"` would read a maximum of 2 characters, while a +#' precision of `"millisecond"` would read a maximum of 6 (2 for the values +#' before the decimal point, 1 for the decimal point, and 3 for the values +#' after it). The modified command `%NS`, where `N` is a positive decimal +#' integer, can be used to exactly specify the maximum number of characters to +#' read. This is only useful if you happen to have seconds with more than 1 +#' leading zero. #' #' - `%p`: The `locale`'s equivalent of the AM/PM designations associated with #' a 12-hour clock. The command `%I` must precede `%p` in the format string. diff --git a/man/date-time-parse.Rd b/man/date-time-parse.Rd index 94b84559..48416a42 100644 --- a/man/date-time-parse.Rd +++ b/man/date-time-parse.Rd @@ -152,12 +152,17 @@ are permitted but not required. \code{N} is a positive decimal integer specifies the maximum number of characters to read. If not specified, the default is \code{2}. Leading zeroes are permitted but not required. -\item \verb{\%S}: The seconds as a decimal number. The modified command \verb{\%NS} where -\code{N} is a positive decimal integer specifies the maximum number of -characters to read. If not specified, the default is determined by the -precision that you are parsing at. If encountered, the \code{locale} -determines the decimal point character. Leading zeroes are permitted but -not required. +\item \verb{\%S}: The seconds as a decimal number. Leading zeroes are permitted but +not required. If encountered, the \code{locale} determines the decimal point +character. Generally, the maximum number of characters to read is +determined by the precision that you are parsing at. For example, a +precision of \code{"second"} would read a maximum of 2 characters, while a +precision of \code{"millisecond"} would read a maximum of 6 (2 for the values +before the decimal point, 1 for the decimal point, and 3 for the values +after it). The modified command \verb{\%NS}, where \code{N} is a positive decimal +integer, can be used to exactly specify the maximum number of characters to +read. This is only useful if you happen to have seconds with more than 1 +leading zero. \item \verb{\%p}: The \code{locale}'s equivalent of the AM/PM designations associated with a 12-hour clock. The command \verb{\%I} must precede \verb{\%p} in the format string. \item \verb{\%R}: Equivalent to \verb{\%H:\%M}. @@ -315,9 +320,16 @@ If \code{date_time_parse_complete()} is given input that is length zero, all \code{NA}s, or completely fails to parse, then no time zone will be able to be determined. In that case, the result will use \code{"UTC"}. -If manually parsing sub-second components, be aware that they will be -automatically rounded to the nearest second when converting them to POSIXct. -See the examples for a way to control this. +If you have strings with sub-second components, then these date-time parsers +are not appropriate for you. Remember that clock treats POSIXct as a second +precision type, so parsing a string with fractional seconds directly into a +POSIXct is ambiguous and undefined. Instead, fully parse the string, +including its fractional seconds, into a clock type that can handle it, such +as a naive-time with \code{\link[=naive_time_parse]{naive_time_parse()}}, then round to seconds with +whatever rounding convention is appropriate for your use case, such as +\code{\link[=time_point_floor]{time_point_floor()}}, and finally convert that to POSIXct with +\code{\link[=as_date_time]{as_date_time()}}. This gives you complete control over how the fractional +seconds are handled when converting to POSIXct. } \examples{ # Parse with a known `zone`, even though that information isn't in the string @@ -367,26 +379,19 @@ abbrev_times <- c( date_time_parse_abbrev(abbrev_times, "America/New_York") # --------------------------------------------------------------------------- -# Rounding of sub-second components - -# Generally, if you have a string with sub-second components, they will -# be ignored when parsing into a date-time -x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.7") - -date_time_parse(x, "America/New_York") +# Sub-second components -# If you manually try and parse those sub-second components with `\%4S` to -# read the 2 seconds, 1 decimal point, and 1 fractional component, the -# fractional component will be rounded to the nearest second -date_time_parse(x, "America/New_York", format = "\%Y-\%m-\%d \%H:\%M:\%4S") +# If you have a string with sub-second components, but only require up to +# seconds, first parse them into a clock type that can handle sub-seconds to +# fully capture that information, then round using whatever convention is +# required for your use case before converting to a date-time. +x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.78") -# If you don't like this, parse the full string as a naive-time, -# then round manually and convert to a POSIXct -nt <- naive_time_parse(x, format = "\%Y-\%m-\%d \%H:\%M:\%S", precision = "millisecond") -nt +x <- naive_time_parse(x, precision = "millisecond") +x -nt <- time_point_floor(nt, "second") -nt +time_point_floor(x, "second") +time_point_round(x, "second") -as.POSIXct(nt, "America/New_York") +as_date_time(time_point_round(x, "second"), "America/New_York") } diff --git a/man/date_parse.Rd b/man/date_parse.Rd index bc0069ab..6600dc9f 100644 --- a/man/date_parse.Rd +++ b/man/date_parse.Rd @@ -133,12 +133,17 @@ are permitted but not required. \code{N} is a positive decimal integer specifies the maximum number of characters to read. If not specified, the default is \code{2}. Leading zeroes are permitted but not required. -\item \verb{\%S}: The seconds as a decimal number. The modified command \verb{\%NS} where -\code{N} is a positive decimal integer specifies the maximum number of -characters to read. If not specified, the default is determined by the -precision that you are parsing at. If encountered, the \code{locale} -determines the decimal point character. Leading zeroes are permitted but -not required. +\item \verb{\%S}: The seconds as a decimal number. Leading zeroes are permitted but +not required. If encountered, the \code{locale} determines the decimal point +character. Generally, the maximum number of characters to read is +determined by the precision that you are parsing at. For example, a +precision of \code{"second"} would read a maximum of 2 characters, while a +precision of \code{"millisecond"} would read a maximum of 6 (2 for the values +before the decimal point, 1 for the decimal point, and 3 for the values +after it). The modified command \verb{\%NS}, where \code{N} is a positive decimal +integer, can be used to exactly specify the maximum number of characters to +read. This is only useful if you happen to have seconds with more than 1 +leading zero. \item \verb{\%p}: The \code{locale}'s equivalent of the AM/PM designations associated with a 12-hour clock. The command \verb{\%I} must precede \verb{\%p} in the format string. \item \verb{\%R}: Equivalent to \verb{\%H:\%M}. @@ -188,9 +193,11 @@ The default \code{format} used is \code{"\%Y-\%m-\%d"}. \emph{\code{date_parse()} ignores both the \verb{\%z} and \verb{\%Z} commands,} as clock treats Date as a \emph{naive} type, with a yet-to-be-specified time zone. -If parsing a string with sub-daily components, such as hours, minutes or -seconds, note that the conversion to Date will round those components to -the nearest day. See the examples for a way to control this. +Parsing strings with sub-daily components, such as hours, minutes, or +seconds, should be done with \code{\link[=date_time_parse]{date_time_parse()}}. If you only need the date +components, round the result to day precision, and then use \code{\link[=as_date]{as_date()}}. +Attempting to directly parse a sub-daily string into a Date is ambiguous and +undefined, and is unlikely to work as you might expect. } \examples{ date_parse("2020-01-01") @@ -212,25 +219,19 @@ date_parse( date_parse("2020-W01-2", format = "\%G-W\%V-\%u") # --------------------------------------------------------------------------- -# Rounding of sub-daily components +# Sub-daily components -# Note that rounding a string with time components will round them to the -# nearest day if you try and parse them +# If you have a string with sub-daily components, but only require the date, +# first parse them as date-times to fully parse the sub-daily components, +# then round using whatever convention is required for your use case before +# converting to date. x <- c("2019-01-01 11", "2019-01-01 12") -# Hour 12 rounds up to the next day -date_parse(x, format = "\%Y-\%m-\%d \%H") - -# If you don't like this, one option is to just not parse the time component -date_parse(x, format = "\%Y-\%m-\%d") - -# A more general option is to parse the full string as a naive-time, -# then round manually -nt <- naive_time_parse(x, format = "\%Y-\%m-\%d \%H", precision = "hour") -nt +x <- date_time_parse(x, zone = "UTC", format = "\%Y-\%m-\%d \%H") +x -nt <- time_point_floor(nt, "day") -nt +date_floor(x, "day") +date_round(x, "day") -as.Date(nt) +as_date(date_round(x, "day")) } diff --git a/man/naive_time_parse.Rd b/man/naive_time_parse.Rd index 83d541f3..af40d972 100644 --- a/man/naive_time_parse.Rd +++ b/man/naive_time_parse.Rd @@ -139,12 +139,17 @@ are permitted but not required. \code{N} is a positive decimal integer specifies the maximum number of characters to read. If not specified, the default is \code{2}. Leading zeroes are permitted but not required. -\item \verb{\%S}: The seconds as a decimal number. The modified command \verb{\%NS} where -\code{N} is a positive decimal integer specifies the maximum number of -characters to read. If not specified, the default is determined by the -precision that you are parsing at. If encountered, the \code{locale} -determines the decimal point character. Leading zeroes are permitted but -not required. +\item \verb{\%S}: The seconds as a decimal number. Leading zeroes are permitted but +not required. If encountered, the \code{locale} determines the decimal point +character. Generally, the maximum number of characters to read is +determined by the precision that you are parsing at. For example, a +precision of \code{"second"} would read a maximum of 2 characters, while a +precision of \code{"millisecond"} would read a maximum of 6 (2 for the values +before the decimal point, 1 for the decimal point, and 3 for the values +after it). The modified command \verb{\%NS}, where \code{N} is a positive decimal +integer, can be used to exactly specify the maximum number of characters to +read. This is only useful if you happen to have seconds with more than 1 +leading zero. \item \verb{\%p}: The \code{locale}'s equivalent of the AM/PM designations associated with a 12-hour clock. The command \verb{\%I} must precede \verb{\%p} in the format string. \item \verb{\%R}: Equivalent to \verb{\%H:\%M}. @@ -222,6 +227,18 @@ If your date-time strings contain a full time zone name and a UTC offset, use If your date-time strings contain a UTC offset, but not a full time zone name, use \code{\link[=sys_time_parse]{sys_time_parse()}}. } +\section{Full Precision Parsing}{ + + +It is highly recommended to parse all of the information in the date-time +string into a type at least as precise as the string. For example, if your +string has fractional seconds, but you only require seconds, specify a +sub-second \code{precision}, then round to seconds manually using whatever +convention is appropriate for your use case. Parsing such a string directly +into a second precision result is ambiguous and undefined, and is unlikely to +work as you might expect. +} + \examples{ naive_time_parse("2020-01-01 05:06:07") @@ -236,4 +253,27 @@ naive_time_parse( "2020-01-01 -4000 America/New_York", format = "\%Y-\%m-\%d \%z \%Z" ) + +# --------------------------------------------------------------------------- +# Fractional seconds and POSIXct + +# If you have a string with fractional seconds and want to convert it to +# a POSIXct, remember that clock treats POSIXct as a second precision type. +# Ideally, you'd use a clock type that can support fractional seconds, but +# if you really want to parse it into a POSIXct, the correct way to do so +# is to parse the full fractional time point with the correct `precision`, +# then round to seconds using whatever convention you require, and finally +# convert that to POSIXct. +x <- c("2020-01-01 00:00:00.123", "2020-01-01 00:00:00.555") + +# First, parse string with full precision +x <- naive_time_parse(x, precision = "millisecond") +x + +# Then round to second with a floor, ceiling, or round to nearest +time_point_floor(x, "second") +time_point_round(x, "second") + +# Finally, convert to POSIXct +as_date_time(time_point_round(x, "second"), zone = "UTC") } diff --git a/man/sys_time_parse.Rd b/man/sys_time_parse.Rd index 491b7ed4..c9f2a274 100644 --- a/man/sys_time_parse.Rd +++ b/man/sys_time_parse.Rd @@ -139,12 +139,17 @@ are permitted but not required. \code{N} is a positive decimal integer specifies the maximum number of characters to read. If not specified, the default is \code{2}. Leading zeroes are permitted but not required. -\item \verb{\%S}: The seconds as a decimal number. The modified command \verb{\%NS} where -\code{N} is a positive decimal integer specifies the maximum number of -characters to read. If not specified, the default is determined by the -precision that you are parsing at. If encountered, the \code{locale} -determines the decimal point character. Leading zeroes are permitted but -not required. +\item \verb{\%S}: The seconds as a decimal number. Leading zeroes are permitted but +not required. If encountered, the \code{locale} determines the decimal point +character. Generally, the maximum number of characters to read is +determined by the precision that you are parsing at. For example, a +precision of \code{"second"} would read a maximum of 2 characters, while a +precision of \code{"millisecond"} would read a maximum of 6 (2 for the values +before the decimal point, 1 for the decimal point, and 3 for the values +after it). The modified command \verb{\%NS}, where \code{N} is a positive decimal +integer, can be used to exactly specify the maximum number of characters to +read. This is only useful if you happen to have seconds with more than 1 +leading zero. \item \verb{\%p}: The \code{locale}'s equivalent of the AM/PM designations associated with a 12-hour clock. The command \verb{\%I} must precede \verb{\%p} in the format string. \item \verb{\%R}: Equivalent to \verb{\%H:\%M}. @@ -230,6 +235,18 @@ If your date-time strings don't contain an offset from UTC, you might consider using \code{\link[=naive_time_parse]{naive_time_parse()}}, since the resulting naive-time doesn't come with an assumption of a UTC time zone. } +\section{Full Precision Parsing}{ + + +It is highly recommended to parse all of the information in the date-time +string into a type at least as precise as the string. For example, if your +string has fractional seconds, but you only require seconds, specify a +sub-second \code{precision}, then round to seconds manually using whatever +convention is appropriate for your use case. Parsing such a string directly +into a second precision result is ambiguous and undefined, and is unlikely to +work as you might expect. +} + \examples{ sys_time_parse("2020-01-01 05:06:07") diff --git a/man/year_month_day_parse.Rd b/man/year_month_day_parse.Rd index 7737e5dd..c39e670e 100644 --- a/man/year_month_day_parse.Rd +++ b/man/year_month_day_parse.Rd @@ -139,12 +139,17 @@ are permitted but not required. \code{N} is a positive decimal integer specifies the maximum number of characters to read. If not specified, the default is \code{2}. Leading zeroes are permitted but not required. -\item \verb{\%S}: The seconds as a decimal number. The modified command \verb{\%NS} where -\code{N} is a positive decimal integer specifies the maximum number of -characters to read. If not specified, the default is determined by the -precision that you are parsing at. If encountered, the \code{locale} -determines the decimal point character. Leading zeroes are permitted but -not required. +\item \verb{\%S}: The seconds as a decimal number. Leading zeroes are permitted but +not required. If encountered, the \code{locale} determines the decimal point +character. Generally, the maximum number of characters to read is +determined by the precision that you are parsing at. For example, a +precision of \code{"second"} would read a maximum of 2 characters, while a +precision of \code{"millisecond"} would read a maximum of 6 (2 for the values +before the decimal point, 1 for the decimal point, and 3 for the values +after it). The modified command \verb{\%NS}, where \code{N} is a positive decimal +integer, can be used to exactly specify the maximum number of characters to +read. This is only useful if you happen to have seconds with more than 1 +leading zero. \item \verb{\%p}: The \code{locale}'s equivalent of the AM/PM designations associated with a 12-hour clock. The command \verb{\%I} must precede \verb{\%p} in the format string. \item \verb{\%R}: Equivalent to \verb{\%H:\%M}. @@ -213,6 +218,18 @@ The default options assume \code{x} should be parsed at day precision, using a \details{ \code{year_month_day_parse()} completely ignores the \verb{\%z} and \verb{\%Z} commands. } +\section{Full Precision Parsing}{ + + +It is highly recommended to parse all of the information in the date-time +string into a type at least as precise as the string. For example, if your +string has fractional seconds, but you only require seconds, specify a +sub-second \code{precision}, then round to seconds manually using whatever +convention is appropriate for your use case. Parsing such a string directly +into a second precision result is ambiguous and undefined, and is unlikely to +work as you might expect. +} + \examples{ x <- "2019-01-01" diff --git a/man/zoned-parsing.Rd b/man/zoned-parsing.Rd index da0db5b5..517c5857 100644 --- a/man/zoned-parsing.Rd +++ b/man/zoned-parsing.Rd @@ -150,12 +150,17 @@ are permitted but not required. \code{N} is a positive decimal integer specifies the maximum number of characters to read. If not specified, the default is \code{2}. Leading zeroes are permitted but not required. -\item \verb{\%S}: The seconds as a decimal number. The modified command \verb{\%NS} where -\code{N} is a positive decimal integer specifies the maximum number of -characters to read. If not specified, the default is determined by the -precision that you are parsing at. If encountered, the \code{locale} -determines the decimal point character. Leading zeroes are permitted but -not required. +\item \verb{\%S}: The seconds as a decimal number. Leading zeroes are permitted but +not required. If encountered, the \code{locale} determines the decimal point +character. Generally, the maximum number of characters to read is +determined by the precision that you are parsing at. For example, a +precision of \code{"second"} would read a maximum of 2 characters, while a +precision of \code{"millisecond"} would read a maximum of 6 (2 for the values +before the decimal point, 1 for the decimal point, and 3 for the values +after it). The modified command \verb{\%NS}, where \code{N} is a positive decimal +integer, can be used to exactly specify the maximum number of characters to +read. This is only useful if you happen to have seconds with more than 1 +leading zero. \item \verb{\%p}: The \code{locale}'s equivalent of the AM/PM designations associated with a 12-hour clock. The command \verb{\%I} must precede \verb{\%p} in the format string. \item \verb{\%R}: Equivalent to \verb{\%H:\%M}. @@ -263,6 +268,18 @@ zone name, you might need to use \code{\link[=naive_time_parse]{naive_time_parse know the time zone that the date-times are supposed to be in, you can convert to a zoned-time with \code{\link[=as_zoned_time]{as_zoned_time()}}. } +\section{Full Precision Parsing}{ + + +It is highly recommended to parse all of the information in the date-time +string into a type at least as precise as the string. For example, if your +string has fractional seconds, but you only require seconds, specify a +sub-second \code{precision}, then round to seconds manually using whatever +convention is appropriate for your use case. Parsing such a string directly +into a second precision result is ambiguous and undefined, and is unlikely to +work as you might expect. +} + \examples{ library(magrittr) diff --git a/tests/testthat/test-date.R b/tests/testthat/test-date.R index a8989615..e5b37fd5 100644 --- a/tests/testthat/test-date.R +++ b/tests/testthat/test-date.R @@ -290,16 +290,16 @@ test_that("formatting Dates with `%z` or `%Z` returns NA with a warning", { test_that("`%z` and `%Z` commands are ignored", { expect_identical( - date_parse("2019-12-31 11:59:59-0500", format = "%Y-%m-%d %H:%M:%S%z"), + date_parse("2019-12-31 -0500", format = "%Y-%m-%d %z"), as.Date("2019-12-31") ) expect_identical( - date_parse("2019-12-31 11:59:59[America/New_York]", format = "%Y-%m-%d %H:%M:%S[%Z]"), + date_parse("2019-12-31 America/New_York", format = "%Y-%m-%d %Z"), as.Date("2019-12-31") ) }) -test_that("parsing into a date if you requested to parse time components rounds the time (#207)", { +test_that("parsing into a date if you requested to parse time components rounds the time (#207) (#230) (undocumented)", { expect_identical( date_parse("2019-12-31 11:59:59", format = "%Y-%m-%d %H:%M:%S"), as.Date("2019-12-31") @@ -310,6 +310,16 @@ test_that("parsing into a date if you requested to parse time components rounds ) }) +test_that("parsing fails when undocumented rounding behavior would result in invalid 60 second component (#230) (undocumented)", { + expect_identical( + expect_warning( + date_parse("2019-01-01 01:01:59.550", format = "%Y-%m-%d %H:%M:%6S"), + class = "clock_warning_parse_failures" + ), + new_date(NA_real_) + ) +}) + test_that("failure to parse throws a warning", { expect_warning(date_parse("foo"), class = "clock_warning_parse_failures") expect_snapshot(date_parse("foo")) diff --git a/tests/testthat/test-naive-time.R b/tests/testthat/test-naive-time.R index 8275cbfb..3634f15f 100644 --- a/tests/testthat/test-naive-time.R +++ b/tests/testthat/test-naive-time.R @@ -152,21 +152,19 @@ test_that("can parse subsecond precision", { ) }) -test_that("parsing to a lower precision ignores higher precision info", { +test_that("parsing works if `precision` uses a default that doesn't attempt to capture all the info", { + # Uses %Y-%m-%d x <- "2019-01-01 01:00:00" - y <- "2019-01-01 01:00:00.12345" - expect_identical( naive_time_parse(x, precision = "day"), as_naive_time(year_month_day(2019, 1, 1)) ) + + # Uses %Y-%m-%d %H:%M + x <- "2019-01-01 01:00:59" expect_identical( - naive_time_parse(y, precision = "second"), - as_naive_time(year_month_day(2019, 1, 1, 1, 0, 0)) - ) - expect_identical( - naive_time_parse(y, precision = "millisecond"), - as_naive_time(year_month_day(2019, 1, 1, 1, 0, 0, 123, subsecond_precision = "millisecond")) + naive_time_parse(x, precision = "minute"), + as_naive_time(year_month_day(2019, 1, 1, 1, 0)) ) }) @@ -272,7 +270,7 @@ test_that("%Z is completely ignored", { ) }) -test_that("parsing rounds parsed components more precise than the resulting container (#207)", { +test_that("parsing rounds parsed components more precise than the resulting container (#207) (#230) (undocumented)", { expect_identical( naive_time_parse("2019-12-31 11", format = "%Y-%m-%d %H", precision = "day"), as_naive_time(year_month_day(2019, 12, 31)) @@ -289,7 +287,7 @@ test_that("parsing rounds parsed components more precise than the resulting cont ) }) -test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", { +test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207) (#230) (undocumented)", { # Default N for milliseconds is 6, so `%6S` (2 hour seconds, 1 for decimal, 3 for subseconds) expect_identical( naive_time_parse("2019-01-01 01:01:01.1238", format = "%Y-%m-%d %H:%M:%S", precision = "millisecond"), @@ -303,6 +301,16 @@ test_that("parsing rounds parsed subsecond components more precise than the resu ) }) +test_that("parsing fails when undocumented rounding behavior would result in invalid 60 second component (#230) (undocumented)", { + expect_identical( + expect_warning( + naive_time_parse("2019-01-01 01:01:59.550", format = "%Y-%m-%d %H:%M:%6S", precision = "second"), + class = "clock_warning_parse_failures" + ), + as_naive_time(year_month_day(NA, NA, NA, NA, NA, NA)) + ) +}) + # ------------------------------------------------------------------------------ # format() diff --git a/tests/testthat/test-zoned-time.R b/tests/testthat/test-zoned-time.R index cc0a035c..3b344e23 100644 --- a/tests/testthat/test-zoned-time.R +++ b/tests/testthat/test-zoned-time.R @@ -248,7 +248,7 @@ test_that("leftover subseconds result in a parse failure", { ) }) -test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", { +test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207) (#230) (undocumented)", { x <- "2019-01-01 01:01:01.1238-05:00[America/New_York]" # Requesting `%7S` parses the full `01.1238`, and the `1238` portion is rounded up @@ -258,6 +258,20 @@ test_that("parsing rounds parsed subsecond components more precise than the resu ) }) +test_that("parsing fails when undocumented rounding behavior would result in invalid 60 second component (#230) (undocumented)", { + x <- "2019-01-01 01:01:59.550-05:00[America/New_York]" + + # Requesting `%6S` parses the full `59.550`, which is immediately rounded to `60` which looks invalid. + # The correct way to do this is to parse the milliseconds, then round. + expect_identical( + expect_warning( + zoned_time_parse_complete(x, precision = "second", format = "%Y-%m-%d %H:%M:%6S%Ez[%Z]"), + class = "clock_warning_parse_failures" + ), + as_zoned_time(as_naive_time(year_month_day(NA, NA, NA, NA, NA, NA)), zone = "UTC") + ) +}) + # ------------------------------------------------------------------------------ # zoned_time_parse_abbrev()