Skip to content

Commit

Permalink
Document when period.apply() is called with mean
Browse files Browse the repository at this point in the history
Add message to calls to period.apply() and its apply.*ly() convenience
functions, and add note section to documentation.

Fixes #124.
  • Loading branch information
joshuaulrich committed Aug 2, 2023
1 parent fffe583 commit c4452e8
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 23 deletions.
42 changes: 42 additions & 0 deletions R/period.apply.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,36 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

.mean_by_column_message <-
function(caller)
{
if (getOption("xts.message.period.apply.mean", TRUE)) {
message("NOTE: `", caller, "(..., FUN = mean)` operates by column, unlike other math\n ",
"functions (e.g. median, sum, var, sd). Please use `FUN = colMeans` instead,\n ",
"and use `FUN = function(x) mean(x)` to take the mean of all columns. Set\n ",
"`options(xts.message.period.apply.mean = FALSE)` to suppress this message.")
}

# changing this behavior will break code in the following dependencies:
#
# ATAforecasting/R/ATA_Find_Multi_Freq.R
# bidask/R/utils.R
# dsa/R/HelperFunctions.R # {.tomonth}
# RavenR/inst/doc/Introduction_to_RavenR.R
# RavenR/inst/doc/Introduction_to_RavenR.Rmd
# RavenR/R/rvn_apply_wyearly.R
# RavenR/R/rvn_monthly_vbias.R
# rts/man/apply.monthly.Rd
# rts/man/period.apply.Rd
# RWDataPlyr/R/xts_helperFunctions.R
}

`period.apply` <-
function(x, INDEX, FUN, ...)
{
if (deparse(substitute(FUN)) == "mean") {
.mean_by_column_message("period.apply")
}
x <- try.xts(x, error = FALSE)
FUN <- match.fun(FUN)

Expand Down Expand Up @@ -74,33 +101,48 @@ function (x, INDEX, FUN, ...)
`apply.daily` <-
function(x,FUN, ...)
{
if (deparse(substitute(FUN)) == "mean") {
.mean_by_column_message("apply.daily")
}
ep <- endpoints(x,'days')
period.apply(x,ep,FUN, ...)
}
`apply.weekly` <-
function(x,FUN, ...)
{
if (deparse(substitute(FUN)) == "mean") {
.mean_by_column_message("apply.weekly")
}
ep <- endpoints(x,'weeks')
period.apply(x,ep,FUN, ...)
}

`apply.monthly` <-
function(x,FUN, ...)
{
if (deparse(substitute(FUN)) == "mean") {
.mean_by_column_message("apply.monthly")
}
ep <- endpoints(x,'months')
period.apply(x,ep,FUN, ...)
}

`apply.quarterly` <-
function(x,FUN, ...)
{
if (deparse(substitute(FUN)) == "mean") {
.mean_by_column_message("apply.quarterly")
}
ep <- endpoints(x,'quarters')
period.apply(x,ep,FUN, ...)
}

`apply.yearly` <-
function(x,FUN, ...)
{
if (deparse(substitute(FUN)) == "mean") {
.mean_by_column_message("apply.yearly")
}
ep <- endpoints(x,'years')
period.apply(x,ep,FUN, ...)
}
Expand Down
39 changes: 23 additions & 16 deletions inst/tinytest/test-period.apply.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,59 +9,59 @@
info_msg <- "test.duplicate_INDEX"
x <- .xts(1:10, 1:10)
ep <- c(0, 2, 4, 6, 8, 10)
nodup <- period.apply(x, ep, mean)
dup <- period.apply(x, c(ep, 10), mean)
nodup <- period.apply(x, ep, sum)
dup <- period.apply(x, c(ep, 10), sum)
expect_identical(nodup, dup, info = info_msg)

info_msg <- "test.duplicate_INDEX_vector"
x <- 1:10
ep <- c(0, 2, 4, 6, 8, 10)
nodup <- period.apply(x, ep, mean)
dup <- period.apply(x, c(ep, 10), mean)
nodup <- period.apply(x, ep, sum)
dup <- period.apply(x, c(ep, 10), sum)
expect_identical(nodup, dup, info = info_msg)

info_msg <- "test.unsorted_INDEX"
x <- .xts(1:10, 1:10)
ep.s <- c(2, 4, 6, 8)
ep.u <- sample(ep.s)
s <- period.apply(x, c(0, ep.s, 10), mean)
u <- period.apply(x, c(0, ep.u, 10), mean)
s <- period.apply(x, c(0, ep.s, 10), sum)
u <- period.apply(x, c(0, ep.u, 10), sum)
expect_identical(s, u, info = info_msg)

info_msg <- "test.unsorted_INDEX_vector"
x <- 1:10
ep.s <- c(2, 4, 6, 8)
ep.u <- sample(ep.s)
s <- period.apply(x, c(0, ep.s, 10), mean)
u <- period.apply(x, c(0, ep.u, 10), mean)
s <- period.apply(x, c(0, ep.s, 10), sum)
u <- period.apply(x, c(0, ep.u, 10), sum)
expect_identical(s, u, info = info_msg)

info_msg <- "test.INDEX_starts_with_zero"
x <- .xts(1:10, 1:10)
ep <- c(2, 4, 6, 8, 10)
a <- period.apply(x, ep, mean)
z <- period.apply(x, c(0, ep), mean)
a <- period.apply(x, ep, sum)
z <- period.apply(x, c(0, ep), sum)
expect_identical(a, z, info = info_msg)

info_msg <- "test.INDEX_starts_with_zero_vector"
x <- 1:10
ep <- c(2, 4, 6, 8, 10)
a <- period.apply(x, ep, mean)
z <- period.apply(x, c(0, ep), mean)
a <- period.apply(x, ep, sum)
z <- period.apply(x, c(0, ep), sum)
expect_identical(a, z, info = info_msg)

info_msg <- "test.INDEX_ends_with_lengthX"
x <- .xts(1:10, 1:10)
ep <- c(0, 2, 4, 6, 8)
a <- period.apply(x, ep, mean)
z <- period.apply(x, c(ep, 10), mean)
a <- period.apply(x, ep, sum)
z <- period.apply(x, c(ep, 10), sum)
expect_identical(a, z, info = info_msg)

info_msg <- "test.INDEX_ends_with_lengthX_vector"
x <- 1:10
ep <- c(0, 2, 4, 6, 8)
a <- period.apply(x, ep, mean)
z <- period.apply(x, c(ep, 10), mean)
a <- period.apply(x, ep, sum)
z <- period.apply(x, c(ep, 10), sum)
expect_identical(a, z, info = info_msg)

# check specific period.* functions
Expand Down Expand Up @@ -118,3 +118,10 @@ x <- xts(rnorm(10000), i - 10000:1 * 60)
d <- to.daily(x)
dateseq <- seq(as.Date("2013-01-29"), as.Date("2013-02-05"), "day")
expect_equivalent(index(d), dateseq, info = info_msg)

# message for FUN = mean
expect_message(period.apply(x, e, mean), pattern = "period\\.apply\\(..., FUN = mean\\)")
expect_message(apply.daily(x, mean), pattern = "apply\\.daily\\(..., FUN = mean\\)")
expect_message(apply.monthly(x, mean), pattern = "apply\\.monthly\\(..., FUN = mean\\)")
expect_message(apply.quarterly(x, mean), pattern = "apply\\.quarterly\\(..., FUN = mean\\)")
expect_message(apply.yearly(x, mean), pattern = "apply\\.yearly\\(..., FUN = mean\\)")
19 changes: 18 additions & 1 deletion man/apply.monthly.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,23 @@ Essentially a wrapper to the \pkg{xts} functions
\code{endpoints} and \code{period.apply}, mainly as
a convenience.
}
\note{
When \code{FUN = mean} the results will contain one column for every column
in the input, which is different from other math functions (e.g. \code{median},
\code{sum}, \code{prod}, \code{sd}, etc.).

\code{FUN = mean} works by column because the default method \code{stats::mean}
used to work by column for matrices and data.frames. R Core changed the
behavior of \code{mean} to always return one column in order to be consistent
with the other math functions. This broke some \pkg{xts} dependencies and
\code{mean.xts} was created to maintain the original behavior.

Using \code{FUN = mean} will print a message that describes this inconsistency.
To avoid the message and confusion, use \code{FUN = colMeans} to calculate
means by column and use \code{FUN = function(x) mean} to calculate one mean
for all the data. Set \code{options(xts.message.period.apply.mean = FALSE)}
to suppress this message.
}
\value{
A vector of results produced by \code{FUN}, corresponding
to the appropriate periods.
Expand All @@ -47,7 +64,7 @@ xts.ts <- xts(rnorm(231),as.Date(13514:13744,origin="1970-01-01"))
start(xts.ts)
end(xts.ts)

apply.monthly(xts.ts,mean)
apply.monthly(xts.ts,colMeans)
apply.monthly(xts.ts,function(x) var(x))
}
\keyword{ utilities }
21 changes: 19 additions & 2 deletions man/period.apply.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,23 @@ intervals. For example, when \code{INDEX} is the result of a call to
0, and ends with \code{NROW(x)}. All those conditions are true of vectors
returned by \code{endpoints()}.
}
\note{
When \code{FUN = mean} the results will contain one column for every column
in the input, which is different from other math functions (e.g. \code{median},
\code{sum}, \code{prod}, \code{sd}, etc.).
\code{FUN = mean} works by column because the default method \code{stats::mean}
used to work by column for matrices and data.frames. R Core changed the
behavior of \code{mean} to always return one column in order to be consistent
with the other math functions. This broke some \pkg{xts} dependencies and
\code{mean.xts} was created to maintain the original behavior.
Using \code{FUN = mean} will print a message that describes this inconsistency.
To avoid the message and confusion, use \code{FUN = colMeans} to calculate
means by column and use \code{FUN = function(x) mean} to calculate one mean
for all the data. Set \code{options(xts.message.period.apply.mean = FALSE)}
to suppress this message.
}
\value{
An object with \code{length(INDEX) - 1} observations (assuming \code{INDEX}
starts with 0 and ends with \code{NROW(x)}).
Expand All @@ -39,8 +56,8 @@ starts with 0 and ends with \code{NROW(x)}).
\examples{
zoo.data <- zoo(rnorm(31)+10,as.Date(13514:13744,origin="1970-01-01"))
ep <- endpoints(zoo.data,'weeks')
period.apply(zoo.data, INDEX=ep, FUN=function(x) mean(x))
period.apply(zoo.data, INDEX=ep, FUN=mean) #same
period.apply(zoo.data, INDEX=ep, FUN=function(x) colMeans(x))
period.apply(zoo.data, INDEX=ep, FUN=colMeans) #same
period.apply(letters,c(0,5,7,26), paste0)
}
Expand Down
8 changes: 4 additions & 4 deletions vignettes/xts-faq.Rnw
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ You can use \code{apply.daily}, or \code{period.apply} more generally:
<<>>=
sample.xts <- xts(1:50, seq(as.POSIXct("1970-01-01"),
as.POSIXct("1970-01-03")-1, length=50))
apply.daily(sample.xts, mean)
period.apply(sample.xts, endpoints(sample.xts, "days"), mean)
period.apply(sample.xts, endpoints(sample.xts, "hours", 6), mean)
apply.daily(sample.xts, colMeans)
period.apply(sample.xts, endpoints(sample.xts, "days"), colMeans)
period.apply(sample.xts, endpoints(sample.xts, "hours", 6), colMeans)
@

\q{How can I process daily data for a specific time subset?}
Expand All @@ -183,7 +183,7 @@ First use time-of-day subsetting to extract the time range you want to work on (
the leading \code{"T"} and leading zeros are required for each time in the range:
\code{"T06:00"}), then use \code{apply.daily} to apply your function to the subset:
<<eval=FALSE>>=
apply.daily(sample.xts['T06:00/T17:00',], mean)
apply.daily(sample.xts['T06:00/T17:00',], colMeans)
@

\q{How can I analyze my irregular data in regular blocks, adding observations
Expand Down

0 comments on commit c4452e8

Please sign in to comment.