Permalink
Browse files

Add chaining functions

  • Loading branch information...
hadley committed Oct 9, 2013
1 parent 77227fc commit 57f76ed560ac52590f7ded3d013db8fb19f7d627
Showing with 189 additions and 0 deletions.
  1. +1 −0 DESCRIPTION
  2. +3 −0 NAMESPACE
  3. +96 −0 R/chain.r
  4. +89 −0 man/chain.Rd
View
@@ -76,3 +76,4 @@ Collate:
'zzz.r'
'query-bq.r'
'src-bigquery.r'
+ 'chain.r'
View
@@ -196,11 +196,14 @@ S3method(ungroup,tbl_dt)
S3method(ungroup,tbl_sql)
S3method(unique,sql)
S3method(update,tbl_sql)
+export("%.%")
export(.datatable.aware)
export(anti_join)
export(arrange)
export(as.tbl)
export(build_sql)
+export(chain)
+export(chain_q)
export(collapse)
export(collect)
export(compute)
View
@@ -0,0 +1,96 @@
+#' Chain together multiple operations.
+#'
+#' The downside of the functional nature of dplyr is that when you combine
+#' multiple data manipulation operations, you have to read from the inside
+#' out and the arguments may be very distant to the function call. These
+#' functions providing an alternative way of calling dplyr (and other data
+#' manipulation) functions that you read can from left to right.
+#'
+#' The functions work via simple substitution so that \code{chain(x, f(y))} or
+#' \code{x %.% f(y)} is translated into \code{f(x, y)}.
+#'
+#' @param x,y A dataset and function to apply to it
+#' @param ...,calls A sequence of data transformations, starting with a dataset.
+#' The first argument of each call should be omitted - the value of the
+#' previous step will be substituted in automatically. Use \code{chain} and
+#' \code{...} when working interactive; use \code{chain_q} and \code{calls}
+#' when calling from another function.
+#' @param env Environment in which to evaluation expressions. In ordinary
+#' operation you should not need to set this parameter.
+#' @export
+#' @examples
+#' # If you're performing many operations you can either do step by step
+#' a1 <- group_by(hflights, Year, Month, DayofMonth)
+#' a2 <- select(a1, Year:DayofMonth, ArrDelay, DepDelay)
+#' a3 <- summarise(a2,
+#' arr = mean(ArrDelay, na.rm = TRUE),
+#' dep = mean(DepDelay, na.rm = TRUE))
+#' a4 <- filter(a3, arr > 30 | dep > 30)
+#'
+#' # If you don't want to save the intermediate results, you need to
+#' # wrap the functions:
+#' filter(
+#' summarise(
+#' select(
+#' group_by(hflights, Year, Month, DayofMonth),
+#' Year:DayofMonth, ArrDelay, DepDelay
+#' ),
+#' arr = mean(ArrDelay, na.rm = TRUE),
+#' dep = mean(DepDelay, na.rm = TRUE)
+#' ),
+#' arr > 30 | dep > 30
+#' )
+#'
+#' # This is difficult to read because the order of the operations is from
+#' # inside to out, and the arguments are a long way away from the function.
+#' # Alternatively you can use chain or %.% to sequence the operations
+#' # linearly:
+#'
+#' hflights %.%
+#' group_by(Year, Month, DayofMonth) %.%
+#' select(Year:DayofMonth, ArrDelay, DepDelay) %.%
+#' summarise(
+#' arr = mean(ArrDelay, na.rm = TRUE),
+#' dep = mean(DepDelay, na.rm = TRUE)
+#' ) %.%
+#' filter(a3, arr > 30 | dep > 30)
+#'
+#' chain(
+#' hflights,
+#' group_by(Year, Month, DayofMonth),
+#' select(Year:DayofMonth, ArrDelay, DepDelay),
+#' summarise(
+#' arr = mean(ArrDelay, na.rm = TRUE),
+#' dep = mean(DepDelay, na.rm = TRUE)
+#' ),
+#' filter(a3, arr > 30 | dep > 30)
+#' )
+chain <- function(..., env = parent.frame()) {
+ chain_q(dots(...), env = env)
+}
+
+#' @export
+#' @rdname chain
+chain_q <- function(calls, env = parent.frame()) {
+ if (length(calls) == 0) return()
+ if (length(calls) == 1) return(eval(calls[[1]], env))
+
+ # New environemnt for evalution - inherits from parent frame, and
+ # contains unusually named (to avoid conflicts) variable to represent
+ # result of previous computation
+ e <- new.env(parent = env)
+ e$`__prev` <- eval(calls[[1]], env)
+
+ for(call in calls[-1]) {
+ new_call <- as.call(c(call[[1]], quote(`__prev`), as.list(call[-1])))
+ e$`__prev` <- eval(new_call, e)
+ }
+
+ e$`__prev`
+}
+
+#' @export
+#' @rdname chain
+"%.%" <- function(x, y) {
+ chain_q(list(substitute(x), substitute(y)), env = parent.frame())
+}
View
@@ -0,0 +1,89 @@
+\name{chain}
+\alias{\%.\%}
+\alias{chain}
+\alias{chain_q}
+\title{Chain together multiple operations.}
+\usage{
+ chain(..., env = parent.frame())
+
+ chain_q(calls, env = parent.frame())
+
+ %.%(x, y)
+}
+\arguments{
+ \item{x,y}{A dataset and function to apply to it}
+
+ \item{...,calls}{A sequence of data transformations,
+ starting with a dataset. The first argument of each call
+ should be omitted - the value of the previous step will
+ be substituted in automatically. Use \code{chain} and
+ \code{...} when working interactive; use \code{chain_q}
+ and \code{calls} when calling from another function.}
+
+ \item{env}{Environment in which to evaluation
+ expressions. In ordinary operation you should not need to
+ set this parameter.}
+}
+\description{
+ The downside of the functional nature of dplyr is that
+ when you combine multiple data manipulation operations,
+ you have to read from the inside out and the arguments
+ may be very distant to the function call. These functions
+ providing an alternative way of calling dplyr (and other
+ data manipulation) functions that you read can from left
+ to right.
+}
+\details{
+ The functions work via simple substitution so that
+ \code{chain(x, f(y))} or \code{x %.% f(y)} is translated
+ into \code{f(x, y)}.
+}
+\examples{
+# If you're performing many operations you can either do step by step
+a1 <- group_by(hflights, Year, Month, DayofMonth)
+a2 <- select(a1, Year:DayofMonth, ArrDelay, DepDelay)
+a3 <- summarise(a2,
+ arr = mean(ArrDelay, na.rm = TRUE),
+ dep = mean(DepDelay, na.rm = TRUE))
+a4 <- filter(a3, arr > 30 | dep > 30)
+
+# If you don't want to save the intermediate results, you need to
+# wrap the functions:
+filter(
+ summarise(
+ select(
+ group_by(hflights, Year, Month, DayofMonth),
+ Year:DayofMonth, ArrDelay, DepDelay
+ ),
+ arr = mean(ArrDelay, na.rm = TRUE),
+ dep = mean(DepDelay, na.rm = TRUE)
+ ),
+ arr > 30 | dep > 30
+)
+
+# This is difficult to read because the order of the operations is from
+# inside to out, and the arguments are a long way away from the function.
+# Alternatively you can use chain or \%.\% to sequence the operations
+# linearly:
+
+hflights \%.\%
+ group_by(Year, Month, DayofMonth) \%.\%
+ select(Year:DayofMonth, ArrDelay, DepDelay) \%.\%
+ summarise(
+ arr = mean(ArrDelay, na.rm = TRUE),
+ dep = mean(DepDelay, na.rm = TRUE)
+ ) \%.\%
+ filter(a3, arr > 30 | dep > 30)
+
+chain(
+ hflights,
+ group_by(Year, Month, DayofMonth),
+ select(Year:DayofMonth, ArrDelay, DepDelay),
+ summarise(
+ arr = mean(ArrDelay, na.rm = TRUE),
+ dep = mean(DepDelay, na.rm = TRUE)
+ ),
+ filter(a3, arr > 30 | dep > 30)
+)
+}
+

0 comments on commit 57f76ed

Please sign in to comment.