Skip to content

Commit

Permalink
Move Lahman and hflights back to suggests.
Browse files Browse the repository at this point in the history
Closes #508
  • Loading branch information
hadley committed Aug 27, 2014
1 parent fd71751 commit 1d8b2e2
Show file tree
Hide file tree
Showing 35 changed files with 72 additions and 65 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ Imports:
utils,
R6,
Rcpp,
magrittr,
Lahman,
hflights
magrittr
Suggests:
RSQLite,
RSQLite.extfuns,
Expand All @@ -31,7 +29,9 @@ Suggests:
knitr,
microbenchmark,
ggplot2,
mgcv
mgcv,
Lahman,
hflights
VignetteBuilder: knitr
LazyData: yes
LinkingTo: Rcpp (>= 0.11.1),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# dplyr 0.2.0.99

* Lahman and hflights have are (once again) suggested packages. This means
many demos will not work unless you explicitly install them with
`install.packages(c("Lahman", "hflights"))` (#508).

* Switched from RC to R6.

* Change first argument name of `group_by()` to `.data` so you can create
Expand Down
6 changes: 3 additions & 3 deletions R/bench-compare.r
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
#' @seealso \code{\link{src_local}} for working with local data
#' @examples
#' \donttest{
#' if (require("microbenchmark")) {
#' lahman_local <- lahman_srcs("df", "dt", "cpp")
#' if (require("microbenchmark") && has_lahman()) {
#' lahman_local <- lahman_srcs("df", "dt")
#' teams <- lapply(lahman_local, function(x) x %>% tbl("Teams"))
#'
#' compare_tbls(teams, function(x) x %>% filter(yearID == 2010))
Expand All @@ -38,7 +38,7 @@
#' # You can also supply arbitrary additional arguments to bench_tbls
#' # if there are other operations you'd like to compare.
#' bench_tbls(teams, function(x) x %>% filter(yearID == 2010),
#' base = subset(Teams, yearID == 2010))
#' base = subset(Lahman::Teams, yearID == 2010))
#'
#' # A more complicated example using multiple tables
#' setup <- function(src) {
Expand Down
3 changes: 2 additions & 1 deletion R/chain.r
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' @export
#' @examples
#' # If you're performing many operations you can either do step by step
#' data("hflights", package = "hflights")
#' if (require("hflights")) {
#' a1 <- group_by(hflights, Year, Month, DayofMonth)
#' a2 <- select(a1, Year:DayofMonth, ArrDelay, DepDelay)
#' a3 <- summarise(a2,
Expand Down Expand Up @@ -61,6 +61,7 @@
#' dep = mean(DepDelay, na.rm = TRUE)
#' ) %>%
#' filter(arr > 30 | dep > 30)
#' }
chain <- function(..., env = parent.frame()) {
# Defunct 0.3. Remove in 0.4
stop("Chain is defunct Please use %>%", call. = FALSE)
Expand Down
3 changes: 3 additions & 0 deletions R/data-lahman.r
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ cache_lahman <- function(type, ...) {
#' @rdname lahman
#' @export
has_lahman <- function(type, ...) {
if (!requireNamespace("Lahman", quietly = TRUE)) return(FALSE)
if (missing(type)) return(TRUE)

succeeds(lahman_src(type, ...), quiet = TRUE)
}

Expand Down
2 changes: 0 additions & 2 deletions R/data-temp.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#'
#' temp_srcs(local)
#' temp_srcs(db)
#'
#' mtcars_tbls <- temp_tbls(local, mtcars)
#' }
temp_srcs <- function(..., quiet = NULL) {
load_srcs(temp_src, c(...), quiet = quiet)
Expand Down
3 changes: 2 additions & 1 deletion R/do.r
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@
#' compare <- models %>% do(aov = anova(.$mod_linear, .$mod_quad))
#' # compare %>% summarise(p.value = aov$`Pr(>F)`)
#'
#' if (require("hflights")) {
#' # You can use it to do any arbitrary computation, like fitting a linear
#' # model. Let's explore how carrier departure delays vary over the time
#' data("hflights", package = "hflights")
#' carriers <- group_by(hflights, UniqueCarrier)
#' group_size(carriers)
#'
Expand All @@ -74,6 +74,7 @@
#' library(mgcv)
#' by_dest %>% do(smooth = gam(ArrDelay ~ s(DepTime) + Month, data = .))
#' }
#' }
do <- function(.data, ...) UseMethod("do")

#' @export
Expand Down
5 changes: 2 additions & 3 deletions R/group-size.r
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#' Calculate group sizes.
#'
#'
#'
#' @param x a grouped tbl
#' @export
#' @examples
#' data("hflights", package = "hflights")
#' if (require("hflights")) {
#'
#' by_day <- hflights %>% group_by(Year, Month, DayofMonth)
#' n_groups(by_day)
Expand All @@ -14,6 +12,7 @@
#' by_dest <- hflights %>% group_by(Dest)
#' n_groups(by_dest)
#' group_size(by_dest)
#' }
group_size <- function(x) UseMethod("group_size")

#' @export
Expand Down
3 changes: 1 addition & 2 deletions R/grouped-dt.r
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
#' @param vars a list of quoted variables.
#' @export
#' @examples
#' if (require("data.table")) {
#' data("hflights", package = "hflights")
#' if (require("data.table") && require("hflights")) {
#' hflights_dt <- tbl_dt(hflights)
#' group_size(group_by(hflights_dt, Year, Month, DayofMonth))
#' group_size(group_by(hflights_dt, Dest))
Expand Down
5 changes: 2 additions & 3 deletions R/join-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
#' \code{copy} is \code{TRUE}, \code{y} will be converted into a data frame
#' @param ... included for compatibility with the generic; otherwise ignored.
#' @examples
#' data("Batting", package = "Lahman")
#' data("Master", package = "Lahman")
#'
#' if (require("Lahman")) {
#' batting_df <- tbl_df(Batting)
#' person_df <- tbl_df(Master)
#'
Expand All @@ -32,6 +30,7 @@
#' anti_join(batting_df, person_df)
#' # or people who didn't bat
#' anti_join(person_df, batting_df)
#' }
#' @name join.tbl_df
NULL

Expand Down
5 changes: 1 addition & 4 deletions R/join-dt.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@
#' @param x,y tbls to join
#' @param ... Included for compatibility with generic; otherwise ignored.
#' @examples
#' if (require("RSQLite") && require("RSQLite.extfuns")) {
#' data("Batting", package = "Lahman")
#' data("Master", package = "Lahman")
#'
#' if (require("data.table") && require("Lahman")) {
#' batting_dt <- tbl_dt(Batting)
#' person_dt <- tbl_dt(Master)
#'
Expand Down
3 changes: 1 addition & 2 deletions R/manip-dt.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
#' @param .env The environment in which to evaluate arguments not included
#' in the data. The default should suffice for ordinary usage.
#' @examples
#' if (require("data.table")) {
#' if (require("data.table") && require("hflights")) {
#' # If you start with a data table, you end up with a data table
#' data("hflights", package = "hflights")
#' hflights <- as.data.table(hflights)
#' filter(hflights, Month == 1, DayofMonth == 1, Dest == "DFW")
#' head(select(hflights, Year:DayOfWeek))
Expand Down
3 changes: 1 addition & 2 deletions R/manip-grouped-dt.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
#' @param inplace if \code{FALSE} (the default) the data frame will be copied
#' prior to modification to avoid changes propagating via reference.
#' @examples
#' if (require("data.table")) {
#' data("hflights", package = "hflights")
#' if (require("data.table") && require("hflights")) {
#' hflights2 <- tbl_dt(hflights)
#' by_dest <- group_by(hflights2, Dest)
#'
Expand Down
3 changes: 2 additions & 1 deletion R/partial-eval.r
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' @export
#' @keywords internal
#' @examples
#' data("Batting", package = "Lahman")
#' if (require("Lahman")) {
#' bdf <- tbl_df(Batting)
#' partial_eval(quote(year > 1980), bdf)
#'
Expand All @@ -48,6 +48,7 @@
#' partial_eval(quote(1 + 2 * 3))
#' x <- 1
#' partial_eval(quote(x ^ y))
#' }
partial_eval <- function(call, tbl = NULL, env = parent.frame()) {
if (is.atomic(call)) return(call)

Expand Down
7 changes: 5 additions & 2 deletions R/src-local.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
#' @keywords internal
#' @export
#' @examples
#' if (require("Lahman")) {
#' src_dt("Lahman")
#' src_df("Lahman")
#'
#' batting_df <- tbl(src_df("Lahman"), "Batting")
#'
#' if (require("data.table")) {
#' batting_dt <- tbl(src_dt("Lahman"), "Batting")
#' src_df("Lahman")
#' batting_dt <- tbl(src_dt("Lahman"), "Batting")
#' }
#' }
src_local <- function(tbl, pkg = NULL, env = NULL) {
if (!xor(is.null(pkg), is.null(env))) {
Expand Down
3 changes: 2 additions & 1 deletion R/tally.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @param sort if \code{TRUE} will sort output in descending order of \code{n}
#' @export
#' @examples
#' library(Lahman)
#' if (require("Lahman")) {
#' batting_tbl <- tbl_df(Batting)
#' tally(group_by(batting_tbl, yearID))
#' tally(group_by(batting_tbl, yearID), sort = TRUE)
Expand All @@ -23,6 +23,7 @@
#'
#' # This looks a little nicer if you use the infix %>% operator
#' batting_tbl %>% group_by(playerID) %>% tally(sort = TRUE)
#' }
tally <- function(x, wt, sort = FALSE) {
if (missing(wt)) {
if ("n" %in% names(x)) {
Expand Down
4 changes: 2 additions & 2 deletions R/tbl-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' ds
#' as.data.frame(ds)
#'
#' data("Batting", package = "Lahman")
#' if (require("Lahman")) {
#' batting <- tbl_df(Batting)
#' dim(batting)
#' colnames(batting)
Expand Down Expand Up @@ -43,7 +43,6 @@
#' # mutate(stints, cumsum(stints))
#'
#' # Joins ---------------------------------------------------------------------
#' data("Master", "HallOfFame", package = "Lahman")
#' player_info <- select(tbl_df(Master), playerID, hofID, birthYear)
#' hof <- select(filter(tbl_df(HallOfFame), inducted == "Y"),
#' hofID, votedBy, category)
Expand All @@ -56,6 +55,7 @@
#' semi_join(player_info, hof)
#' # Find players not in hof
#' anti_join(player_info, hof)
#' }
tbl_df <- function(data) {
assert_that(is.data.frame(data))
tbl_df_impl(data)
Expand Down
3 changes: 2 additions & 1 deletion R/top-n.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @export
#' @examples
#' # Find 10 players with most games
#' data("Batting", package = "Lahman")
#' if (require("Lahman")) {
#' players <- group_by(tbl_df(Batting), playerID)
#' games <- tally(players, G)
#' top_n(games, 10, n)
Expand All @@ -26,6 +26,7 @@
#'
#' # Find year with most games for each player
#' tbl_df(Batting) %>% group_by(playerID) %>% top_n(1, G)
#' }
top_n <- function(x, n, wt) {
if (missing(wt)) {
vars <- tbl_vars(x)
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ You can install:
devtools::install_github("hadley/dplyr")
```

You'll probably also want to install the data packages used in most examples: `install.packages(c("hflights", "Lahman"))`.
To get started, read the notes below, then read the intro vignette: `vignette("introduction", package = "dplyr")`. To make the most of dplyr, I also recommend that you familiarise yourself with the principles of [tidy data](http://vita.had.co.nz/papers/tidy-data.html): this will help you get your data into a form that works well with dplyr, ggplot2 and R's many modelling functions.

If you encounter a clear bug, please file a minimal reproducible example on [github](https://github.com/hadley/dplyr/issues). For questions and other discussion, please use the [manipulatr mailing list](https://groups.google.com/group/manipulatr).
Expand Down
6 changes: 3 additions & 3 deletions man/bench_compare.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ multiple sources.
}
\examples{
\donttest{
if (require("microbenchmark")) {
lahman_local <- lahman_srcs("df", "dt", "cpp")
if (require("microbenchmark") && has_lahman()) {
lahman_local <- lahman_srcs("df", "dt")
teams <- lapply(lahman_local, function(x) x \%>\% tbl("Teams"))

compare_tbls(teams, function(x) x \%>\% filter(yearID == 2010))
Expand All @@ -57,7 +57,7 @@ bench_tbls(teams, function(x) x \%>\% filter(yearID == 2010))
# You can also supply arbitrary additional arguments to bench_tbls
# if there are other operations you'd like to compare.
bench_tbls(teams, function(x) x \%>\% filter(yearID == 2010),
base = subset(Teams, yearID == 2010))
base = subset(Lahman::Teams, yearID == 2010))

# A more complicated example using multiple tables
setup <- function(src) {
Expand Down
3 changes: 2 additions & 1 deletion man/chain.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ standardised and \code{\%.\%} is much more popular.
}
\examples{
# If you're performing many operations you can either do step by step
data("hflights", package = "hflights")
if (require("hflights")) {
a1 <- group_by(hflights, Year, Month, DayofMonth)
a2 <- select(a1, Year:DayofMonth, ArrDelay, DepDelay)
a3 <- summarise(a2,
Expand Down Expand Up @@ -82,4 +82,5 @@ hflights \%>\%
) \%>\%
filter(arr > 30 | dep > 30)
}
}

3 changes: 2 additions & 1 deletion man/do.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ models
compare <- models \%>\% do(aov = anova(.$mod_linear, .$mod_quad))
# compare \%>\% summarise(p.value = aov$`Pr(>F)`)

if (require("hflights")) {
# You can use it to do any arbitrary computation, like fitting a linear
# model. Let's explore how carrier departure delays vary over the time
data("hflights", package = "hflights")
carriers <- group_by(hflights, UniqueCarrier)
group_size(carriers)

Expand All @@ -93,4 +93,5 @@ library(mgcv)
by_dest \%>\% do(smooth = gam(ArrDelay ~ s(DepTime) + Month, data = .))
}
}
}

3 changes: 2 additions & 1 deletion man/group_size.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ n_groups(x)
Calculate group sizes.
}
\examples{
data("hflights", package = "hflights")
if (require("hflights")) {

by_day <- hflights \%>\% group_by(Year, Month, DayofMonth)
n_groups(by_day)
Expand All @@ -25,4 +25,5 @@ by_dest <- hflights \%>\% group_by(Dest)
n_groups(by_dest)
group_size(by_dest)
}
}

3 changes: 1 addition & 2 deletions man/grouped_dt.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ method on a data table or tbl: this will take care of capturing
the unevalated expressions for you.
}
\examples{
if (require("data.table")) {
data("hflights", package = "hflights")
if (require("data.table") && require("hflights")) {
hflights_dt <- tbl_dt(hflights)
group_size(group_by(hflights_dt, Year, Month, DayofMonth))
group_size(group_by(hflights_dt, Dest))
Expand Down
5 changes: 2 additions & 3 deletions man/join.tbl_df.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ See \code{\link{join}} for a description of the general purpose of the
functions.
}
\examples{
data("Batting", package = "Lahman")
data("Master", package = "Lahman")
if (require("Lahman")) {
batting_df <- tbl_df(Batting)
person_df <- tbl_df(Master)
Expand All @@ -54,4 +52,5 @@ anti_join(batting_df, person_df)
# or people who didn't bat
anti_join(person_df, batting_df)
}
}

5 changes: 1 addition & 4 deletions man/join.tbl_dt.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,7 @@ See \code{\link{join}} for a description of the general purpose of the
functions.
}
\examples{
if (require("RSQLite") && require("RSQLite.extfuns")) {
data("Batting", package = "Lahman")
data("Master", package = "Lahman")
if (require("data.table") && require("Lahman")) {
batting_dt <- tbl_dt(Batting)
person_dt <- tbl_dt(Master)
Expand Down
Loading

0 comments on commit 1d8b2e2

Please sign in to comment.