diff --git a/_bookdown.yml b/_bookdown.yml index bd89fd1b..7ecbb1e8 100644 --- a/_bookdown.yml +++ b/_bookdown.yml @@ -40,7 +40,6 @@ rmd_files: - "rmarkdown-workflow.Rmd" - "appendixes.Rmd" - - "references.Rmd" before_chapter_script: "_common.R" book_filename: "r4ds-solutions" diff --git a/appendixes.Rmd b/appendixes.Rmd index ac0dd46c..342873c5 100644 --- a/appendixes.Rmd +++ b/appendixes.Rmd @@ -1,64 +1,3 @@ # (PART) Appendixes {-} -# Functions for Primary and Foreign Keys {-} - -The chapter [Relational data](#relational-data) describes primary key and foreign key relations. -However, the dplyr package does not provide functions to check for these. - -The `any_missing()` function is a helper function that will be used later. -```{r} -any_missing <- function(x) any(is.na(x)) -``` - -The `is_primary_key()` function checks whether variables are -```{r} -#' Check whether variables are a primary key -#' -#' Check whether a set of variables is a primary key for a data frame. -#' Unlike SQL databases, R data frames do not enforce a primary key -#' constraint. This function checks whether a set of variables uniquely -#' identify a row. -#' -#' @param tbl A tbl. -#' @param ... One or more unquoted expressions separated by commas. -#' You can treat variable names like they are positions. This uses -#' The same semantics as [dplyr::select()]. -#' @return A logical vector of length one that is `TRUE` if the -#' variables are primary key, and `FALSE` otherwise. -is_primary_key <- function(tbl, ...) { - variables <- quos(...) - # no elements can be missing - has_nulls <- summarise_at(tbl, vars(UQS(variables)), any_missing) - if (any(as.logical(has_nulls))) { - return(FALSE) - } - nrow(distinct(tbl, !!!variables)) == nrow(tbl) -} - -foo <- tribble( - ~a, ~b, ~c, - 1, NA, 1, - 2, 2, 1, - 3, 3, 3 -) - -is_key(foo, a) -is_key(foo, b) -is_key(foo, c) - -is_key(foo, 1:3) -``` - -The `is_foreign_key()` function checks whether -```{r is_foreign_key} -# check that columns in y are are foreign key of x -is_foreign_key <- function(x, y, by = NULL) { - # check that y is a primary key - if (!rlang::eval_tidy(quo(is_primary_key(y, !!!by)))) { - return(FALSE) - } - # check that all x are found in y - !nrow(anti_join(x, y, by = by)) -} - -``` +# References {-} diff --git a/bin/keys_example.R b/bin/keys_example.R new file mode 100644 index 00000000..da2d39df --- /dev/null +++ b/bin/keys_example.R @@ -0,0 +1,48 @@ +any_missing <- function(x) any(is.na(x)) + +#' Check whether variables are a primary key +#' +#' Check whether a set of variables is a primary key for a data frame. +#' Unlike SQL databases, R data frames do not enforce a primary key +#' constraint. This function checks whether a set of variables uniquely +#' identify a row. +#' +#' @param tbl A tbl. +#' @param ... One or more unquoted expressions separated by commas. +#' You can treat variable names like they are positions. This uses +#' The same semantics as [dplyr::select()]. +#' @return A logical vector of length one that is `TRUE` if the +#' variables are primary key, and `FALSE` otherwise. +is_primary_key <- function(tbl, ...) { + variables <- quos(...) + # no elements can be missing + has_nulls <- summarise_at(tbl, vars(UQS(variables)), any_missing) + if (any(as.logical(has_nulls))) { + return(FALSE) + } + nrow(distinct(tbl, !!!variables)) == nrow(tbl) +} + +foo <- tribble( + ~a, ~b, ~c, + 1, NA, 1, + 2, 2, 1, + 3, 3, 3 +) + +is_key(foo, a) +is_key(foo, b) +is_key(foo, c) + +is_key(foo, 1:3) + +# check that columns in y are are foreign key of x +is_foreign_key <- function(x, y, by = NULL) { + # check that y is a primary key + if (!rlang::eval_tidy(quo(is_primary_key(y, !!!by)))) { + return(FALSE) + } + # check that all x are found in y + !nrow(anti_join(x, y, by = by)) +} + diff --git a/references.Rmd b/references.Rmd deleted file mode 100644 index b553324e..00000000 --- a/references.Rmd +++ /dev/null @@ -1 +0,0 @@ -# References {-}