Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
49 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,64 +1,3 @@ | ||
# (PART) Appendixes {-} | ||
|
||
# Functions for Primary and Foreign Keys {-} | ||
|
||
The chapter [Relational data](#relational-data) describes primary key and foreign key relations. | ||
However, the dplyr package does not provide functions to check for these. | ||
|
||
The `any_missing()` function is a helper function that will be used later. | ||
```{r} | ||
any_missing <- function(x) any(is.na(x)) | ||
``` | ||
|
||
The `is_primary_key()` function checks whether variables are | ||
```{r} | ||
#' Check whether variables are a primary key | ||
#' | ||
#' Check whether a set of variables is a primary key for a data frame. | ||
#' Unlike SQL databases, R data frames do not enforce a primary key | ||
#' constraint. This function checks whether a set of variables uniquely | ||
#' identify a row. | ||
#' | ||
#' @param tbl A tbl. | ||
#' @param ... One or more unquoted expressions separated by commas. | ||
#' You can treat variable names like they are positions. This uses | ||
#' The same semantics as [dplyr::select()]. | ||
#' @return A logical vector of length one that is `TRUE` if the | ||
#' variables are primary key, and `FALSE` otherwise. | ||
is_primary_key <- function(tbl, ...) { | ||
variables <- quos(...) | ||
# no elements can be missing | ||
has_nulls <- summarise_at(tbl, vars(UQS(variables)), any_missing) | ||
if (any(as.logical(has_nulls))) { | ||
return(FALSE) | ||
} | ||
nrow(distinct(tbl, !!!variables)) == nrow(tbl) | ||
} | ||
foo <- tribble( | ||
~a, ~b, ~c, | ||
1, NA, 1, | ||
2, 2, 1, | ||
3, 3, 3 | ||
) | ||
is_key(foo, a) | ||
is_key(foo, b) | ||
is_key(foo, c) | ||
is_key(foo, 1:3) | ||
``` | ||
|
||
The `is_foreign_key()` function checks whether | ||
```{r is_foreign_key} | ||
# check that columns in y are are foreign key of x | ||
is_foreign_key <- function(x, y, by = NULL) { | ||
# check that y is a primary key | ||
if (!rlang::eval_tidy(quo(is_primary_key(y, !!!by)))) { | ||
return(FALSE) | ||
} | ||
# check that all x are found in y | ||
!nrow(anti_join(x, y, by = by)) | ||
} | ||
``` | ||
# References {-} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
any_missing <- function(x) any(is.na(x)) | ||
|
||
#' Check whether variables are a primary key | ||
#' | ||
#' Check whether a set of variables is a primary key for a data frame. | ||
#' Unlike SQL databases, R data frames do not enforce a primary key | ||
#' constraint. This function checks whether a set of variables uniquely | ||
#' identify a row. | ||
#' | ||
#' @param tbl A tbl. | ||
#' @param ... One or more unquoted expressions separated by commas. | ||
#' You can treat variable names like they are positions. This uses | ||
#' The same semantics as [dplyr::select()]. | ||
#' @return A logical vector of length one that is `TRUE` if the | ||
#' variables are primary key, and `FALSE` otherwise. | ||
is_primary_key <- function(tbl, ...) { | ||
variables <- quos(...) | ||
# no elements can be missing | ||
has_nulls <- summarise_at(tbl, vars(UQS(variables)), any_missing) | ||
if (any(as.logical(has_nulls))) { | ||
return(FALSE) | ||
} | ||
nrow(distinct(tbl, !!!variables)) == nrow(tbl) | ||
} | ||
|
||
foo <- tribble( | ||
~a, ~b, ~c, | ||
1, NA, 1, | ||
2, 2, 1, | ||
3, 3, 3 | ||
) | ||
|
||
is_key(foo, a) | ||
is_key(foo, b) | ||
is_key(foo, c) | ||
|
||
is_key(foo, 1:3) | ||
|
||
# check that columns in y are are foreign key of x | ||
is_foreign_key <- function(x, y, by = NULL) { | ||
# check that y is a primary key | ||
if (!rlang::eval_tidy(quo(is_primary_key(y, !!!by)))) { | ||
return(FALSE) | ||
} | ||
# check that all x are found in y | ||
!nrow(anti_join(x, y, by = by)) | ||
} | ||
|
This file was deleted.
Oops, something went wrong.