Skip to content

Commit

Permalink
work on array/matrix handling (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
leeper committed May 8, 2017
1 parent 8b7f835 commit 8847a31
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 32 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
2.1.0 - Thomas J. Leeper

- Added `unf.array()` method. (#18)
- Added `sort` argument to `unf()` to optionally not sort column UNF hashes. (#18)

2.0.5 - Thomas J. Leeper

- Exported new function `unf_equal()` to better comply with R CMD check.
Expand Down
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: UNF
Version: 2.0.6
Version: 2.1.0
Title: Tools for Creating Universal Numeric Fingerprints for Data
Date: 2016-08-09
Date: 2017-05-08
Authors@R: c(person("Thomas", "Leeper", role = c("aut","cre"),
email = "thosjleeper@gmail.com"),
person("Micah", "Altman", role = c("aut")))
Expand All @@ -23,4 +23,4 @@ License: GPL-2
URL: https://github.com/leeper/UNF
BugReports: https://github.com/leeper/UNF/issues
VignetteBuilder: knitr
RoxygenNote: 5.0.1
RoxygenNote: 6.0.1
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ S3method(as.unfvector,ts)
S3method(print,UNF)
S3method(print,UNFtest)
S3method(print,unfvector)
S3method(unf,array)
S3method(unf,data.frame)
S3method(unf,default)
S3method(unf,list)
Expand Down
34 changes: 23 additions & 11 deletions R/unf.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#' @description UNF is a cryptographic hash or signature that can be used to uniquely identify (a version of) a dataset, or a subset thereof.
#' @param x For \code{unf}, a vector, matrix, dataframe, or list; for \code{unf3}, \code{unf4}, \code{unf5}, a vector. If \code{x} is a dataframe or list with one variable or one vector element, respectively, \code{unf} returns the UNF for the single vector (which is consistent with the Dataverse implementation but ambiguous in the UNF standard). For algorithm versions < 5, all non-numeric vectors are treated as character.
#' @param version Version of the UNF algorithm. Allowed values are 3, 4, 4.1, 5, and 6. Always use the same version of the algorithm to check a UNF. Default for \code{unf} is 6 and default for \code{unf4} is 4 (but can also be set to 4.1, which is identical except for using SHA256 instead of MD5).
#' @param sort A logical indicating whether to sort the columns/variables of a matrix or data frame. The default is \code{TRUE}. If \code{FALSE}, column order is respected when calculating the final UNF hash. This can be useful for distinguishing two matrices from one another.
#' @param digits The number of significant digits for rounding for numeric values. Default is 7L. Must be between 1 and 15.
#' @param characters The number of characters for truncation. Default is 128L. Must be greater than 1.
#' @param truncation The number of bits to truncate the UNF signature to. Default is 128L. Must be one of: 128,192,196,256.
Expand Down Expand Up @@ -125,32 +126,38 @@ unf.default <- function(x, version = 6, ...) {
}

#' @export
unf.data.frame <- function(x, version = 6, ...){
unf.data.frame <- function(x, version = 6, sort = TRUE, ...){
if (length(x) == 1) {
return(unf(x[[1]], version = version, ...))
}
locale <- Sys.getlocale(category="LC_COLLATE")
Sys.setlocale(category="LC_COLLATE", "C")
on.exit(Sys.setlocale(category="LC_COLLATE", locale))
if (isTRUE(sort)) {
sort_fun <- sort
locale <- Sys.getlocale(category="LC_COLLATE")
Sys.setlocale(category="LC_COLLATE", "C")
on.exit(Sys.setlocale(category="LC_COLLATE", locale))
} else {
sort_fun <- function(x) x
}
if (version == 3) {
vars <- sapply(x, function(i) unf3(i, ...)$unf)
out <- unf3(sort(vars), ...)
out <- unf3(sort_fun(vars), ...)
} else if (version == 4) {
vars <- sapply(x, function(i) unf4(i, ...)$unf)
out <- unf4(sort(vars), ...)
out <- unf4(sort_fun(vars), ...)
} else if (version == 4.1) {
vars <- sapply(x, function(i) unf4(i, version = 4.1, ...)$unf)
out <- unf4(sort(vars), version = 4.1, ...)
out <- unf4(sort_fun(vars), version = 4.1, ...)
} else if (version == 5) {
vars <- sapply(x, function(i) unf5(i, ...)$unf)
out <- unf5(sort(vars), ...)
out <- unf5(sort_fun(vars), ...)
} else if (version == 6) {
vars <- sapply(x, function(i) unf6(i, ...)$unf)
out <- unf6(sort(vars), ...)
out <- unf6(sort_fun(vars), ...)
} else {
stop("Unrecognized UNF version: must be 3, 4, 4.1, 5, or 6.")
}
out$variables <- vars
attr(out, "sort") <- sort
return(out)
}

Expand All @@ -160,8 +167,13 @@ unf.list <- function(x, version = 6, ...) {
}

#' @export
unf.matrix <- function(x, version = 6, ...) {
unf(as.data.frame(x), version = version, ...)
unf.matrix <- function(x, version = 6, sort = TRUE, ...) {
unf(as.data.frame(x), version = version, sort = sort, ...)
}

#' @export
unf.array <- function(x, version = 6, sort = TRUE, ...) {
unf(as.data.frame(x), version = version, sort = sort, ...)
}

#' @export
Expand Down
9 changes: 4 additions & 5 deletions man/UNF-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions man/as.unfvector.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions man/equal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions man/signifz.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/unf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8847a31

Please sign in to comment.