Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/sdcTools/sdcMicro
Browse files Browse the repository at this point in the history
  • Loading branch information
alexkowa committed Feb 4, 2020
2 parents 0c932f7 + f92a69b commit efa8b02
Show file tree
Hide file tree
Showing 31 changed files with 1,148 additions and 978 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: sdcMicro
Type: Package
Title: Statistical Disclosure Control Methods for Anonymization of Data and
Risk Estimation
Version: 5.4.2
Version: 5.5.0
Authors@R: c(
person("Matthias", "Templ", email="matthias.templ@gmail.com", role = c("aut", "cre"), comment=c(ORCID="0000-0002-8638-5276")),
person("Bernhard", "Meindl", email = "Bernhard.Meindl@statistik.gv.at", role = c("aut")),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ import(shinyBS)
import(tools)
import(xtable)
importFrom(DT,datatable)
importFrom(MASS,loglm)
importFrom(MASS,mvrnorm)
importFrom(VIM,gowerD)
importFrom(data.table,data.table)
importFrom(data.table,is.data.table)
importFrom(graphics,axis)
importFrom(graphics,box)
Expand Down
6 changes: 6 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 5.5.1
- bug in modRisk when the levels of a variable in the formulaM is greater than 10 (solved). Thanks to Ying Chen for the solution.

# 5.5.0
- gcc problems solved

# 5.4.1
- nothing new, but shiny did no longer export a specific function, so we resolved a warning

Expand Down
4 changes: 4 additions & 0 deletions R/modRisk.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
#' @return Two global risk measures and some model output given the specified model. If this method
#' is applied to an \code{\link{sdcMicroObj-class}}-object, the slot 'risk' in the object ist updated
#' with the result of the model-based risk-calculation.
#' @importFrom data.table data.table
#' @importFrom MASS loglm
#' @author Matthias Templ, Marius Totter, Bernhard Meindl
#' @seealso \code{\link{loglm}}, \code{\link{measure_risk}}
#' @references Skinner, C.J. and Holmes, D.J. (1998) \emph{Estimating the
Expand Down Expand Up @@ -241,6 +243,8 @@ definition=function(obj, method="default", weights, formulaM, bound=Inf) {
colnames(lambda)[ncol(lambda)] <- "Fk"
lambda <- data.table(lambda, key=vars)
#lambda <- lambda[,lapply(.SD, as.numeric)]
# suggestion by Ying Chen:
lambda <- data.table(lambda, key = vars)
for(v in vars){
if(class(x[[v]])!=class(lambda[[v]])){
if(is.character(x[[v]])){
Expand Down
4 changes: 3 additions & 1 deletion R/pram.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' automatically for an object of class \code{\link{sdcMicroObj-class}}. One can also specify
#' an integer vector or factor that specifies that desired groups. This vector must match the dimension
#' of the input data set, however. For a possible use case, have a look at the examples.
#' @param ... further input, currently ignored.
## @param ... further input, currently ignored.
#' @param pd minimum diagonal entries for the generated transition matrix P.
#' Either a vector of length 1 (which is recycled) or a vector of the same length as
#' the number of variables that should be postrandomized. It is also possible to set \code{pd}
Expand Down Expand Up @@ -56,6 +56,7 @@
#' @note Deprecated method 'pram_strata'is no longer available
#' in sdcMicro > 4.5.0
#' @examples
#' \dontrun{
#' data(testdata)
#'
#' ## application on a factor-variable
Expand Down Expand Up @@ -138,6 +139,7 @@
#' print(sdc, type="pram")
#' # we can also have a look at the transitions
#' get.sdcMicroObj(sdc, "pram")$transitions
#' }

pram <- function(obj, variables=NULL, strata_variables=NULL, pd=0.8, alpha=0.5) {
pramX(obj=obj, variables=variables, strata_variables=strata_variables, pd=pd, alpha=alpha)
Expand Down
148 changes: 103 additions & 45 deletions R/rankSwap.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
#' Rank swapping sorts the values of one numeric variable by their numerical
#' values (ranking). The restricted range is determined by the rank of two
#' swapped values, which cannot differ, by definition, by more than \eqn{P}{P}
#' percent of the total number of observations. Only positive P, R0 and K0 are
#' used and only one of it must be supplied.
#' percent of the total number of observations. Only positive P, R0 and K0 are
#' used and only one of it must be supplied. If none is supplied, sdcMicro sets
#' parameter eqn{R0} to 0.95 internally.
#'
#' @name rankSwap
#' @docType methods
Expand Down Expand Up @@ -51,99 +52,156 @@
#' provided by the World Bank to the PARIS21 Secretariat at the Organisation
#' for Economic Co-operation and Development (OECD). This work builds on
#' previous work which is elsewhere acknowledged.
#' @references
#' @references
#' Moore, Jr.R. (1996) Controlled data-swapping techniques for
#' masking public use microdata, U.S. Bureau of the Census \emph{Statistical
#' Research Division Report Series}, RR 96-04.
#'
#'
#' Kowarik, A. and Templ, M. and Meindl, B. and Fonteneau, F. and Prantner, B.:
#' \emph{Testing of IHSN Cpp Code and Inclusion of New Methods into sdcMicro},
#' in: Lecture Notes in Computer Science, J. Domingo-Ferrer, I. Tinnirello
#' (editors.); Springer, Berlin, 2012, ISBN: 978-3-642-33626-3, pp. 63-77.
#' (editors.); Springer, Berlin, 2012, ISBN: 978-3-642-33626-3, pp. 63-77.
#' \doi{10.1007/978-3-642-33627-0_6}
#'
#'
#' @export
#' @examples
#' data(testdata2)
#' data_swap <- rankSwap(testdata2,variables=c("age","income","expend","savings"))
#' \dontrun{
#' data_swap <- rankSwap(
#' obj = testdata2,
#' variables = c("age", "income", "expend", "savings")
#' )
#'
#' ## for objects of class sdcMicro:
#' data(testdata2)
#' sdc <- createSdcObj(testdata2,
#' keyVars=c('urbrur','roof','walls','water','electcon','relat','sex'),
#' numVars=c('expend','income','savings'), w='sampling_weight')
#' sdc <- createSdcObj(
#' dat = testdata2,
#' keyVars = c("urbrur", "roof", "walls", "water", "electcon", "relat", "sex"),
#' numVars = c("expend", "income", "savings"),
#' w = "sampling_weight")
#' sdc <- rankSwap(sdc)
rankSwap <- function(obj, variables=NULL, TopPercent=5, BottomPercent=5,
K0=NULL, R0=0.95, P=NULL, missing=NA, seed=NULL) {
TFpar <- c(!is.null(P),!is.null(R0),!is.null(K0))
if(sum(TFpar)>1){
stop("Only one of the parameters P, R0 and K0 should be provided.")
#' }
rankSwap <- function(obj, variables = NULL, TopPercent = 5, BottomPercent = 5,
K0 = NULL, R0 = NULL, P = NULL, missing = NA, seed = NULL) {

TFpar <- c(!is.null(P), !is.null(R0), !is.null(K0))
if (sum(TFpar) > 1) {
stop("Only one of the parameters P, R0 and K0 should be provided.", call. = FALSE)
}

if (sum(TFpar) == 0) {
R0 <- 0.95
message("setting parameter R0 = 0.95 as no inputs have been specified.")
}

rankSwapX(obj=obj, variables=variables, TopPercent=TopPercent, BottomPercent=BottomPercent, K0=K0, R0=R0, P=P, missing=missing, seed=seed)
rankSwapX(
obj = obj,
variables = variables,
TopPercent = TopPercent,
BottomPercent = BottomPercent,
K0 = K0,
R0 = R0,
P = P,
missing = missing,
seed = seed
)
}
setGeneric("rankSwapX", function(obj, variables=NULL, TopPercent=5, BottomPercent=5,
K0=-1, R0=0.95, P=0, missing=NA, seed=NULL) {

setGeneric("rankSwapX", function(obj, variables = NULL, TopPercent = 5, BottomPercent = 5,
K0 = -1, R0 = 0.95, P = 0, missing = NA, seed = NULL) {
standardGeneric("rankSwapX")
})

setMethod(f="rankSwapX", signature=c("sdcMicroObj"),
definition=function(obj, variables=NULL, TopPercent=5, BottomPercent=5, K0=NULL,
R0=0.95, P=NULL, missing=NA, seed=NULL) {

manipData <- get.sdcMicroObj(obj, type="manipNumVars")
setMethod(f = "rankSwapX", signature = c("sdcMicroObj"),
definition = function(obj,
variables = NULL,
TopPercent = 5,
BottomPercent = 5,
K0 = NULL,
R0 = 0.95,
P = NULL,
missing = NA,
seed = NULL) {
manipData <- get.sdcMicroObj(obj, type = "manipNumVars")

if ( is.null(variables) ) {
variables <- colnames(manipData)
}
if (is.null(variables)) {
variables <- colnames(manipData)
}

res <- rankSwap(manipData, variables=variables, TopPercent=TopPercent,
BottomPercent=BottomPercent, K0=K0, R0=R0, P=P, missing=missing, seed=seed)
res <- rankSwap(
obj = manipData,
variables = variables,
TopPercent = TopPercent,
BottomPercent = BottomPercent,
K0 = K0,
R0 = R0,
P = P,
missing = missing,
seed = seed
)

obj <- nextSdcObj(obj)
obj <- set.sdcMicroObj(obj, type="manipNumVars", input=list(res))
obj <- dRisk(obj)
obj <- dUtility(obj)
obj
})
obj <- nextSdcObj(obj)
obj <- set.sdcMicroObj(obj, type = "manipNumVars", input = list(res))
obj <- dRisk(obj)
obj <- dUtility(obj)
obj
}
)

setMethod(f="rankSwapX", signature=c("data.frame"),
definition=function(obj, variables=NULL, TopPercent=5, BottomPercent=5, K0=-1,
R0=0.95, P=0, missing=NA, seed=NULL) {
setMethod(
f = "rankSwapX",
signature = c("data.frame"),
definition = function(obj,
variables = NULL,
TopPercent = 5,
BottomPercent = 5,
K0 = -1,
R0 = 0.95,
P = 0,
missing = NA,
seed = NULL) {

# by default, all variables will be used
if (is.null(variables)) {
variables <- colnames(obj)
}
dataX <- obj[, variables, drop=FALSE]
dataX <- obj[, variables, drop = FALSE]
dataX <- as.matrix(dataX)

if (!all(apply(dataX, 2, is.numeric))) {
dataX <- apply(dataX, 2, as.numeric)
}

data2 <- dataX
data2[, ] <- NA

index_missing <- is.na(dataX)
miss_val <- ifelse(is.na(missing), min(dataX, na.rm=TRUE)-1, missing)
miss_val <- ifelse(is.na(missing), min(dataX, na.rm = TRUE) - 1, missing)
if (sum(index_missing) > 0) {
dataX[index_missing] <- miss_val
}

seed <- ifelse(is.null(seed), -1L, as.integer(seed))
if(is.null(K0)){
if (is.null(K0)) {
K0 <- -1
}
if(is.null(R0)){
if (is.null(R0)) {
R0 <- -1
}
if(is.null(P)){
if (is.null(P)) {
P <- -1
}
dat <- .Call("RankSwap", dataX, data2, miss_val, TopPercent, BottomPercent, K0, R0, P, seed)$Res
dat <- .Call("RankSwap",
dataX,
data2,
miss_val,
TopPercent,
BottomPercent,
K0,
R0,
P,
seed)$Res
if (sum(index_missing) > 0 & is.na(missing)) {
dat[dat==miss_val] <- NA
dat[dat == miss_val] <- NA
}
obj[, variables] <- dat
invisible(obj)
Expand Down
5 changes: 3 additions & 2 deletions R/sdcMicro-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@
#' ## do not use the mu-Argus test data set (free1)
#' # since the numerical variables are (probably) faked.
#' data(Tarragona)
#' \dontrun{
#' Tarragona1 <- rankSwap(Tarragona, P = 10, K0 = NULL, R0 = NULL)
#' }
#'
#' ## Microaggregation:
#' m1 <- microaggregation(Tarragona, method="onedims", aggr=3)
Expand Down Expand Up @@ -158,7 +160,7 @@
#' pairs(dataGen(mtcars[,4:6],n=200))
#'
#' ## PRAM
#'
#' \dontrun{
#' set.seed(123)
#' x <- factor(sample(1:4, 250, replace=TRUE))
#' pr1 <- pram(x)
Expand All @@ -171,7 +173,6 @@
#' marstat <- as.factor(free1[,"MARSTAT"])
#' marstatPramed <- pram(marstat)
#' summary(marstatPramed)
#' \dontrun{
#' # FOR OBJECTS OF CLASS sdcMicro
#' data(testdata)
#' sdc <- createSdcObj(testdata,
Expand Down
4 changes: 2 additions & 2 deletions inst/templates/report-template.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ cat(paste0("- **householdID**: *", paste(impVars$hhId, collapse="* | *"),"*\n"))
cat(paste0("- **strataVariable(s)**: *", paste(impVars$strataVars, collapse="* | *"),"*\n"))
#sensVar <- get.reportObj(repObj, "sensiblecn")
if ( !is.null(impVars$sensibleVar ) ) {
if (!is.null(impVars$sensibleVar)) {
cat(paste0("- **Sensitive variable(s) for l-diversity**: "))
cat(paste0("*",paste(impVar$sensibleVar, collapse="* | *"),"*"))
cat(paste0("*", paste(impVars$sensibleVar, collapse = "* | *"), "*"))
}
cat("\n\n")
```
Expand Down
9 changes: 7 additions & 2 deletions man/LLmodGlobalRisk.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 11 additions & 3 deletions man/LocalRecProg.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/addNoise.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 9 additions & 3 deletions man/extractManipData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/kAnon_violations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit efa8b02

Please sign in to comment.