diff --git a/DESCRIPTION b/DESCRIPTION index b37ffac..3daa825 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: fastLink Type: Package Title: Fast Probabilistic Record Linkage with Missing Data -Version: 0.4.1 -Date: 2018-05-16 +Version: 0.5.0 +Date: 2018-11-01 Authors@R: c( person("Ted", "Enamorado", email = "fastlinkr@gmail.com", role = c("aut", "cre")), person("Ben", "Fifield", email = "fastlinkr@gmail.com", role = c("aut")), diff --git a/R/emlinkMARmov.R b/R/emlinkMARmov.R index 189c419..0451d86 100644 --- a/R/emlinkMARmov.R +++ b/R/emlinkMARmov.R @@ -81,6 +81,11 @@ emlinkMARmov <- function(patterns, nobs.a, nobs.b, ## The p.m, p.gamma.k.m, p.gamma.k.u, p.gamma.k.m, p.gamma.k.m, p.gamma.k.m, that ## maximize the observed data log-likelihood of the agreement patterns + ## Edge case + if(is.null(nrow(patterns))){ + patterns <- as.data.frame(t(as.matrix(patterns))) + } + ## Number of fields nfeatures <- ncol(patterns) - 1 @@ -421,6 +426,11 @@ emlinkRS <- function(patterns.out, em.out, nobs.a, nobs.b){ stop("Your `em.out` object is not a valid emlinkMARmov object.") } options(digits = 16) + + ## Edge case + if(is.null(nrow(patterns.out))){ + patterns.out <- as.data.frame(t(as.matrix(patterns.out))) + } nfeatures <- ncol(patterns.out) - 1 gamma.j.k <- as.matrix(patterns.out[, 1:nfeatures]) N <- nrow(gamma.j.k) diff --git a/R/emlinklog.R b/R/emlinklog.R index acb8527..2b8791c 100644 --- a/R/emlinklog.R +++ b/R/emlinklog.R @@ -70,7 +70,11 @@ emlinklog <- function(patterns, nobs.a, nobs.b, ## Returns: ## The p.m, p.gamma.k.m, p.gamma.k.u, p.gamma.k.m, p.gamma.k.m, p.gamma.k.m, that ## maximize the observed data log-likelihood of the agreement patterns - + + ## Edge case + if(is.null(nrow(patterns))){ + patterns <- as.data.frame(t(as.matrix(patterns))) + } ## Number of fields nfeatures <- ncol(patterns) - 1 diff --git a/R/fastLink-package.R b/R/fastLink-package.R index 508dc0b..9a52b9d 100644 --- a/R/fastLink-package.R +++ b/R/fastLink-package.R @@ -1,6 +1,6 @@ #' Fast Probabilistic Record Linkage with Missing Data #' -#' \code{fastLink} implements methods developed by Enamorado, Fifield, and Imai (2017) +#' \code{fastLink} implements methods developed by Enamorado, Fifield, and Imai (2018) #' ''Using a Probabilistic Model to Assist Merging of Large-scale Administrative Records'', #' to probabilistically merge large datasets using the Fellegi-Sunter model #' while allowing for missing data and the inclusion of auxiliary information. @@ -8,8 +8,8 @@ #' the Fellegi-Sunter model, using the Expectation-Maximization Algorithm. In addition, #' tools for conducting and summarizing data merges are included. #' -#' \tabular{ll}{ Package: \tab fastLink\cr Type: \tab Package\cr Version: \tab 0.4.1-\cr -#' Date: \tab 2018-05-16\cr License: \tab GPL (>= 3)\cr } +#' \tabular{ll}{ Package: \tab fastLink\cr Type: \tab Package\cr Version: \tab 0.5.0-\cr +#' Date: \tab 2018-11-01\cr License: \tab GPL (>= 3)\cr } #' #' @name fastLink-package #' @useDynLib fastLink, .registration = TRUE @@ -18,8 +18,8 @@ #' @author Ted Enamorado \email{fastlinkr@@gmail.com}, Ben Fifield \email{fastlinkr@@gmail.com}, and Kosuke Imai \email{kimai@@princeton.edu} #' #' Maintainer: Ted Enamorado \email{fastlinkr@@gmail.com} -#' @references Enamorado, Ted, Ben Fifield and Kosuke Imai. (2017) "Using a Probabilistic Model to Assist Merging of -#' Large-scale Administrative Records." Working Paper. Available at \url{http://imai.princeton.edu/research/linkage.html}. +#' @references Enamorado, Ted, Ben Fifield and Kosuke Imai. (2018) "Using a Probabilistic Model to Assist Merging of +#' Large-scale Administrative Records." Forthcoming, American Political Science Review. Available at \url{http://imai.princeton.edu/research/linkage.html}. #' @keywords package #' @import Matrix data.table #' @importFrom Rcpp evalCpp diff --git a/R/gammaCK2par.R b/R/gammaCK2par.R index 7193325..155e9a4 100644 --- a/R/gammaCK2par.R +++ b/R/gammaCK2par.R @@ -43,10 +43,10 @@ gammaCK2par <- function(matAp, matBp, n.cores = NULL, cut.a = 0.92, method = "jw matBp[matBp == ""] <- NA if(sum(is.na(matAp)) == length(matAp) | length(unique(matAp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.\n") } if(sum(is.na(matBp)) == length(matBp) | length(unique(matBp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.\n") } if(!(method %in% c("jw", "jaro", "lv"))){ diff --git a/R/gammaCKpar.R b/R/gammaCKpar.R index bf641e1..5482080 100644 --- a/R/gammaCKpar.R +++ b/R/gammaCKpar.R @@ -45,10 +45,10 @@ gammaCKpar <- function(matAp, matBp, n.cores = NULL, cut.a = 0.92, cut.p = 0.88, matBp[matBp == ""] <- NA if(sum(is.na(matAp)) == length(matAp) | length(unique(matAp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.\n") } if(sum(is.na(matBp)) == length(matBp) | length(unique(matBp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.\n") } if(!(method %in% c("jw", "jaro", "lv"))){ diff --git a/R/gammaKpar.R b/R/gammaKpar.R index 0872940..d2a22c5 100644 --- a/R/gammaKpar.R +++ b/R/gammaKpar.R @@ -48,17 +48,17 @@ gammaKpar <- function(matAp, matBp, gender = FALSE, n.cores = NULL) { if(!gender){ if(sum(is.na(matAp)) == length(matAp) | length(unique(matAp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.\n") } if(sum(is.na(matBp)) == length(matBp) | length(unique(matBp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.\n") } }else{ if(sum(is.na(matAp)) == length(matAp)){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.\n") } if(sum(is.na(matBp)) == length(matBp)){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.\n") } } diff --git a/R/gammaNUMCK2par.R b/R/gammaNUMCK2par.R index 10ebd7c..c7ef505 100644 --- a/R/gammaNUMCK2par.R +++ b/R/gammaNUMCK2par.R @@ -40,10 +40,10 @@ gammaNUMCK2par <- function(matAp, matBp, n.cores = NULL, cut.a = 1) { matBp[matBp == ""] <- NA if(sum(is.na(matAp)) == length(matAp) | length(unique(matAp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.\n") } if(sum(is.na(matBp)) == length(matBp) | length(unique(matBp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.\n") } if(is.null(n.cores)) { diff --git a/R/gammaNUMCKpar.R b/R/gammaNUMCKpar.R index fe42bde..df0cbe1 100644 --- a/R/gammaNUMCKpar.R +++ b/R/gammaNUMCKpar.R @@ -35,10 +35,10 @@ gammaNUMCKpar <- function(matAp, matBp, n.cores = NULL, cut.a = 1, cut.p = 2) { matBp[matBp == ""] <- NA if(sum(is.na(matAp)) == length(matAp) | length(unique(matAp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset A.\n") } if(sum(is.na(matBp)) == length(matBp) | length(unique(matBp)) == 1){ - cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.") + cat("WARNING: You have no variation in this variable, or all observations are missing in dataset B.\n") } if(is.null(n.cores)) {