Merge pull request #20 from lukesonnet/lukesonnet/tests

Add tests; make some minor code changes
lukesonnet · Feb 10, 2018 · 37f2384 · 37f2384
2 parents 4f5f346 + 9b18aad
commit 37f2384
Show file tree

Hide file tree

Showing 17 changed files with 297 additions and 87 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,3 +4,4 @@ src/deprecated
 ^\.Rproj\.user$
 ^\.travis\.yml$
 ^appveyor\.yml$
+^tests/testthat/test_krlogit_fns\.R$
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,37 @@
-# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
-
 language: R
 sudo: false
 cache: packages
-warnings_are_errors: false
+warnings_are_errors: false
+
+matrix:
+  include:
+    - os: linux
+      r: release
+
+    - os: linux
+      r: oldrel
+      after_success:
+      - echo skipping source packaging on linux/oldrel
+
+    - os: linux
+      r: devel
+      after_success:
+      - echo skipping source packaging on linux/devel
+
+    - os: osx
+      r: release
+      if: branch = master
+
+    - os: osx
+      osx_image: xcode6.4
+      r: oldrel
+      if: branch = master
+
+r_packages:
+- numDeriv
+
+r_github_packages:
+- r-lib/covr
+
+after_success:
+- test $TRAVIS_OS_NAME == "linux" && Rscript -e 'covr::coveralls()'
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: KRLS2
 Type: Package
-Title: Kernel-based Regularized Least squares (KRLS)
-Version: 0.4
-Date: 2016-05-15
+Title: Kernel-based Regularized Least squares
+Version: 1.1.0
+Date: 2018-02-08
 Author: Jens Hainmueller (Stanford) Chad Hazlett (UCLA) Luke Sonnet (UCLA)
 Maintainer: Jens Hainmueller <jhain@stanford.edu>
-Description: Package implements Kernel-based Regularized Least Squares (KRLS), a
+Description: Implements Kernel-based Regularized Least Squares (KRLS), a
     machine learning method to fit multidimensional functions y=f(x) for regression
     and classification problems without relying on linearity or additivity
     assumptions. KRLS finds the best fitting function by minimizing the squared loss
@@ -17,6 +17,7 @@ Imports:
 LinkingTo: Rcpp, RcppArmadillo
 License: GPL (>= 2)
 Suggests:
-    lattice,
-    testthat
+    testthat,
+    lattice
+URL: https://www.r-project.org, https://www.stanford.edu/~jhain/
 RoxygenNote: 6.0.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -37,4 +37,16 @@ export(summary.krls2)
 export(trace_mat)
 import(RSpectra)
 importFrom(Rcpp,sourceCpp)
-useDynLib(KRLS2)
+importFrom(grDevices,devAskNewPage)
+importFrom(graphics,arrows)
+importFrom(graphics,par)
+importFrom(graphics,plot)
+importFrom(stats,as.formula)
+importFrom(stats,optim)
+importFrom(stats,optimize)
+importFrom(stats,predict)
+importFrom(stats,pt)
+importFrom(stats,quantile)
+importFrom(stats,sd)
+importFrom(stats,var)
+useDynLib(KRLS2, .registration = TRUE)
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -3,81 +3,81 @@
 
 #' @export
 mult_diag <- function(x, d) {
-    .Call('_KRLS2_mult_diag', PACKAGE = 'KRLS2', x, d)
+    .Call(`_KRLS2_mult_diag`, x, d)
 }
 
 #' @export
 trace_mat <- function(x) {
-    .Call('_KRLS2_trace_mat', PACKAGE = 'KRLS2', x)
+    .Call(`_KRLS2_trace_mat`, x)
 }
 
 #' @export
 krls_gr_trunc <- function(U, D, y, w, fitted, dhat, lambda) {
-    .Call('_KRLS2_krls_gr_trunc', PACKAGE = 'KRLS2', U, D, y, w, fitted, dhat, lambda)
+    .Call(`_KRLS2_krls_gr_trunc`, U, D, y, w, fitted, dhat, lambda)
 }
 
 #' @export
 krls_hess_trunc_inv <- function(U, D, w, lambda) {
-    .Call('_KRLS2_krls_hess_trunc_inv', PACKAGE = 'KRLS2', U, D, w, lambda)
+    .Call(`_KRLS2_krls_hess_trunc_inv`, U, D, w, lambda)
 }
 
 #' @export
 krlogit_fn_trunc <- function(par, U, D, y, w, lambda) {
-    .Call('_KRLS2_krlogit_fn_trunc', PACKAGE = 'KRLS2', par, U, D, y, w, lambda)
+    .Call(`_KRLS2_krlogit_fn_trunc`, par, U, D, y, w, lambda)
 }
 
 #' @export
 krlogit_gr_trunc <- function(par, U, D, y, w, lambda) {
-    .Call('_KRLS2_krlogit_gr_trunc', PACKAGE = 'KRLS2', par, U, D, y, w, lambda)
+    .Call(`_KRLS2_krlogit_gr_trunc`, par, U, D, y, w, lambda)
 }
 
 #' @export
 partial_logit <- function(K, coef, beta0) {
-    .Call('_KRLS2_partial_logit', PACKAGE = 'KRLS2', K, coef, beta0)
+    .Call(`_KRLS2_partial_logit`, K, coef, beta0)
 }
 
 #' @export
 krlogit_hess_trunc_inv <- function(par, U, D, y, w, lambda) {
-    .Call('_KRLS2_krlogit_hess_trunc_inv', PACKAGE = 'KRLS2', par, U, D, y, w, lambda)
+    .Call(`_KRLS2_krlogit_hess_trunc_inv`, par, U, D, y, w, lambda)
 }
 
 #' @export
 euc_dist <- function(x1, x2) {
-    .Call('_KRLS2_euc_dist', PACKAGE = 'KRLS2', x1, x2)
+    .Call(`_KRLS2_euc_dist`, x1, x2)
 }
 
 #' @export
 kern_gauss_1d <- function(x1, x2, b) {
-    .Call('_KRLS2_kern_gauss_1d', PACKAGE = 'KRLS2', x1, x2, b)
+    .Call(`_KRLS2_kern_gauss_1d`, x1, x2, b)
 }
 
 #' @export
 kern_gauss <- function(x, b) {
-    .Call('_KRLS2_kern_gauss', PACKAGE = 'KRLS2', x, b)
+    .Call(`_KRLS2_kern_gauss`, x, b)
 }
 
 #' @export
 new_gauss_kern <- function(newx, oldx, b) {
-    .Call('_KRLS2_new_gauss_kern', PACKAGE = 'KRLS2', newx, oldx, b)
+    .Call(`_KRLS2_new_gauss_kern`, newx, oldx, b)
 }
 
 #' @export
 solve_for_d_ls <- function(y, U, D, lambda) {
-    .Call('_KRLS2_solve_for_d_ls', PACKAGE = 'KRLS2', y, U, D, lambda)
+    .Call(`_KRLS2_solve_for_d_ls`, y, U, D, lambda)
 }
 
 #' @export
 solve_for_d_ls_w <- function(y, U, D, w, lambda) {
-    .Call('_KRLS2_solve_for_d_ls_w', PACKAGE = 'KRLS2', y, U, D, w, lambda)
+    .Call(`_KRLS2_solve_for_d_ls_w`, y, U, D, w, lambda)
 }
 
 #' @export
 pwmfx <- function(k, x, coefhat, vcovc, p, b) {
-    .Call('_KRLS2_pwmfx', PACKAGE = 'KRLS2', k, x, coefhat, vcovc, p, b)
+    .Call(`_KRLS2_pwmfx`, k, x, coefhat, vcovc, p, b)
 }
 
 #' @export
 pwmfx_novar <- function(k, x, coefhat, p, b) {
-    .Call('_KRLS2_pwmfx_novar', PACKAGE = 'KRLS2', k, x, coefhat, p, b)
+    .Call(`_KRLS2_pwmfx_novar`, k, x, coefhat, p, b)
 }
 
diff --git a/R/inference.R b/R/inference.R
@@ -99,24 +99,28 @@ inference.krls2 <- function(obj,
           if(is.null(clusters)) {
             score <- matrix(nrow = n, ncol = length(obj$dhat))
             for(i in 1:n) {
-              score[i, ] = krls_gr_trunc(obj$U[i, , drop = F],
-                                         obj$D,
-                                         y[i],
-                                         obj$w[i],
-                                         yfitted[i], 
-                                         obj$dhat,
-                                         obj$lambda/n)
+              score[i, ] <- krls_gr_trunc(
+                obj$U[i, , drop = F],
+                obj$D,
+                y[i],
+                obj$w[i],
+                yfitted[i],
+                obj$dhat,
+                obj$lambda / n
+              )
             }
           } else {
             score <- matrix(nrow = length(clusters), ncol = length(obj$dhat))
             for(j in 1:length(clusters)){
-              score[j, ] = krls_gr_trunc(obj$U[clusters[[j]], , drop = F],
-                                         obj$D,
-                                         y[clusters[[j]]],
-                                         obj$w[clusters[[j]]],
-                                         yfitted[clusters[[j]]],
-                                         obj$dhat,
-                                         length(clusters[[j]]) * obj$lambda/n)
+              score[j, ] <- krls_gr_trunc(
+                obj$U[clusters[[j]], , drop = F],
+                obj$D,
+                y[clusters[[j]]],
+                obj$w[clusters[[j]]],
+                yfitted[clusters[[j]]],
+                obj$dhat,
+                length(clusters[[j]]) * obj$lambda / n
+              )
             }
 
           }

diff --git a/R/kernels.R b/R/kernels.R
@@ -29,7 +29,7 @@ generateK <- function(X,
     U <- truncDat$Utrunc
     D <- truncDat$eigvals
   } else {
-    eigobj <- eigen(K)
+    eigobj <- eigen(K, symmetric = T)
     eigvaliszero <- eigobj$values == 0
     if(any(eigvaliszero)) {
 

diff --git a/R/krls2.R b/R/krls2.R
@@ -25,9 +25,11 @@
 #' via the summary() function. See summary.krls2().
 #'
 #' @import RSpectra
-#' @useDynLib KRLS2
+#' @useDynLib KRLS2, .registration = TRUE
 #' @importFrom Rcpp sourceCpp
-
+#' @importFrom grDevices devAskNewPage
+#' @importFrom graphics arrows par plot
+#' @importFrom stats as.formula optim optimize predict pt quantile sd var
 
 #############
 # Functions #
@@ -54,11 +56,13 @@
 #' @param U Positive scalar that determines the upper bound of the search window for the leave-one-out optimization to find \eqn{\lambda}{lambda} with least squares loss. Default is \code{NULL} which means that the upper bound is found by using an algorithm outlined in \code{\link{lambdaline}}. Ignored with logistic loss.
 #' @param tol Positive scalar that determines the tolerance used in the optimization routine used to find \eqn{\lambda}{lambda} with least squares loss. Default is \code{NULL} which means that convergence is achieved when the difference in the sum of squared leave-one-out errors between the \var{i} and the \var{i+1} iteration is less than \var{N * 10^-3}. Ignored with logistic loss.
 #' @param truncate A boolean that defaults to \code{FALSE}. If \code{TRUE} truncates the kernel matrix, keeping as many eigenvectors as needed so that 1-\code{epsilon} of the total variance in the kernel matrix is retained. Alternatively, you can simply specify \code{epsilon} and truncation will be used.
-#' @param epsilon Scalar between 0 and 1 that determines the total variance that can be lost in truncation. If not NULL, truncation is automatically set to TRUE.
+#' @param epsilon Scalar between 0 and 1 that determines the total variance that can be lost in truncation. If not NULL, truncation is automatically set to TRUE. If \code{truncate == TRUE}, default is 0.001.
+#' @param lastkeeper Number of columns of \code{U} to keep when \code{truncate == TRUE}. Overrides \code{epsilon}.
 #' @param con A list of control arguments passed to optimization for the numerical optimization of the kernel regularized logistic loss function.
-#' @param returnopt A boolean that defaults to \code{FALSE}. If \code{TRUE}, returns the result of the \code{optim} method called to optimize the kernel regularized logistic loss function.
+#' @param returnopt A boolean that defaults to \code{FALSE}. If \code{TRUE}, returns the result of the \code{optim} method called to optimize the kernel regularized logistic loss function. Returns \code{NULL} with leastsquares loss.
 #' @param printlevel A number that is either 0 (default), 1, or 2. 0 Has minimal printing, 1 prints out most diagnostics, and 2 prints out most diagnostics including \code{optim} diagnostics for each fold in the cross-validation selection of hyperparameters.
 #' @param warn A number that sets your \code{warn} option. We default to 1 so that warnings print as they occur. You can change this to 2 if you want all warnings to be errors, to 0 if you want all warnings to wait until the top-level call is finished, or to a negative number to ignore them.
+#' @param sigma DEPRECATED. Users should now use \code{b}, included for backwards compatability.
 #' @details
 #' \code{krls} implements the Kernel-based Regularized Least Squares (KRLS) estimator as described in Hainmueller and Hazlett (2014). Please consult this reference for any details.
 
@@ -121,7 +125,7 @@ krls <- function(# Data arguments
                     lastkeeper = NULL,
                     # Optimization arguments
                     con = list(maxit=500),
-                    returnopt = TRUE,
+                    returnopt = FALSE,
                     printlevel = 0,
                     warn = 1,
                     sigma = NULL, # to provide legacy support for old code,
@@ -370,17 +374,19 @@ krls <- function(# Data arguments
 
   coefhat <- UDinv %*% out$dhat
 
+  opt <- NULL
   if (loss == "leastsquares") {
 
     yfitted <- Kdat$K %*% coefhat
     yfitted <- yfitted * y.init.sd + y.init.mean
-
+    
   } else {
 
-    opt <- if(returnopt) out$opt else NULL
-
     yfitted <- logistic(K=Kdat$K, coeff=coefhat, beta0 = out$beta0hat)
 
+    if (returnopt) {
+      opt <- out$opt
+    }
   }
 
   z <- list(K = Kdat$K,
@@ -398,7 +404,8 @@ krls <- function(# Data arguments
             b = b,
             lambda = lambda,
             kernel = whichkernel,
-            loss = loss
+            loss = loss,
+            opt = opt
   )
 
   class(z) <- "krls2"