-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added convergence rate estimator from Politis et al. 1999
- Loading branch information
Showing
7 changed files
with
309 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# convergence rate of estimators | ||
|
||
|
||
#' Empirical convergence rate of a KL divergence estimator | ||
#' | ||
#' Subsampling-based confidence intervals computed by `kld_ci_subsampling()` | ||
#' require the convergence rate of the KL divergence estimator as an input. The | ||
#' default rate of `0.5` assumes that the variance term dominates the bias term. | ||
#' For high-dimensional problems, depending on the data, the convergence rate | ||
#' might be lower. This function allows to empirically derive the convergence | ||
#' rate. | ||
#' | ||
#' Note: Currently, only the one-sample version is implemented. | ||
#' | ||
#' @inheritParams kld_est | ||
#' @param estimator A KL divergence estimator. | ||
#' @param n.sizes Number of different subsample sizes to use (default: `4`). | ||
#' @param spacing.factor Multiplicative factor controlling the spacing of sample | ||
#' sizes (default: `1.5`). | ||
#' @param typical.subsample A function that produces a typical subsample size, | ||
#' used as the geometric mean of subsample sizes (default: `sqrt(n)`). | ||
#' @param b Subsample sizes to be used. The default `NULL` means `b` is computed | ||
#' from `n.sizes`, `spacing.factor` and `typical.subsample`. If a non-`NULL` | ||
#' value for `b` is given, inputs `n.sizes`, `spacing.factor` and | ||
#' `typical.subsample` are ignored. | ||
#' @param B Number of subsamples to draw per subsample size. | ||
#' @param plot A boolean (default: `FALSE`) controlling whether to produce a | ||
#' diagnostic plot visualizing the fit. | ||
#' @returns A scalar, the parameter \eqn{\beta} in the empirical convergence | ||
#' rate \eqn{n^-\beta} of the `estimator` to the true KL divergence. | ||
#' It can be used in the `convergence.rate` argument of `kld_ci_subsampling()` | ||
#' as `convergence.rate = function(n) n^beta`. | ||
#' | ||
#' References: | ||
#' | ||
#' Politis, Romano and Wolf, "Subsampling", Chapter 8 (1999), for theory. | ||
#' | ||
#' The implementation has been adapted from lecture notes by C. J. Geyer, | ||
#' https://www.stat.umn.edu/geyer/5601/notes/sub.pdf | ||
#' | ||
#' @export | ||
convergence_rate <- function(estimator, X, Y = NULL, q = NULL, | ||
n.sizes = 4, spacing.factor = 1.5, | ||
typical.subsample = function(n) sqrt(n), | ||
b = NULL, B = 500L, plot = FALSE) { | ||
|
||
two.sample <- is_two_sample(Y, q) | ||
|
||
# important dimensions for X and Y | ||
if (is.vector(X)) X <- as.matrix(X) | ||
n <- nrow(X) | ||
|
||
# determine subsample sizes from input parameters | ||
if (is.null(b)) { | ||
bmin <- typical.subsample(n) / sqrt((n.sizes-1)*spacing.factor) | ||
b <- floor(bmin * spacing.factor^(0:(n.sizes-1))) | ||
} | ||
|
||
if (two.sample) { | ||
stop("Two-sample version not implemented yet.") | ||
|
||
} else { | ||
|
||
theta.hat <- estimator(X, q = q) | ||
|
||
theta.star <- matrix(NA, B, length(b)) | ||
for (i in 1:B) { | ||
X.star <- X | ||
# backwards nested subsampling | ||
for (j in length(b):1) { | ||
X.star <- sample(X.star, b[j], replace = FALSE) | ||
theta.star[i, j] <- estimator(X.star, q = q) | ||
} | ||
} | ||
|
||
zmat <- theta.star - theta.hat | ||
|
||
# calculate quantile differences | ||
l_probs <- seq(0.05, 0.45, by = 0.05) | ||
u_probs <- seq(0.55, 0.95, by = 0.05) | ||
lqmat <- log(apply(zmat, MARGIN = 2, FUN = function(x) quantile(x, u_probs)) | ||
- apply(zmat, MARGIN = 2, FUN = function(x) quantile(x, l_probs))) | ||
dimnames(lqmat) <- list(NULL,b) | ||
|
||
y <- colMeans(lqmat) | ||
beta <- -cov(y, log(b)) / var(log(b)) | ||
} | ||
|
||
if (plot) { | ||
inter <- mean(y) + beta * mean(log(b)) | ||
plot(rep(b, each = nrow(lqmat)), as.vector(lqmat), | ||
xlab = "Subsample size", | ||
ylab = "log(high quantile - low quantile)", | ||
main = paste0("Empirical convergence rate (beta = ",signif(beta,3),")"), | ||
log = "x") | ||
lines(b, inter-beta*log(b), col = "red") | ||
} | ||
beta | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
--- | ||
title: "Empirical convergence rate of KL divergence estimators" | ||
author: "Niklas Hartung" | ||
date: "`r Sys.Date()`" | ||
output: html_document | ||
--- | ||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE) | ||
``` | ||
|
||
## Topic | ||
|
||
The subsampling method, used to generate confidence intervals for a KL divergence | ||
estimate, requires the convergence rate of the estimator as an input. | ||
|
||
According to literature (Noh 2014, Singh 2016, Zhao 2020), the 1-NN estimator has | ||
a variance of O(1/n) and a dimension-dependent bias. The exact shape of the bias | ||
depends on the smoothness assumptions for the distribution. As a default in the | ||
subsampling method, a $\sqrt{n}$ convergence rate is assumed, which implies that | ||
the bias term is not of larger order than the variance. | ||
|
||
As an alternative to using such a theoretically derived convergence rate, it can | ||
be determined empirically using the method described in Politis et al. (1999). | ||
|
||
For now, only the one-sample variant is implemented. | ||
|
||
### Preliminaries | ||
|
||
Load libraries | ||
|
||
```{r} | ||
library(kldest) | ||
library(ggplot2) | ||
``` | ||
|
||
For reproducibility | ||
|
||
```{r} | ||
set.seed(0) | ||
``` | ||
|
||
### Simulation parameters | ||
|
||
Global parameters | ||
|
||
```{r} | ||
n <- 1000L # sample size | ||
n.sizes <- 5L # number of different subsample sizes | ||
B <- 1000L # number of subsamples to simulate per subsample size | ||
``` | ||
|
||
Estimators to be evaluated. Currently, the bias-reduced algorithm cannot be used | ||
since it relies on two samples, for which function `convergence_rate` is not | ||
implemented yet. | ||
|
||
```{r} | ||
estimators <- list( | ||
"nn1" = kld_est_nn#, | ||
# "brnn1" = function(...) kld_est_brnn(..., warn.max.k = FALSE) | ||
) | ||
nestim <- length(estimators) | ||
``` | ||
|
||
Different distributions $p$ and $q$: | ||
|
||
```{r} | ||
scenarios <- list( | ||
"2-D, independent" = list(mu1 = c(2,0), | ||
sigma1 = diag(c(1,100)), | ||
mu2 = c(0,10), | ||
sigma2 = diag(c(1,100))), | ||
"2-D, corr. diff." = list(mu1 = c(0,0), | ||
sigma1 = constDiagMatrix(dim = 2, diag = 3, offDiag = 1), | ||
mu2 = c(0,0), | ||
sigma2 = constDiagMatrix(dim = 2, diag = 3, offDiag = -1)), | ||
"10-D, corr. diff." = list(mu1 = rep(0,10), | ||
sigma1 = constDiagMatrix(dim = 10, diag = 1, offDiag = 0.999), | ||
mu2 = rep(0,10), | ||
sigma2 = diag(10)) | ||
) | ||
``` | ||
|
||
Compute true KL-D in each scenario (for completeness, not really needed here) | ||
|
||
```{r} | ||
(kld_true <- vapply(scenarios, function(x) do.call(kld_gaussian,x), 1)) | ||
``` | ||
|
||
## Simulation study: empirical convergence rate | ||
|
||
```{r} | ||
rates <- matrix(nrow = length(scenarios), | ||
ncol = nestim, | ||
dimnames = list(names(scenarios), names(estimators))) | ||
for (i in seq_along(scenarios)) { | ||
p <- scenarios[[i]] | ||
X <- MASS::mvrnorm(n, mu = p$mu1, Sigma = p$sigma1) | ||
q <- function(x) mvdnorm(x, mu = p$mu2, Sigma = p$sigma2) | ||
# Y <- MASS::mvrnorm(n, mu = p$mu2, Sigma = p$sigma2) # not supported yet... | ||
for (j in 1:nestim) { | ||
rates[i,j] <- convergence_rate(estimators[[j]], X = X, q = q, | ||
n.sizes = n.sizes, B = B, plot = TRUE) | ||
} | ||
} | ||
``` | ||
|
||
$\Rightarrow$ even in high-dimensional examples, the convergence rate is | ||
approximately as expected from the variance term. | ||
|
||
|