In [1]:
/**
 * Compute an estimator using bootstrapping.
 * We use resampling with replacement.
 */

%use s2
import java.util.*

/**
 * Test of resampling series.
 *
 * Artifically construct a dependent sequence (consisting of 0 or 1) by
 * retaining the last value with probability *q* while changing the last value
 * with probability (1-q).
 *
 * The simple bootstrapping method [CaseResamplingReplacement] will severely overestimate
 * the occurrences of certain pattern, while block bootstrapping method [BlockBootstrap]
 * gives a good estimation of the occurrences in the original sample. 
 * All estimators over estimate.
 */

// generate a random set of samples
val N = 10000 // number of samples
val q = 0.70 // the probability of retaining last value
val uniformRNG = UniformRNG() // a uniform distribution RNG
uniformRNG.seed(1234567890L)
val rlg: RandomNumberGenerator = Ziggurat2000Exp() // an exponential distribution RNG
rlg.seed(1234567890L)

// generating the random samples
val mean = Mean()
val sample = DoubleArray(N)
sample[0] = if (uniformRNG.nextDouble() > 0.5) 1.0 else 0.0
for (i in 1 until N) {
    sample[i] = if (uniformRNG.nextDouble() < q) sample[i - 1] else 1 - sample[i - 1]
    mean.addData(sample[i])
}

// the pattern to match
val pattern = doubleArrayOf(1.0, 0.0, 1.0, 0.0, 1.0)

// number of bootstrap samples
val B = 10000

// count the number patterns found in the original samples
val countInSample = match(sample, pattern)

// count the number patterns found in the bootstrapped samples generated using CaseResamplingReplacement
val simpleBoot = CaseResamplingReplacement(sample, uniformRNG)
val countInSimpleBootstrap = Mean()

// count the number patterns found in the bootstrapped samples generated using PattonPolitisWhite2009ForObject.Type.STATIONARY
val stationaryBlock = PattonPolitisWhite2009(sample, PattonPolitisWhite2009ForObject.Type.STATIONARY, uniformRNG, rlg)
val countInStationaryBlockBootstrap = Mean()

// count the number patterns found in the bootstrapped samples generated using PattonPolitisWhite2009ForObject.Type.CIRCULAR
val circularBlock = PattonPolitisWhite2009(sample, PattonPolitisWhite2009ForObject.Type.CIRCULAR, uniformRNG, rlg)
val countInCircularBlockBootstrap = Mean()

// counting the number of patterns found
for (i in 0 until B) {
    countInSimpleBootstrap.addData(match(simpleBoot.newResample(), pattern))
    countInStationaryBlockBootstrap.addData(match(stationaryBlock.newResample(), pattern))
    countInCircularBlockBootstrap.addData(match(circularBlock.newResample(), pattern))
}

println("matched patterns in sample: $countInSample")
println("matched patterns in simple bootstrap: %f".format(countInSimpleBootstrap.value())) // way off
println("matched patterns in stationary block bootstrap: %f".format(countInStationaryBlockBootstrap.value())) // close enough
println("matched patterns in circular block bootstrap: %f".format(countInCircularBlockBootstrap.value())) // closest

fun match(seq: DoubleArray, pattern: DoubleArray): Double {
    var count = 0.0
    for (i in 0 until seq.size - pattern.size) {
        if (seq[i] == pattern[0]) {
            val trunc = Arrays.copyOfRange(seq, i, i + pattern.size)
            if (DoubleUtils.equal(trunc, pattern, 1e-7)) {
                count++
            }
        }
    }
    return count
}

matched patterns in sample: 39.0
matched patterns in simple bootstrap: 316.876100
matched patterns in stationary block bootstrap: 45.085600
matched patterns in circular block bootstrap: 43.989500
