# Frequency Coverage Simulation
This notebooks runs a simulation to test frequency coverage performance of Gaussian process
reference priors.

For details, see Section 5.1 of [1] and Section 5 of [2]

1: *Ren Cuirong, Sun Dongchu, He Chong.* Objective Bayesian analysis for a spatial
model with nugget effects // Journal of Statistical Planning and Inference.
2012. 142, 7. 1933–1946.

2: *Berger James O, Oliveira Victor De, Sans´o Bruno.* Objective Bayesian Analysis
of Spatially Correlated Data // Journal of the American Statistical Associa-
tion. 2001. 96, 456. 1361–1374.

In [1]:
import numpy as np
import scipy
from bbai.gp import BayesianGaussianProcessRegression, Power1CovarianceFunction
import matplotlib.pyplot as plt
import seaborn

np.random.seed(0)

## Generate Data Set

In [2]:
N = 10

def make_location_matrix():
    res = np.zeros((N*N, 2))
    step = 1.0 / (N - 1)
    index = 0
    for i in range(N):
        for j in range(N):
            res[index, 0] = i * step
            res[index, 1] = j * step
            index += 1
    return res

def make_covariance_matrix(Z, theta, eta):
    res = np.zeros((N*N, N*N))
    for i in range(N*N):
        xi = Z[i]
        for j in range(N*N):
            xj = Z[j]
            d = np.linalg.norm(xi - xj)
            res[i, j] = np.exp(-d/theta)
        res[i, i] += eta
    return res

def make_dataset_p1(theta, eta):
    Z = make_location_matrix()
    X = np.ones((N * N, 1))
    K = make_covariance_matrix(Z, theta, eta)
    return Z, X, K

def make_design_matrix_p6(Z):
    N2 = N * N
    x = Z[:, 0].reshape((N2, 1))
    y = Z[:, 1].reshape((N2, 1))
    x2 = x * x
    xy = x * y
    y2 = y * y
    ones = np.ones((N2, 1))
    return np.hstack((ones, x, y, x2, xy, y2))

def make_dataset_p6(theta, eta):
    Z = make_location_matrix()
    X = make_design_matrix_p6(Z)
    K = make_covariance_matrix(Z, theta, eta)
    return Z, X, K

def make_target_vector(K):
    return np.random.multivariate_normal(np.zeros(K.shape[0]), K)

## Run simlation

In [3]:
def run_simulation(Z, X, K, theta, eta, beta):
    num_regressors = len(beta)
    SampleSize = 200
    theta_count = 0
    eta_count = 0
    sigma2_count = 0
    beta_counts = np.zeros(num_regressors)
    low = 0.025
    high = 0.975
    theta_cdfs = []
    eta_cdfs = []
    sigma2_cdfs = []
    model = BayesianGaussianProcessRegression(kernel=Power1CovarianceFunction())
    for _ in range(SampleSize):
        y = make_target_vector(K)
        y += np.dot(X, beta)
        model.fit(Z, y, X)
        cdf = model.marginal_length_.cdf(theta)
        if cdf > low and cdf < high:
            theta_count += 1
        theta_cdfs.append(cdf)
        cdf = model.marginal_noise_ratio_.cdf(eta)
        if cdf > low and cdf < high:
            eta_count += 1
        eta_cdfs.append(cdf)
        cdf = model.marginal_sigma2_signal_.cdf(1.0)
        if cdf > low and cdf < high:
            sigma2_count += 1
        sigma2_cdfs.append(cdf)
        for j in range(num_regressors):
            cdf = model.marginal_regressors_[j].cdf(beta[j])
            if cdf > low and cdf < high:
                beta_counts[j] += 1
    print('*************************')
    print('theta =', theta)
    print('eta =', eta)
    print('sigma2 =', 1.0)
    print('theta_coverage: ', theta_count / SampleSize)
    print('eta_coverage: ', eta_count / SampleSize)
    print('sigma2_coverage: ', sigma2_count / SampleSize)
    for j in range(num_regressors):
        print('beta_coverage_%d:%f' % (j, beta_counts[j] / SampleSize))
    return theta_cdfs, eta_cdfs, sigma2_cdfs

## Run simulations with p = 1

In [4]:
beta = [1.0]
for eta in [0.01, 0.05, 0.1, 0.2]:
    for theta in [0.2, 0.5, 1.0]:
        Z, X, K = make_dataset_p1(theta, eta)
        run_simulation(Z, X, K, theta, eta, beta)

*************************
theta = 0.2
eta = 0.01
sigma2 = 1.0
theta_coverage:  0.945
eta_coverage:  0.885
sigma2_coverage:  0.99
beta_coverage_0:1.000000
*************************
theta = 0.5
eta = 0.01
sigma2 = 1.0
theta_coverage:  0.985
eta_coverage:  0.98
sigma2_coverage:  0.995
beta_coverage_0:0.990000
*************************
theta = 1.0
eta = 0.01
sigma2 = 1.0
theta_coverage:  0.995
eta_coverage:  0.995
sigma2_coverage:  0.98
beta_coverage_0:0.965000
*************************
theta = 0.2
eta = 0.05
sigma2 = 1.0
theta_coverage:  0.95
eta_coverage:  1.0
sigma2_coverage:  0.975
beta_coverage_0:0.995000
*************************
theta = 0.5
eta = 0.05
sigma2 = 1.0
theta_coverage:  0.99
eta_coverage:  0.995
sigma2_coverage:  0.985
beta_coverage_0:0.995000
*************************
theta = 1.0
eta = 0.05
sigma2 = 1.0
theta_coverage:  1.0
eta_coverage:  1.0
sigma2_coverage:  0.985
beta_coverage_0:0.945000
*************************
theta = 0.2
eta = 0.1
sigma2 = 1.0
theta_coverage:  0.9

## Run simulations with p = 6

In [5]:
beta = [0.15, -0.65, -0.1, 0.9, -1.0, 1.2]
for eta in [0.01, 0.05, 0.1, 0.2]:
    for theta in [0.2, 0.5, 1.0]:
        Z, X, K = make_dataset_p6(theta, eta)
        run_simulation(Z, X, K, theta, eta, beta)

*************************
theta = 0.2
eta = 0.01
sigma2 = 1.0
theta_coverage:  0.995
eta_coverage:  0.865
sigma2_coverage:  0.995
beta_coverage_0:1.000000
beta_coverage_1:0.945000
beta_coverage_2:0.990000
beta_coverage_3:0.915000
beta_coverage_4:0.970000
beta_coverage_5:0.935000
*************************
theta = 0.5
eta = 0.01
sigma2 = 1.0
theta_coverage:  1.0
eta_coverage:  0.95
sigma2_coverage:  0.975
beta_coverage_0:0.925000
beta_coverage_1:0.895000
beta_coverage_2:0.885000
beta_coverage_3:0.900000
beta_coverage_4:0.860000
beta_coverage_5:0.900000
*************************
theta = 1.0
eta = 0.01
sigma2 = 1.0
theta_coverage:  0.96
eta_coverage:  0.915
sigma2_coverage:  0.835
beta_coverage_0:0.765000
beta_coverage_1:0.870000
beta_coverage_2:0.850000
beta_coverage_3:0.835000
beta_coverage_4:0.890000
beta_coverage_5:0.840000
*************************
theta = 0.2
eta = 0.05
sigma2 = 1.0
theta_coverage:  1.0
eta_coverage:  1.0
sigma2_coverage:  1.0
beta_coverage_0:0.960000
beta_coverage_1