## Simulation 1: Generate $X \sim f_X$ and obtain $Y = f(X)$, add contaminant $\epsilon$.

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import scipy.stats as st
import scipy.special as sp
import random

from fastkde import fastKDE

In [2]:
def estimator(x, alpha = 0.05):
    
    if(x.shape[0]%2!=0):
        x = x[:-1]
    
    estim, inf = np.split(x, 2) #split data up into two halves
    
    ## first split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = estim[:,0], list_of_points = list(inf[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = estim[:,1], list_of_points = list(inf[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x1 = -np.mean(np.log(margin_x))
    h_y1 = -np.mean(np.log(margin_y))
    
    ## second split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = inf[:,0], list_of_points = list(estim[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = inf[:,1], list_of_points = list(estim[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x2 = -np.mean(np.log(margin_x))
    h_y2 = -np.mean(np.log(margin_y))
    
    ## cross fitting
    h_x = (h_x1 + h_x2)/2
    h_y = (h_y1 + h_y2)/2
    delta = (h_x - h_y)
    
    ## variance estimation using monte carlo
    margin_x = fastKDE.pdf_at_points(var1 = x[:,0])
    margin_y = fastKDE.pdf_at_points(var1 = x[:,1])
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    covar = np.cov(np.log(margin_x), np.log(margin_y))
    
    delta_var = covar[0,0] + covar[1,1] - 2*covar[0,1]
    delta_sd = np.sqrt(delta_var)
    
    delta_lcb = delta - st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select)/2)
    delta_ucb = delta + st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select)/2)
    
    return ([h_x, h_y, delta_lcb, delta, delta_ucb])

## Case 1: $X \sim U(0, 1); Y = X^{1/3}$

In [3]:
def data_gen_1(n = 1000, eps = 0):
    x = np.random.uniform(low = 0, high = 1, size = 1000)
    y = np.power(x, 1/3)
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    y = (y - np.min(y))/(np.max(y) - np.min(y)) + np.random.normal(loc=0, scale = eps, size = 1000)
    return np.column_stack((x, y))

In [4]:
for eps in [0, 0.1, 0.15, 0.20, 0.25]:
    np.random.seed(1234)
    filename = '/home/soumikp/2023_bka/code/simulation_coverage/' + 'sim1_' + str(np.round(eps, 2)) + '_vals.csv'
    niter = 250
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_1(n = 1000, eps = np.round(eps, 2)))[3]
    np.savetxt(filename, op, delimiter=',', header = 'C')

## Case 2: $X \sim U(0, 1); Y = X^{1/2}$

In [5]:
def data_gen_2(n = 1000, eps = 0):
    x = np.random.uniform(low = 0, high = 1, size = 1000)
    y = np.power(x, 1/2) 
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    y = (y - np.min(y))/(np.max(y) - np.min(y)) + np.random.normal(loc=0, scale = eps, size = 1000)
    return np.column_stack((x, y))

In [14]:
for eps in [0.10]:
    np.random.seed(1234)
    filename = '/home/soumikp/2023_bka/code/simulation_coverage/' + 'sim2_' + str(np.round(eps, 2)) + '_vals.csv'
    niter = 250
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_2(n = 1000, eps = np.round(eps, 2)))[3]
    np.savetxt(filename, op, delimiter=',', header = 'C')

## Case 3: $X \sim U(0, 1); Y = X^{2}$

In [7]:
def data_gen_3(n = 1000, eps = 0):
    x = np.random.uniform(low = 0, high = 1, size = 1000)
    y = np.power(x, 2) 
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    y = (y - np.min(y))/(np.max(y) - np.min(y)) + np.random.normal(loc=0, scale = eps, size = 1000)
    return np.column_stack((x, y))

In [13]:
for eps in [0.10]:
    np.random.seed(1234)
    filename = '/home/soumikp/2023_bka/code/simulation_coverage/' + 'sim3_' + str(np.round(eps, 2)) + '_vals.csv'
    niter = 250
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_3(n = 1000, eps = np.round(eps, 2)))[3]
    np.savetxt(filename, op, delimiter=',', header = 'C')

## Case 4: $X \sim U(0, 1); Y = X^{3}$

In [11]:
def data_gen_4(n = 1000, eps = 0):
    x = np.random.uniform(low = 0, high = 1, size = 1000)
    y = np.power(x, 3)
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    y = (y - np.min(y))/(np.max(y) - np.min(y)) + np.random.normal(loc=0, scale = eps, size = 1000)
    return np.column_stack((x, y))

In [12]:
for eps in [0, 0.05, 0.10, 0.15, 0.20]:
    np.random.seed(1234)
    filename = '/home/soumikp/2023_bka/code/simulation_coverage/' + 'sim4_' + str(np.round(eps, 2)) + '_vals.csv'
    niter = 250
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_4(n = 1000, eps = np.round(eps, 2)))[3]
    np.savetxt(filename, op, delimiter=',', header = 'C')

## Case 5: $X \sim U(0, 1); Y = \exp(X)$

In [16]:
def data_gen_5(n = 1000, eps = 0):
    x = np.random.uniform(low = 0, high = 1, size = 1000)
    y = np.exp(x)
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    y = (y - np.min(y))/(np.max(y) - np.min(y)) + np.random.normal(loc=0, scale = eps, size = 1000)
    return np.column_stack((x, y))

In [17]:
for eps in [0, 0.05, 0.10, 0.15, 0.20]:
    np.random.seed(1234)
    filename = '/home/soumikp/2023_bka/code/simulation_coverage/' + 'sim5_' + str(np.round(eps, 2)) + '_vals.csv'
    niter = 250
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_5(n = 1000, eps = np.round(eps, 2)))[3]
    np.savetxt(filename, op, delimiter=',', header = 'C')

## Case 6: $X \sim U(0, 1); Y = \sin(\pi X/2)$

In [18]:
def data_gen_6(n = 1000, eps = 0):
    x = np.random.uniform(low = 0, high = 1, size = 1000)
    y = np.sin(11*x/2)
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    y = (y - np.min(y))/(np.max(y) - np.min(y)) + np.random.normal(loc=0, scale = eps, size = 1000)
    return np.column_stack((x, y))

In [21]:
for eps in [0, 0.05, 0.10, 0.15, 0.20]:
    np.random.seed(4321)
    filename = '/home/soumikp/2023_bka/code/simulation_coverage/' + 'sim6_' + str(np.round(eps, 2)) + '_vals.csv'
    niter = 250
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_6(n = 1000, eps = np.round(eps, 2)))[3]
    np.savetxt(filename, op, delimiter=',', header = 'C')