## Simulation 2: We investigate the behaviour of $C_{X \rightarrow Y}$ for different bivariate structures.

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import scipy.stats as st
import scipy.special as sp
import random
import openturns as ot

from fastkde import fastKDE

In [2]:
def estimator(x, alpha = 0.05):
    
    if(x.shape[0]%2!=0):
        x = x[:-1]
    
    estim, inf = np.split(x, 2) #split data up into two halves
    
    ## first split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = estim[:,0], list_of_points = list(inf[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = estim[:,1], list_of_points = list(inf[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x1 = -np.mean(np.log(margin_x))
    h_y1 = -np.mean(np.log(margin_y))
    
    ## second split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = inf[:,0], list_of_points = list(estim[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = inf[:,1], list_of_points = list(estim[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x2 = -np.mean(np.log(margin_x))
    h_y2 = -np.mean(np.log(margin_y))
    
    ## cross fitting
    h_x = (h_x1 + h_x2)/2
    h_y = (h_y1 + h_y2)/2
    delta = (h_x - h_y)
    
    ## variance estimation using monte carlo
    margin_x = fastKDE.pdf_at_points(var1 = x[:,0])
    margin_y = fastKDE.pdf_at_points(var1 = x[:,1])
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    covar = np.cov(np.log(margin_x), np.log(margin_y))
    
    delta_var = covar[0,0] + covar[1,1] - 2*covar[0,1]
    delta_sd = np.sqrt(delta_var)
    
    delta_lcb = delta - st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select)/2)
    delta_ucb = delta + st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select)/2)
    
    return ([h_x, h_y, delta_lcb, delta, delta_ucb])

## simulation 1A: normal vs normal

In [3]:
def data_gen_1a(n, sx, sy):
    R = ot.CorrelationMatrix(2)
    R[0, 1] = 0.25
    R[1, 0] = 0.25
    copula = ot.NormalCopula(R)
    x = ot.Normal(0, sx)
    y = ot.Normal(0, sy)
    dist = ot.ComposedDistribution([x, y], copula)
    data = dist.getSample(n)
    x = data[:,0]
    y = data[:,1]
    return(np.column_stack((x, y)))

In [4]:
param = np.array([(x, y) for x in range(1, 6, 1) for y in range(1, 6, 1)])
output_final = np.empty([param.shape[0], 5])
for k in range(param.shape[0]):
    niter = 200
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_1a(n = 500, sx = float(param[k,0]), sy = float(param[k, 1])))[3]
    output_final[k,:] = [param[k,0], param[k,1], np.mean(op), np.quantile(op, 0.025), np.quantile(op, 0.975)]
np.savetxt('/home/soumikp/2023_bka/output/sim3_nor_nor.csv', output_final, delimiter = ",")




## case 1B: normal vs exponential

In [5]:
def data_gen_1b(n, sx, sy):
    R = ot.CorrelationMatrix(2)
    R[0, 1] = 0.25
    R[1, 0] = 0.25
    copula = ot.NormalCopula(R)
    x = ot.Normal(0, sx)
    y = ot.Exponential(sy, 0) ## with mean = 1/sy
    dist = ot.ComposedDistribution([x, y], copula)
    data = dist.getSample(n)
    x = data[:,0]
    y = data[:,1]
    return(np.column_stack((x, y)))

In [6]:
param = np.array([(x, y) for x in range(1, 6, 1) for y in np.linspace(0.1, 0.70, num = 5)])
output_final = np.empty([param.shape[0], 5])
for k in range(param.shape[0]):
    niter = 200
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_1b(n = 500, sx = float(param[k,0]), sy = float(param[k, 1])))[3]
    output_final[k,:] = [param[k,0], param[k,1], np.mean(op), np.quantile(op, 0.025), np.quantile(op, 0.975)]
np.savetxt('/home/soumikp/2023_bka/output/sim3_nor_exp.csv', output_final, delimiter = ",")

## case 1C: normal vs lognormal

In [7]:
def data_gen_1c(n, sx, sy):
    R = ot.CorrelationMatrix(2)
    R[0, 1] = 0.25
    R[1, 0] = 0.25
    copula = ot.NormalCopula(R)
    x = ot.Normal(0, sx)
    y = ot.LogNormal(0, sy, 0) ## with mean = 1/sy
    dist = ot.ComposedDistribution([x, y], copula)
    data = dist.getSample(n)
    x = data[:,0]
    y = data[:,1]
    return(np.column_stack((x, y)))

In [8]:
param = np.array([(x, y) for x in range(1, 6, 1) for y in np.linspace(1, 2.5, num = 5)])
output_final = np.empty([param.shape[0], 5])
for k in range(param.shape[0]):
    niter = 200
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_1c(n = 500, sx = float(param[k,0]), sy = float(param[k, 1])))[3]
    output_final[k,:] = [param[k,0], param[k,1], np.mean(op), np.quantile(op, 0.025), np.quantile(op, 0.975)]
np.savetxt('/home/soumikp/2023_bka/output/sim3_nor_log.csv', output_final, delimiter = ",")

## case 1D: Exponential vs Lognormal

In [9]:
def data_gen_1d(n, sx, sy):
    R = ot.CorrelationMatrix(2)
    R[0, 1] = 0.25
    R[1, 0] = 0.25
    copula = ot.NormalCopula(R)
    x = ot.Exponential(sx, 0)
    y = ot.LogNormal(0, sy, 0) ## with mean = 1/sy
    dist = ot.ComposedDistribution([x, y], copula)
    data = dist.getSample(n)
    x = data[:,0]
    y = data[:,1]
    return(np.column_stack((x, y)))

In [10]:
param = np.array([(x, y) for x in np.linspace(0.3, 0.70, num = 5) for y in np.linspace(1, 2.5, num = 5)])
output_final = np.empty([param.shape[0], 5])
for k in range(param.shape[0]):
    niter = 200
    op = np.zeros(niter)
    for i in range(niter):
        op[i] = estimator(data_gen_1d(n = 500, sx = float(param[k,0]), sy = float(param[k, 1])))[3]
    output_final[k,:] = [param[k,0], param[k,1], np.mean(op), np.quantile(op, 0.025), np.quantile(op, 0.975)]
np.savetxt('/home/soumikp/2023_bka/output/sim3_exp_log.csv', output_final, delimiter = ",")