In [7]:
# Importing necessary libraries
import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.stats import wasserstein_distance

# Defining the provided functions and parameters

# Defining the weight function
def omega(t, x, c1, c2):
    sigma = math.sqrt(np.var(X))
    h1 = (c1/math.sqrt(12))*(T ** (-(1/5)))
    h2 = c2*sigma*(T ** (-(1/5)))
    numerator = K_h1(t/T - t / T, h1) * np.prod(K_h2(x - X[t-1], h2))
    denominator = np.sum([K_h1(s/T - s / T, h1) * np.prod(K_h2(x - X[s-1], h2)) for s in range(1, T+1)])
    return numerator / denominator

# Defining the empirical conditional distribution function estimator
def ecdf(x, v):
    weights = np.array([omega(t, x, c1, c2) for t in range(1, T+1)])
    indicator = (Y <= v).astype(int)
    return np.sum(weights * indicator)

# Define uniform kernels for K1
def K_h1(z, h1):
    return 0.5 if abs(z/h1) <= 1 else 0

# Define Gaussian kernels for K2
def K_h2(z, h2):
    return np.exp(-z**2 / (2 * h2**2))

# Generate locally stationary data
np.random.seed(42)
# Define the time-varying parameter
def time_varying_param(t):
    phi = 0.2 + 0.3 * np.sin(0.1 * t/T)
    return phi

# Simulate the time-varying AR(1) model
def simulate_tv_ar(T, param_func):
    epsilon = np.random.normal(0, 1, T)
    y = np.zeros(T)
    
    for t in range(1, T):
        phi = param_func(t)
        y[t] = phi * y[t-1] + epsilon[t]
    
    return y

# Parameters
T = 100  # Length of time series
d = 1  # Number of covariates
c1 = 5
c2 = 1.68
x_test = np.array([0.5])

c1_values = [1, 2, 3, 4, 5]
c2_values = [0.5, 1, 1.5, 2, 2.5]

# Running the replication loop
c1_c2_W1 = {}  # Store average W1 for each pair of c1 and c2
c1_c2_W1_individual = {}  # Store individual W1 for each pair of c1 and c2

for c1 in c1_values:
    for c2 in c2_values:
        W1_temp_list = []  # Store W1 for each replication
        for rep in range(2):
            # Generate locally stationary data
            np.random.seed(42+rep)  # Use different seed for each replication
            Y = simulate_tv_ar(T, time_varying_param)
            X = np.array([Y[t-2] for t in range(1, T+1)])
            
            v_values = np.linspace(Y.min(), Y.max(), T)  # Update v_values based on new Y
            
            # Estimated F for each v
            F_values = [ecdf(x_test, v) for v in v_values]
            
            # True CDF
            data_sorted = np.sort(Y)
            True_CDF = np.arange(1, len(data_sorted) + 1) / len(data_sorted)
            
            # Compute the 1-Wasserstein distance
            W1 = wasserstein_distance(True_CDF, F_values)
            W1_temp_list.append(W1)
            
        # Storing individual and average 1-Wasserstein distances for this pair of c1 and c2
        c1_c2_W1[(c1, c2)] = np.mean(W1_temp_list)
        c1_c2_W1_individual[(c1, c2)] = W1_temp_list

print("Individual 1-Wasserstein Distances for each (c1, c2): ", c1_c2_W1_individual)
print("Average 1-Wasserstein Distance for each (c1, c2): ", c1_c2_W1)


Individual 1-Wasserstein Distances for each (c1, c2):  {(1, 0.5): [0.11601204027113031, 0.06393420759976075], (1, 1): [0.08680082817602684, 0.07385995948221255], (1, 1.5): [0.07572412017885607, 0.07354779476037898], (1, 2): [0.07601122727119483, 0.07498042375761527], (1, 2.5): [0.07703946325729044, 0.07689096356329775], (2, 0.5): [0.11601204027113031, 0.06393420759976075], (2, 1): [0.08680082817602684, 0.07385995948221255], (2, 1.5): [0.07572412017885607, 0.07354779476037898], (2, 2): [0.07601122727119483, 0.07498042375761527], (2, 2.5): [0.07703946325729044, 0.07689096356329775], (3, 0.5): [0.11601204027113031, 0.06393420759976075], (3, 1): [0.08680082817602684, 0.07385995948221255], (3, 1.5): [0.07572412017885607, 0.07354779476037898], (3, 2): [0.07601122727119483, 0.07498042375761527], (3, 2.5): [0.07703946325729044, 0.07689096356329775], (4, 0.5): [0.11601204027113031, 0.06393420759976075], (4, 1): [0.08680082817602684, 0.07385995948221255], (4, 1.5): [0.07572412017885607, 0.073547