In [1]:
"""This module implements the Fréchet regression method from Petersen and Müller 2019.

The method is implemented in the context of estimation in the space of distribution
functions. Also, a simulation study is designed to compare with the methods outlined in
Petersen and Müller 2016.

"""

'This module implements the Fréchet regression method from Petersen and Müller 2019.\n\nThe method is implemented in the context of estimation in the space of distribution\nfunctions. Also, a simulation study is designed to compare with the methods outlined in\nPetersen and Müller 2016.\n\n'

In [2]:
import pickle

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

from frechet_fda.config import SRC
from frechet_fda.tools.data_generation_tools import (
    gen_params_regression,
    gen_predictor_values_regression,
    gen_sample_points_from_qfs,
    gen_y_qf,
    make_estimated_pdf,
)
from frechet_fda.tools.frechet_tools import ise_wasserstein, solve_frechet_qp
from frechet_fda.tools.function_tools import (
    get_optimal_range,
    inverse_log_qd_transform,
    log_qd_transform,
    mean_func,
)

ModuleNotFoundError: No module named 'frechet_fda'

In [None]:
N = 200
seed = 28071995
predictor_bounds = (-1, 1)
mu_params_dict = {"mu0": 0, "beta": 3, "v1": 0.25}
sigma_params_dict = {"sigma0": 3, "gamma": 0.5, "v2": 2}

# Define u grid on which to evaluate function points
grid_size = 500
grid_to_predict_size = 50
delta = 0  # can start bounded away from zero and one
u = np.linspace(delta, 1 - delta, grid_size)

In [None]:
predictor_vals = gen_predictor_values_regression(N, predictor_bounds, seed)
mus, sigmas = gen_params_regression(
    mu_params_dict, sigma_params_dict, predictor_vals, seed,
)
predictor_vals.shape, mus.shape, sigmas.shape

In [None]:
# Take only every "slicing_size"th value of predictor grid to calculate estimate
x_grid = np.linspace(-1, 1, grid_to_predict_size)
x_grid.shape

In [None]:
qfs = gen_y_qf(mus, sigmas, u)
qfs = [qf.drop_inf() for qf in qfs]
qfs[0].y.shape

In [None]:
# Select which qfs to compare
first_qf_to_compare = 0
second_qf_to_compare = 199

qfs[first_qf_to_compare].compare(
    qfs[second_qf_to_compare],
    label_self=(
        f"Mu = {round(mus[first_qf_to_compare], 2)}, "
        f"Sigma = {round(sigmas[first_qf_to_compare], 2)}"
    ),
    label_other=(
        f"Mu = {round(mus[second_qf_to_compare], 2)}, "
        f"Sigma = {round(sigmas[second_qf_to_compare], 2)}"
    ),
)

In [None]:
# Plot some densities of the generated distributions
fig, ax = plt.subplots()
some_qfs = np.random.default_rng().choice(qfs, size=6, replace=False)
for qf in some_qfs:
    pdf = qf.invert().differentiate()
    ax.plot(
        pdf.x,
        pdf.y,
    )
plt.grid()
plt.show()

In [None]:
estimates = solve_frechet_qp(
    xs_to_predict=x_grid, x_observed=predictor_vals, quantile_functions=qfs,
)

In [None]:
for qf in estimates:
    pdf = qf.invert().differentiate()
    plt.plot(pdf.x, pdf.y)

In [None]:
# Initialize a 2D array to hold f(u, x) values
z_values = np.zeros((len(x_grid), len(estimates[0].x)))

# Populate the array with estimates
for i, func in enumerate(estimates):
    z_values[i] = func.y

# Create the 3D surface plot
X, Y = np.meshgrid(estimates[0].x, x_grid)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")
ax.plot_surface(X, Y, z_values, cmap="viridis", alpha=0.9)

# Adjust the viewing angle for better visibility
ax.view_init(elev=15, azim=245)

ax.set_xlabel("u")
ax.set_ylabel("Predictor x")
ax.set_zlabel("Q(u | x)", rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# Choose qfs of interest
mus_to_compare, sigmas_to_compare = gen_params_regression(
    mu_params_dict, sigma_params_dict, x_grid, seed,
)
qfs_of_interest = gen_y_qf(mus_to_compare, sigmas_to_compare, u)
qfs_of_interest = [qf.drop_inf() for qf in qfs_of_interest]
# New z_values for 'qfs'
z_values_qfs_subset = np.zeros((len(x_grid), len(qfs_of_interest[0].x)))
for i, func in enumerate(qfs_of_interest):
    z_values_qfs_subset[i] = func.y

# Create the 3D surface plot for 'estimates'
X, Y = np.meshgrid(qfs_of_interest[0].x, x_grid)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")

# Create the 3D surface plot for 'qfs'
ax.plot_surface(X, Y, z_values_qfs_subset, cmap="magma", alpha=0.9)

# Adjust the viewing angle for better visibility
ax.view_init(elev=15, azim=240)

# Add labels
ax.set_xlabel("u")
ax.set_ylabel("Predictor x")
ax.set_zlabel("Q(u | x)", rotation=0)

plt.tight_layout()
plt.show()

In [None]:
# Plot for all qfs, not just subset
z_values_qfs = np.zeros((len(predictor_vals), len(qfs[0].x)))
for i, func in enumerate(qfs):
    z_values_qfs[i] = func.y

# Create the 3D surface plot for 'estimates'
X, Y = np.meshgrid(qfs[0].x, predictor_vals)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")

# Create the 3D surface plot for 'qfs'
ax.plot_surface(X, Y, z_values_qfs, cmap="magma", alpha=0.7)

# Adjust the viewing angle for better visibility
ax.view_init(elev=15, azim=240)

# Add labels
ax.set_xlabel("u")
ax.set_ylabel("Predictor x")
ax.set_zlabel("Q(u | x)", rotation=0)

plt.tight_layout()
plt.show()

In [None]:
# Create the 3D surface plot for 'estimates'
X, Y = np.meshgrid(qfs_of_interest[0].x, x_grid)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")
ax.plot_surface(X, Y, z_values, cmap="viridis", alpha=0.7)

# Create the 3D surface plot for 'qfs'
ax.plot_surface(X, Y, z_values_qfs_subset, cmap="magma", alpha=0.7)

# Adjust the viewing angle for better visibility
ax.view_init(elev=15, azim=190)

# Add labels
ax.set_xlabel("u")
ax.set_ylabel("Predictor x")
ax.set_zlabel("Q(u | x)", rotation=0)

plt.tight_layout()
plt.show()

### Compute Integrated Squared Error

In [None]:
m_hat = estimates

In [None]:
true_mus = mu_params_dict["mu0"] + mu_params_dict["beta"] * x_grid
true_sigmas = sigma_params_dict["sigma0"] + sigma_params_dict["gamma"] * x_grid
true_m = gen_y_qf(true_mus, true_sigmas, u)
true_m = [qf.drop_inf() for qf in true_m]

In [None]:
differences = [hat - true for hat, true in zip(m_hat, true_m, strict=True)]

In [None]:
ise_wasserstein(m_hat, true_m, x_grid, already_qf=True)

## Functional Regression with Transformation method

In [None]:
# see whether toying around with function and returning to it changes anything
qfs[199].drop_inf().differentiate().integrate().compare(qfs[199].drop_inf() + 0.1)

In [None]:
pdfs = [qf.drop_inf().invert().differentiate() for qf in qfs]
lqdfs, start_vals = log_qd_transform(pdfs, different_supports=True)
start_vals = np.array(start_vals, dtype=np.float64)

In [None]:
predictor_matrix = np.array((np.ones_like(predictor_vals), predictor_vals)).transpose()
predictor_matrix.shape, np.linalg.inv(
    predictor_matrix.transpose() @ predictor_matrix,
).shape

In [None]:
log_betahat = (
    np.linalg.inv(predictor_matrix.transpose() @ predictor_matrix)
    @ predictor_matrix.transpose()
    @ lqdfs
)
log_betahat0 = log_betahat[0]
log_betahat1 = log_betahat[1]
log_betahat0.compare(log_betahat1, label_self="beta0", label_other="beta1")

In [None]:
# See that beta0 is equal to the mean of the log qdfs
mean_func(lqdfs).compare(
    log_betahat0 + 0.05, label_self="Mean lqdf", label_other="Beta_0",
)

In [None]:
lqdf_hat = x_grid * log_betahat1 + log_betahat0
interpolated_start_vals = np.interp(x_grid, predictor_vals, start_vals)
pdf_hat = inverse_log_qd_transform(lqdf_hat, interpolated_start_vals)
lqdfs[0].compare(lqdf_hat[0], label_self="Lqdf 0", label_other="Estimated Lqdf 0")
lqdfs[49].compare(lqdf_hat[49], label_self="Lqdf 49", label_other="Estimated Lqdf 49")
true_m[0].invert().differentiate().compare(
    pdf_hat[0], label_self="Mean lqdf", label_other="Beta_0",
)
true_m[49].invert().differentiate().compare(
    pdf_hat[49], label_self="Mean lqdf", label_other="Beta_0",
)

In [None]:
# Calculate integrated squared error
qf_hat = [pdf.integrate().invert() for pdf in pdf_hat]
ise_wasserstein(qf_hat, true_m, x_grid, already_qf=True)

In [None]:
distribution_of_interest = 20
pdfs[distribution_of_interest].compare(pdf_hat[distribution_of_interest])
qfs[distribution_of_interest].compare(
    pdf_hat[distribution_of_interest].integrate().invert(),
)

## For demonstration of estimation effects

In [None]:
for i, lqd in enumerate(lqdf_hat[::10]):
    plt.plot(lqd.x, lqd.y, label=i)
plt.legend()
plt.show()

In [None]:
for i, qf in enumerate(qf_hat[::20]):
    plt.plot(qf.x, qf.y, label=i)
for i, qf in enumerate(true_m[::20]):
    plt.plot(qf.x, qf.y, label=f"True {i}")
plt.legend()
plt.show()

In [None]:
for i, pdf in enumerate(pdfs[::50]):
    plt.plot(pdf.x, pdf.y, label=i)
plt.legend()
plt.show()

In [None]:
for i, pdf in enumerate(pdf_hat[::10]):
    plt.plot(pdf.x, pdf.y, label=i)
plt.legend()
plt.show()

In [None]:
for i, qf in enumerate(estimates[::10]):
    pdf = qf.invert().differentiate()
    plt.plot(pdf.x, pdf.y, label=i)
plt.legend()
plt.show()

In [None]:
# New z_values for 'qfs'
qf_hats_of_interest = [pdf.integrate().invert().drop_inf() for pdf in pdf_hat]
z_values_pdf_hats = np.zeros((len(x_grid), len(qf_hats_of_interest[0].x)))
for i, func in enumerate(qf_hats_of_interest):
    z_values_pdf_hats[i] = func.y

# Create the 3D surface plot for 'estimates'
X, Y = np.meshgrid(qf_hats_of_interest[0].x, x_grid)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")
ax.plot_surface(X, Y, z_values, cmap="viridis", alpha=0.7)

# Create the 3D surface plot for 'qfs'
ax.plot_surface(X, Y, z_values_pdf_hats, cmap="magma", alpha=0.7)

# Adjust the viewing angle for better visibility
ax.view_init(elev=15, azim=245)

# Add labels
ax.set_xlabel("u")
ax.set_ylabel("Predictor x")
ax.set_zlabel("Q(u | x)", rotation=0)

plt.tight_layout()
plt.show()

In [None]:
# Create the 3D surface plot for qfs
X, Y = np.meshgrid(qf_hats_of_interest[0].x, x_grid)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")
ax.plot_surface(X, Y, z_values_qfs_subset, cmap="viridis", alpha=0.7)

# Create the 3D surface plot for functional regression estimates
ax.plot_surface(X, Y, z_values_pdf_hats, cmap="magma", alpha=0.7)

# Adjust the viewing angle for better visibility
ax.view_init(elev=15, azim=245)

# Add labels
ax.set_xlabel("u")
ax.set_ylabel("Predictor x")
ax.set_zlabel("Q(u | x)", rotation=0)

plt.tight_layout()
plt.show()

# Simulation

In [None]:
# Number of simulations
m = 500
# Number of samples of densities to generate
sample_sizes = [50, 100, 200]
# Number of sample points to generate from each density for density estimation step
points_number = 100
# Which kernel to use in density estimation
which_kernel = "std_normal"
# Set interval within which to estimate densities, approx of -inf and inf
denstimation_limits = (-30, 30)
# Fineness of grids to evaluate functions
grid_size = 500
delta = 0  # can start bounded away from zero and one
u = np.linspace(delta, 1 - delta, grid_size)

# Choose for which x_vals to predict conditional quantiles (predicting at
# every x from predictor_vals would be very computationally expensive)
grid_to_predict_size = 50
x_grid = np.linspace(-1, 1, grid_to_predict_size)

# Parameters for stochastic process that generates conditional distributions
predictor_bounds = (-1, 1)
mu_params_dict = {"mu0": 0, "beta": 3, "v1": 0.25}
sigma_params_dict = {"sigma0": 3, "gamma": 0.5, "v2": 2}
# Calculate true stochastic process to compare later against estimates
true_mus = mu_params_dict["mu0"] + mu_params_dict["beta"] * x_grid
true_sigmas = sigma_params_dict["sigma0"] + sigma_params_dict["gamma"] * x_grid
true_m = gen_y_qf(true_mus, true_sigmas, u)
true_m = [qf.drop_inf() for qf in true_m]

# For storing simulation results
stored_ise_frechet = np.empty((m, len(sample_sizes)))
stored_ise_func_reg = np.empty((m, len(sample_sizes)))
stored_ise_frechet_denstimation = np.empty((m, len(sample_sizes)))
stored_ise_func_reg_denstimation = np.empty((m, len(sample_sizes)))

In [None]:
total_iterations = m * len(sample_sizes)  # Total iterations
progress_bar = tqdm(total=total_iterations, desc="Simulation")
for i in range(m):
    # Try different sample sizes
    for j, n in enumerate(sample_sizes):
        # j index only used for storing ISE values below!
        # Set parameters
        seed_num = int(str(i) + str(j))  # unique seed in each simulation run
        predictor_vals = gen_predictor_values_regression(n, predictor_bounds, seed_num)
        mus, sigmas = gen_params_regression(
            mu_params_dict, sigma_params_dict, predictor_vals, seed_num,
        )

        # Generate conditional distributions
        qfs = gen_y_qf(mus, sigmas, u)
        qfs = [qf.drop_inf() for qf in qfs]
        pdfs = [qf.invert().differentiate() for qf in qfs]

        # Estimate conditional quantile function given x
        m_hat = solve_frechet_qp(
            xs_to_predict=x_grid, x_observed=predictor_vals, quantile_functions=qfs,
        )

        # Store estimation error
        stored_ise_frechet[i, j] = ise_wasserstein(
            m_hat, true_m, x_grid, already_qf=True,
        )

        ## Transformation method
        lqdfs, start_vals = log_qd_transform(pdfs, different_supports=True)
        start_vals = np.array(start_vals, dtype=np.float64)

        # Calculate effect of x on lqdf
        predictor_matrix = np.array(
            (np.ones_like(predictor_vals), predictor_vals),
        ).transpose()
        log_betahat = (
            np.linalg.inv(predictor_matrix.transpose() @ predictor_matrix)
            @ predictor_matrix.transpose()
            @ lqdfs
        )

        # Estimated lqdfs and their inverse transforms
        lqdf_hat = x_grid * log_betahat[1] + log_betahat[0]
        interpolated_start_vals = np.interp(x_grid, predictor_vals, start_vals)
        pdf_hat = inverse_log_qd_transform(lqdf_hat, interpolated_start_vals)
        qf_hat = [pdf.integrate().invert() for pdf in pdf_hat]

        # Store estimation error
        stored_ise_func_reg[i, j] = ise_wasserstein(
            qf_hat, true_m, x_grid, already_qf=True,
        )

        ### With density estimation
        sample_points = gen_sample_points_from_qfs(qfs, points_number, seed_num)

        # Density estimation
        # Rule-of-Thumb bandwidth (Li and Racine 2007, p. 66)
        bandwidth = 1.06 * np.std(sample_points, axis=0) * (n ** (-0.2))
        temp_estimated_pdfs = make_estimated_pdf(
            sample_points=sample_points,
            a=denstimation_limits[0] * np.ones(n),
            b=denstimation_limits[1] * np.ones(n),
            kern=which_kernel,
            grid_size=grid_size,
            bandwidth=bandwidth,
            bias_corrected=False,
        )
        # Get actual support
        new_ranges = get_optimal_range(temp_estimated_pdfs)
        # Generate numerically stable objects, within individual ranges
        estimated_pdfs = make_estimated_pdf(
            sample_points=sample_points,
            a=new_ranges[:, 0],
            b=new_ranges[:, 1],
            kern=which_kernel,
            grid_size=grid_size,
            bandwidth=bandwidth,
            bias_corrected=False,
        )
        estimated_qfs = [pdf.integrate().invert() for pdf in estimated_pdfs]

        # Estimate conditional quantile function given x
        m_hat = solve_frechet_qp(
            xs_to_predict=x_grid,
            x_observed=predictor_vals,
            quantile_functions=estimated_qfs,
        )

        # Store estimation error
        stored_ise_frechet_denstimation[i, j] = ise_wasserstein(
            m_hat, true_m, x_grid, already_qf=True,
        )

        ## Transformation method
        estimated_lqdfs, start_vals = log_qd_transform(
            estimated_pdfs, different_supports=True,
        )
        start_vals = np.array(start_vals, dtype=np.float64)

        # Calculate effect of x on lqdf
        predictor_matrix = np.array(
            (np.ones_like(predictor_vals), predictor_vals),
        ).transpose()
        log_betahat = (
            np.linalg.inv(predictor_matrix.transpose() @ predictor_matrix)
            @ predictor_matrix.transpose()
            @ estimated_lqdfs
        )

        # Estimated lqdfs and their inverse transforms
        lqdf_hat = x_grid * log_betahat[1] + log_betahat[0]
        interpolated_start_vals = np.interp(x_grid, predictor_vals, start_vals)
        pdf_hat = inverse_log_qd_transform(lqdf_hat, interpolated_start_vals)
        qf_hat = [pdf.integrate().invert() for pdf in pdf_hat]

        # Store estimation error
        stored_ise_func_reg_denstimation[i, j] = ise_wasserstein(
            qf_hat, true_m, x_grid, already_qf=True,
        )

        # Print progress of simulation
        progress_bar.update()
progress_bar.close()

## Save results if large simulation was run

In [None]:
if m > 100:
    # Create a dictionary to hold all arrays
    data_dict = {
        "stored_ise_frechet": stored_ise_frechet,
        "stored_ise_func_reg": stored_ise_func_reg,
        "stored_ise_frechet_denstimation": stored_ise_frechet_denstimation,
        "stored_ise_func_reg_denstimation": stored_ise_func_reg_denstimation,
    }

    # Save the dictionary using pickle
    file_path_pickle = SRC / "sim_results" / "stored_ise_arrays.pkl"

    with open(file_path_pickle, "wb") as handle:
        pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
(
    np.mean(stored_ise_frechet, axis=0),
    np.mean(stored_ise_func_reg, axis=0),
    np.mean(stored_ise_frechet_denstimation, axis=0),
    np.mean(stored_ise_func_reg_denstimation, axis=0),
)

In [None]:
plt.boxplot(stored_ise_frechet, labels=["50", "100", "200"])

plt.title("Distribution of ISE Across Different Sample Sizes")
plt.xlabel("Sample Size")
plt.ylabel("Integrated Squared Error (ISE)")

plt.show()

In [None]:
plt.boxplot(stored_ise_func_reg, labels=["50", "100", "200"])

plt.title("Distribution of ISE Across Different Sample Sizes")
plt.xlabel("Sample Size")
plt.ylabel("Integrated Squared Error (ISE)")

plt.show()

In [None]:
plt.boxplot(stored_ise_frechet_denstimation, labels=["50", "100", "200"])

plt.title("Distribution of ISE Across Different Sample Sizes")
plt.xlabel("Sample Size")
plt.ylabel("Integrated Squared Error (ISE)")

plt.show()

In [None]:
plt.boxplot(stored_ise_func_reg_denstimation, labels=["50", "100", "200"])

plt.title("Distribution of ISE Across Different Sample Sizes")
plt.xlabel("Sample Size")
plt.ylabel("Integrated Squared Error (ISE)")

plt.show()

Without density estimation step, unclear effect of increasing sample size; error decreases
from 50 to 100 for both methods, but increases from 100 to 200. Maybe here numerical
inacurracies dominate, while error when density estimation is included is mainly driven
from having to estimate densities. They profit from having more densities observed?