In [None]:
"""Sandbox module."""
import warnings

import matplotlib.pyplot as plt
import numpy as np
from data_generation import gen_discretized_distributions, gen_grids_and_parameters
from fda_funcs import (
    compute_moments,
)
from misc import (
    cdf_from_density,
    dens_from_qd,
    norm_cdf,
    norm_pdf,
    quantile_from_cdf,
    riemann_sum_arrays,
    trunc_norm_pdf,
)

%matplotlib inline

## Fréchet mean estimation

In [None]:
def gen_grids_and_parameters(n, gridnum, truncation_point, delta):
    """Generate parameters for the density samples and define appropriate grids."""
    grid_densities = np.linspace(
        start=-truncation_point,
        stop=truncation_point,
        num=gridnum,
    )
    grid_quantiles = np.linspace(start=delta, stop=1 - delta, num=gridnum)

    # Draw different sigmas
    log_sigmas = np.random.default_rng(seed=28071995).uniform(-1.5, 1.5, n)
    mus = np.zeros(n)
    sigmas = np.exp(log_sigmas)

    return (grid_densities, grid_quantiles, mus, sigmas)

In [None]:
def gen_discretized_distributions(grid_pdfs, grid_qfs, mus, sigmas, truncation_point):
    """Generate discretized pdfs, cdfs, qfs, and qdfs."""
    # Truncated pdfs
    pdfs_discretized = trunc_norm_pdf(
        grid_pdfs[:, np.newaxis],
        mus,
        sigmas,
        -truncation_point,
        truncation_point,
    )

    # Truncated cdfs
    cdfs_discretized = cdf_from_density(
        grid_pdfs,
        pdfs_discretized,
        axis=-1,
    )

    # Truncated qfs
    qfs_discretized = quantile_from_cdf(
        grid_pdfs[:, np.newaxis].transpose(),
        cdfs_discretized,
        grid_qfs,
    )

    # Truncated qdfs
    qdfs_discretized = qd_from_dens(pdfs_discretized, dsup=grid_pdfs, qdsup=grid_qfs)

    return pdfs_discretized, cdfs_discretized, qfs_discretized, qdfs_discretized

In [None]:
def riemann_sum_arrays(support_grid, array, axis=-1, cumsum=False):
    """Computes Riemann sum for given array, along the axis that contains the grid of
    values.
    """
    # Calculate the step size between consecutive grid points
    step_sizes = np.diff(support_grid)
    # Repeat last element so the output is not one element shorter. Should be approx.
    # ok
    step_sizes = np.append(step_sizes, step_sizes[..., -1][..., np.newaxis], axis=-1)

    # Compute the cumulative sum along the specified axis (i.e.,
    # the integral up to each grid point)
    if cumsum:
        result = np.cumsum(array * step_sizes, axis=axis)
    else:
        result = np.sum(array * step_sizes, axis=axis)

    # Return the cumulative sums, which represent the CDF at each grid point
    return result

In [None]:
def qd_from_dens(dens, dsup=None, qdsup=None):
    """Compute quantile densities directly from densities.

    'Inspired' from dens2qd in fdadensity package in R.
    """
    # Validate input
    eps = 1e-5
    if not np.allclose([np.min(qdsup), np.max(qdsup)], [0, 1], atol=eps):
        print([np.min(qdsup), np.max(qdsup)])
        msg = "Please check the support of the QF domain's boundaries."
        raise ValueError(msg)

    integral_dens = riemann_sum_arrays(dsup, array=dens, axis=-1, cumsum=False)
    deviations_from_1 = abs(integral_dens - 1)
    if np.any(deviations_from_1 > eps):
        warnings.warn(
            f"Not all provided densities integrate to 1 with tolerance {eps}!"
            f"\n Max case of deviation is: {deviations_from_1.max()} "
            f"\n In position: {deviations_from_1.argmax()} "
            "\n Performing normalization...",
        )
        dens /=  integral_dens[..., np.newaxis]

    qd = 1 / dens
    integral_qd = riemann_sum_arrays(qdsup, qd, axis=-1, cumsum=False)
    qd *= np.ptp(dsup) / integral_qd[..., np.newaxis]

    return qd

In [None]:
def trunc_norm_pdf(x, mu, sigma, a, b):
    """Define truncated normal density function.

    To test: columns of x must align with mu and sigma.

    """
    x = np.array(x)  # to vectorize the input
    mu = np.array(mu)
    sigma = np.array(sigma)
    x_std = (x - mu) / sigma
    a_std = (a - mu) / sigma
    b_std = (b - mu) / sigma
    numerator = norm_pdf(x_std, 0, 1)
    denominator = norm_cdf(b_std, 0, 1) - norm_cdf(a_std, 0, 1)

    result = numerator / denominator / sigma

    # Create a boolean mask for values outside the interval [a, b]
    mask = (x_std < a_std) | (x_std > b_std)

    # Set the PDF to zero for values of x outside the interval [a, b]
    result[mask] = 0
    result = result.transpose()

    # Check whether each density integrates to 1
    eps = 1e-5
    integrals = riemann_sum_arrays(np.linspace(a, b, len(x)), result, axis=-1)
    deviations_from_1 = abs(integrals - 1)
    if np.any(deviations_from_1 > eps):
        warnings.warn(
            f"Not all provided densities integrate to 1 with tolerance {eps}!"
            f"\n Max case of deviation is: {deviations_from_1.max()} "
            f"\n In position: {deviations_from_1.argmax()} "
            "\n Performing normalization...",
        )
        result /=  integrals[..., np.newaxis]
    return result

In [None]:
def dens_from_qd(qds_discretized, qdsup=None, dsup=None):
    """Compute density from a quantile density function.

    'Inspired' from qd2dens in fdadensity package in R.
    """
    # Validate input
    eps = 1e-5
    if not np.allclose([np.min(qdsup), np.max(qdsup)], [0, 1], atol=eps):
        print([np.min(qdsup), np.max(qdsup)])
        msg = "Please check the support of the qds_discretized domain's boundaries."
        raise ValueError(msg)

    integral_qd = riemann_sum_arrays(qdsup, array=qds_discretized, axis=-1, cumsum=True)
    if not np.isclose(integral_qd[-1], np.ptp(dsup), atol=eps):
        print(integral_qd, np.ptp(dsup))
        msg = "Quantile Density does not integrate to the range of the densities with "
        f"tolerance {eps}."
        raise ValueError(msg)

    # Calculate new support grid
    dtemp = dsup[0] + integral_qd

    # Calculate density
    dens_temp = 1 / qds_discretized
    dtemp, idx_unique = np.unique(dtemp, return_index=True, axis=-1)
    dens_temp = dens_temp[idx_unique]
    dens = np.interp(dsup, dtemp, dens_temp)

    # Normalize the density
    dens /= riemann_sum_arrays(dsup, dens, axis=-1, cumsum=False)[..., np.newaxis]

    return dens

In [None]:
def density_from_qd_old(qds_discretized, dsup, qdsup=None):
    """Compute density from a quantile density function.

    'Inspired' from qd2dens in fdadensity package in R.

    """
    if qdsup is None:
        qdsup = np.linspace(0, 1, len(qds_discretized))
    quantile_oplus = dsup[0] + riemann_sum_arrays(
        support_grid=qdsup, array=qds_discretized, axis=0, cumsum=True,
    )

    dens_temp = 1 / qds_discretized
    ind = np.unique(quantile_oplus, return_index=True, axis=-1)[1]
    quantile_oplus = np.atleast_1d(quantile_oplus)[ind]
    dens_temp = dens_temp[~ind]
    dens = np.interp(dsup, quantile_oplus, dens_temp)
    dens /= riemann_sum_arrays(dsup, dens, axis=0, cumsum=False)[..., np.newaxis]

    return dens, quantile_oplus

In [None]:
def wasserstein_frechet_mean(qds_discretized, dsup, qdsup=None):
    """Compute Wasserstein-Fréchet mean from sample."""
    if qdsup is None:
        qdsup = np.linspace(0, 1, qds_discretized.shape[-1])
    mean_qdf = np.mean(qds_discretized, axis=0)
    integral = riemann_sum_arrays(qdsup, array=mean_qdf, axis=-1, cumsum=False)
    mean_qdf *= (dsup[-1] - dsup[0]) / integral
    return dens_from_qd(mean_qdf, qdsup, dsup)

In [None]:
# Set up data
n = 200
gridnum = 1000
truncation_point = 3

grid_pdfs1, grid_qfs1, mus1, sigmas1 = gen_grids_and_parameters(
    n, gridnum, truncation_point, 0,
)
grid_pdfs2, grid_qfs2, mus2, sigmas2 = gen_grids_and_parameters(
    n, gridnum, truncation_point, 1e-5,
)

In [None]:
# Generate distributions
(
    pdfs_discretized1,
    cdfs_discretized1,
    qfs_discretized1,
    qdfs_discretized1,
) = gen_discretized_distributions(
    grid_pdfs1, grid_qfs1, mus1, sigmas1, truncation_point,
)

In [None]:
(
    pdfs_discretized2,
    cdfs_discretized2,
    qfs_discretized2,
    qdfs_discretized2,
) = gen_discretized_distributions(
    grid_pdfs2, grid_qfs2, mus2, sigmas2, truncation_point,
)

In [None]:
tempi = qd_from_dens(pdfs_discretized1, grid_pdfs1, grid_qfs1)[0]

In [None]:
tempi.shape, qdfs_discretized1.shape

In [None]:
lim1 = 999
lim2 = 999
integral1 = riemann_sum_arrays(grid_qfs1[:lim1], qdfs_discretized1[0][:lim1], axis=-1)
integral2 = riemann_sum_arrays(grid_qfs1[:lim2], tempi[:lim2], axis=-1)
integral1, integral2, grid_qfs1[lim1], grid_qfs2[lim2]

In [None]:
mean_function1, centered_densities, cov_matrix = compute_moments(pdfs_discretized1)
mean_function2, centered_densities, cov_matrix = compute_moments(pdfs_discretized2)

In [None]:
whi = 40
plt.plot(pdfs_discretized1[whi])
riemann_sum_arrays(grid_pdfs1, pdfs_discretized1[whi], axis=-1)

In [None]:
(tempi[0] - qdfs_discretized1[0]).max()

In [None]:
f_oplus1 = wasserstein_frechet_mean(qdfs_discretized1, grid_pdfs1, grid_qfs1)
F_oplus1 = cdf_from_density(grid_pdfs1, f_oplus1, axis=0)
f_oplus2 = wasserstein_frechet_mean(qdfs_discretized2, grid_pdfs2, grid_qfs2)
F_oplus2 = cdf_from_density(grid_pdfs2, f_oplus2, axis=0)

In [None]:
nat_center = trunc_norm_pdf(grid_pdfs1, 0, 1, -3, 3)

In [None]:
# Compare cross sectional and frechet means to true center
fig, ax = plt.subplots()
ax.plot(grid_pdfs1, f_oplus1, label="Fréchet mean")
ax.plot(grid_pdfs1, mean_function1, label="Cross sectional mean ")
ax.plot(grid_pdfs2,nat_center, label="True center")
plt.legend()
plt.show()

In [None]:
# Look how plots of qdfs ranging from 0 to 1 and qdfs almost ranging from 0 to 1 look like
wiggle = 0.05
fig, ax = plt.subplots()
ax.plot(grid_pdfs1, f_oplus1, label="Fréchet mean 1")
ax.plot(grid_pdfs2, f_oplus2 + wiggle, label="Fréchet mean 2")
ax.plot(grid_pdfs1, mean_function1, label="Cross sectional mean 1")
ax.plot(grid_pdfs2, mean_function2 + wiggle, label="Cross sectional mean 2")
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(grid_pdfs1, f_oplus1, label="Fréchet mean pdf")
ax.plot(grid_pdfs1, F_oplus1, label="Fréchet mean cdf")
ax.plot(grid_pdfs1, mean_function1, label="Cross sectional mean")
plt.legend()
plt.show()