In [None]:
"""Sandbox module."""

import numpy as np
from data_generation import (
    gen_discretized_distributions,
    gen_grids_and_parameters,
)
from misc import (
    cdf_from_density,
    quantile_from_density,
    riemann_sum_arrays,
    trunc_norm_pdf,
)

%matplotlib inline

import warnings

from scipy.stats import truncnorm

warnings.filterwarnings("always")
import matplotlib.pyplot as plt
from scipy.interpolate import CubicSpline

In [None]:
# def riemann_sum_arrays(support_grid, array, axis=-1, cumsum=False):
#     """Computes the Riemann sum using the Midpoint rule for the given array, along
#     the axis that contains the grid of values."""

#     # Calculate the step size between consecutive grid points

#     # Calculate midpoints

#     # Sample the function at the midpoints
#     if array.ndim > 1:
#         for i in range(len(midpoint_values)):

#     # Compute the cumulative sum along the specified axis (i.e.,
#     # the integral up to each grid point)
#     if cumsum:
#         # Append last value so we have same shape as before
#     # Or just the integral

In [None]:
def dens_from_qd(qd, qdsup=None, dsup=None):
    """Compute density from a quantile density function.

    'Inspired' from qd2dens in fdadensity package in R.

    """
    # Validate input
    eps = 1e-3
    boundaries = [np.min(qdsup), np.max(qdsup)]
    if not np.allclose(boundaries, [0, 1], atol=eps):
        msg = f"Please check the support of the QF domain's boundaries: {boundaries}"
        raise ValueError(msg)

    integral_qd = riemann_sum_arrays(qdsup, array=qd, axis=-1, cumsum=True)
    if not np.isclose(integral_qd[-1], np.ptp(dsup), atol=eps):
        msg = (
            "Quantile Density does not integrate to the range of the densities with "
            f"tolerance {eps}."
            f"\n Integral is: {integral_qd[...,-1]}"
            f"\n Range is: {np.ptp(dsup)}"
        )
        raise ValueError(msg)

    # Calculate new support grid
    dsup_temp = dsup[0] + integral_qd

    # Calculate density
    dens_temp = 1 / qd
    idx_unique = np.unique(dsup_temp, return_index=True, axis=-1)[1]
    dsup_temp = dsup_temp[..., idx_unique]
    dens_temp = dens_temp[..., idx_unique]
    dens = np.interp(dsup, dsup_temp, dens_temp)

    if dens.ndim > 1:
        dens = np.zeros(qd.shape)
        for i in range(len(dens)):
            temp_sup, ind = np.unique(dsup_temp[i], return_index=True)
            temp = dens_temp[i][..., ind]
            dens[i] = CubicSpline(temp_sup, temp, bc_type="natural", axis=-1)(dsup)
    else:
        temp_sup, ind = np.unique(dsup_temp, return_index=True)
        temp = dens_temp[..., ind]
        qd = CubicSpline(temp_sup, temp, bc_type="natural", axis=-1)(dsup)

    # Normalize the density
    dens /= riemann_sum_arrays(dsup, dens, axis=-1, cumsum=False)[..., np.newaxis]

    return dens

In [None]:
def qd_from_dens(dens, dsup=None, qdsup=None):
    """Compute quantile densities directly from densities.

    'Inspired' from dens2qd in fdadensity package in R.

    """
    # Validate input
    eps = 1e-3
    boundaries = [np.min(qdsup), np.max(qdsup)]
    if not np.allclose(boundaries, [0, 1], atol=eps):
        msg = f"Please check the support of the QF domain's boundaries: {boundaries}"
        raise ValueError(msg)

    integral_dens = riemann_sum_arrays(dsup, array=dens, axis=-1, cumsum=True)
    deviations_from_1 = abs(integral_dens[...,-1] - 1)
    if np.any(deviations_from_1 > eps):
        warnings.warn(
            f"Not all provided densities integrate to 1 with tolerance {eps}!"
            f"\n Max case of deviation is: {deviations_from_1.max()}"
            f"\n In position: {deviations_from_1.argmax()} "
            "\n Performing normalization...",
        )
        dens /= integral_dens[...,-1][..., np.newaxis]

    qdsup_temp = integral_dens

    qd_temp = 1 / dens

    if dens.ndim > 1:
        qd = np.zeros(dens.shape)
        for i in range(len(qd)):
            temp_sup, ind = np.unique(qdsup_temp[i], return_index=True)
            temp = qd_temp[i][..., ind]
            qd[i] = CubicSpline(temp_sup, temp, bc_type="natural", axis=-1)(qdsup)
    else:
        temp_sup, ind = np.unique(qdsup_temp, return_index=True)
        temp = qd_temp[..., ind]
        qd = CubicSpline(temp_sup, temp, bc_type="natural", axis=-1)(qdsup)

    integral_qd = riemann_sum_arrays(qdsup, qd, axis=-1, cumsum=False)
    qd *= np.ptp(dsup) / integral_qd[..., np.newaxis]

    return qd

In [None]:
def gen_discretized_distributions(grid_pdfs, grid_qfs, mus, sigmas, truncation_point):
    """Generate discretized pdfs, cdfs, qfs, and qdfs."""
    # Truncated pdfs
    pdfs_discretized = trunc_norm_pdf(
        grid_pdfs[:, np.newaxis],
        mus,
        sigmas,
        -truncation_point,
        truncation_point,
    )

    # Truncated cdfs
    cdfs_discretized = cdf_from_density(
        grid_pdfs,
        pdfs_discretized,
        axis=-1,
    )

    # Truncated qfs
    qfs_discretized = quantile_from_density(
        pdfs_discretized,
        grid_pdfs,
        grid_qfs,
    )

    # Truncated qdfs
    qdfs_discretized = qd_from_dens(
        pdfs_discretized, dsup=grid_pdfs, qdsup=grid_qfs,
    )

    return pdfs_discretized, cdfs_discretized, qfs_discretized, qdfs_discretized

In [None]:
# Set up data
n = 200
gridnum = 1000
truncation_point = 3

grid_pdfs, grid_qfs, mus, sigmas = gen_grids_and_parameters(
    n, gridnum, truncation_point, delta=0,
)

In [None]:
# Generate distributions
pdfs_discretized, cdfs_discretized, qfs_discretized, qdfs_discretized = (
    gen_discretized_distributions(grid_pdfs, grid_qfs, mus, sigmas, truncation_point)
)

In [None]:
dens_to_look_at = 40
a = qd_from_dens(pdfs_discretized[dens_to_look_at], grid_pdfs, grid_qfs)
b = dens_from_qd(a, grid_qfs, grid_pdfs)

In [None]:
ideal_quantiles = truncnorm.ppf(grid_qfs, loc=0, scale=sigmas[dens_to_look_at], a=(-truncation_point /sigmas[dens_to_look_at]), b=(truncation_point /sigmas[dens_to_look_at]))
fig, ax = plt.subplots()
ax.plot(grid_qfs, ideal_quantiles + 0.1, label="Ideal")
ax.plot(grid_qfs, qfs_discretized[dens_to_look_at], label="Mine")
plt.legend()
plt.show()

In [None]:
ideal_qdfs = truncnorm.pdf(ideal_quantiles, loc=0, scale=sigmas[dens_to_look_at], a=(-truncation_point /sigmas[dens_to_look_at]), b=(truncation_point /sigmas[dens_to_look_at]))
fig, ax = plt.subplots()
ax.plot(grid_pdfs, ideal_qdfs, label='"Ideal" LOL')
ax.plot(grid_pdfs, b + 0.01, label="Mine")
ax.plot(grid_pdfs, pdfs_discretized[dens_to_look_at], label="Original")
plt.legend()
plt.show()

In [None]:
riemann_sum_arrays(grid_qfs, a)

In [None]:
a[:5], a[-5:]

In [None]:
fig, ax = plt.subplots()
ax.plot(grid_qfs, a, label="qdf")
plt.legend()
plt.show()

In [None]:
# Compare if pdf -> qdf -> pdf still looks like original pdf
fig, ax = plt.subplots()
ax.plot(grid_pdfs, pdfs_discretized[dens_to_look_at], label="pdf")
ax.plot(grid_pdfs, b + 0.001, label="doubletransformed pdf")
plt.legend()
plt.show()

In [None]:
# Look how spline looks compared to usual distribution
fig, ax = plt.subplots()
ax.plot(grid_pdfs, CubicSpline(grid_pdfs, pdfs_discretized, axis=-1)(grid_pdfs)[dens_to_look_at], label="Spline")
ax.plot(grid_pdfs, pdfs_discretized[dens_to_look_at], label="Pdf")
plt.legend()
plt.show()

In [None]:
qdfs_discretized[0][499], pdfs_discretized[0][499]

In [None]:
(np.log(qdfs_discretized[0]) + np.log(np.interp(qfs_discretized[0], grid_pdfs, pdfs_discretized[0])))

In [None]:
a = np.array([0,1,2,3,4])