In [None]:
"""Sandbox module."""
from functools import partial

import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

%matplotlib inline

## Nonparamatric kernel density estimation

In [None]:
kernels = {
    "Epanechnikov": lambda u: 0.75 * (1 - u**2) * int(abs(u) <= 1),
    "Uniform": lambda u: 0.5 * int(abs(u) <= 1),
    "Triangular": lambda u: (1 - abs(u)) * int(abs(u) <= 1),
}

In [None]:
def kernel_estimator(x, h, sample, kernel_type):
    """Kernel density estimator function."""
    k = np.vectorize(kernels[kernel_type])
    return 1 / (len(sample) * h) * sum(k((x - sample) / h))

In [None]:
sample_size = 200
grid_size = 100
grid_ending = 10
bandwidth = 0.75  # need to implement optimal bandwidth

sample = np.random.default_rng().normal(scale=10, size=sample_size)
grid = np.linspace(start=-grid_ending, stop=grid_ending, num=grid_size)

In [None]:
kernel_estimator_given_sample = partial(kernel_estimator, sample=sample)

### Generate fitted values

In [None]:
values_epa = [
    kernel_estimator_given_sample(x=i, h=bandwidth, kernel_type="Epanechnikov")
    for i in grid
]
values_uni = [
    kernel_estimator_given_sample(x=i, h=bandwidth, kernel_type="Uniform") for i in grid
]
values_tri = [
    kernel_estimator_given_sample(x=i, h=bandwidth, kernel_type="Triangular")
    for i in grid
]

### Plots

In [None]:
fig, ax = plt.subplots()
ax.plot(grid, values_epa, label="Epanechnikov")
ax.plot(grid, values_tri, label="Triangular")
ax.plot(grid, stats.norm.pdf(grid, 0, 1), label="True density")
# plot histogram for comparison
ax.hist(
    sample,
    bins=grid,
    density=True,
    histtype="step",
    edgecolor="black",
    linewidth=0.5,
    label="Histogram",
)
plt.legend()
plt.show()

## Kernel Regression

Context: we want to investigate the nonparametric regression relation $y_i = m(x_i) +
\epsilon_i$, where $y_i$ is a dependent variable, $x_i$ an explanatory variable, and
$\epsilon_i$ an $iid$ error term, for observations $i = 1, ..., n$.

### Sample generation

In [None]:
def m(x):
    """True function."""
    return 3 * np.sin(x) + 2 * x

In [None]:
epsilon = np.random.default_rng().normal(0, 4, size=sample_size)
y = m(sample) + epsilon

In [None]:
def m_hat(x, y, h, sample, kernel_type):
    """Estimator."""
    k = np.vectorize(kernels[kernel_type])
    numerator = sum(k((x - sample) / h) * y)
    denominator = sum(k((x - sample) / h))
    return (
        numerator / denominator
    )  # , len(k((x - sample) / h) * y), len(k((x - sample) / h))

In [None]:
bandwidth = 1.4
temp1 = partial(m_hat, y=y, h=bandwidth, sample=sample, kernel_type="Epanechnikov")
temp2 = np.vectorize(temp1)

In [None]:
fig, ax = plt.subplots()
ax.plot(grid, m(grid), label="True relation")
ax.plot(grid, temp2(x=grid), label="Nadaraya-Watson estimator")
plt.legend()
plt.show()