In [90]:
import numpy as np
import altair as alt
import pandas as pd
from scipy.stats import norm
import math
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [91]:
experiments = 1000

In [92]:
observations_count = 100

In [93]:
observations = np.random.normal(0, 1, observations_count)

In [94]:
alpha = 0.05

In [132]:
def epsilon(n, alpha):
    return math.sqrt((1/(2*n))*math.log(2/alpha))

In [133]:
def Fn_hat_closure(data):
    def Fn_hat(x):
        return sum(data < x)/len(data)
    return Fn_hat

In [134]:
def Ux_closure(Fn_hat, epsilon_n):
    def U(x):
        return min(Fn_hat(x) + epsilon_n, 1)
    return U

In [135]:
def Lx_closure(Fn_hat, epsilon_n):
    def L(x):
        return max(Fn_hat(x) - epsilon_n, 0)
    return L

In [136]:
epsilon_n = epsilon(observations_count, alpha)
fn_hat = Fn_hat_closure(observations)
ux = Ux_closure(fn_hat, epsilon_n)
lx = Lx_closure(fn_hat, epsilon_n)

In [137]:
experiment_range = np.linspace(-2, 2, 400)
upper_band = [ux(x) for x in experiment_range]
lower_band = [lx(x) for x in experiment_range]
estimated_Fn = [fn_hat(x) for x in experiment_range]

In [141]:
upper_band_data = pd.DataFrame({
        'x': experiment_range,
        'y': upper_band
        })
lower_band_data = pd.DataFrame({
        'x': experiment_range,
        'y': lower_band
        })
estimated_fn_data = pd.DataFrame({
        'x': experiment_range,
        'y': estimated_Fn
        })
data_data = pd.DataFrame({
        'x': sorted(observations),
        'y': 0
        })
true_cdf_data = pd.DataFrame({
        'x': sorted(observations),
        'y': norm.cdf(sorted(observations))
        })

In [142]:
upper_band_chart = alt.Chart(upper_band_data).mark_line(color='blue').encode(
        x=alt.X('x', axis=alt.Axis( title='y')),
        y=alt.Y('y', axis=alt.Axis(title='f(y)')),
    )

lower_band_chart = alt.Chart(lower_band_data).mark_line(color='red').encode(
        x=alt.X('x', axis=alt.Axis( title='y')),
        y=alt.Y('y', axis=alt.Axis(title='f(y)')),
    )

estimated_fn_chart = alt.Chart(estimated_fn_data).mark_line(color='black').encode(
        x=alt.X('x', axis=alt.Axis( title='y')),
        y=alt.Y('y', axis=alt.Axis(title='f(y)')),
    )

data_chart = alt.Chart(data_data).mark_point(color='blue').encode(
        x=alt.X('x', axis=alt.Axis( title='y')),
        y=alt.Y('y', axis=alt.Axis(title='f(y)')),
    )
true_cdf_chart = alt.Chart(true_cdf_data).mark_line(color='green').encode(
        x=alt.X('x', axis=alt.Axis( title='y')),
        y=alt.Y('y', axis=alt.Axis(title='f(y)')),
    )

In [144]:
upper_band_chart + lower_band_chart + estimated_fn_chart + data_chart + true_cdf_chart