## Generacion de datos y comparacion de intervalos

El ejercicio pide comparar las barras de error para varias distribuciones. En este cuaderno genero 2,000 muestras por caso (tres gaussianas, dos uniformes y tres betas) y calculo intervalos para la media usando:

- Desigualdad de Chebyshev: requiere una estimacion empirica de la varianza.
- Desigualdad de Hoeffding: exige cotas conocidas [a,b], por lo que en las gaussianas uso mu +/- 4*sigma como cota practica (aclarado porque la distribucion real es no acotada).

La tabla final permite contrastar directamente las longitudes de cada intervalo (anchos) para las distintas familias.


In [1]:
import numpy as np
import pandas as pd

def hoeffding_interval(sample, low, high, alpha=0.05):
    n = len(sample)
    width = (high - low) * np.sqrt(np.log(2 / alpha) / (2 * n))
    mean = sample.mean()
    return mean - width, mean + width, 2 * width

def chebyshev_interval(sample, alpha=0.05):
    n = len(sample)
    mean = sample.mean()
    var = sample.var(ddof=1)
    width = np.sqrt(var / (n * alpha))
    return mean - width, mean + width, var, 2 * width

rng = np.random.default_rng(42)
N = 2000
alpha = 0.05
rows = []
for mu, var in [(0, 1), (0, 4), (3, 1)]:
    sigma = np.sqrt(var)
    data = rng.normal(mu, sigma, size=N)
    cheb_low, cheb_high, var_hat, cheb_width = chebyshev_interval(data, alpha)
    bounds = (mu - 4 * sigma, mu + 4 * sigma)
    hoe_low, hoe_high, hoe_width = hoeffding_interval(data, *bounds, alpha)
    rows.append(dict(
        distribution=f"Normal(mu={mu}, var={var})",
        mean_hat=float(data.mean()),
        var_hat=float(var_hat),
        chebyshev_low=cheb_low,
        chebyshev_high=cheb_high,
        chebyshev_width=cheb_width,
        hoeffding_low=hoe_low,
        hoeffding_high=hoe_high,
        hoeffding_width=hoe_width,
        bounds=f"[{bounds[0]:.2f}, {bounds[1]:.2f}]"
    ))

for a, b in [(0, 1), (2, 5)]:
    data = rng.uniform(a, b, size=N)
    cheb_low, cheb_high, var_hat, cheb_width = chebyshev_interval(data, alpha)
    hoe_low, hoe_high, hoe_width = hoeffding_interval(data, a, b, alpha)
    rows.append(dict(
        distribution=f"Uniform({a},{b})",
        mean_hat=float(data.mean()),
        var_hat=float(var_hat),
        chebyshev_low=cheb_low,
        chebyshev_high=cheb_high,
        chebyshev_width=cheb_width,
        hoeffding_low=hoe_low,
        hoeffding_high=hoe_high,
        hoeffding_width=hoe_width,
        bounds=f"[{a:.2f}, {b:.2f}]"
    ))

for a, b in [(0.5, 0.5), (2, 5), (5, 1)]:
    data = rng.beta(a, b, size=N)
    cheb_low, cheb_high, var_hat, cheb_width = chebyshev_interval(data, alpha)
    hoe_low, hoe_high, hoe_width = hoeffding_interval(data, 0, 1, alpha)
    rows.append(dict(
        distribution=f"Beta(alpha={a}, beta={b})",
        mean_hat=float(data.mean()),
        var_hat=float(var_hat),
        chebyshev_low=cheb_low,
        chebyshev_high=cheb_high,
        chebyshev_width=cheb_width,
        hoeffding_low=hoe_low,
        hoeffding_high=hoe_high,
        hoeffding_width=hoe_width,
        bounds="[0.00, 1.00]"
    ))

df = pd.DataFrame(rows)
pd.set_option('display.precision', 4)
print(df)


ModuleNotFoundError: No module named 'pandas'