# Homework 2

In [2]:
import numpy as np
from scipy.optimize import minimize_scalar
import plotly.offline as py
from plotly.graph_objs import Scatter, Figure, Layout

py.init_notebook_mode(connected=True)

## Sampling π


### Basic properties

The probability is just the ratio between the area of the circle and that of the square, i.e:

$$\mathbb{P}[s_i = 4] = \frac{\mathcal{A}_{circle}}{\mathcal{A}_{square}}$$

  The mean and variance are easily determined:

$$
\begin{align*}
m &= \mathbb{E}[s_i] = \frac{\pi}{4} \cdot 4 + 0 = \pi \\
\Delta &= \mathbb{E}[s_i^2] - \mathbb{E}[s_i]^2 = 4 \cdot \pi - \pi^2 = \pi(4 - \pi)
\end{align*}
$$


### Estimators

The estimators $\hat{m}$ and $\hat{\Delta}$ are both unbiased. In fact:

$$
\begin{align*}
\mathbb{E}[\hat{m}] &= \frac{1}{N} \cdot N \cdot \mathbb{E}[s_i] = m \qquad \mathrm{\blacksquare}
\end{align*}
$$

$$
\begin{align*}
\mathbb{E}[\hat{\Delta}] &= \frac{1}{N-1} \sum_{i = 1}^{N} (\mathbb{E}[s_i^2] - \mathbb{E}[\hat{m}^2]) 
= \frac{N}{N-1} \{ \mathbb{E}[s_i^2] - \frac{1}{N}\mathbb{E}[s_i^2] - (N - 1)\mathbb{E}[s_i]^2 \} = (\mathbb{E}[s_i^2] - \mathbb{E}[s_i]^2) = \Delta \qquad \mathrm{\blacksquare}
\end{align*}
$$


In [4]:
# Sample in a square 
x, y = np.random.uniform(-1, 1, (2, 1000))

# Visualize the sampling
data = [Scatter(x=x, y=y, mode='markers')]
lyt = Layout(
    yaxis=dict(scaleanchor="x"),
    shapes=[{'type': 'circle', 'x0': -1, 'y0': -1, 'x1': 1, 'y1': 1}]
)
py.iplot(Figure(data=data, layout=lyt))

In [12]:
def DKL(p, q):
    """Kullback–Leibler divergence between two Bernulli distributions"""
    return p*np.log(p/q) + (1-p)*np.log((1-p)/(1-q))

# n_est different pi estimation, for a different number of extracted points
# (choices put in the list N)
n_est = 1000
N = [1000]

# Empirical mean and variance for each pi estimates with a certain N
mean = []
variance = []

# Empirical probability of making a certain error (absolute value)
error_probability = []
error = 0.01


for i in N:
    pi_estimate = []
    error_counter = 0

    for j in range(n_est):
        s = 0.

        # cicle to compute one pi estimate using N[] different extracted points
        for k in range(i):
            [x, y] = [np.random.random() for k in [0,0]]
            if (x - .5)**2 + (y - .5)**2 < .25:
                s += 4

        pi_estimate += [s/i]

        # to count how many times I make a mistake bigger than error
        if abs(pi_estimate[-1] - np.pi) >= error:
            error_counter += 1


    # empirical estimation
    error_probability += [error_counter/float(n_est)]
    mean += [sum(pi_estimate)/n_est]
    variance += [sum([(pi_estimate[k]-mean[-1])**2 \
                      for k in range(len(pi_estimate))])/n_est]


    # various bound from the theory to have a displacement from the true pi value
    # bigger or equal to error
    variance_th = np.pi*(4 - np.pi)/i
    markov_bound = np.pi/(np.pi + error)
    cheb_bound = np.pi*(4 - np.pi)/(i*error**2)
    chern_bound = np.exp(-i*DKL((np.pi + error)/4, np.pi/4))
    hoeff_bound = np.exp(-i*error**2/8)


    print("N = ", i)

    print("empirical mean of estimator: ", mean[-1])
    print("empirical variance of estimator: ", variance[-1], "(theoretical:",
        variance_th, ")")
    print("theoretical typical error:", variance_th**(.5))
    print("probability (estimation) of making an error bigger than 0.01:",
        error_probability[-1])
    print("Markov bound:", markov_bound)
    print("Chebyshev bound:", cheb_bound)
    print("Chernoff bound:", chern_bound)
    print("Hoeffding bound:", hoeff_bound)

    print("-------------")

N =  1000
empirical mean of estimator:  3.142027999999997
empirical variance of estimator:  0.0028027192160000034 (theoretical: 0.0026967662132698143 )
theoretical typical error: 0.05193039777692651
probability (estimation) of making an error bigger than 0.01: 0.862
Markov bound: 0.996827001107326
Chebyshev bound: 26.967662132698145
Chernoff bound: 0.981578403256
Hoeffding bound: 0.987577800494
-------------



## 2. Find the lighthouse

Since the angular distribution is uniform, considering N total flashes spreaded in the $\pi$, the number of flashes in an angle $d\theta$ is: $n(\theta)=\frac{Nd\theta}{\pi}$, yielding an angular probability density of:

$$
\begin{equation}
p(\theta)=\dfrac{1}{\pi}
\end{equation}
$$

The $x_k$ position on the shore is a function of the angle $\theta$ through: $x_k=\beta tan(\theta_k)$. Each element of lenght $dx$ then depends on $d\theta$ through:

$$
\begin{equation}
 dx=\dfrac{\beta d\theta}{cos(\theta)^{2}}
\end{equation}
$$

but 

$$
\begin{equation}
cos(\theta)=\dfrac{\beta}{\sqrt{\beta^{2}+(x_k-\alpha)^{2}}}
\end{equation}
$$

consequently

$$
\begin{equation}
p(\theta)d\theta=\dfrac{1}{\pi} \dfrac{dx}{\dfrac{\beta}{cos(\theta)^{2}}}=\dfrac{\beta dx}{\pi(\beta^{2}+(x_k-\alpha)^{2})}
\end{equation}
$$

where

$$
\begin{equation}
p(x)=\dfrac{\beta}{\pi(\beta^{2}+(x_k-\alpha)^{2})}
\end{equation}
$$




## 3. Statistical inference & maximum likelihood

In [5]:
def sample_exp(tau, N=1):
    """Sample from the truncated exponential distribution."""
    return -tau*np.log(np.random.uniform(np.exp(-20/tau), np.exp(-1/tau), N))


def pdf_exp(tau, x):
    """Probability Density Function of the truncated exponential."""
    return np.exp(-x/tau) / (tau*(np.exp(-1/tau) - np.exp(-20/tau)))


def log_likelihood(tau, sample, sign=1.0):
    """Calculate the log-likelihood given a sample and tau."""
    return sign * np.sum(np.log(pdf_exp(tau, sample)))


In [9]:
lambda_true = 10.
Ns = [10, 100, 1000]

samples = [sample_exp(lambda_true, N) for N in Ns]
lambdas = np.arange(5, 50, 0.1)

data = []

for s in samples:
    lh = [log_likelihood(l, s) for l in lambdas]
    i_max = np.argmax(lh)

    data.append(Scatter(
        x = lambdas,
        y = lh / np.abs(lh[i_max]),
        name = len(s)
    ))

lyt = Layout(
    title="Log-likelihood for different sample sizes",
    xaxis=dict(title="λ")
) 
py.iplot(Figure(data=data, layout=lyt) )

In [7]:
def estimate_lambda(sample, bounds=(1, 100)):
    """Estimates the Maximum Likelihood lambda parameter."""
    r = minimize_scalar(log_likelihood, args=(sample, -1), method='bounded', bounds=bounds)
    if not r.success:
        raise Exception("estimate_lambda: {}".format(r.message))
    
    return r.x


def MSE(lambda_true, N=100, repeat=10000):
    """Calculate the Mean Square Error.
    
    Args:
        lambda_true: The true value of the lambda parameter.
        N: The sample size.
        repeat: The number of samples on which the average is calculated.
    """
    errors = np.zeros(repeat)
    
    for i in range(repeat):
        s = sample_exp(lambda_true, N)
        lambda_ml = estimate_lambda(s)
        errors[i] = (lambda_ml-lambda_true)**2

    return np.average(errors)


def I(lam, N):
    """Fisher Information."""
    # @todo
    return N / lam**3 * (np.exp(-1/lam) - 400*np.exp(-20/lam) + 2*((np.exp(-1/lam) - 20*np.exp(-20/lam))/(np.exp(-1/lam) - np.exp(-20/lam)) + lam))

In [13]:
lam = 10.
s = sample_exp(lam, N=1000)

lambdas_true = np.arange(1, 20, 1.)

lyt = Layout(
    title="MSE as function of λ",
    xaxis=dict(title="λ"),
    yaxis=dict(title="MSE")
)

data = [Scatter(
    x=lambdas_true,
    y=[MSE(l, N=1000, repeat=10000) for l in lambdas_true]
)]

py.iplot(Figure(data=data, layout=lyt))