## Exponential Distribution
---

The p.d.f. of an <font color=red>exponential distribution</font> $\mathrm{Exp}(\lambda)$ is

\begin{align*}
    & p(x|\lambda) = \lambda e^{-\lambda x}, \quad 0 < x < \infty,\quad \lambda > 0, \\
    & \mathrm{E}[X] = \frac1{\lambda},\quad \mathrm{Var}[X] = \frac1{\lambda^{2}}.
\end{align*}

The exponential distribution is used to model the length of duration.

## Likelihood
---

Suppose we have the i.i.d. sample $D=(x_{1},\dots,x_{n})$ from $\mathrm{Exp}(\lambda)$.

The likelihood is
\begin{align*}
    p(D|\lambda) &= \prod_{i=1}^{n}p(x_{i}|\lambda) = \prod_{i=1}^{n}\lambda e^{-\lambda x_{i}} \\
                 &= \lambda^{n}e^{-\lambda\sum_{i=1}^{n}x_{i}}.
\end{align*}

## Derivation of the Posterior Distribution of $\lambda$
---

We use the gamma distribution $\mathrm{Gamma}(\alpha_{0},\beta_{0})$ as the prior of $\lambda$.

The posterior distribution of $\lambda$ is derived as

\begin{align*}
    p(\lambda|D)
    &\propto p(D|\lambda)p(\lambda) \\
    &\propto \lambda^{n}e^{-\lambda\sum_{i=1}^{n}x_{i}}\times\lambda^{\alpha_{0}-1}e^{-\beta_{0}\lambda} \\
    &\propto \lambda^{n+\alpha_{0}-1}e^{-\left(\sum_{i=1}^{n}x_{i}+\beta_{0}\right)\lambda} \\
    &\propto \lambda^{\alpha_{\star}-1}e^{-\beta_{\star}\lambda}, \\
    \alpha_{\star} &= n + \alpha_{0},\quad \beta_{\star} = \sum_{i=1}^{n}x_{i} + \beta_{0},
\end{align*}

which is the gamma distribution $\mathrm{Gamma}(\alpha_{\star},\beta_{\star})$.


In [1]:
import numpy as np
import scipy.stats as st
import scipy.optimize as opt
import pandas as pd
from IPython.display import display
from bokeh.io import show, output_notebook
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, HoverTool, Slider, Span
from bokeh.plotting import figure, show
output_notebook()

### HPDI of the gamma distribution

In [2]:
def gamma_hpdi(ci0, alpha, theta, prob):
    def hpdi_conditions(v, a, t, p):
        eq1 = st.gamma.cdf(v[1], a, scale=t) - st.gamma.cdf(v[0], a, scale=t) - p
        eq2 = st.gamma.pdf(v[1], a, scale=t) - st.gamma.pdf(v[0], a, scale=t)
        return np.hstack((eq1, eq2))
    return opt.root(hpdi_conditions, ci0, args=(alpha, theta, prob)).x

### Posterior statsistics of $\lambda$

In [3]:
def exponential_stats(data, hyper_param, prob):
    a0 = hyper_param['a0']
    b0 = hyper_param['b0']
    n = data.size
    a_star = n + a0
    b_star = data.sum() + b0
    theta_star = 1.0 / b_star
    mean_lam = st.gamma.mean(a_star, scale=theta_star)
    median_lam = st.gamma.median(a_star, scale=theta_star)
    mode_lam = (a_star - 1.0) * theta_star
    sd_lam = st.gamma.std(a_star, scale=theta_star)
    ci_lam = st.gamma.interval(prob, a_star, scale=theta_star)
    hpdi_lam = gamma_hpdi(ci_lam, a_star, theta_star, prob)
    stats = np.hstack((mean_lam, median_lam, mode_lam, sd_lam, ci_lam, hpdi_lam)).reshape((1, 8))
    stats_string = ['mean', 'median', 'mode', 'sd', 'ci (lower)', 'ci (upper)', 'hpdi (lower)', 'hpdi (upper)']
    param_string = ['$\\lambda$']
    results = pd.DataFrame(stats, index=param_string, columns=stats_string)
    return results

### Plotting the posterior distribution of $\lambda$

In [4]:
def exponential_posterior_plot(data, hyper_param, bounds):
    n = data.size
    y = data.sum()
    x = np.linspace(bounds[0], bounds[1], 1001)
    slider_a = Slider(value=hyper_param['a0'], start=0.01, end=10.0, step=0.01, title=r'$$\alpha_0$$')
    slider_b = Slider(value=hyper_param['b0'], start=0.01, end=10.0, step=0.01, title=r'$$\beta_0$$')
    def exponential_posterior_interactive(doc):
        a0 = slider_a.value
        b0 = slider_b.value
        a_star = n + a0
        b_star = y + b0
        source = ColumnDataSource(
            data = dict(
                x=x,
                posterior=st.gamma.pdf(x, a_star, scale=1.0/b_star),
                prior=st.gamma.pdf(x, a0, scale=1.0/b0)
            )
        )
        hover = HoverTool(
            tooltips = [
                ('\u03BB', '@x'),
                ('posterior', '@posterior'),
                ('prior', '@prior')
            ]
        )
        p = figure(width=400, height=300, tools=[hover], toolbar_location=None)
        p.line('x', 'posterior', source=source, line_color='navy', line_width=2, legend_label='Posterior')
        p.line('x', 'prior', source=source, line_color='firebrick', line_width=2, line_dash='dotted', legend_label='Prior')
        p.xaxis.axis_label = r'$$\lambda$$'
        p.yaxis.axis_label = 'Probability density'
        p.x_range.range_padding = 0
        p.legend.click_policy = 'hide'
        p.legend.location = 'top_right'
        p.legend.border_line_color = p.xgrid.grid_line_color = p.ygrid.grid_line_color = p.outline_line_color = None
        def update_posterior(attr, old, new):
            a0 = slider_a.value
            b0 = slider_b.value
            a_star = n + a0
            b_star = y + b0
            source.data['posterior'] = st.gamma.pdf(x, a_star, scale=1.0/b_star)
            source.data['prior'] = st.gamma.pdf(x, a0, scale=1.0/b0)
        for params in [slider_a, slider_b]:
            params.on_change('value', update_posterior)
        doc.add_root(column(row(slider_a, slider_b, width=400), p))
    show(exponential_posterior_interactive)

### Application 1: Simulated Data
---

We use artificial data generated from the exponential distribution:

$$
 x_1,\dots,x_{50} \sim \mathrm{Exp}(3).
$$

The prior distribution of $\lambda$ is $\mathrm{Gamma}(1,1)$, which is equivalent to the exponential distribution:

$$
 p(\lambda) = e^{-\lambda},\quad \lambda > 0.
$$


In [5]:
lam = 3.0
n = 50
np.random.seed(99)
data = st.expon.rvs(scale=1.0/lam, size=n)
hyper_param = dict(
    a0 = 1.0,
    b0 = 1.0
)
prob = 0.95
results = exponential_stats(data, hyper_param, prob)
display(results)

Unnamed: 0,mean,median,mode,sd,ci (lower),ci (upper),hpdi (lower),hpdi (upper)
$\lambda$,2.925511,2.906412,2.868148,0.409653,2.178235,3.781295,2.143757,3.739807


In [7]:
exponential_posterior_plot(data, hyper_param, [0.0, 6.0])

### Application2: Strikes Duration Data
---

+ a cross-section from 1968 to 1976
+ number of observations : 566
+ country : United States

#### Variables:

+ dur - duration of the strike in days

+ gdp - measure of stage of business cycle (deviation of monthly log industrial production in manufacturing from prediction from OLS on time, time-squared and monthly dummies)

#### Source:

Kennan, J. (1985) “The Duration of Contract strikes in U.S. Manufacturing”, Journal of Econometrics, 28, 5-28.

#### Reference:

Cameron, A.C. and P.K. Trivedi (2005) Microeconometrics : methods and applications, Cambridge, pp. 574–5 and 582.



In [8]:
strikes_data = pd.read_csv('StrikeDur.csv', index_col=0)
strikes_duration = strikes_data['dur'].values
bin_max = np.max(strikes_duration)
hist, bins = np.histogram(strikes_duration, bins=np.linspace(1, bin_max, bin_max))
p = figure(width=400, height=300, toolbar_location=None)
p.vbar(x=bins[:-1], top=hist, width=0.5)
p.xaxis.axis_label = 'Duration of Strikes (Days)'
p.yaxis.axis_label = 'Frequency'
p.y_range.range_padding = 0
p.xgrid.grid_line_color = p.ygrid.grid_line_color = p.outline_line_color = None
show(p)

In [9]:
hyper_param_strikes = dict(
    a0 = 1.0,
    b0 = 1.0
)
prob = 0.95
results_strikes = exponential_stats(strikes_duration, hyper_param_strikes, prob)
display(results_strikes)

Unnamed: 0,mean,median,mode,sd,ci (lower),ci (upper),hpdi (lower),hpdi (upper)
$\lambda$,0.022963,0.022949,0.022922,0.000964,0.021111,0.024891,0.021085,0.024863


In [11]:
exponential_posterior_plot(strikes_duration, hyper_param_strikes, [0.0, 0.04])