In [None]:
%matplotlib notebook
import numpy as np
import math
import matplotlib.pyplot as plt
from ipywidgets import widgets
from IPython.display import display
import scipy.stats

### Hypothesis testing 

The following widgets let you select the parameters of an either one-sided or two-sided hypothesis test for the population mean. The null hypothesis is that the value of the mean is a given value. The alternate hypothesis is that the population mean is different of that value (one-sided test) or higher/lower (two-sided test).

The sampling distribution of the sample mean is close to normal, centered around the population mean, and with a standard deviation equal to the standard error of the mean. The null hypothesis is evaluated by using the Z-score of the sample mean based on its sampling distribution to compute the probability of we get that sample mean by sampling from the population. This is the p-value. We reject the null hypothesis if the p-value is lower than the significance level.

We use the standard deviation of the sample as an approximation of the population's standard deviation when computing the standard error. 

In [None]:
fig, ax = plt.subplots()
ax.grid(True)
plt.ion()

POINTS = 1000

def my_label(text, width='20%'):
    return widgets.Label(text, layout=widgets.Layout(width=width))  

pm = widgets.FloatText(value=0)
sm = widgets.FloatText(value=0.1)
ssd = widgets.FloatText(value=1)
ss = widgets.FloatText(value=100, min=10)
sl = widgets.FloatText(value=0.05, min=0.01, max=0.99)
osts = widgets.RadioButtons(options=['One-sided (left)', 'One-sided (right)', 'Two-sided'])
result1 = my_label('', width='100%') 
result2 = my_label('', width='100%') 

box = widgets.VBox([
    my_label('Population'),
    widgets.HBox([my_label('Mean (null hypothesis)'), pm]),
    my_label('Sample'),
    widgets.HBox([my_label('Mean'), sm]),
    widgets.HBox([my_label('Standard deviation'), ssd]),
    widgets.HBox([my_label('Size'), ss]),
    my_label('Hypothesis test'), 
    widgets.HBox([my_label('Significance level'), sl]),
    osts,
    result1,
    result2
])

# Plots the normal distribution centered around the population mean and with
# standard deviation = standard error. The area of the curve corresponding to
# the alternate hypothesis is highlighted
def on_change(change):
    global fig, ax, lines
    
    # Auxiliar functions
    def compute_pdf(x):
        return 1/(math.sqrt(2*math.pi*std**2))*np.exp(-(x-mean)**2/(2*std**2))
    
    def plot_alternate_hypothesis(start, end):
        if start == 0 and end == 0:
            x = np.array([0])
        else:
            x = np.arange(start, end, (end - start)/POINTS)
        y = compute_pdf(x)
        ax.fill_between(x, 0, y, color='red', alpha=0.3)
    
    # Deleting previous figure
    while len(ax.lines) > 0:
        l = ax.lines.pop(0)
        del l
    while len(ax.collections) > 0:
        c = ax.collections.pop(0)
        del c
        
    # Plotting - we assume that the population is normally distributed, with mean equal to the
    # value of the null hypothesis and standard deviation equal to the standard error 
    mean = pm.value
    std = ssd.value / math.sqrt(ss.value)
    x = np.arange(-3*std, 3*std, 6*std/POINTS)
    y = compute_pdf(x)
    ax.plot(x,y,color='blue')
    ax.set_xlim([-3*std, 3*std])
    
    # Plotting - red vertical line for the alternate hypothesis
    if osts.value == 'One-sided (left)':
        plot_alternate_hypothesis(-3*std, sm.value)
        p_value = scipy.stats.norm(mean, std).cdf(sm.value)
    elif osts.value == 'One-sided (right)':
        plot_alternate_hypothesis(sm.value, 3*std)
        p_value = 1 - scipy.stats.norm(mean, std).cdf(sm.value)
    else:
        if sm.value >= pm.value:
            plot_alternate_hypothesis(-3*std, -sm.value)
            plot_alternate_hypothesis(sm.value, 3*std)
            p_value = 2*scipy.stats.norm(mean, std).cdf(-sm.value)
        elif sm.value < pm.value:
            plot_alternate_hypothesis(-3*std, sm.value)
            plot_alternate_hypothesis(-sm.value, 3*std)
            p_value = 2*scipy.stats.norm(mean, std).cdf(sm.value)
            
    # Result of the hypothesis test
    result1.value = 'Result of the test: p-value = ' + str(p_value)
    if p_value < sl.value:
        result2.value = 'We reject the null hypothesis'
    else:
        result2.value = 'We fail to reject the null hypothesis'
        
    fig.canvas.draw()
    
wid = [pm, sm, ssd, ss, sl, osts]
[w.observe(on_change) for w in wid]
on_change(None)

display(box)