In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
import math
import numpy as np
from ipywidgets import widgets
from IPython.display import display

In [None]:
# I am using the following code to compute n choose k. The code 
# is copied from https://stackoverflow.com/questions/3025162/statistics-combinations-in-python.

def choose(n, k):
    if 0 <= k <= n:
        ntok = 1
        ktok = 1
        for t in range(1, min(k, n - k) + 1):
            ntok *= n
            ktok *= t
            n -= 1
        return ntok // ktok
    else:
        return 0

### Normal distribution

The normal distribution is a symmetric unimodal Bell-shaped distribution which is described by two parameters: the mean and the standard distribution. The mean indicates the shift on the x axis, whereas the standard distribution determines the width of the central part of the distribution. 

In [None]:
fig_normal, ax_normal = plt.subplots()
ax_normal.grid(True)
plt.ion()
COLORS_NORMAL = ['red', 'blue', 'green']
DEFAULT_STD_NORMAL = 1

def define_input_normal(description, value):
    return widgets.FloatText(
        value=value,
        description=description,
    )

number_dist_normal = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

mean1_normal = define_input_normal('Mean 1: ', 0)
mean2_normal = define_input_normal('Mean 2: ', -1)
mean3_normal = define_input_normal('Mean 3: ', 1)
std1_normal = define_input_normal('Standard deviation 1: ', DEFAULT_STD_NORMAL)
std2_normal = define_input_normal('Standard deviation 2: ', DEFAULT_STD_NORMAL)
std3_normal = define_input_normal('Standard deviation 3: ', DEFAULT_STD_NORMAL)

layout_normal = widgets.VBox([
    number_dist_normal,
    widgets.HBox([mean1_normal, std1_normal]),
    widgets.HBox([mean2_normal, std2_normal]),
    widgets.HBox([mean3_normal, std3_normal])
])

# The normal distributions are plot in the range between -3 standard deviations and
# 3 standard deviations. However, if more than one distribution is plot, we extrapolate
# the tails of all of them so they cover all the x axis
def on_change_normal(change):
    global fig_normal, ax_normal
    
    # Deleting previous figure
    while len(ax_normal.lines) > 0:
        l = ax_normal.lines.pop(0)
        del l
        
    # Checking that the standard deviation is higher than 0
    for i in range(int(number_dist_normal.value)):
        if wid_normal[4+i].value <= 0:
            wid_normal[4+i].value = DEFAULT_STD_NORMAL
        
    # Computing the range of x values
    min_x_normal = wid_normal[1].value - 3*wid_normal[4].value
    max_x_normal = wid_normal[1].value + 3*wid_normal[4].value
    for i in range(1,int(number_dist_normal.value)):
        min_x_normal = min(min_x_normal, wid_normal[i+1].value - 3*wid_normal[4+i].value)
        max_x_normal = max(max_x_normal, wid_normal[i+1].value + 3*wid_normal[4+i].value)
        
    # Plotting itself
    for i in range(int(number_dist_normal.value)):
        # Manually computing the normal distribution's PDF
        mean_normal = float(wid_normal[1+i].value)
        std_normal = float(wid_normal[4+i].value)
        x_normal = np.arange(min_x_normal, max_x_normal, 0.01)
        y_normal = 1/(math.sqrt(2*math.pi*std_normal**2))*np.exp(-(x_normal-mean_normal)**2/(2*std_normal**2))
        ax_normal.plot(x_normal,y_normal,color=COLORS_NORMAL[i])
    ax_normal.set_xlim([min_x_normal, max_x_normal])
    fig_normal.canvas.draw()
    
wid_normal = [number_dist_normal, mean1_normal, mean2_normal, mean3_normal, std1_normal, std2_normal, std3_normal]
[w.observe(on_change_normal) for w in wid_normal]
on_change_normal(None)

display(layout_normal)

### Bernoulli distribution

A Bernoulli random variable is a random variable with only two possible values corresponding to the two possible outcomes of a trial: success and fail. If the probability of obtaining a success is p, then we can say that the random variable is following a Bernoulli distribution with mean p and standard deviation √(p(1-p)).

In [None]:
fig_ber, ax_ber = plt.subplots()
plt.ion()
COLORS_BER = ['r', 'b', 'g']
MARKERS_BER = ['o', 's', '*']
BUTTON_DESCRIPTION = 'mean = %.2f, standard deviaton = %.2f'
P_BER = [0.3, 0.5, 0.7]

def define_input_ber(description, value):
    return widgets.FloatText(
        value=value,
        min=0,
        max=1,
        description=description
    )

def define_label_ber(i):
    return widgets.Button(
    description = BUTTON_DESCRIPTION%( 
        P_BER[i], 
        math.sqrt(P_BER[i]*(1-P_BER[i]))
    ),
    layout = widgets.Layout(width='50%'),
    disabled = True)

number_dist_ber = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

p1_ber = define_input_ber('p1: ', P_BER[0])
p2_ber = define_input_ber('p2: ', P_BER[1])
p3_ber = define_input_ber('p3: ', P_BER[2])

b1_ber = define_label_ber(0)
b2_ber = define_label_ber(1)
b3_ber = define_label_ber(2)

layout_ber = widgets.VBox([number_dist_ber, p1_ber, b1_ber, p2_ber, b2_ber, p3_ber, b3_ber])

# This is just a suggestion on how to represent a bernoulli distribution, 
# even thought it really does not make much sense to represent such a
# distribution with a plot, in my opinion
def on_change_ber(change):
    global fig_ber, ax_ber
    
    # Deleting previous figure
    while len(ax_ber.lines) > 0:
        l = ax_ber.lines.pop(0)
        del l
        
    # Updating information
    for i in range(3):
        wid_ber[4+i].description = BUTTON_DESCRIPTION%( 
            wid_ber[i+1].value, 
            math.sqrt(wid_ber[i+1].value*(1-wid_ber[i+1].value))
        )
        
    # Plotting itself
    for i in range(int(number_dist_ber.value)):
        x_ber = [0, 1]
        y_ber = [1 - wid_ber[i+1].value, wid_ber[i+1].value]
        (markerline_ber, stemlines_ber, baseline_ber) = ax_ber.stem(
                x_ber,y_ber, 
                linefmt=COLORS_BER[i], 
                markerfmt=COLORS_BER[i] + MARKERS_BER[i])
        plt.setp(baseline_ber, visible=False)
        
    ax_ber.set_xlim([-1, 2])
    ax_ber.set_ylim([0,1])
    fig_ber.canvas.draw()
    
wid_ber = [number_dist_ber, p1_ber, p2_ber, p3_ber, b1_ber, b2_ber, b3_ber]
[w.observe(on_change_ber) for w in wid_ber]
on_change_ber(None)

display(layout_ber)

### Geometric distribution

A Geometric distribution describes the waiting time required to obtain the first success in a series of independent and identically distributed Bernoulli trials.  

The mean of a geometric distribution is 1/p and the standard deviation is √((1-p)/p**2)

In [None]:
fig_geo, ax_geo = plt.subplots()
ax_geo.grid(True)
plt.ion()
COLORS_GEO = ['r', 'b', 'g']
P_GEO = [0.3, 0.5, 0.7] # Also used if any probability is set to zero
                    # We cannot set p=0 for this distribution

number_dist_geo = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

p1_geo = define_input_ber('p1: ', P_GEO[0])
p2_geo = define_input_ber('p2: ', P_GEO[1])
p3_geo = define_input_ber('p3: ', P_GEO[2])

b1_geo = define_label_ber(0)
b2_geo = define_label_ber(1)
b3_geo = define_label_ber(2)

layout_geo = widgets.VBox([number_dist_geo, p1_geo, b1_geo, p2_geo, b2_geo, p3_geo, b3_geo])

# The x-range of the plot is set between 0 and the mean + 
# 3 standard deviations of the distribution. If more than one distribution
# is displayed, we used the maximum range
def on_change_geo(change):
    global fig_geo, ax_geo
    
    # Deleting previous figure
    while len(ax_geo.lines) > 0:
        l = ax_geo.lines.pop(0)
        del l
        
    # Updating labels and computing the range
    max_x_geo = 0
    for i in range(3):
        if wid_geo[i+1].value == 0:
            wid_geo[i+1].value = P_GEO[i]
        
        mean_geo = 1/wid_geo[i+1].value
        std_geo = math.sqrt((1-wid_geo[i+1].value)/wid_geo[i+1].value**2)
        
        wid_geo[4+i].description = BUTTON_DESCRIPTION%( 
            mean_geo, 
            std_geo
        )
          
        if i < int(number_dist_geo.value):
            max_x_geo = max(max_x_geo, mean_geo + 3*std_geo)

    # Plotting itself
    for i in range(int(number_dist_geo.value)):
        p_geo = wid_geo[i+1].value
        x_geo = np.arange(1, max_x_geo)
        y_geo = p_geo*(1-p_geo)**(x_geo-1)
        ax_geo.plot(x_geo, y_geo, color=COLORS_GEO[i])
        
    ax_geo.set_xlim([0, max_x_geo])
    fig_geo.canvas.draw()
    
wid_geo = [number_dist_geo, p1_geo, p2_geo, p3_geo, b1_geo, b2_geo, b3_geo]
[w.observe(on_change_geo) for w in wid_geo]
on_change_geo(None)

display(layout_geo)

### Binomial distribution

The binomial distribution describes the probability of obtaining exactly k successes out of n independent and identically distributed Bernoulli tests with a probability p of success.

The mean of the binomial distribution is np and the standard deviation is √(np(1-p))

In [None]:
fig_bin, ax_bin = plt.subplots()
ax_bin.grid(True)
plt.ion()
COLORS_BIN = ['r', 'b', 'g']
P_BIN = [0.3, 0.5, 0.7] # Also used if any probability is set to zero
                    # We cannot set p=0 for this distribution
N_BIN = [20, 20, 20]

number_dist_bin = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

def define_input_prob_bin(description, value):
    return widgets.FloatText(
        value=value,
        min=0,
        max=1,
        description=description
    )

def define_input_natural_bin(description, value):
    return widgets.IntText(
        value=value,
        min=0,
        description=description
    )

p1_bin = define_input_prob_bin('p1: ', P_BIN[0])
p2_bin = define_input_prob_bin('p2: ', P_BIN[1])
p3_bin = define_input_prob_bin('p3: ', P_BIN[2])

n1_bin = define_input_natural_bin('n1: ', N_BIN[0])
n2_bin = define_input_natural_bin('n2: ', N_BIN[1])
n3_bin = define_input_natural_bin('n3: ', N_BIN[2])

b1_bin = define_label_ber(0)
b2_bin = define_label_ber(1)
b3_bin = define_label_ber(2)

layout_bin = widgets.VBox([
            number_dist_bin, 
            widgets.HBox([n1_bin, p1_bin]), 
            b1_bin, 
            widgets.HBox([n2_bin, p2_bin]),
            b2_bin, 
            widgets.HBox([n3_bin, p3_bin]),
            b3_bin
])

# The x-range of the plot is set between 0 and the maximum value of n for
# the selected distributions
def on_change_bin(change):
    global fig_bin, ax_bin
    
    # Deleting previous figure
    while len(ax_bin.lines) > 0:
        l = ax_bin.lines.pop(0)
        del l
        
    # Validating input and computing the range
    max_x_bin = 0
    for i in range(int(number_dist_bin.value)):
        # n
        if wid_bin[4+i].value == 0:
            wid_bin[4+i].value = N_BIN[i]
        # p
        if wid_bin[i+1].value == 0:
            wid_bin[i+1].value = P_BIN[i]
            
        max_x_bin = max(max_x_bin, int(wid_bin[4+i].value))
        
    # Updating labels 
    for i in range(3): 
        mean_bin = float(wid_bin[4+i].value * wid_bin[i+1].value)
        std_bin = float(math.sqrt((1-wid_bin[i+1].value)*wid_bin[i+1].value*wid_bin[4+i].value))
        
        wid_bin[7+i].description = BUTTON_DESCRIPTION%( 
            mean_bin, 
            std_bin
        )

    # Plotting itself
    for i in range(int(number_dist_bin.value)):
        p_bin = wid_bin[i+1].value
        n_bin = wid_bin[i+4].value
        x_bin = np.arange(0, max_x_bin+1)
        y_bin = [choose(int(n_bin), int(k))*p_bin**k*(1-p_bin)**(n_bin-k) for k in x_bin]
        ax_bin.plot(x_bin,y_bin,color=COLORS_BIN[i])
        
    ax_bin.set_xlim([0, max_x_bin])
    fig_bin.canvas.draw()
    
wid_bin = [number_dist_bin, p1_bin, p2_bin, p3_bin, n1_bin, n2_bin, n3_bin, b1_bin, b2_bin, b3_bin]
[w.observe(on_change_bin) for w in wid_bin]
on_change_bin(None)

display(layout_bin)

### Negative binomial distribution

The negative binomial distribution is a generalisation of the geometric distribution that expresses the probability of obtaining the k-th success after the n-th trial, so that the result of the last trial is a success. The mean and the standard deviation are k/p and sqrt(k*(1-p)/p^2), respectively.

In [None]:
fig_nbin, ax_nbin = plt.subplots()
ax_nbin.grid(True)
plt.ion()
COLORS_NBIN = ['r', 'b', 'g']
P_NBIN = [0.3, 0.5, 0.7] # Also used if any probability is set to zero
                    # We cannot set p=0 for this distribution
K_NBIN = [20, 20, 20]

number_dist_nbin = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

p1_nbin = define_input_prob_bin('p1: ', P_NBIN[0])
p2_nbin = define_input_prob_bin('p2: ', P_NBIN[1])
p3_nbin = define_input_prob_bin('p3: ', P_NBIN[2])

k1_nbin = define_input_natural_bin('k1: ', K_NBIN[0])
k2_nbin = define_input_natural_bin('k2: ', K_NBIN[1])
k3_nbin = define_input_natural_bin('k3: ', K_NBIN[2])

b1_nbin = define_label_ber(0)
b2_nbin = define_label_ber(1)
b3_nbin = define_label_ber(2)

layout_nbin = widgets.VBox([
            number_dist_nbin, 
            widgets.HBox([k1_nbin, p1_nbin]), 
            b1_nbin, 
            widgets.HBox([k2_nbin, p2_nbin]),
            b2_nbin, 
            widgets.HBox([k3_nbin, p3_nbin]),
            b3_nbin
])

# The x-range of the plot is set between 0 and the maximum value of n for
# the selected distributions
def on_change_nbin(change):
    global fig_nbin, ax_nbin
    
    # Deleting previous figure
    while len(ax_nbin.lines) > 0:
        l = ax_nbin.lines.pop(0)
        del l
        
    # Validating input and computing the range
    for i in range(int(number_dist_nbin.value)):
        # k
        if wid_nbin[4+i].value == 0:
            wid_nbin[4+i].value = K_NBIN[i]
        # p
        if wid_nbin[i+1].value == 0:
            wid_nbin[i+1].value = P_NBIN[i]
        
    # Updating labels 
    max_x_nbin = 0
    for i in range(3): 
        mean_nbin = float(wid_nbin[4+i].value / wid_nbin[i+1].value)
        std_nbin = float(math.sqrt((1-wid_nbin[i+1].value)*wid_nbin[4+i].value)/(wid_nbin[i+1].value**2))
        
        wid_nbin[7+i].description = BUTTON_DESCRIPTION%( 
            mean_nbin, 
            std_nbin
        )
        
        if i < int(number_dist_nbin.value):
            max_x_nbin = max(max_x_nbin, mean_nbin + 3*std_nbin)

    # Plotting itself
    for i in range(int(number_dist_nbin.value)):
        p_nbin = wid_nbin[i+1].value
        k_nbin = wid_nbin[i+4].value
        x_nbin = np.arange(k_nbin, max_x_nbin+1)
        y_nbin = [choose(int(n-1), int(k_nbin-1))*p_nbin**k_nbin*(1-p_nbin)**(n-k_nbin) for n in x_nbin]
        ax_nbin.plot(x_nbin,y_nbin,color=COLORS_NBIN[i])
        
    ax_nbin.set_xlim([0, max_x_nbin])
    fig_nbin.canvas.draw()
    
wid_nbin = [number_dist_nbin, p1_nbin, p2_nbin, p3_nbin, k1_nbin, k2_nbin, k3_nbin, b1_nbin, b2_nbin, b3_nbin]
[w.observe(on_change_nbin) for w in wid_nbin]
on_change_nbin(None)

display(layout_nbin)

### Poisson distribution

The Poisson distribution expresses the probability of k events to occur in a fixed time interval when these events occur an average of l (lambda) times in such period of time independently of the time since the last event. The mean and standard deviation of the Poisson distribution are l and sqrt(l), respectively.

In [None]:
from scipy.special import factorial

fig_poi, ax_poi = plt.subplots()
ax_poi.grid(True)
plt.ion()
COLORS_POI = ['r', 'b', 'g']
L_POI = [5, 10, 15] 

number_dist_poi = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

l1_poi = define_input_natural_bin('lambda1: ', L_POI[0])
l2_poi = define_input_natural_bin('lambda2: ', L_POI[1])
l3_poi = define_input_natural_bin('lambda3: ', L_POI[2])

b1_poi = define_label_ber(0)
b2_poi = define_label_ber(1)
b3_poi = define_label_ber(2)

layout_poi = widgets.VBox([number_dist_poi, l1_poi, b1_poi, l2_poi, b2_poi, l3_poi, b3_poi])

# The x-range of the plot is set between 0 and the mean + 
# 3 standard deviations of the distribution. If more than one distribution
# is displayed, we used the maximum range
def on_change_poi(change):
    global fig_poi, ax_poi
    
    # Deleting previous figure
    while len(ax_poi.lines) > 0:
        l = ax_poi.lines.pop(0)
        del l
        
    # Updating labels and computing the range
    max_x_poi = 0
    for i in range(3):
        if wid_poi[i+1].value == 0:
            wid_poi[i+1].value = L_POI[i]
        
        mean_poi = wid_poi[i+1].value
        std_poi = math.sqrt(wid_poi[i+1].value)
        
        wid_poi[4+i].description = BUTTON_DESCRIPTION%( 
            mean_poi, 
            std_poi
        )
          
        if i < int(number_dist_poi.value):
            max_x_poi = max(max_x_poi, mean_poi + 3*std_poi)

    # Plotting itself
    for i in range(int(number_dist_poi.value)):
        l_poi = wid_poi[i+1].value
        x_poi = np.arange(1, max_x_poi)
        y_poi = l_poi**x_poi*math.exp(-l_poi)/factorial(x_poi)
        ax_poi.plot(x_poi,y_poi,color=COLORS_POI[i])
        
    ax_poi.set_xlim([0, max_x_poi])
    fig_poi.canvas.draw()
    
wid_poi = [number_dist_poi, l1_poi, l2_poi, l3_poi, b1_poi, b2_poi, b3_poi]
[w.observe(on_change_poi) for w in wid_poi]
on_change_poi(None)

display(layout_poi)

### t-distribution

The t-distribution looks like the normal distribution, but the tails are thicker. The thickness of the tails is determined by the distribution's only parameter: the degrees of freedom (df). The higher the value of df is, the closer the shape of the t-distribution is to that of the normal distribution. When df=30 the t-distribution is indistinguishable from the normal distribution. This distribution is used in hypothesis testing when a normally distributed point estimate is involved but we do not have a large sample. The thicker tails of the distribution account for the lack of accuracy of the sampling error for small samples

In [None]:
fig_t, ax_t = plt.subplots()
ax_t.grid(True)
plt.ion()
COLORS_T = ['r', 'b', 'g']
DF_T = [5, 15, 30] 
BUTTON_DESCRIPTION_T = 'standard deviaton = %.2f'

number_dist_t = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

df1_t = define_input_natural_bin('df1: ', DF_T[0])
df2_t = define_input_natural_bin('df2: ', DF_T[1])
df3_t = define_input_natural_bin('df3: ', DF_T[2])

b1_t = define_label_ber(0)
b2_t = define_label_ber(1)
b3_t = define_label_ber(2)

layout_t = widgets.VBox([number_dist_t, df1_t, b1_t, df2_t, b2_t, df3_t, b3_t])

# The x-range of the plot is set between -3 standard deviations and 
# 3 standard deviations of the distribution (mean is always zero). If more than one distribution
# is displayed, we used the maximum range
def on_change_t(change):
    global fig_t, ax_t
    
    # Deleting previous figure
    while len(ax_t.lines) > 0:
        l = ax_t.lines.pop(0)
        del l
        
    # Updating labels and computing the range
    max_x_t = 0
    for i in range(3):
        if wid_t[i+1].value <= 2:
            wid_t[i+1].value = DF_T[i]
        
        std_t = math.sqrt(wid_t[i+1].value/(wid_t[i+1].value-2))
        
        wid_t[4+i].description = BUTTON_DESCRIPTION_T%(std_t)
          
        if i < int(number_dist_t.value):
            max_x_t = max(max_x_t, 3*std_t)

    # Plotting itself
    def gamma(n):
        return factorial(n-1)
    
    for i in range(int(number_dist_t.value)):
        # Manually computing the t-distribution's PDF
        std_t = math.sqrt(wid_t[i+1].value/(wid_t[i+1].value-2))
        x_t = np.arange(-max_x_t, max_x_t, 0.01)
        df = wid_t[i+1].value
        y_t = gamma((df+1)/2)/(math.sqrt(math.pi*df)*gamma(df/2))*(1 + x_t**2/df)**-((df+1)/2)
        ax_t.plot(x_t, y_t, color=COLORS_T[i])
    ax_t.set_xlim([-max_x_t, max_x_t])
    fig_t.canvas.draw()
    
wid_t = [number_dist_t, df1_t, df2_t, df3_t, b1_t, b2_t, b3_t]
[w.observe(on_change_t) for w in wid_t]
on_change_t(None)

display(layout_t)


### F-distribution

The F-distribution is used to test the null hypothesis of the analysis of variances test (ANOVA). It has two parameters corresponding to the degrees of freedom of an ANOVA test.

In [None]:
import scipy.stats

fig_f, ax_f = plt.subplots()
ax_f.grid(True)
plt.ion()
COLORS_F = ['r', 'b', 'g']
DF1_F = [7, 20, 100]
DF2_F = [5, 15, 100]

number_dist_f = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

df1_1_f = define_input_natural_bin('df1_1: ', DF1_F[0])
df1_2_f = define_input_natural_bin('df1_2: ', DF1_F[1])
df1_3_f = define_input_natural_bin('df1_3: ', DF1_F[2])

df2_1_f = define_input_natural_bin('df2_1: ', DF2_F[0])
df2_2_f = define_input_natural_bin('df2_2: ', DF2_F[1])
df2_3_f = define_input_natural_bin('df2_3: ', DF2_F[2])

b1_f = define_label_ber(0)
b2_f = define_label_ber(1)
b3_f = define_label_ber(2)

layout_f = widgets.VBox([
            number_dist_f, 
            widgets.HBox([df1_1_f, df2_1_f]), 
            b1_f, 
            widgets.HBox([df1_2_f, df2_2_f]), 
            b2_f, 
            widgets.HBox([df1_3_f, df2_3_f]), 
            b3_f,
])

# The x-range of the plot is set between 0 and 
# mean + 3 standard deviations of the distribution. If more than one distribution
# is displayed, we used the maximum range
def on_change_f(change):
    global fig_f, ax_f
    
    # Deleting previous figure
    while len(ax_f.lines) > 0:
        l = ax_f.lines.pop(0)
        del l
        
    # Updating labels and computing the range
    max_x_f = 0
    for i in range(3):
        if wid_f[i+1].value <= 2:
            wid_f[i+1].value = DF1_F[i]
        if wid_f[i+4].value <= 4:
            wid_f[i+4].value = DF2_F[i]
            
        d1 = wid_f[i+1].value
        d2 = wid_f[i+4].value
        
        mean_f = d2/float(d2-2)
        
        std_f = math.sqrt((2*d2**2*(d1+d2-2))/float(d1*(d2-2)**2*(d2-4)))
        
        wid_f[7+i].description = BUTTON_DESCRIPTION%(mean_f, std_f)
          
        if i < int(number_dist_f.value):
            max_x_f = max(max_x_f, mean_f + 3*std_f)

    # Plotting itself
    for i in range(int(number_dist_f.value)):
        # Computing the F-distribution's PDF using scipy
        # due to the large numbers obtained when using
        # the gamma function
    
        d1 = wid_f[i+1].value
        d2 = wid_f[i+4].value
        
        x_f = np.arange(0, max_x_f, 0.01)
        y_f = scipy.stats.f.pdf(x_f, d1, d2)
        ax_f.plot(x_f, y_f, color=COLORS_F[i])
    ax_f.set_xlim([0, max_x_f])
    fig_f.canvas.draw()
    
wid_f = [number_dist_f, df1_1_f, df1_2_f, df1_3_f, df2_1_f, df2_2_f, df2_3_f, b1_f, b2_f, b3_f]
[w.observe(on_change_f) for w in wid_f]
on_change_f(None)

display(layout_f)


### Chi-square distribution

The Chi-square distribution is used to represent distributions and random variables that are right skewed and always positive. It is the distribution associated to the test statistic in hypothesis tests in which we try to evaluate whether the observed frequencies of groups in a sample reflect the expected frequencies of groups in the population. 

In [None]:
from scipy.special import gamma

fig_chi, ax_chi = plt.subplots()
ax_chi.grid(True)
plt.ion()
COLORS_CHI = ['r', 'b', 'g']
DF_CHI = [2, 5, 10]

number_dist_chi = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

df1_chi = define_input_natural_bin('df1: ', DF_CHI[0])
df2_chi = define_input_natural_bin('df2: ', DF_CHI[1])
df3_chi = define_input_natural_bin('df3: ', DF_CHI[2])

b1_chi = define_label_ber(0)
b2_chi = define_label_ber(1)
b3_chi = define_label_ber(2)

layout_chi = widgets.VBox([
            number_dist_chi, 
            df1_chi, 
            b1_chi, 
            df2_chi, 
            b2_chi, 
            df3_chi, 
            b3_chi,
])

# The x-range of the plot is set between 0 and 
# mean + 3 standard deviations of the distribution. If more than one distribution
# is displayed, we used the maximum range
def on_change_chi(change):
    global fig_chi, ax_chi
    
    # Deleting previous figure
    while len(ax_chi.lines) > 0:
        l = ax_chi.lines.pop(0)
        del l
        
    # Updating labels and computing the range
    max_x_chi = 0
    for i in range(3):
        if wid_chi[i+1].value <= 1:
            wid_chi[i+1].value = DF_CHI[i]

        df = wid_chi[i+1].value
        
        mean_chi = df
        
        std_chi = math.sqrt(2*df)
        
        wid_chi[4+i].description = BUTTON_DESCRIPTION%(mean_chi, std_chi)
          
        if i < int(number_dist_chi.value):
            max_x_chi = max(max_x_chi, mean_chi + 3*std_chi)

    # Plotting itself
    for i in range(int(number_dist_chi.value)):
        # Computing the chi-distribution's PDF using scipy
        # due to the large numbers obtained when we use
        # the gamma function
        df = wid_chi[i+1].value
        
        x_chi = np.arange(0, max_x_chi, 0.01)
        y_chi = scipy.stats.chi2.pdf(x_chi, df)
        ax_chi.plot(x_chi, y_chi, color=COLORS_CHI[i])
    ax_chi.set_xlim([0, max_x_chi])
    fig_chi.canvas.draw()
    
wid_chi = [number_dist_chi, df1_chi, df2_chi, df3_chi, b1_chi, b2_chi, b3_chi]
[w.observe(on_change_chi) for w in wid_chi]
on_change_chi(None)

display(layout_chi)

## Beta distribution

The Beta distribution actually represents a whole family of distributions which depend on the value of two shape parameters a and b. It is of great importance in Bayesian analysis, since the Beta distribution is the conjugate prior of the Bernoulli likelihood. 

In [None]:
fig_beta, ax_beta = plt.subplots()
ax_beta.grid(True)
plt.ion()
COLORS_BETA = ['r', 'b', 'g']
A_BETA = [0.1, 5, 1]
B_BETA = [0.1, 1, 5]

number_dist_beta = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

a_1_beta = define_input_prob_bin('a_1: ', A_BETA[0])
a_2_beta = define_input_prob_bin('a_2: ', A_BETA[1])
a_3_beta = define_input_prob_bin('a_3: ', A_BETA[2])

b_1_beta = define_input_prob_bin('b_1: ', B_BETA[0])
b_2_beta = define_input_prob_bin('b_2: ', B_BETA[1])
b_3_beta = define_input_prob_bin('b_3: ', B_BETA[2])

b1_beta = define_label_ber(0)
b2_beta = define_label_ber(1)
b3_beta = define_label_ber(2)

layout_beta = widgets.VBox([
              number_dist_beta, 
              widgets.HBox([a_1_beta, b_1_beta]), 
              b1_beta, 
              widgets.HBox([a_2_beta, b_2_beta]), 
              b2_beta, 
              widgets.HBox([a_3_beta, b_3_beta]), 
              b3_beta,
])

def on_change_beta(change):
    global fig_beta, ax_beta
    
    # Deleting previous figure
    while len(ax_beta.lines) > 0:
        l = ax_beta.lines.pop(0)
        del l
        
    # Updating labels and computing the range
    for i in range(3):
        if wid_beta[i+1].value <= 0:
            wid_beta[i+1].value = A_BETA[i]
        if wid_beta[i+4].value <= 0:
            wid_beta[i+4].value = B_BETA[i]
            
        a = wid_beta[i+1].value
        b = wid_beta[i+4].value
        
        mean_beta = a/float(a + b)
        
        std_beta = math.sqrt(a * b / ((a + b) ** 2 * (a + b + 1)))
        
        wid_beta[7+i].description = BUTTON_DESCRIPTION%(mean_beta, std_beta)
          
    # Plotting itself
    for i in range(int(number_dist_beta.value)):
        # Computing the Beta distributions's PDF using scipy
        # due to the large numbers obtained when using
        # the gamma function
    
        a = wid_beta[i+1].value
        b = wid_beta[i+4].value
        
        x_beta = np.arange(0, 1, 0.01)
        y_beta = scipy.stats.beta.pdf(x_beta, a, b)
        ax_beta.plot(x_beta, y_beta, color=COLORS_BETA[i])
    ax_beta.set_xlim([0, 1])
    fig_beta.canvas.draw()
    
wid_beta = [number_dist_beta, a_1_beta, a_2_beta, a_3_beta, b_1_beta, b_2_beta, b_3_beta, b1_beta, b2_beta, b3_beta]
[w.observe(on_change_beta) for w in wid_beta]
on_change_beta(None)

display(layout_beta)


In [None]:
fig_gamma, ax_gamma = plt.subplots()
ax_gamma.grid(True)
plt.ion()
COLORS_GAMMA = ['r', 'b', 'g']
A_GAMMA = [1, 9, 7.5]
B_GAMMA = [0.5, 2, 1]

number_dist_gamma = widgets.Dropdown(
    options=['1', '2', '3'],
    value='1',
    description='Number of distributions:',
)

a_1_gamma = define_input_prob_bin('a_1: ', A_GAMMA[0])
a_2_gamma = define_input_prob_bin('a_2: ', A_GAMMA[1])
a_3_gamma = define_input_prob_bin('a_3: ', A_GAMMA[2])

b_1_gamma = define_input_prob_bin('b_1: ', B_GAMMA[0])
b_2_gamma = define_input_prob_bin('b_2: ', B_GAMMA[1])
b_3_gamma = define_input_prob_bin('b_3: ', B_GAMMA[2])

b1_gamma = define_label_ber(0)
b2_gamma = define_label_ber(1)
b3_gamma = define_label_ber(2)

layout_gamma = widgets.VBox([
              number_dist_gamma, 
              widgets.HBox([a_1_gamma, b_1_gamma]), 
              b1_gamma,
              widgets.HBox([a_2_gamma, b_2_gamma]), 
              b2_gamma, 
              widgets.HBox([a_3_gamma, b_3_gamma]), 
              b3_gamma,
])

def on_change_gamma(change):
    global fig_gamma, ax_gamma
    
    # Deleting previous figure
    while len(ax_gamma.lines) > 0:
        l = ax_gamma.lines.pop(0)
        del l
        
    # Updating labels and computing the range
    for i in range(3):
        if wid_gamma[i+1].value <= 0:
            wid_gamma[i+1].value = A_GAMMA[i]
        if wid_gamma[i+4].value <= 0:
            wid_gamma[i+4].value = B_GAMMA[i]
            
        a = wid_gamma[i+1].value
        b = wid_gamma[i+4].value
        
        mean_gamma = a / float(b)
        
        std_gamma = math.sqrt(a / float(b ** 2))
        
        wid_gamma[7+i].description = BUTTON_DESCRIPTION%(mean_gamma, std_gamma)
          
    # Plotting itself
    for i in range(int(number_dist_gamma.value)):
        # Computing the Gamma distributions's PDF using scipy
        # due to the large numbers obtained when using
        # the gamma function
    
        a = wid_gamma[i+1].value
        b = wid_gamma[i+4].value
        
        x_gamma = np.arange(0, 20, 0.01)
        y_gamma = scipy.stats.gamma.pdf(x_gamma, a=a, scale=1 / float(b))
        ax_gamma.plot(x_gamma, y_gamma, color=COLORS_GAMMA[i])
    ax_gamma.set_xlim([0, 20])
    fig_gamma.canvas.draw()
    
wid_gamma = [number_dist_gamma, a_1_gamma, a_2_gamma, a_3_gamma, b_1_gamma, b_2_gamma, b_3_gamma, b1_gamma, b2_gamma, b3_gamma]
[w.observe(on_change_gamma) for w in wid_gamma]
on_change_gamma(None)

display(layout_gamma)