# Unit 7 - Example 3

Import of Python libraries

In [1]:
## imports
import math
import pandas as pd
from IPython.display import HTML, display


## Der folgende Code dient zur Implementierung der Funktionalität, damit Code ein- und ausgeblendet werden kann.
def toggle_code():
    toggle_code_js_html = '''
    <script>
    function code_toggle() {
        if ($('div.cell.code_cell.rendered.selected div.input').css('display')!='none'){
            $('div.cell.code_cell.rendered.selected div.input').hide();
        } else {
            $('div.cell.code_cell.rendered.selected div.input').show();
        }
    }
    </script>
    <form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Toggle code"></form>
    '''
    display(HTML(toggle_code_js_html))


toggle_code()

# Exercise

In some circumstances students have to pass a language test as part of the accrediation proces for German universities. The attribute X "Points in the language test" is normal distributed. A sample yielded the following results:  

66, 38, 73, 64, 85, 58, 32, 42, 77 und 45.  

Determine the point estimators for the expectation and the standard deviation of the attribute in the population. With these values construct the 95% confidence interval for the expectation in the population.  
Illustrate the difference between the two estimation procedures.

# Solution

**1. Determing the probability of confidence 1 - $\alpha$**

The probability of confidence is 1 - $\alpha$ = 0.95, i.e. $\alpha$ = 0.05

**2. Determine the degrees of freedom and the t-value**

The degrees of freedom are n - 1 = 10 - 1 = 9.  

The t-value for 9 degrees of freedom and a probability of confidence of 95% is 2.262.

In [2]:
import math
from scipy.stats import t

In [3]:
# For a 2-sided confidence interval we use 1 - alpha / 2 = 1 - 0.05 / 2 = 1 - 0.025 = 0.975
t_value = t.ppf(0.975, 9)
print(t_value)

2.2621571627409915


**3.1 Point estimator for the expectation $\hat{\mu}$**
$$ \hat{\mu} = \bar{x} = \frac{1}{n} \sum_{t=1}^n x_t $$

In [5]:
n = 10
x_quer = (66 + 38 + 73 + 64 + 85 + 58 + 32 + 42 + 77 + 45) / n
print(x_quer)

58.0


**3.2 Point estimator for the variance $\sigma^2$ and standard deviation $\sigma$**  

$$ \hat{\sigma}^2 = s^2 = \frac{1}{n - 1} \sum_{t=1}^n (x_t - \bar{x})^2 $$  
$$ \hat{\sigma} = s = \sqrt{s^2} $$

In [6]:
s_2 = ((66 - x_quer)**2 + (38 - x_quer)**2 + (73 - x_quer)**2 + (64 - x_quer)**2 + (85 - x_quer)**2 + 
       (58 - x_quer)**2 + (32 - x_quer)**2 + (42 - x_quer)**2 + (77 - x_quer)**2 + (45 - x_quer)**2) / (n - 1)
s = math.sqrt(s_2)
print(s)

18.0


**4. Computation of the estimated standard deviation of the sample mean $\hat{\sigma}_\bar{x} = \frac{s}{\sqrt{n}}$**

In [7]:
standard_error = s / math.sqrt(n)
print(standard_error)

5.692099788303082


**5. Computation of the sample error $\delta = t_{1-\alpha} \hat{\sigma}_\bar{x}$**

In [8]:
sample_error = t_value * standard_error
print(sample_error)

12.8764243071463


**6. Construction of the confidence interval $CI = [\bar{x} - t_{1-\alpha} \hat{\sigma}_\bar{x} ;  \bar{x} + t_{1-\alpha} \hat{\sigma}_\bar{x}] $**

In [9]:
lower_bound = x_quer - sample_error
upper_bound = x_quer + sample_error
print(lower_bound.round(2), upper_bound.round(2))

45.12 70.88


# Interactive Chart
## Changes of the confidence interval if the parameters change

In [18]:
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t, norm
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual


%matplotlib inline


def make_plot(konfp=0.95, sigma=18, n=10):

    loc = 58
    
    alpha = 1 - konfp
    z_value = t.ppf(1 - alpha / 2, n - 1)
    standard_error = sigma / math.sqrt(n)
    
    lower_bound = loc - z_value * standard_error
    upper_bound = loc + z_value * standard_error
    
    x_values = np.linspace(lower_bound, upper_bound, 100)
    y_values = [1 for i in x_values]

    fig, ax = plt.subplots(figsize=(14, 2))
    ax.plot(x_values, y_values, linewidth=4)

    ## x-Achse
    ##
    plt.xlim(0, 100)
    ax.set_xticks(np.arange(0, 100, 5))

    ## y-Achse
    ##
    plt.ylim(0.9, 1.1)
    ax.set_yticks([])
    ax.set_yticklabels([])
    #ax.set_ylabel('$P(x)$', size=14)

    ## Sonstiges
    ##
    ax.set_title(r'Confidence interval with $\bar{x}=58$,' 
                 + r' $\sigma=${}, probability of confidence {}, sample size {}'.format(sigma, konfp, n), 
                 size=16)
    ax.grid(True, axis='x')

    ## Text für Untergrenze
    plt.annotate(
        '{:.2f}'.format(lower_bound),
        xy=(min(lower_bound, 54.3), 1.01),
        ha='center',
        size=14
    )

    ## Text für Obergrenze
    plt.annotate(
        '{:.2f}'.format(upper_bound),
        xy=(max(upper_bound, 61,6), 1.01),
        ha='center',
        size=14
    )

toggle_code()

In [14]:
interactive(
    make_plot, 
    konfp=widgets.FloatSlider(value=0.95, min=0.9, max=0.99, step=0.01, 
                              continuous_update=False, description='Probability of confidence'),
    sigma=widgets.IntSlider(value=18, min=1, max=30, step=1, 
                            continuous_update=False, description='Sigma'),
    n=widgets.IntSlider(value=10, min=5, max=20, step=1, 
                        continuous_update=False, description='Sample size')
)

interactive(children=(FloatSlider(value=0.95, continuous_update=False, description='Probability of confidence'…

# Interactive Chart
## Comparison of the t-distribution and the standard normal distribution in dependency of the degrees of freedom

If we have a sample and the standard deviation in the population has to be estimated then we use the t-distribution to determine the standard error.  
If the standard deviation of the population is known then we can use the normal distribution.  
If we have a sample size of n = 30 or above the t-distribution converges to the standard normal distribution.

In [15]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t
from scipy.stats import norm
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual


%matplotlib inline


def make_plot(df=5):
    pass

    x_values = np.linspace(-5, 5, 300)
    y_values_norm = [norm.pdf(i, loc=0, scale=1) for i in x_values]
    y_values_t = [t.pdf(i, df, loc=0, scale=1) for i in x_values]

    fig, ax = plt.subplots(figsize=(14, 8))

    ax.plot(x_values, y_values_norm, label="Normal distribution")
    ax.plot(x_values, y_values_t, label="t-distribution, degrees of freedom {}".format(df))

    ax.set_title('Comparison of the density function of the standard normal distribution and the t-distribution', size=16)
    #ax.set_xlabel('$x$', size=14)
    #ax.set_ylabel('$P(x)$', size=14)

    ax.grid(True, axis='both')

    #ax.set_xticks(np.linspace(-3, 3, 13))
    #ax.set_xticklabels(x_labels)

    #ax.set_yticks(np.linspace(0.0, 1.0, 11))

    ## Wert neben Linie schreiben
    #for x, y in zip(x_values, y_values):
    #    ax.annotate(str(y), xy=(x, y - 0.04), size=14)

    #major_ticks = np.arange(0, 1.1, 0.1)    
    #ax.set_yticks(major_ticks)

    plt.legend()

toggle_code()

In [17]:
interactive(
    make_plot, 
    df=widgets.IntSlider(value=5, min=5, max=50, step=1, continuous_update=False, description='Degrees of freedom'))

interactive(children=(IntSlider(value=5, continuous_update=False, description='Degrees of freedom', max=50, mi…