# Unit 7 - Example 1

Import of Python libraries

In [1]:
## imports
import math
import pandas as pd
from IPython.display import HTML, display


## Der folgende Code dient zur Implementierung der Funktionalität, damit Code ein- und ausgeblendet werden kann.
def toggle_code():
    toggle_code_js_html = '''
    <script>
    function code_toggle() {
        if ($('div.cell.code_cell.rendered.selected div.input').css('display')!='none'){
            $('div.cell.code_cell.rendered.selected div.input').hide();
        } else {
            $('div.cell.code_cell.rendered.selected div.input').show();
        }
    }
    </script>
    <form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Toggle code"></form>
    '''
    display(HTML(toggle_code_js_html))


toggle_code()

# Exercise

In a company the number of hours of overtime of the employees should be estimated. Let's assume the attribute X "hours of overtime" in the population follows a normal distribution. A sample with sample size 41 shows a sample mean of 10 hours and a sample variance of 15 hours^2. The probability of confidence is set to 95%.  

Compute the confidence interval and visualize the result. Discuss how the confidence interval changes (given all other parameters are constant) if  
(i) the sample size increases  
(ii) the probability of confidence increases  
(iii) the sample standard deviation decreases.  

# Solution

**1. Determing the probability of confidence 1 - $\alpha$**

The probability of confidence is 1 - $\alpha$ = 0.95, i.e. $\alpha$ = 0.05

**2. Determine the degrees of freedom and the t-value**

The degrees of freedom are n - 1 = 41 - 1 = 40.  

The t-value for 40 degrees of freedom and a probability of confidence of 95% is 2.0211.

In [3]:
import math
from scipy.stats import t

In [4]:
# For a 2-sided confidence interval we use 1 - alpha / 2 = 1 - 0.05 / 2 = 1 - 0.025 = 0.975
t_value = t.ppf(0.975, 40)
print(t_value)

2.0210753829953374


**3. Computation of the sample mean $\bar{x}$ and the sample standard deviation s**

In [5]:
x_mean = 10
print(x_mean)

10


In [6]:
x_variance = 15
x_std = math.sqrt(x_variance)
print(x_std)

3.872983346207417


**4. Computation of the estimated standard deviation of the sample mean $\hat{\sigma}_\bar{x} = \frac{s}{\sqrt{n}}$**

In [7]:
standard_error = x_std / math.sqrt(41)
print(standard_error)

0.604858378909134


**5. Computation of the sample error $\delta = t_{1-\alpha} \hat{\sigma}_\bar{x}$**

In [8]:
sample_error = t_value * standard_error
print(sample_error)

1.2224643798117167


**6. Construction of the confidence interval $CI = [\bar{x} - t_{1-\alpha} \hat{\sigma}_\bar{x} ;  \bar{x} + t_{1-\alpha} \hat{\sigma}_\bar{x}] $**

In [9]:
lower_bound = x_mean - sample_error
upper_bound = x_mean + sample_error
print(lower_bound.round(2), upper_bound.round(2))

8.78 11.22


# Interactive Chart
## Changes of the confidence interval if the parameters change

In [10]:
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t, norm
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual


%matplotlib inline


def make_plot(konfp=0.95, sigma=4, n=41):

    loc = 10
    
    alpha = 1 - konfp
    z_value = norm.ppf(1 - alpha / 2)
    standard_error = sigma / math.sqrt(n)
    
    lower_bound = loc - z_value * standard_error
    upper_bound = loc + z_value * standard_error
    
    x_values = np.linspace(lower_bound, upper_bound, 100)
    y_values = [1 for i in x_values]

    fig, ax = plt.subplots(figsize=(14, 2))
    ax.plot(x_values, y_values, linewidth=4)

    ## x-Achse
    ##
    plt.xlim(0, 20)
    ax.set_xticks(np.arange(0, 20, 1))

    ## y-Achse
    ##
    plt.ylim(0.9, 1.1)
    ax.set_yticks([])
    ax.set_yticklabels([])
    #ax.set_ylabel('$P(x)$', size=14)

    ## Sonstiges
    ##
    ax.set_title(r'Confidence interval with $\bar{x}=10$,' 
                 + r' $\sigma=${}, probability of confidence {}, sample size {}'.format(sigma, konfp, n), 
                 size=16)
    ax.grid(True, axis='x')

    ## Text für Untergrenze
    plt.annotate(
        '{:.2f}'.format(lower_bound),
        xy=(min(lower_bound, 8.98), 1.01),
        ha='center',
        size=14
    )
    
    ## Text für Obergrenze
    plt.annotate(
        '{:.2f}'.format(upper_bound),
        xy=(max(upper_bound, 11.0), 1.01),
        ha='center',
        size=14
    )

toggle_code()

In [13]:
interactive(
    make_plot, 
    konfp=widgets.FloatSlider(value=0.9, min=0.9, max=0.99, step=0.01, 
                              continuous_update=False, description='Probability of confidence'),
    sigma=widgets.IntSlider(value=4, min=1, max=10, step=1, 
                            continuous_update=False, description='Sigma'),
    n=widgets.IntSlider(value=41, min=5, max=60, step=5, 
                        continuous_update=False, description='Sample size')
)

interactive(children=(FloatSlider(value=0.9, continuous_update=False, description='Probability of confidence',…

# Interactive Chart
## Comparison of the t-distribution and the standard normal distribution in dependency of the degrees of freedom

In [19]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t
from scipy.stats import norm
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual


%matplotlib inline


def make_plot(df=5):
    pass

    x_values = np.linspace(-5, 5, 300)
    y_values_norm = [norm.pdf(i, loc=0, scale=1) for i in x_values]
    y_values_t = [t.pdf(i, df, loc=0, scale=1) for i in x_values]

    fig, ax = plt.subplots(figsize=(14, 8))

    ax.plot(x_values, y_values_norm, label="Normal distribution")
    ax.plot(x_values, y_values_t, label="t-distribution, degrees of freedom {}".format(df))

    ax.set_title('Comparison of the density function of the standard normal distribution and the t-distribution', size=16)
    #ax.set_xlabel('$x$', size=14)
    #ax.set_ylabel('$P(x)$', size=14)

    ax.grid(True, axis='both')

    #ax.set_xticks(np.linspace(-3, 3, 13))
    #ax.set_xticklabels(x_labels)

    #ax.set_yticks(np.linspace(0.0, 1.0, 11))

    ## Wert neben Linie schreiben
    #for x, y in zip(x_values, y_values):
    #    ax.annotate(str(y), xy=(x, y - 0.04), size=14)

    #major_ticks = np.arange(0, 1.1, 0.1)    
    #ax.set_yticks(major_ticks)

    plt.legend()

toggle_code()

In [20]:
interactive(
    make_plot, 
    df=widgets.IntSlider(value=5, min=5, max=50, step=1, continuous_update=False, description='Degrees of freedom'))

interactive(children=(IntSlider(value=5, continuous_update=False, description='Degrees of freedom', max=50, mi…

Copyright © 2020 IUBH Internationale Hochschule