# Content and Objective

+ Show result of chi-square test for uniform distribution:
    + Sample uniformly distributed values in [0 : N_random ]
    + Get chi-square test statistics and compare to value of chi-square quantile
    + You may use actually random samples or example of the lecture

# Import

In [1]:
# importing
import numpy as np
import scipy.signal as signal
import scipy.stats as stats
import scipy as sp

import matplotlib.pyplot as plt
import matplotlib

from ipywidgets import interactive
import ipywidgets as widgets

# showing figures inline
%matplotlib inline

In [2]:
# plotting options 
font = {'size'   : 30}
plt.rc('font', **font)
plt.rc('text', usetex=True)

matplotlib.rc('figure', figsize=(24, 12) )

# Here we go

In [3]:
# number of elements for random sampling
# 6 equalling die, 2 equalling coin, etc.
N_random = 6

# vector of prob. to be test for H_0
# You may also define different pmf if you like to
p = np.ones( N_random ) / N_random

In [4]:
# define number of observations
N = 100

# get no. of "hits" when using simulated dice
sample = np.random.choice( range( 1, N_random + 1 ), size = N, p = p )

# get no. of values by checking how often sample == _n + 1 is TRUE
# NOTE: indexing in Python starts at 0, so _n + 1 has to be used
h = np.zeros( N_random )

for _n in range( N_random ):
    h[ _n ] = np.sum( sample == _n + 1 )

###
# select this one if you want to have the (determinstic) example of [K. Bosch: Elementare Einführung in die angewandte Statistik, Vieweg]
###
if 1:
    N = 120
    h = np.array( [ 30, 25, 18, 10, 22, 15 ] )


print( h )

[30 25 18 10 22 15]


In [5]:
# determine chi-square test value
t = np.sum( ( h - N * p )**2 / ( N * p  ) )

print( 'Test statistic for {} trials: \t{}'.format( N, t ) )

Test statistic for 120 trials: 	12.899999999999999


In [6]:
# define niveau alpha
alpha = 0.1


# get quantiles of chi2
chi2 = stats.chi2.ppf( 1 - alpha, N_random - 1 )


print( 'Value of chi square quantile: {}'.format( chi2 ) )

Value of chi square quantile: 9.236356899781123


In [7]:
print( 'Results of test:' )
print( '----------------\n' )


print( 'Hypothesis: \t\t{}'.format( p ) )
print( 'Observations: \t\t{}'.format( h ) ) 

print()
print( 'Empirical value: \t{}'.format( t ) )
print( 'Chi square quantile: \t{}'.format( chi2 ) )

print()
test_result = 'H_0 declined' if t > chi2 else 'H_0 not declined'

print( 'Niveau: \t\t{}'.format( alpha) ) 
print( 'Test result: \t\t' + test_result + ' with respect to error probability {}'.format( alpha) )



Results of test:
----------------

Hypothesis: 		[0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667]
Observations: 		[30 25 18 10 22 15]

Empirical value: 	12.899999999999999
Chi square quantile: 	9.236356899781123

Niveau: 		0.1
Test result: 		H_0 declined with respect to error probability 0.1


# Now some graphical illustration

In [8]:
# define pdf of chi2 distribution

def get_pdf_under_H_0( x, degrees_of_freedom ):
    '''
    returns pdf of a chi2( degrees_of_freedom )
    '''

    pdf = stats.chi2.pdf( x, degrees_of_freedom )

    return pdf

In [9]:
# get value for ensuring that test has niveau alpha
# to that end, values of pdf are integrated, starting from t, as long as P( H_1 | H_0 ) < alpha
#
# Note: integration is performed numerically, starting von 5 * degrees_of_freedom
#       i case of very precise results, increasing upper limit or finer sampling of x might be necessary

def find_index_of_bound_in_test( x, degrees_of_freedom, alpha ):

    # get pdf of chi2 
    pdf = get_pdf_under_H_0( x, degrees_of_freedom )

    # init values for numerical integration
    delta_x = x[1] - x[0]
    upper_limit = 3 * degrees_of_freedom
    sum = 0.0

    # step from upper limit towards 0 until niveau alpha is violated
    for _n in range( len(x), 0, -1 ):

        sum += pdf[ _n - 1 ] * delta_x
        if sum > alpha:
            break

    return _n + 1

In [10]:

# interactive plotting function
def show_test( N_random, alpha ):

    # numerical values for sampling pdf
    delta_x = 0.1
    x = np.arange( 0, 5 * ( N_random-1), delta_x )

    # lower bound for K_1
    k = find_index_of_bound_in_test( x, N_random-1, alpha )

    # get pmf under H_0
    pdf = get_pdf_under_H_0( x, N_random - 1 )

    pdf_K_0 = np.copy( pdf )
    pdf_K_0[ k: ] = 0
    
    pdf_K_1 = np.copy( pdf )
    pdf_K_1[ : k-1 ] = 0

    print( 'Min. required t:\t\t{}'.format( x[ k ] ) )
    print( 'Actual prob. error 1. kind:\t{}'.format( np.sum( delta_x * pdf_K_1 )) )


    # figure for time signals
    plt.figure( figsize=( 10, 4 ) )
    font = {'size'   : 14}
    plt.rc('font', **font)
    plt.rc('text', usetex=False)


    plt.fill_between( x, pdf_K_0 )
    plt.fill_between( x, pdf_K_1, color='red' )

    plt.xlabel(r'$x$',fontsize=14)
    #plt.ylabel(r'$P_{H_0}(k)$',fontsize=14)
    plt.grid( True )


interactive_update = interactive( 
    show_test, 
    N_random = widgets.IntSlider(
            min = 5, max = 100, step = 1, value = 10, 
            continuous_update = False, 
            description = 'Number of classes r', 
            style={'description_width': 'initial'}, 
            layout=widgets.Layout(width='50%'),
            align_items='center',
            ),
    alpha = widgets.FloatSlider(
            min = 0.0, max = 0.5, step = 0.001, value = 0.05, 
            continuous_update = False, 
            description = 'niveau alpha', 
            style={'description_width': 'initial'}, 
            layout=widgets.Layout(width='50%'),
            align_items='left',
            )
    )


output = interactive_update.children[-1]
output.layout.height = '500px'
interactive_update

interactive(children=(IntSlider(value=10, continuous_update=False, description='Number of classes r', layout=L…