In [1]:
# Let's test the rendom number generator
# Divide the unit interval into K equal subintervals
# Run the generator N times and count the number 
# of numbers in each subinterval.  There should be
# about N/K in each.  This statement will be made
# more precise using the Chi-square statistic

In [2]:
from random import uniform
from math import floor

In [3]:
uniform(0,1)

0.07723453606538411

In [4]:
def run_experiment(n):
    output = []
    for i in list(range(0,n)):
        output.append(uniform(0,1))
    return output

In [5]:
data = run_experiment(10)
data

[0.23392621016937232,
 0.23949637220766962,
 0.654968673177244,
 0.8375871774953202,
 0.7379509700815392,
 0.14910454399870232,
 0.9670378519953056,
 0.40562672477534134,
 0.7906216393716077,
 0.546241922284476]

In [6]:
def bin_index(a,b,n,x):
    bin_width = (b-a)/n
    return min(floor((x-a)/bin_width),n-1)

list(map( lambda x: bin_index(0,1,10,x), data))

[2, 2, 6, 8, 7, 1, 9, 4, 7, 5]

In [7]:
def bins(data, a, b, n):
    bins = [0]*n
    for x in data:
        k = bin_index(a,b,n,x)
        bins[k] = bins[k] + 1
    return bins

In [8]:
bins([0, 0.1, 0.2, 0.3, 0.4 , 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], 0, 1, 10)

[1, 1, 2, 0, 1, 2, 1, 0, 1, 2]

In [9]:
bins(data, 0, 1, 10)

[0, 1, 2, 0, 1, 1, 1, 2, 1, 1]

In [10]:
data2 = run_experiment(1000)
bin_counts = bins(data2, 0, 1, 10)
bin_counts

[96, 93, 96, 115, 109, 103, 98, 96, 94, 100]

In [11]:
frequencies = list(map(lambda x: x/1000, bin_counts))
frequencies

[0.096, 0.093, 0.096, 0.115, 0.109, 0.103, 0.098, 0.096, 0.094, 0.1]

In [12]:
## Chi squared statistic
## Chi2(data) = sum of (O_i - E_i)^2/O_i
## 
## (1) Compute Ch2(data) using Python
## (2) What does it mean?
##     Obviously the bigger Chi2, the more "unexpected" the result
##     But this statement needs to be made precise
##
## Chi square table: https://faculty.elgin.edu/dkernler/statistics/ch09/chi-square-table.pdf

In [13]:
def chi2(observed, expected):
    for (o, e) in zip(observed, expected):
        print (o,e)
    

In [19]:
chi2([2,3,4], [3,3,3])

0.75

In [15]:
def chi2(observed, expected):
    output = 0
    for (o, e) in zip(observed, expected):
        output += (o - e)**2/o
    return output
    

In [21]:
print(bin_counts)

chi2(bin_counts, [100]*10)

[96, 93, 96, 115, 109, 103, 98, 96, 94, 100]


4.237696416327155