# 02443 - Computer Exercise 1: Generation and Testing of Random Numbers

In [1]:
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_dark"

import numpy as np
import pandas as pd
from scipy.stats import chi2, norm
from scipy.special import kolmogorov

## Part 1 - Linear Congruential Generator (LCG)

In [2]:
def LCG(a:int, c:int, M:int, x0:int, n:int, as_int=False):
    """
    LCG generates a list of random numbers using the Linear Congruent Generation method.
    """
    # Preallocate and initialize array for pseudorandom numbers
    X = np.zeros(n+1, dtype=int)
    X[0] = x0

    # Generate pseudorandom numbers
    for i in range(1, n+1):
        X[i] = (a*X[i-1] + c) % M
    
    if as_int:
        return X
    else:
        return X/M

Using example from slide 13 in lecture 2 to verify implementation.

In [3]:
M = 16; a = 5; c = 1; x0 = 3
print(LCG(5, 1, 16, 3, 16, as_int=True))

[ 3  0  1  6 15 12 13  2 11  8  9 14  7  4  5 10  3]


In [4]:
# Histogram
def plot_histogram(U, title="", n_bins=10): 
    counts, bins = np.histogram(U, bins=n_bins)
    bins = 0.5 * (bins[:-1] + bins[1:])
    fig = px.bar(x=bins, y=counts)
    fig.update_layout(
        title_text=title,
        xaxis_title="Value",
        yaxis_title="Count",
        width=600,
        height=400,
        bargap=0.1,
    )
    fig.show()

# Correlation plot
def plot_correlation(U, title="Correlation plot of consequtive numbers."):
    n = len(U)
    fig = go.Figure(data=go.Scatter(x=U[:n-1], y=U[1:n], mode='markers', marker=dict(size=2, color='blue')))
    fig.update_layout(
        title=title,
        xaxis_title=r"$U_{i-1}$",
        yaxis_title=r"$U_{i}$",
        width=600,
        height=600,
    )
    fig.show()

### (a) Generating 10000 pseudorandom numbers
To ensure full cycle length, the parameters of the RNG is chosen according to the criteria in slide 14 of lecture 2.

In [5]:
a = 257     # Prime
c = 659     # Prime
M = 65536   # 2^16
x0 = 69     # 3*23
n = 10000   # Number of samples

# Generate pseudorandom numbers
U = LCG(a, c, M, x0, n)

# Plot histogram
plot_histogram(U, title="Histogram of LCG random numbers.<br>a = {}, c = {}, M = {}, x_0 = {}, n = {}".format(a, c, M, x0, n))

# Plot correlation
plot_correlation(U, title="Correlation plot of LCG random numbers.<br>a = {}, c = {}, M = {}, x_0 = {}, n = {}".format(a, c, M, x0, n))

Considering the histogram, the numbers appear to be fairly uniformly distributed. The scatterplot clearly shows that there some underlying structure of these pseudorandom numbers.

## (b) RNG evaluation

**Functions for testing distribution of random numbers**

In [6]:
def chi_sq_test(U, n_classes=10):
    
    # Compute expected number of observations in each class
    n_expected = len(U) / n_classes
    
    # Count number of observations in each class
    n_obs, _ = np.histogram(U, bins=n_classes)

    # Compute test statistic
    T_obs = np.abs(np.sum((n_obs - n_expected)**2 / n_expected))

    # Compute p-value
    df = n_classes-1 # when number of estimated parameters is m=1
    p = 1 - chi2.cdf(T_obs, df)
    
    return T_obs, p

In [7]:
def kolmogorov_smirnov_test(U):

    # Get number of observations
    n = len(U)

    # Setup expected values of F
    F_exp = np.linspace(0, 1, n+1)[1:]

    # Compute test statistic
    Dn = max(abs(F_exp-np.sort(U)))

    # Compute p-value
    p = kolmogorov(Dn)

    return Dn, p

**Runtests for testing independence of random numbers**

In [8]:
def above_below_runtest1(U):

    median = np.median(U)
    n1 = np.sum(U < median)
    n2 = np.sum(median < U)

    # Compute total number of observed runs
    temp = U > median
    T_obs = sum(temp[1:] ^ temp[:-1])

    # Compute p-value
    mean = 2*n1*n2/(n1 + n2) + 1
    log_expr = np.log(2) + np.log(n1) + np.log(n2) + np.log(2*n1*n2 - n1 - n2) - 2*np.log(n1 + n2) - np.log(n1 + n2 - 1)
    var = np.exp(log_expr)
    Z_obs = (T_obs - mean) / np.sqrt(var)
    p = 2 * (1 - norm.cdf(np.abs(Z_obs)))

    return T_obs, p

In [9]:
def up_down_runtest2(U):

    n = len(U)

    # Get indeces where runs change (Append -1 and n-1 at ends to handle first and last run)
    idx = np.concatenate(([-1], np.where(U[1:]-U[:-1] < 0)[0], [len(U)-1]))

    # Compute run lengths and count them (clamp to 6)
    run_lengths = np.clip(idx[1:] - idx[:-1], 1, 6)
    R = np.array([np.count_nonzero(run_lengths == i) for i in range(1, 7)])

    # Compute test statistic
    A = np.array([
        [4529.4, 9044.9, 13568, 18091, 22615, 27892],
        [9044.9, 18097, 27139, 36187, 45234, 55789],
        [13568, 27139, 40721, 54281, 67852, 83685],
        [18091, 36187, 54281, 72414, 90470, 111580],
        [22615, 45234, 67852, 90470, 113262, 139476],
        [27892, 55789, 83685, 111580, 139476, 172860]
    ])
    B = np.array([1/6, 5/24, 11/120, 19/720, 29/5040, 1/840])
    Z_obs = (1/(n - 6)) * (R - n*B).T @ A @ (R - n*B)

    # Compute p-value
    p = 1 - chi2.cdf(Z_obs, 6)
    
    return Z_obs, p

In [10]:
def up_and_down_runtest3(U):

    n = len(U)

    # Find runs (Append 0 at ends to handle first and last run)
    seq = np.concatenate(([0], np.sign(U[1:] - U[:-1]), [0]))

    # Get indeces where runs change
    idx = np.flatnonzero(seq[:-1] != seq[1:])

    # Compute run lengths
    run_lengths = idx[1:] - idx[:-1]
    X_obs = len(run_lengths)

    # Compute test statistic
    Z_obs = (X_obs - (2*n-1)/3) / np.sqrt((16*n - 29) / 90)

    # Compute p-value
    p = 2*(1 - norm.cdf(np.abs(Z_obs)))

    return Z_obs, p


In [11]:
def test_random_numbers(U):

    Z_chisq, p_chisq = chi_sq_test(U)
    Z_ks, p_ks = kolmogorov_smirnov_test(U)
    Z_r1, p_r1 = above_below_runtest1(U)
    Z_r2, p_r2 = up_down_runtest2(U)
    Z_r3, p_r3 = up_and_down_runtest3(U)

    Zs = [Z_chisq, Z_ks, Z_r1, Z_r2, Z_r3]
    ps = [p_chisq, p_ks, p_r1, p_r2, p_r3]
    
    table = np.array([Zs, ps]).T

    df = pd.DataFrame(np.round(table, 2),
                      index=["Chi squared", "Kol-Smi", "Above/Below", "Up/Down", "Up and Down"],
                      columns=["Test statistic", "p-value"]
    )
    print(df)

In [12]:
test_random_numbers(U)

             Test statistic  p-value
Chi squared            1.36     1.00
Kol-Smi                0.00     1.00
Above/Below         4974.00     0.59
Up/Down              369.39     0.00
Up and Down           18.52     0.00


### (c) Experimenting with the RNG

### Part 2 - System Available Generator (NumPy)

In [13]:
# Using numpy to generate random numbers
import numpy as np
U = np.random.rand(10000)
test_random_numbers(U)

             Test statistic  p-value
Chi squared            9.94     0.36
Kol-Smi                0.01     1.00
Above/Below         5090.00     0.08
Up/Down                6.77     0.34
Up and Down            1.56     0.12


### Part 3 - Discussion of One Sample Approach

Something something genius...