In [1]:
import subprocess
import shlex
import re

import numpy as np

from scipy.stats import combine_pvalues
from IPython.display import display, Markdown

In [2]:
RANDOMNESS_PATTERN = re.compile(r"exceed this value (\d+\.\d+) percent")

CC_PATTERN = re.compile(r"(?<!^)(?=[A-Z])")

# ENT - pseudorandom number sequence test

In [3]:
def test_randomness_ent(
    mode, seeds=[42], limit='1GB',
):
    outputs = []
    
    for seed in seeds:
        generator = subprocess.Popen(shlex.split(
            f"cargo run --release -- --seed {seed} --limit {limit} {CC_PATTERN.sub('-', mode).lower()}"
        ), stdout=subprocess.PIPE)
        ent = subprocess.run(
            shlex.split("./ent"), stdin=generator.stdout,
            check=True, capture_output=True, text=True
        )

        pvalue = float(RANDOMNESS_PATTERN.search(ent.stdout).group(1)) / 100.0
        
        outputs.append((pvalue, ent.stdout))
    
    gof_statistic, gof_pvalue = combine_pvalues([pvalue for pvalue, output in outputs])

    if gof_pvalue <= 0.01 or gof_pvalue >= 0.99:
        display(Markdown(f"## <span style='color:purple'><u>{mode}</u></span>"))
    elif gof_pvalue <= 0.05 or gof_pvalue >= 0.95:
        display(Markdown(f"## <span style='color:red'><u>{mode}</u></span>"))
    elif gof_pvalue <= 0.1 or gof_pvalue >= 0.9:
        display(Markdown(f"## <span style='color:orange'>*{mode}*</span>"))
    else:
        display(Markdown(f"## <span style='color:green'>{mode}</span>"))
        
    most_suspect = None
    
    for (pvalue, output) in outputs:
        suspicion = 1 - min(pvalue, 1 - pvalue)
        
        if (most_suspect is None) or (suspicion > most_suspect[0]):
            most_suspect = (suspicion, output)
        
    display(Markdown("#### Fisher’s combined Chi-squared:"))
    display(Markdown(f"* p-value: {gof_pvalue}\n* statistic: {gof_statistic}"))

    display(Markdown("#### Test output (most suspect):"))
    display(Markdown(">" + most_suspect[1].replace('\n', '\n>')))

    display(Markdown("#### Parameters:"))
    display(Markdown(f"* seeds: {seeds}\n* limit: {limit}"))

In [4]:
for mode in ["Monolithic", "Independent", "IndependentSimulation", "IndependentSimulationNoDispersal"]:
    seeds = np.random.randint(0, np.iinfo("uint64").max, dtype="uint64", size=10)
    
    test_randomness_ent(mode, seeds=seeds, limit='100MB')

## <span style='color:orange'>*Monolithic*</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.9203830053780299
* statistic: 11.87284504453826

#### Test output (most suspect):

>Entropy = 7.999998 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 217.27, and randomly
>would exceed this value 95.83 percent of the times.
>
>Arithmetic mean value of data bytes is 127.5069 (127.5 = random).
>Monte Carlo value for Pi is 3.141757806 (error 0.01 percent).
>Serial correlation coefficient is 0.000141 (totally uncorrelated = 0.0).
>

#### Parameters:

* seeds: [11438208194157600746   807722557157693178 17550653521735143439
  9542332256426796632  4914276692573245219 16356419854488693534
 14980437654377072669 17806098081867856147 17337572593171120509
  7364022910693211996]
* limit: 100MB

## <span style='color:green'>Independent</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.2907559301788645
* statistic: 22.96014122574649

#### Test output (most suspect):

>Entropy = 7.999998 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 221.25, and randomly
>would exceed this value 93.78 percent of the times.
>
>Arithmetic mean value of data bytes is 127.5014 (127.5 = random).
>Monte Carlo value for Pi is 3.141033486 (error 0.02 percent).
>Serial correlation coefficient is -0.000172 (totally uncorrelated = 0.0).
>

#### Parameters:

* seeds: [ 1033172182558178935 17060661050088092183 15563744042189496085
 10700258396870203697  9070988897145708188 18207424661263643070
 12616583184337137306  8040288011543582599  7983907059600088109
 14936626608515603020]
* limit: 100MB

## <span style='color:green'>IndependentSimulation</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.35052277705236573
* statistic: 21.81699930901328

#### Test output (most suspect):

>Entropy = 7.999998 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 302.64, and randomly
>would exceed this value 2.17 percent of the times.
>
>Arithmetic mean value of data bytes is 127.5084 (127.5 = random).
>Monte Carlo value for Pi is 3.141088206 (error 0.02 percent).
>Serial correlation coefficient is 0.000007 (totally uncorrelated = 0.0).
>

#### Parameters:

* seeds: [12161772534524896259  2045254060635954830 17196094396943409166
  5739541112433987454  7717621565149612247 11609414428789322420
 17251671525350401836  8906859295787177297 17357911630322962708
 16924902073399398944]
* limit: 100MB

## <span style='color:green'>IndependentSimulationNoDispersal</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.15112128749977102
* statistic: 26.460976353035672

#### Test output (most suspect):

>Entropy = 7.999998 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 297.67, and randomly
>would exceed this value 3.42 percent of the times.
>
>Arithmetic mean value of data bytes is 127.4839 (127.5 = random).
>Monte Carlo value for Pi is 3.142323726 (error 0.02 percent).
>Serial correlation coefficient is -0.000005 (totally uncorrelated = 0.0).
>

#### Parameters:

* seeds: [17770599855847412276 15708050387024954144 15035841265197839949
  2523591470737598723  3735631641601737476 11099102083812552322
  5828206479837106746  8530539764271179718 11408569139797663507
 13707293129912994060]
* limit: 100MB