In [1]:
import subprocess
import shlex
import re

import numpy as np

from scipy.stats import combine_pvalues
from IPython.display import display, Markdown

In [2]:
RANDOMNESS_PATTERN = re.compile(r"exceed this value (\d+\.\d+) percent")

CC_PATTERN = re.compile(r"(?<!^)(?=[A-Z])")

# ENT - pseudorandom number sequence test

In [3]:
def test_randomness_ent(
    mode, seeds=[42], limit='1GB',
):
    outputs = []
    
    for seed in seeds:
        generator = subprocess.Popen(shlex.split(
            f"cargo run --release -- --seed {seed} --limit {limit} {CC_PATTERN.sub('-', mode).lower()}"
        ), stdout=subprocess.PIPE)
        ent = subprocess.run(
            shlex.split("./ent"), stdin=generator.stdout,
            check=True, capture_output=True, text=True
        )

        pvalue = float(RANDOMNESS_PATTERN.search(ent.stdout).group(1)) / 100.0
        
        outputs.append((pvalue, ent.stdout))
    
    gof_statistic, gof_pvalue = combine_pvalues([pvalue for pvalue, output in outputs])

    if gof_pvalue <= 0.01 or gof_pvalue >= 0.99:
        display(Markdown(f"## <span style='color:purple'><u>{mode}</u></span>"))
    elif gof_pvalue <= 0.05 or gof_pvalue >= 0.95:
        display(Markdown(f"## <span style='color:red'><u>{mode}</u></span>"))
    elif gof_pvalue <= 0.1 or gof_pvalue >= 0.9:
        display(Markdown(f"## <span style='color:orange'>*{mode}*</span>"))
    else:
        display(Markdown(f"## <span style='color:green'>{mode}</span>"))
        
    most_suspect = None
    
    for (pvalue, output) in outputs:
        suspicion = min(pvalue, 1 - pvalue)
        
        if (most_suspect is None) or (suspicion < most_suspect[0]):
            most_suspect = (suspicion, output)
        
    display(Markdown("#### Fisher’s combined Chi-squared:"))
    display(Markdown(f"* p-value: {gof_pvalue}\n* statistic: {gof_statistic}"))

    display(Markdown("#### Test output (most suspect):"))
    display(Markdown(">" + most_suspect[1].replace('\n', '\n>')))

    display(Markdown("#### Configuration:"))
    display(Markdown(f"* seeds: {seeds}\n* limit: {limit}"))

In [4]:
for mode in ["Monolithic", "Independent", "IndependentSimulation", "IndependentSimulationNoDispersal"]:
    seeds = np.random.randint(0, np.iinfo("uint64").max, dtype="uint64", size=10)
    
    test_randomness_ent(mode, seeds=seeds, limit='100MB')

## <span style='color:green'>Monolithic</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.4070611425211245
* statistic: 20.832310754533395

#### Test output (most suspect):

>Entropy = 7.999998 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 279.50, and randomly
>would exceed this value 13.98 percent of the times.
>
>Arithmetic mean value of data bytes is 127.5031 (127.5 = random).
>Monte Carlo value for Pi is 3.142117086 (error 0.02 percent).
>Serial correlation coefficient is -0.000095 (totally uncorrelated = 0.0).
>

#### Configuration:

* seeds: [ 5971835272702868478  2114132761498108405  9015447351495465784
 11213259191635696868  5079814388047370343 16728729476119530712
  1897032354548496386 13519412175737025821  3128571018571274929
  2366035730914088892]
* limit: 100MB

## <span style='color:green'>Independent</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.4715011771750339
* statistic: 19.784178049865723

#### Test output (most suspect):

>Entropy = 7.999998 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 283.73, and randomly
>would exceed this value 10.44 percent of the times.
>
>Arithmetic mean value of data bytes is 127.5065 (127.5 = random).
>Monte Carlo value for Pi is 3.141276366 (error 0.01 percent).
>Serial correlation coefficient is -0.000274 (totally uncorrelated = 0.0).
>

#### Configuration:

* seeds: [4862681072913350338 7752163063292758363  928144651139258977
  916705093039732952 4731746930049523154 3913460021304599207
  111459139077761779 8149104512309705179 3639355440670271048
 6049203734858299484]
* limit: 100MB

## <span style='color:green'>IndependentSimulation</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.7447822249870877
* statistic: 15.538972593341462

#### Test output (most suspect):

>Entropy = 7.999998 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 282.43, and randomly
>would exceed this value 11.45 percent of the times.
>
>Arithmetic mean value of data bytes is 127.5175 (127.5 = random).
>Monte Carlo value for Pi is 3.140712606 (error 0.03 percent).
>Serial correlation coefficient is -0.000002 (totally uncorrelated = 0.0).
>

#### Configuration:

* seeds: [12374108081095622295  1863568801586178255   890659494463218326
 15765282812387042083  8075008416815596691  5100045249690513314
 15386672215388746600 16622963562626149362 14662120825122126571
  5933115546056365778]
* limit: 100MB

## <span style='color:green'>IndependentSimulationNoDispersal</span>

#### Fisher’s combined Chi-squared:

* p-value: 0.5399266109787992
* statistic: 18.722460892248378

#### Test output (most suspect):

>Entropy = 7.999999 bits per byte.
>
>Optimum compression would reduce the size
>of this 100000000 byte file by 0 percent.
>
>Chi square distribution for 100000000 samples is 201.74, and randomly
>would exceed this value 99.41 percent of the times.
>
>Arithmetic mean value of data bytes is 127.5033 (127.5 = random).
>Monte Carlo value for Pi is 3.141724206 (error 0.00 percent).
>Serial correlation coefficient is 0.000026 (totally uncorrelated = 0.0).
>

#### Configuration:

* seeds: [14054721351886115996 10692850619163283170  9332136837527969522
  6645581341209973866  3727505421131897686   745523408135193919
   453107618080699856  7822223744270375835  9183033282550356157
 14664796754646485335]
* limit: 100MB