In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import integrate
from scipy.stats import chisqprob
from gmpy2 import digits

In [2]:
def read_results(filename):
    results = (File_bytes, Monte_Carlo_Pi, Rule, Serial_Correlation, Entropy, Chi_square, Mean) = [[] for _ in range(7)]
    with open(filename) as f:
        data = json.load(f)
    variables = {"File-bytes": File_bytes, "Monte-Carlo-Pi": Monte_Carlo_Pi, "Rule": Rule, "Serial-Correlation": Serial_Correlation,
                 "Entropy": Entropy, "Chi-square": Chi_square, "Mean": Mean}
    for i in range(len(data)):
        for k, v in variables.items():
            v.append(data[str(i)][k])
    results = np.array([np.array(r) for r in results]).T
    headers = ["File-bytes", "Monte-Carlo-Pi", "Rule", "Serial-Correlation", "Entropy", "Chi-square", "Mean"]
    return pd.DataFrame(results, columns=headers)

In [3]:
raw = read_results("../results/results-3colors-run1.json")

In [4]:
raw.head(10)

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean
0,100000.0,3.304212,5989006000000.0,-0.097655,4.534318,1347180.0,113.12843
1,100000.0,4.0,5748833000000.0,-100000.0,0.0,25500000.0,0.0
2,100000.0,2.771631,4612166000000.0,0.619945,3.924278,3215508.0,124.88797
3,100000.0,2.969879,3317978000000.0,-0.009058,7.375186,96173.75,138.35305
4,100000.0,3.141006,5838097000000.0,-0.001768,6.291396,357265.6,120.15874
5,100000.0,0.00024,1946686000000.0,0.910383,0.001565,25494880.0,251.99167
6,100000.0,2.110164,3865028000000.0,0.936311,1.176898,12104480.0,144.13443
7,26.0,0.0,5736569000000.0,-0.452072,2.083551,1785.692,165.192308
8,100000.0,3.680307,1465354000000.0,-0.102109,4.36732,1283542.0,107.45517
9,1.0,,5788441000000.0,-100000.0,0.0,255.0,43.0


In [5]:
sample = raw[raw['File-bytes'] == float(1E5)]
print(len(raw))
print(len(sample))

500
342


In [6]:
sample.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 342 entries, 0 to 497
Data columns (total 7 columns):
File-bytes            342 non-null float64
Monte-Carlo-Pi        342 non-null float64
Rule                  342 non-null float64
Serial-Correlation    342 non-null float64
Entropy               342 non-null float64
Chi-square            342 non-null float64
Mean                  342 non-null float64
dtypes: float64(7)
memory usage: 21.4 KB


In [17]:
list(map(int, sample['Rule'].values))

[5989006295292,
 5748832795437,
 4612165577222,
 3317977576426,
 5838096803942,
 1946686331884,
 3865028493065,
 1465353539133,
 6850331787594,
 4613978046583,
 2625090576158,
 3319988386940,
 124723850236,
 5915545771774,
 5907208654456,
 3015329105051,
 4460042326162,
 525912977146,
 3860021781816,
 4549016652917,
 7456130671624,
 6159094443070,
 3157185622003,
 4088743146511,
 482606267307,
 7470509026006,
 6503126245971,
 6341583888743,
 6838077422360,
 1717632374212,
 7139542732864,
 1197972210798,
 5337573025373,
 2687678424003,
 2153289662500,
 3585149073284,
 334939687869,
 4976609363796,
 6048010766914,
 5918955022368,
 5984803946110,
 5776648922595,
 184135265379,
 6605991548009,
 4578231101236,
 5954059530209,
 1393253325592,
 3358616176037,
 3975068091454,
 687808464674,
 2433922965451,
 7208261907910,
 4072971058871,
 907027594630,
 4930997140826,
 2852889647442,
 483824553087,
 295982060992,
 129772853025,
 3039338877499,
 4106931952183,
 7444095796286,
 3242214071430,
 9

In [18]:
sample["Rule"].values

array([  5.98900630e+12,   5.74883280e+12,   4.61216558e+12,
         3.31797758e+12,   5.83809680e+12,   1.94668633e+12,
         3.86502849e+12,   1.46535354e+12,   6.85033179e+12,
         4.61397805e+12,   2.62509058e+12,   3.31998839e+12,
         1.24723850e+11,   5.91554577e+12,   5.90720865e+12,
         3.01532911e+12,   4.46004233e+12,   5.25912977e+11,
         3.86002178e+12,   4.54901665e+12,   7.45613067e+12,
         6.15909444e+12,   3.15718562e+12,   4.08874315e+12,
         4.82606267e+11,   7.47050903e+12,   6.50312625e+12,
         6.34158389e+12,   6.83807742e+12,   1.71763237e+12,
         7.13954273e+12,   1.19797221e+12,   5.33757303e+12,
         2.68767842e+12,   2.15328966e+12,   3.58514907e+12,
         3.34939688e+11,   4.97660936e+12,   6.04801077e+12,
         5.91895502e+12,   5.98480395e+12,   5.77664892e+12,
         1.84135265e+11,   6.60599155e+12,   4.57823110e+12,
         5.95405953e+12,   1.39325333e+12,   3.35861618e+12,
         3.97506809e+12,

In [29]:
sample["pi_deviation"] = np.abs(sample["Monte-Carlo-Pi"] - np.pi)
sample["mean_deviation"] = np.abs(sample["Mean"] - 255 / 2)
sample["p-value"] = chisqprob(sample["Chi-square"], 255)
sample["langton"] = [(27 - digits(r, 3).zfill(27).count("0"))/27 for r in list(map(int,sample["Rule"]))]

In [33]:
sample.describe()

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean,pi_deviation,mean_deviation,p-value,langton
count,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0
mean,100000.0,3.162756,3707309000000.0,-6725.097772,4.037536,6857875.0,117.655896,0.645245,30.596031,0.0,0.659303
std,0.0,0.943781,2253618000000.0,25082.40884,2.644576,9619144.0,44.597013,0.688193,33.870664,0.0,0.091703
min,100000.0,0.0,466726300.0,-100000.0,0.0,37608.31,0.0,0.000587,0.03795,0.0,0.407407
25%,100000.0,2.973959,1526599000000.0,-0.060315,1.87635,241696.8,107.22117,0.215662,8.31645,0.0,0.592593
50%,100000.0,3.378735,3792148000000.0,-1e-05,4.204439,1598972.0,119.58215,0.44211,18.63526,0.0,0.666667
75%,100000.0,3.803492,5745565000000.0,0.080112,6.725144,9726336.0,137.546943,0.858407,39.068192,0.0,0.703704
max,100000.0,4.0,7623430000000.0,0.958508,7.728144,25500000.0,251.99167,3.141593,127.5,0.0,0.888889


In [34]:
sample[sample.mean_deviation < 1]

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean,pi_deviation,mean_deviation,p-value,langton
63,100000.0,3.383655,1393253000000.0,0.009533,7.039077,158294.8,127.71775,0.242062,0.21775,0.0,0.666667
64,100000.0,2.933637,3358616000000.0,-0.053594,6.765806,195617.4,128.35755,0.207956,0.85755,0.0,0.592593
92,100000.0,3.731189,928013200000.0,-0.162199,4.652444,1188798.0,126.60353,0.589596,0.89647,0.0,0.62963
116,100000.0,3.513021,1087077000000.0,-0.080698,4.398785,1391142.0,127.01398,0.371428,0.48602,0.0,0.777778
125,100000.0,3.51158,1460254000000.0,0.000373,7.227673,103628.8,126.78272,0.369987,0.71728,0.0,0.777778
174,100000.0,3.117005,7350385000000.0,0.026893,4.616635,1325720.0,128.10162,0.024588,0.60162,0.0,0.666667
205,100000.0,3.408136,2610728000000.0,0.000572,6.740882,218497.4,126.90819,0.266543,0.59181,0.0,0.592593
238,100000.0,3.094204,2745191000000.0,0.016374,6.77797,186506.4,128.33574,0.047389,0.83574,0.0,0.555556
323,100000.0,3.050282,3956179000000.0,-0.005755,7.728144,37608.31,127.62304,0.091311,0.12304,0.0,0.703704
384,100000.0,3.24805,6265382000000.0,0.001973,7.029287,175290.8,127.46205,0.106457,0.03795,0.0,0.62963


In [35]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

In [None]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
sample.plot("langton", "mean_deviation", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

In [None]:
d_five_p10_90 = d_five[(d_five['p-value'] > 0.1) & (d_five['p-value'] < 0.9)]
d_five_p05_95 = d_five[(d_five['p-value'] > 0.05) & (d_five['p-value'] < 0.95)]
d_rands_paper = d_five[d_five.rule.isin(s_rands_paper)]

len_five_p10_90 = len(d_five_p10_90)
len_five_p05_95 = len(d_five_p05_95)
len_rands_paper = len(d_rands_paper)

print("Random according to paper: #%d " % len_rands_paper, end="")
print(set(d_rands_paper.rule))
print("Between  5 - 95%%: #%d " % len_five_p05_95, end="")
print(set(d_five_p05_95.rule))
print("Between 10 - 90%%: #%d " % len_five_p10_90, end="")
print(set(d_five_p10_90.rule))

In [None]:
s_five_p05_p95 = set(d_five_p05_95.rule)
s_five_p10_p90 = set(d_five_p10_90.rule)

print("p05_95 rules not good enough for p10_90: ", end="") 
print(set(d_five_p05_95.rule) - set(d_five_p10_90.rule))

In [None]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
d_five.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Entropy", ax=ax1, kind="scatter", color="r", marker='o', alpha=.5, s=40)
plt.show()

ax1 = plt.gca()
d_five.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Entropy", ax=ax1, kind="scatter", color="r", marker='o', alpha=.5, s=40)
plt.savefig('plots/entropy-langton.png', format='png', dpi=400)

ax1 = plt.gca()
d_five.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Entropy", ax=ax1, kind="scatter", color="r", marker='o', alpha=.5, s=40)
plt.savefig('plots/entropy-langton.svg', format='svg', dpi=400)

In [None]:
# Plot Chi-Sqaure of all rules against the langton parameter, cutoff at 1E5
d_five_chi = d_five[(d_five["Chi-square"] < 10**5)]

ax2 = plt.gca()
d_five_chi.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", color="r", marker='o', alpha=.5, s=40)

plt.show()

In [None]:
# Plot Chi-Sqaure of all random paper rules against the langton parameter
ax2 = plt.gca()
d_rands_paper.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", color="r", marker='o', alpha=.5, s=40)

plt.show()

In [None]:
# Cutoff rules with high Chi-Square (not random)
d_rands_paper_chi = d_rands_paper[(d_rands_paper["Chi-square"] < 300)] # 300 or 1E5 is same cutoff

print("Number of random rules according to paper: %d" % len(d_rands_paper))
print("Number of paper rules with high Chi-Square: %d " % (len(d_rands_paper) - len(d_rands_paper_chi)), end="")
print(set(d_rands_paper.rule) - set(d_rands_paper_chi.rule))