In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import integrate
from scipy.stats import chisqprob
from gmpy2 import digits

In [2]:
def read_results(filename):
    results = (File_bytes, Monte_Carlo_Pi, Rule, Serial_Correlation, Entropy, Chi_square, Mean) = [[] for _ in range(7)]
    with open(filename) as f:
        data = json.load(f)
    variables = {"File-bytes": File_bytes, "Monte-Carlo-Pi": Monte_Carlo_Pi, "Rule": Rule, "Serial-Correlation": Serial_Correlation,
                 "Entropy": Entropy, "Chi-square": Chi_square, "Mean": Mean}
    for i in range(len(data)):
        for k, v in variables.items():
            v.append(data[str(i)][k])
    results = np.array([np.array(r) for r in results]).T
    headers = ["File-bytes", "Monte-Carlo-Pi", "Rule", "Serial-Correlation", "Entropy", "Chi-square", "Mean"]
    return pd.DataFrame(results, columns=headers)

In [4]:
raw = read_results("results/results-3colors-run2.json")

In [5]:
raw.head(10)

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean
0,100000.0,4.0,411318300000.0,-1e-05,0.000181,25499490.0,108.00013
1,100000.0,3.885755,6605369000000.0,0.250278,3.914079,1780723.0,110.79919
2,100000.0,2.892116,663336300000.0,-0.164928,4.578474,1195098.0,134.03575
3,100000.0,3.195248,1980947000000.0,0.001541,7.256266,101760.3,117.14244
4,100000.0,3.374295,1558258000000.0,0.098804,4.189589,1554628.0,149.70255
5,13739.0,3.349934,5405423000000.0,0.091629,6.790931,36129.54,120.242812
6,54452.0,3.223361,4294112000000.0,0.013476,7.487716,47370.94,124.792588
7,251.0,1.658537,1974610000000.0,0.116582,5.142711,2516.044,184.549801
8,1.0,,694512600000.0,-100000.0,0.0,255.0,161.0
9,100000.0,2.893796,5312357000000.0,-0.11463,4.41839,1539882.0,116.49434


In [6]:
sample = raw[raw['File-bytes'] == float(1E5)]
print(len(raw))
print(len(sample))

500
338


In [7]:
sample.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 338 entries, 0 to 498
Data columns (total 7 columns):
File-bytes            338 non-null float64
Monte-Carlo-Pi        338 non-null float64
Rule                  338 non-null float64
Serial-Correlation    338 non-null float64
Entropy               338 non-null float64
Chi-square            338 non-null float64
Mean                  338 non-null float64
dtypes: float64(7)
memory usage: 21.1 KB


In [9]:
list(map(int, sample['Rule'].values))

[411318349948,
 6605368871054,
 663336300896,
 1980946634238,
 1558257575547,
 5312356669121,
 1111875794266,
 5889722232766,
 706797040706,
 4653870006790,
 4205168508315,
 6472255895265,
 7055850526606,
 6076448339146,
 6605957167694,
 6403811615049,
 5382447932350,
 2272777877453,
 251887722743,
 7587853547609,
 4802780373615,
 5916334199491,
 2164840655734,
 3602351110602,
 5988285506370,
 1003702545324,
 2640201217974,
 3558851511880,
 2668996727952,
 6051637250338,
 938179112037,
 1418285878620,
 3449620063320,
 992016260355,
 917604434397,
 6609466713641,
 5796652930682,
 1104491785793,
 2007082878471,
 6022241400207,
 6118152585999,
 6598599586138,
 1538928305653,
 1370461052848,
 5599093794355,
 1101860893076,
 345722925628,
 7544494594118,
 6047038488538,
 1218003969544,
 6249147835275,
 7033765792174,
 5618775854170,
 6065088247230,
 5214559053808,
 61073331255,
 3388567446027,
 3156935116072,
 5171130976530,
 5329486612389,
 1732363698559,
 3169998684636,
 4986612582775,
 4

In [10]:
sample["Rule"].values

array([  4.11318350e+11,   6.60536887e+12,   6.63336301e+11,
         1.98094663e+12,   1.55825758e+12,   5.31235667e+12,
         1.11187579e+12,   5.88972223e+12,   7.06797041e+11,
         4.65387001e+12,   4.20516851e+12,   6.47225590e+12,
         7.05585053e+12,   6.07644834e+12,   6.60595717e+12,
         6.40381162e+12,   5.38244793e+12,   2.27277788e+12,
         2.51887723e+11,   7.58785355e+12,   4.80278037e+12,
         5.91633420e+12,   2.16484066e+12,   3.60235111e+12,
         5.98828551e+12,   1.00370255e+12,   2.64020122e+12,
         3.55885151e+12,   2.66899673e+12,   6.05163725e+12,
         9.38179112e+11,   1.41828588e+12,   3.44962006e+12,
         9.92016260e+11,   9.17604434e+11,   6.60946671e+12,
         5.79665293e+12,   1.10449179e+12,   2.00708288e+12,
         6.02224140e+12,   6.11815259e+12,   6.59859959e+12,
         1.53892831e+12,   1.37046105e+12,   5.59909379e+12,
         1.10186089e+12,   3.45722926e+11,   7.54449459e+12,
         6.04703849e+12,

In [11]:
sample["pi_deviation"] = np.abs(sample["Monte-Carlo-Pi"] - np.pi)
sample["mean_deviation"] = np.abs(sample["Mean"] - 255 / 2)
sample["p-value"] = chisqprob(sample["Chi-square"], 255)
sample["langton"] = [(27 - digits(r, 3).zfill(27).count("0"))/27 for r in list(map(int,sample["Rule"]))]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: ht

In [12]:
sample.describe()

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean,pi_deviation,mean_deviation,p-value,langton
count,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0
mean,100000.0,3.222844,3739090000000.0,-7396.448195,4.017576,6603855.0,114.10477,0.592599,30.574848,0.0,0.656038
std,0.0,0.860926,2178847000000.0,26210.112173,2.534566,9507254.0,43.017466,0.628967,33.05861,0.0,0.095427
min,100000.0,0.0,21337750000.0,-100000.0,0.0,44271.2,0.0,0.002773,0.00756,0.0,0.37037
25%,100000.0,2.9979,1712792000000.0,-0.086712,2.26759,297692.6,100.59171,0.1758,8.862285,0.0,0.592593
50%,100000.0,3.355454,3717093000000.0,-0.000486,4.205084,1722141.0,120.224965,0.461058,19.383025,0.0,0.666667
75%,100000.0,3.858934,5596833000000.0,0.051101,6.561086,6551231.0,137.145382,0.858407,39.44509,0.0,0.703704
max,100000.0,4.0,7604736000000.0,0.783507,7.689736,25500000.0,242.44355,3.141593,127.5,0.0,0.888889


In [13]:
sample[sample.mean_deviation < 1]

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean,pi_deviation,mean_deviation,p-value,langton
36,100000.0,3.127325,938179100000.0,-0.041062,4.809285,1097448.0,126.96237,0.014268,0.53763,0.0,0.62963
50,100000.0,2.981399,2007083000000.0,-0.02664,4.58944,1306385.0,127.0747,0.160194,0.4253,0.0,0.666667
86,100000.0,2.99724,4072651000000.0,-0.007934,7.311541,101526.6,126.55526,0.144353,0.94474,0.0,0.518519
140,100000.0,3.24133,2453827000000.0,-0.05747,6.628862,341027.6,127.75273,0.099737,0.25273,0.0,0.703704
156,100000.0,2.99532,2566393000000.0,0.007654,7.237639,114526.3,128.37223,0.146273,0.87223,0.0,0.592593
162,100000.0,3.23773,3162927000000.0,0.003802,6.734073,298140.1,127.31256,0.096137,0.18744,0.0,0.703704
220,100000.0,3.090844,4876568000000.0,0.009873,7.423288,94454.27,127.31454,0.050749,0.18546,0.0,0.666667
279,100000.0,3.364455,6630534000000.0,0.051399,6.665114,245399.3,127.62971,0.222862,0.12971,0.0,0.851852
309,100000.0,3.095884,6695664000000.0,-0.008912,7.298041,104029.4,127.46819,0.045709,0.03181,0.0,0.592593
329,100000.0,3.235089,4781620000000.0,-0.058853,6.5218,332436.9,127.904,0.093496,0.404,0.0,0.62963


In [35]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

In [None]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
sample.plot("langton", "mean_deviation", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

In [None]:
d_five_p10_90 = d_five[(d_five['p-value'] > 0.1) & (d_five['p-value'] < 0.9)]
d_five_p05_95 = d_five[(d_five['p-value'] > 0.05) & (d_five['p-value'] < 0.95)]
d_rands_paper = d_five[d_five.rule.isin(s_rands_paper)]

len_five_p10_90 = len(d_five_p10_90)
len_five_p05_95 = len(d_five_p05_95)
len_rands_paper = len(d_rands_paper)

print("Random according to paper: #%d " % len_rands_paper, end="")
print(set(d_rands_paper.rule))
print("Between  5 - 95%%: #%d " % len_five_p05_95, end="")
print(set(d_five_p05_95.rule))
print("Between 10 - 90%%: #%d " % len_five_p10_90, end="")
print(set(d_five_p10_90.rule))

In [None]:
s_five_p05_p95 = set(d_five_p05_95.rule)
s_five_p10_p90 = set(d_five_p10_90.rule)

print("p05_95 rules not good enough for p10_90: ", end="") 
print(set(d_five_p05_95.rule) - set(d_five_p10_90.rule))

In [None]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
d_five.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Entropy", ax=ax1, kind="scatter", color="r", marker='o', alpha=.5, s=40)
plt.show()

ax1 = plt.gca()
d_five.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Entropy", ax=ax1, kind="scatter", color="r", marker='o', alpha=.5, s=40)
plt.savefig('plots/entropy-langton.png', format='png', dpi=400)

ax1 = plt.gca()
d_five.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Entropy", ax=ax1, kind="scatter", color="r", marker='o', alpha=.5, s=40)
plt.savefig('plots/entropy-langton.svg', format='svg', dpi=400)

In [None]:
# Plot Chi-Sqaure of all rules against the langton parameter, cutoff at 1E5
d_five_chi = d_five[(d_five["Chi-square"] < 10**5)]

ax2 = plt.gca()
d_five_chi.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", color="r", marker='o', alpha=.5, s=40)

plt.show()

In [None]:
# Plot Chi-Sqaure of all random paper rules against the langton parameter
ax2 = plt.gca()
d_rands_paper.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
d_five_p05_95.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", color="r", marker='o', alpha=.5, s=40)

plt.show()

In [None]:
# Cutoff rules with high Chi-Square (not random)
d_rands_paper_chi = d_rands_paper[(d_rands_paper["Chi-square"] < 300)] # 300 or 1E5 is same cutoff

print("Number of random rules according to paper: %d" % len(d_rands_paper))
print("Number of paper rules with high Chi-Square: %d " % (len(d_rands_paper) - len(d_rands_paper_chi)), end="")
print(set(d_rands_paper.rule) - set(d_rands_paper_chi.rule))