In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import integrate
from scipy.stats import chisqprob
from gmpy2 import digits

In [2]:
def read_results(filename):
    results = (File_bytes, Monte_Carlo_Pi, Rule, Serial_Correlation, Entropy, Chi_square, Mean) = [[] for _ in range(7)]
    with open(filename) as f:
        data = json.load(f)
    variables = {"File-bytes": File_bytes, "Monte-Carlo-Pi": Monte_Carlo_Pi, "Rule": Rule, "Serial-Correlation": Serial_Correlation,
                 "Entropy": Entropy, "Chi-square": Chi_square, "Mean": Mean}
    for i in range(len(data)):
        for k, v in variables.items():
            v.append(data[str(i)][k])
    results = np.array([np.array(r) for r in results]).T
    headers = ["File-bytes", "Monte-Carlo-Pi", "Rule", "Serial-Correlation", "Entropy", "Chi-square", "Mean"]
    return pd.DataFrame(results, columns=headers)

In [3]:
raw = read_results("results/results-3colors-run2.json")

In [4]:
raw.head(10)

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean
0,100000.0,4.0,411318300000.0,-1e-05,0.000181,25499490.0,108.00013
1,100000.0,3.885755,6605369000000.0,0.250278,3.914079,1780723.0,110.79919
2,100000.0,2.892116,663336300000.0,-0.164928,4.578474,1195098.0,134.03575
3,100000.0,3.195248,1980947000000.0,0.001541,7.256266,101760.3,117.14244
4,100000.0,3.374295,1558258000000.0,0.098804,4.189589,1554628.0,149.70255
5,13739.0,3.349934,5405423000000.0,0.091629,6.790931,36129.54,120.242812
6,54452.0,3.223361,4294112000000.0,0.013476,7.487716,47370.94,124.792588
7,251.0,1.658537,1974610000000.0,0.116582,5.142711,2516.044,184.549801
8,1.0,,694512600000.0,-100000.0,0.0,255.0,161.0
9,100000.0,2.893796,5312357000000.0,-0.11463,4.41839,1539882.0,116.49434


In [5]:
sample = raw[raw['File-bytes'] == float(1E5)]
print(len(raw))
print(len(sample))

500
338


In [6]:
sample.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 338 entries, 0 to 498
Data columns (total 7 columns):
File-bytes            338 non-null float64
Monte-Carlo-Pi        338 non-null float64
Rule                  338 non-null float64
Serial-Correlation    338 non-null float64
Entropy               338 non-null float64
Chi-square            338 non-null float64
Mean                  338 non-null float64
dtypes: float64(7)
memory usage: 21.1 KB


In [7]:
sample["pi_deviation"] = np.abs(sample["Monte-Carlo-Pi"] - np.pi)
sample["mean_deviation"] = np.abs(sample["Mean"] - 255 / 2)
sample["p-value"] = chisqprob(sample["Chi-square"], 255)
sample["langton"] = [(27 - digits(r, 3).zfill(27).count("0"))/27 for r in list(map(int,sample["Rule"]))]

sample['Entropy_norm'] = sample['Entropy'] / 8
sample['Entropy'] = sample['Entropy_norm']
sample = sample.drop('Entropy_norm', axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: ht

In [8]:
sample.describe()

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean,pi_deviation,mean_deviation,p-value,langton
count,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0,338.0
mean,100000.0,3.222844,3739090000000.0,-7396.448195,0.502197,6603855.0,114.10477,0.592599,30.574848,0.0,0.656038
std,0.0,0.860926,2178847000000.0,26210.112173,0.316821,9507254.0,43.017466,0.628967,33.05861,0.0,0.095427
min,100000.0,0.0,21337750000.0,-100000.0,0.0,44271.2,0.0,0.002773,0.00756,0.0,0.37037
25%,100000.0,2.9979,1712792000000.0,-0.086712,0.283449,297692.6,100.59171,0.1758,8.862285,0.0,0.592593
50%,100000.0,3.355454,3717093000000.0,-0.000486,0.525635,1722141.0,120.224965,0.461058,19.383025,0.0,0.666667
75%,100000.0,3.858934,5596833000000.0,0.051101,0.820136,6551231.0,137.145382,0.858407,39.44509,0.0,0.703704
max,100000.0,4.0,7604736000000.0,0.783507,0.961217,25500000.0,242.44355,3.141593,127.5,0.0,0.888889


In [9]:
sample[sample.mean_deviation < 1]

Unnamed: 0,File-bytes,Monte-Carlo-Pi,Rule,Serial-Correlation,Entropy,Chi-square,Mean,pi_deviation,mean_deviation,p-value,langton
36,100000.0,3.127325,938179100000.0,-0.041062,0.601161,1097448.0,126.96237,0.014268,0.53763,0.0,0.62963
50,100000.0,2.981399,2007083000000.0,-0.02664,0.57368,1306385.0,127.0747,0.160194,0.4253,0.0,0.666667
86,100000.0,2.99724,4072651000000.0,-0.007934,0.913943,101526.6,126.55526,0.144353,0.94474,0.0,0.518519
140,100000.0,3.24133,2453827000000.0,-0.05747,0.828608,341027.6,127.75273,0.099737,0.25273,0.0,0.703704
156,100000.0,2.99532,2566393000000.0,0.007654,0.904705,114526.3,128.37223,0.146273,0.87223,0.0,0.592593
162,100000.0,3.23773,3162927000000.0,0.003802,0.841759,298140.1,127.31256,0.096137,0.18744,0.0,0.703704
220,100000.0,3.090844,4876568000000.0,0.009873,0.927911,94454.27,127.31454,0.050749,0.18546,0.0,0.666667
279,100000.0,3.364455,6630534000000.0,0.051399,0.833139,245399.3,127.62971,0.222862,0.12971,0.0,0.851852
309,100000.0,3.095884,6695664000000.0,-0.008912,0.912255,104029.4,127.46819,0.045709,0.03181,0.0,0.592593
329,100000.0,3.235089,4781620000000.0,-0.058853,0.815225,332436.9,127.904,0.093496,0.404,0.0,0.62963


In [15]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-entropy-langton.png', format='png', dpi=400)

ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-entropy-langton.svg', format='svg', dpi=400)

In [10]:
# Plot Chisquare of all rules against the langton parameter
ax2 = plt.gca()
sample.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

ax2 = plt.gca()
sample.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-chisquare-langton.png', format='png', dpi=400)

ax2 = plt.gca()
sample.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-chisquare-langton.svg', format='svg', dpi=400)