In [2]:
import util.bit_stream as bs
from tree import tree, encoding
import random
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Auto-correlation with lag 1
def gen_signal(length, auto_correlation):
    assert 0 <= auto_correlation <= 1
    assert length > 0
    first_bit = random.randint(0, 1)
    signal = np.zeros(length, dtype=int)
    signal[0] = first_bit
    for i in range(1,length):
        signal[i] = signal[i-1] if random.random() < auto_correlation else 1-signal[i-1]

    return signal

def data_point(signal, k):
    ts = np.array([encoding.bits_to_tree_k(bs.bit_stream_literal(signal), k) for k in ks])
    return ts

In [25]:
import plotly.graph_objects as go
import pandas as pd
from tree.encoding import bits_to_tree_k  as btt_k 

ks = [3,5,9,17,33,65]
signal_lengths = [l for l in range(500,5000,500)]
tries_per_length = 20

signals = [gen_signal(l, 0.5) for l in signal_lengths for _ in range(tries_per_length)]

gen_tree = lambda sig, k : encoding.bits_to_tree_k(bs.bit_stream_literal(sig), k)
data = [[len(signal), k, gen_tree(signal, k).n_descendants] 
            for signal in signals 
            for k in ks]
df = pd.DataFrame(data, columns=['Bits', 'k', 'Nodes']) 
df

Unnamed: 0,Bits,k,Nodes
0,500,3,265
1,500,5,306
2,500,9,412
3,500,17,645
4,500,33,1048
...,...,...,...
1075,4500,5,2686
1076,4500,9,3732
1077,4500,17,5648
1078,4500,33,9371


In [26]:
import plotly.express as px

fig = px.box(df, x="Bits", y="Nodes", color="k")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()
df


Unnamed: 0,Bits,k,Nodes
0,500,3,265
1,500,5,306
2,500,9,412
3,500,17,645
4,500,33,1048
...,...,...,...
1075,4500,5,2686
1076,4500,9,3732
1077,4500,17,5648
1078,4500,33,9371
