In [5]:
import sys
sys.path.append('../')

import tree_lib.bit_stream as bs
from tree_lib.encodings import two_choices
import plotly.express as px
import pandas as pd
import numpy as np
import random

In [6]:
# Auto-correlation with lag 1
def gen_signal(length, auto_correlation):
    assert 0 <= auto_correlation <= 1
    assert length > 0
    first_bit = f"{random.randint(0, 1)}"
    signal = np.repeat("0", length)
    signal[0] = first_bit
    for i in range(1,length):
        signal[i] = signal[i-1] if random.random() < auto_correlation else f"{1-int(signal[i-1])}"

    return ''.join(signal)

def data_point(signal, k):
    ts = np.array([two_choices.bits_to_tree_k(bs.bit_stream_literal(signal), k) for k in ks])
    return ts

In [7]:
ks = [3,5,9,17,33,65]
signal_lengths = [l for l in range(500,5000,500)]
tries_per_length = 20

signals = [gen_signal(l, 0.5) for l in signal_lengths for _ in range(tries_per_length)]

gen_tree = lambda sig, k : two_choices.bits_to_tree_k(bs.bit_stream_literal(sig), k)
data = [[len(signal), k, gen_tree(signal, k).n_descendants] 
            for signal in signals 
            for k in ks]
df = pd.DataFrame(data, columns=['Bits', 'k', 'Nodes']) 
df

Unnamed: 0,Bits,k,Nodes
0,500,3,265
1,500,5,281
2,500,9,396
3,500,17,646
4,500,33,916
...,...,...,...
1075,4500,5,2737
1076,4500,9,3803
1077,4500,17,5744
1078,4500,33,9687


In [8]:
fig = px.box(df, x="Bits", y="Nodes", color="k")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()
df


Unnamed: 0,Bits,k,Nodes
0,500,3,265
1,500,5,281
2,500,9,396
3,500,17,646
4,500,33,916
...,...,...,...
1075,4500,5,2737
1076,4500,9,3803
1077,4500,17,5744
1078,4500,33,9687
