### create figure for hierarchy example
1. get latent distances vs sequential distances
2. get MI of markov model
3. get MI of recursively generated sequence - store the average latent distance for each of the sequential distances
4. Plot latent distance by MI
5. plot sequential distance by MI

In [11]:
import numpy as np
import pandas as pd
from scipy import random as sr
import matplotlib.pyplot as plt
import networkx as nx
from tqdm.autonotebook import tqdm



In [38]:
def gen_balanced_matrix(na=5, ps=[0.7, 0.2, 0.1]):
    """ Generates a balanced matrix in which every state can reach every other state
    for hierarchical and Markov models
    """
    for r in range(1000):
        breakme = False
        probs = np.zeros((na, na))
        for p in ps:
            for i in np.arange(na):
                ixloc = np.where(
                    (probs[i, :] == 0) & (np.sum(probs != p, axis=0) == na)
                )[0]
                if len(ixloc) > 0:
                    probs[i, np.random.permutation(ixloc)[0]] = p
                else:
                    # the initialization didn't work
                    breakme = True
        if breakme:
            continue
        probs = probs / np.sum(probs, axis=0)
        return probs
    return "Generation Failed"

def generate_hierarchical_graph(branching_factor, depth, alphabet, probs):
    """
    """
    # create the Graph
    G=nx.Graph()

    # create the first point in the graph
    starting_point = np.random.choice(
        alphabet, p=np.sum(probs, axis=1) / np.sum(probs), size=1
    )[0]
    node_types = {0:starting_point}
    # add the first node
    G.add_node(0)
    lowest_level = [0]
    n_nodes = 1

    # sample depthwise
    for i in tqdm(range(depth)):
        update_lowest_level = np.empty(branching_factor**(i+1), dtype='int')
        lowest_level_n = 0
        # for each node in the current lowest level
        for node in lowest_level: 
            # get the node type
            node_samples = np.random.choice(alphabet, p=probs[:, node_types[node]], size=branching_factor)
            for ni, ns in enumerate(node_samples):
                node_types[ni+n_nodes] = ns
                update_lowest_level[lowest_level_n] = ni+n_nodes
                G.add_node(ni+n_nodes)
                G.add_edge(node,ni+n_nodes)
                lowest_level_n+=1            
            n_nodes += len(node_samples)
        lowest_level = update_lowest_level
        
    lowest_level_sequence = [node_types[i] for i in lowest_level]
    return G, lowest_level, lowest_level_sequence

In [44]:
# how many branches to sample in hierarchical
branching_factor = 2
# how many subsamples to perform
depth = 8
# alphabet size
a_n = 2; alphabet = np.arange(a_n)
# how many sequences to use
nseq = 1
print('seq len ',(branching_factor**depth))

seq len  256


In [45]:
# generate probbility matrix 
probs = gen_balanced_matrix(na=a_n, ps=[.9, .1])

In [46]:
G, lowest_level, lowest_level_sequence = generate_hierarchical_graph(branching_factor, depth, alphabet, probs)

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))




In [52]:
#pos=nx.drawing.nx_agraph.graphviz_layout(G,prog='dot')
#fig, ax = plt.subplots(figsize=(12,5))
#nx.draw(G,pos,with_labels=False,arrows=False, width = 3, ax=ax)

In [53]:
len(lowest_level_sequence)

256