# Hierarchical model
1. Generate sequences from the recursive hierarchical model
2. Calculate MI of sequences

In [None]:
import numpy as np
from parallelspaper.models import gen_seq_hierarchical, gen_balanced_matrix
import pandas as pd
from parallelspaper.utils import nowstring
import parallelspaper.information_theory as it
from parallelspaper.config.paths import DATA_DIR

### Parameters 

In [None]:
# how many branches to sample in hierarchical
n_subsamples = [2]
# how many subsamples to perform
depth = 12
# alphabet size
a_n = 5
alphabet = np.arange(a_n)
# how many sequences to use
nseq = 1000
print('seq len ',(np.mean(n_subsamples)**depth))

In [None]:
# distances of MI to sample at
distances = np.arange(1,101)

In [None]:
from sklearn.externals.joblib import Parallel, delayed
n_jobs = 12; verbosity=0

### Generate sequences

In [None]:
from tqdm.autonotebook import tqdm

In [None]:
# generate probbility matrix 
probs = gen_balanced_matrix(ps=[.85, .15])

In [None]:
# sample sequences
seqs_list = tqdm(range(nseq), leave=False)# if nseq < 3 else range(nseq)
with Parallel(n_jobs=n_jobs, verbose=verbosity) as parallel:
    sequences = parallel(
        delayed(gen_seq_hierarchical)(alphabet, probs, depth, n_subsamples)
             for seq in seqs_list)

### Calculate MI

In [None]:
# sequence statistics
seq_len = len(np.concatenate(sequences))
bout_lens = [len(i) for i in sequences]
unique_elements = len(np.unique([np.concatenate(sequences)]))

In [None]:
MI_DF = pd.DataFrame(columns=['name', 'type', 'rep', 'MI', 'MI_shuff', 'distances',
                              'MI_var', 'MI_shuff_var', 'n_elements', 'unique_elements', 'bout_lens'])

In [None]:
# calculate MI
(MI, var_MI), (MI_shuff, MI_shuff_var) = it.sequential_mutual_information([np.concatenate(sequences)], distances, n_jobs = n_jobs, verbosity = verbosity)
# add to MI_DF
MI_DF.loc[len(MI_DF)] = ['hierarchical', 'full', 0, MI, MI_shuff, distances, var_MI, MI_shuff_var, seq_len, unique_elements, bout_lens]

### Save MI

In [None]:
now_string = nowstring()

In [None]:
MI_DF.to_pickle(str(DATA_DIR / ('MI_DF/models/hierarchical_'+now_string+'.pickle')))