# Markov models
1. Generate sequences from Markov models
2. Compute the mutual information of the generated sequences

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm

from parallelspaper.utils import nowstring
import parallelspaper.information_theory as it
from parallelspaper.models import transition_diagrams, sample_sequence_MM
from parallelspaper.config.paths import DATA_DIR

### Peak at Markov transitional matrices

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

fig, ax = plt.subplots(nrows=1, ncols=2, figsize = (6,3))
ax[0].matshow(np.array(transition_diagrams['Bird1'].values))
ax[1].matshow(np.array(transition_diagrams['Okada'].values))

### Generate sequences and calculate MI

In [None]:
# sequential distances to compute MI at
distances = np.arange(1,101)
# how many sequences to sample
n_seq = np.exp2(16).astype('int')

In [None]:
from sklearn.externals.joblib import Parallel, delayed
import parallelspaper.information_theory as it
n_jobs = 24; verbosity=0

In [None]:
MI_DF = pd.DataFrame(columns=['name', 'type', 'rep', 'MI', 'MI_shuff', 'distances',
                              'MI_var', 'MI_shuff_var', 'n_elements', 'unique_elements', 'bout_lens'])

# for each markov model
for name, transition_diagram in tqdm(transition_diagrams.items()):
    # generate sequences
    if n_jobs >1:
        with Parallel(n_jobs=n_jobs, verbose=verbosity) as parallel:
            sequences = parallel(delayed(sample_sequence_MM)(transition_diagram) for i in tqdm(range(n_seq), leave=False))
    else:
        sequences = [sample_sequence_MM(transition_diagram) for i in tqdm(range(n_seq), leave=False)]
    
    # get stats on generated sequences
    unique_elements = len(np.unique(np.concatenate(sequences)))
    bout_lens = [len(i) for i in sequences]
    seq_len = len(np.concatenate(sequences))
    print(seq_len)
    
    # calculate MI
    (MI, var_MI), (MI_shuff, MI_shuff_var) = it.sequential_mutual_information([np.concatenate(sequences)], distances, n_jobs = n_jobs, verbosity = verbosity)
    # add to MI_DF
    MI_DF.loc[len(MI_DF)] = [name+'_markov', 'full', 0, MI, MI_shuff, distances, var_MI, MI_shuff_var, seq_len, unique_elements, bout_lens]

### Save MI

In [None]:
now_string = nowstring()
MI_DF.to_pickle(str(DATA_DIR / ('MI_DF/models/markov_'+now_string+'.pickle')))