# Posterior decoding of simulated data

## Load packages and define model

In [10]:
import sys
from trails.optimizer import trans_emiss_calc
from trails.cutpoints import cutpoints_ABC
import numpy as np
from trails.optimizer import forward_loglik, post_prob_wrapper, viterbi
import pandas as pd
from tqdm import tqdm
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [2]:
seed = 1

In [3]:
# Read the output from TRAILS
df = pd.read_csv('../../chr1/results/chr1_Nelder-Mead_third_run.csv')
# Find iteration with largest likelihood
df = df[df['loglik'] == df['loglik'].max()]
# Convert parameter estimates into dictionary
dct = dict(zip(list(df.columns), df.iloc[0].to_list()))
dct

{'n_eval': 136.0,
 't_A': 0.0027561296181322,
 't_B': 0.0029189333873319,
 't_C': 0.0058515616197657,
 't_2': 0.0024428164101813,
 't_upper': 0.0040645452754154,
 'N_AB': 0.0041849972728295,
 'N_ABC': 0.0025322380849019,
 'r': 0.9527482356906288,
 'loglik': -76112897.5348381,
 'time': 413789.77893161774}

In [13]:
n_int_AB = 1
n_int_ABC = 5

n_sites = 50_000

cut_ABC = cutpoints_ABC(n_int_ABC, 1)*dct["N_ABC"]
t_out = (((dct["t_A"]+dct["t_B"])/2+dct["t_2"])+dct["t_C"])/2 + cut_ABC[n_int_ABC-1]*dct["N_ABC"] + dct["t_upper"] + 2*dct["N_ABC"]

t_out

0.01470529629717112

In [5]:
2/3*np.exp(-dct["t_2"]/dct["N_AB"])

0.37188406988327083

In [6]:
transitions, emissions, starting, hidden_states, observed_states = trans_emiss_calc(
        dct["t_A"], dct["t_B"], dct["t_C"], dct["t_2"], dct["t_upper"], t_out, 
        dct["N_AB"], dct["N_ABC"], dct["r"], n_int_AB, n_int_ABC)

2023-11-03 10:54:56,797	INFO worker.py:1625 -- Started a local Ray instance.


In [14]:
np.random.seed(seed)
idx_lst = list(range(0, len(starting)))
idx = np.random.choice(idx_lst, size = 1, p = starting)[0]
# hid = np.zeros(n_sites, dtype = np.int16)
obs = np.zeros(n_sites, dtype = np.int16)
# hid[0] = idx
obs[0] = np.random.choice(list(range(256)), size = 1, p = emissions[idx])[0]
for i in tqdm(range(1, n_sites)):
    idx = np.random.choice(idx_lst, size = 1, p = transitions[idx])[0]
    # hid[i] = idx
    obs[i] = np.random.choice(list(range(256)), size = 1, p = emissions[idx])[0]


100%|██████████| 49999/49999 [00:05<00:00, 9756.79it/s] 


In [15]:
obs

array([170,  85,   0, ..., 255, 255, 170], dtype=int16)