# train_hmm.ipynb

In [2]:
from pandas import DataFrame, read_csv
import pandas as pd
import numpy as np
import sys, os

from pomegranate import *


In [3]:
dir_path = '/Users/Felix/GitHub/HMMicro/hmm_scripts'

In [4]:
# Get 2-state transition matrix
trans_mat_path = os.path.join(dir_path, 'trans_mat.tsv')
trans_mat = read_csv(trans_mat_path, sep='\t', header=0, index_col=None)
trans_mat.index = state_nms = trans_mat.columns.tolist() # Get state names as list
state_nms = np.array(state_nms)

# Get emission parameters (mean, sd)
emit_params_path = os.path.join(dir_path, 'emit_params.tsv')
emit_params = read_csv(emit_params_path, sep='\t', header=0, index_col=None)

In [5]:
trans_mat

Unnamed: 0,stateA,stateB
stateA,0.7,0.3
stateB,0.2,0.8


In [6]:
emit_params

Unnamed: 0,stateA,stateB
0,5,1
1,1,4


In [42]:
# Get simulated sequence
state_seq_path = os.path.join(dir_path, 'state_seq.tsv')
sim_state_seq = read_csv(state_seq_path, sep='\t', header=None, index_col=None)
sim_state_seq = sim_state_seq.iloc[:,0].tolist()

# Get simulated sequence
emit_seq_path = os.path.join(dir_path, 'emit_seq.tsv')
sim_emit_seq = read_csv(emit_seq_path, sep='\t', header=None, index_col=None)
sim_emit_seq = sim_emit_seq.iloc[:,0].tolist()

In [43]:
# Construct HMM
s1 = State( NormalDistribution(3,3), name=str(state_nms[0]) )
s2 = State( NormalDistribution(3,3), name=str(state_nms[1]) )

model = HiddenMarkovModel()
model.add_states( [s1, s2] )
model.add_transition( model.start, s1, 1)
model.add_transition( model.start, s2, 0)
model.add_transition( s1, s1, 0.5)
model.add_transition( s2, s2, 0.5)
model.add_transition( s1, s2, 0.5)
model.add_transition( s2, s1, 0.5)
model.bake()

In [44]:
# Fit HMM
model.fit([sim_emit_seq], distribution_inertia=0.1, edge_inertia=0.1)

Training improvement: 66.5677068218
Training improvement: 0.0499314811209
Training improvement: 0.00333554999588
Training improvement: 0.00601677506938
Training improvement: 0.0126805580617
Training improvement: 0.0267301344797
Training improvement: 0.0562698360691
Training improvement: 0.118103457156
Training improvement: 0.246329202117
Training improvement: 0.507180671619
Training improvement: 1.01841886335
Training improvement: 1.95616683899
Training improvement: 3.50894291276
Training improvement: 5.76696027615
Training improvement: 8.65405890022
Training improvement: 12.003677145
Training improvement: 15.6220680259
Training improvement: 19.0580391909
Training improvement: 21.2105429557
Training improvement: 20.8581410935
Training improvement: 18.3377521651
Training improvement: 15.6013675075
Training improvement: 13.484724073
Training improvement: 10.7520472082
Training improvement: 7.22252085628
Training improvement: 4.18157576116
Training improvement: 2.20656470701
Training impr

251.14589894714481

In [46]:
model.dense_transition_matrix()

array([[ 0.7059275 ,  0.2940725 ,  0.        ,  0.        ],
       [ 0.17954023,  0.82045977,  0.        ,  0.        ],
       [ 1.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ]])

In [48]:
for st in model.states:
    if st.name in state_nms:
        print st.name
        print st.distribution

stateA
{
    "frozen" :false,
    "class" :"Distribution",
    "parameters" :[
        5.03967111684152,
        0.940528007441866
    ],
    "name" :"NormalDistribution"
}
stateB
{
    "frozen" :false,
    "class" :"Distribution",
    "parameters" :[
        1.150181922290824,
        4.056530371158894
    ],
    "name" :"NormalDistribution"
}
