# train_hmm.ipynb

In [1]:
from pandas import DataFrame, read_csv
import pandas as pd
import numpy as np
import sys, os

from pomegranate import *


In [2]:
dir_path = '/Users/Felix/GitHub/HMMicro/hmm_scripts'

In [3]:
# Get 2-state transition matrix
trans_mat_path = os.path.join(dir_path, 'trans_mat.tsv')
trans_mat = read_csv(trans_mat_path, sep='\t', header=0, index_col=None)
trans_mat.index = state_nms = trans_mat.columns.tolist() # Get state names as list
state_nms = np.array(state_nms)

# Get emission parameters (mean, sd)
emit_params_path = os.path.join(dir_path, 'emit_params.tsv')
emit_params = read_csv(emit_params_path, sep='\t', header=0, index_col=None)

In [4]:
trans_mat

Unnamed: 0,stateA,stateB
stateA,0.7,0.3
stateB,0.2,0.8


In [5]:
emit_params

Unnamed: 0,stateA,stateB
0,5,1
1,1,4


In [6]:
# Get simulated sequence
state_seq_path = os.path.join(dir_path, 'state_seq.tsv')
sim_state_seq = read_csv(state_seq_path, sep='\t', header=None, index_col=None)
sim_state_seq = sim_state_seq.iloc[:,0].tolist()

# Get simulated sequence
emit_seq_path = os.path.join(dir_path, 'emit_seq.tsv')
sim_emit_seq = read_csv(emit_seq_path, sep='\t', header=None, index_col=None)
sim_emit_seq = sim_emit_seq.iloc[:,0].tolist()

In [7]:
# Construct HMM
s1 = State( NormalDistribution(3,3), name=str(state_nms[0]) )
s2 = State( NormalDistribution(3,3), name=str(state_nms[1]) )

model = HiddenMarkovModel()
model.add_states( [s1, s2] )
model.add_transition( model.start, s1, 1)
model.add_transition( model.start, s2, 0)
model.add_transition( s1, s1, 0.5)
model.add_transition( s2, s2, 0.5)
model.add_transition( s1, s2, 0.5)
model.add_transition( s2, s1, 0.5)
model.bake()

In [21]:
NormalDistribution(3,3)

{
    "frozen" :false,
    "parameters" :[
        3.0,
        3.0
    ],
    "class" :"Distribution",
    "name" :"NormalDistribution"
}

In [8]:
# Fit HMM
model.fit([sim_emit_seq], distribution_inertia=0.1, edge_inertia=0.1)

Training improvement: 66.56770682184879
Training improvement: 0.049931481120893295
Training improvement: 0.0033355499958815926
Training improvement: 0.006016775069383584
Training improvement: 0.012680558061674674
Training improvement: 0.026730134479748813
Training improvement: 0.05626983606907743
Training improvement: 0.11810345715639414
Training improvement: 0.24632920211661258
Training improvement: 0.5071806716191531
Training improvement: 1.0184188633497797
Training improvement: 1.9561668389887927
Training improvement: 3.5089429127633593
Training improvement: 5.766960276148438
Training improvement: 8.654058900220207
Training improvement: 12.00367714497088
Training improvement: 15.622068025925728
Training improvement: 19.058039190923864
Training improvement: 21.210542955651363
Training improvement: 20.858141093573522
Training improvement: 18.337752164938593
Training improvement: 15.601367507427312
Training improvement: 13.484724073068719
Training improvement: 10.752047208324711
Traini

251.14589894717983

In [9]:
model.dense_transition_matrix()

array([[ 0.7059275 ,  0.2940725 ,  0.        ,  0.        ],
       [ 0.17954023,  0.82045977,  0.        ,  0.        ],
       [ 1.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ]])

In [11]:
for st in model.states:
    if st.name in state_nms:
        print(st.name)
        print(st.distribution)

stateA
{
    "frozen" :false,
    "parameters" :[
        5.039671116842356,
        0.940528007442133
    ],
    "class" :"Distribution",
    "name" :"NormalDistribution"
}
stateB
{
    "frozen" :false,
    "parameters" :[
        1.1501819222924201,
        4.056530371160682
    ],
    "class" :"Distribution",
    "name" :"NormalDistribution"
}


In [12]:
help(State)

Help on class State in module pomegranate.base:

class State(builtins.object)
 |  Represents a state in an HMM. Holds emission distribution, but not
 |  transition distribution, because that's stored in the graph edges.
 |  
 |  Methods defined here:
 |  
 |  __init__(...)
 |      Make a new State emitting from the given distribution. If distribution
 |      is None, this state does not emit anything. A name, if specified, will
 |      be the state's name when presented in output. Name may not contain
 |      spaces or newlines, and must be unique within a model.
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  __reduce__(...)
 |      helper for pickle
 |  
 |  __repr__(...)
 |      The string representation of a state is the json, so call that format.
 |  
 |  __str__(...)
 |      The string representation of a state is the json, so call that format.
 |  
 |  copy(...)
 |      Return a hard co