# HMM 

## Import

In [1]:
import os

from HMM.utils import *
from HMM.hmm import HiddenMarkovModel

## Paths

In [2]:
DATA_PATH = 'data'
TRAIN_PATH = os.path.join(DATA_PATH, 'train.json')
TEST_PATH = os.path.join(DATA_PATH, 'test.json')
GRAPH_PATH = os.path.join(DATA_PATH, 'adjacency_mat.json')
INITIAL_PROBS_PATH = os.path.join(DATA_PATH, 'initial_probs.json')
OBSERVATION_NAMES = os.path.join(DATA_PATH, 'observation_names.json')
EMISSION_PATH = os.path.join(DATA_PATH, 'emission_mat.json')

## Load Data

In [3]:
train_data = read_json(TRAIN_PATH)
print('Train observed dataset length: {}'.format(len(train_data)))

Train observed dataset length: 10000


In [4]:
test_data = read_json(TEST_PATH)
print('Test observed dataset length: {}'.format(len(test_data)))

Test observed dataset length: 1000


In [5]:
adjacency_mat = read_json(GRAPH_PATH)
for key in adjacency_mat:
    adjacency_mat[key] = cast_keys_to_int(adjacency_mat[key])
adjacency_mat = cast_keys_to_int(adjacency_mat)

adjacency_mat

{0: {0: 0.166, 1: 0.166, 2: 0.166, 3: 0.166, 4: 0.166, 5: 0.166},
 1: {0: 0.33, 1: 0.33, 2: 0.33},
 2: {0: 0.5, 5: 0.5},
 3: {0: 0.33, 3: 0.33, 4: 0.33},
 4: {0: 0.5, 5: 0.5},
 5: {0: 0.5, 5: 0.5}}

In [6]:
emission_mat = read_json(EMISSION_PATH)
for key in emission_mat:
    emission_mat[key] = cast_keys_to_int(emission_mat[key])
emission_mat = cast_keys_to_int(emission_mat)

emission_mat

{0: {0: 0.166, 1: 0.166, 2: 0.166, 3: 0.166, 4: 0.166, 5: 0.166},
 1: {0: 0.1, 1: 0.7, 2: 0.1, 3: 0.03, 4: 0.03, 5: 0.03},
 2: {0: 0.2, 5: 0.19, 1: 0.03, 2: 0.7, 3: 0.03, 4: 0.03},
 3: {0: 0.1, 3: 0.7, 4: 0.1, 2: 0.03, 1: 0.03, 5: 0.03},
 4: {0: 0.19, 5: 0.2, 1: 0.03, 2: 0.03, 3: 0.03, 4: 0.7},
 5: {0: 0.19, 5: 0.7, 1: 0.03, 2: 0.03, 3: 0.03, 4: 0.2}}

In [7]:
initial_probs = cast_keys_to_int(read_json(INITIAL_PROBS_PATH))
initial_probs

{0: 0.166, 1: 0.166, 2: 0.166, 3: 0.166, 4: 0.166, 5: 0.166}

In [8]:
observation_names = cast_keys_to_int(read_json(OBSERVATION_NAMES))
observation_names

{0: 'normal',
 1: 'happy',
 2: 'very happy',
 3: 'sad',
 4: 'very sad',
 5: 'not available'}

## Hidden Markov Model

In [9]:
hmm = HiddenMarkovModel(
    adjacency_mat=adjacency_mat,
    initial_probs=initial_probs,
    observation_names=observation_names,
    emission_probs=emission_mat
)

### a)

### Transition matrix before Baum-Welch

In [10]:
calculate_probability(hmm.transition_probs)

{0: {0: 0.16600000000000004,
  1: 0.16600000000000004,
  2: 0.16600000000000004,
  3: 0.16600000000000004,
  4: 0.16600000000000004,
  5: 0.16600000000000004},
 1: {0: 0.33, 1: 0.33, 2: 0.33},
 2: {0: 0.5, 5: 0.5},
 3: {0: 0.33, 3: 0.33, 4: 0.33},
 4: {0: 0.5, 5: 0.5},
 5: {0: 0.5, 5: 0.5}}

### Emission matrix before Baum-Welch

In [11]:
calculate_probability(hmm.emission_probs)

{0: {0: 0.16600000000000004,
  1: 0.16600000000000004,
  2: 0.16600000000000004,
  3: 0.16600000000000004,
  4: 0.16600000000000004,
  5: 0.16600000000000004},
 1: {0: 0.10000000000000002,
  1: 0.7,
  2: 0.10000000000000002,
  3: 0.03000000000000001,
  4: 0.03000000000000001,
  5: 0.03000000000000001},
 2: {0: 0.19999999999999998,
  5: 0.19,
  1: 0.03000000000000001,
  2: 0.7,
  3: 0.03000000000000001,
  4: 0.03000000000000001},
 3: {0: 0.10000000000000002,
  3: 0.7,
  4: 0.10000000000000002,
  2: 0.03000000000000001,
  1: 0.03000000000000001,
  5: 0.03000000000000001},
 4: {0: 0.19,
  5: 0.19999999999999998,
  1: 0.03000000000000001,
  2: 0.03000000000000001,
  3: 0.03000000000000001,
  4: 0.7},
 5: {0: 0.19,
  5: 0.7,
  1: 0.03000000000000001,
  2: 0.03000000000000001,
  3: 0.03000000000000001,
  4: 0.19999999999999998}}

### Baum-Welch

In [12]:
hmm.update_parameters(train_data, iterations=1, verbose=False)

100%|██████████| 10000/10000 [30:11<00:00,  5.52it/s]


### Transition matrix after Baum-Welch

In [13]:
calculate_probability(hmm.transition_probs)

{0: {0: 0.4830807200952925,
  1: 0.14402245424790971,
  2: 0.006864870718720916,
  3: 0.2654339357374656,
  4: 1.5355303677274023e-05,
  5: 0.10058266389693576},
 1: {0: 0.820745655264335, 1: 0.1707187671043396, 2: 0.008535577631323793},
 2: {0: 0.8201889699494384, 5: 0.17981103005056448},
 3: {0: 0.8207192826501282, 3: 0.17925636302853948, 4: 2.4354321334690404e-05},
 4: {0: 0.8193774370034099, 5: 0.18062256299658708},
 5: {0: 0.820719099614597, 5: 0.17928090038540268}}

In [14]:
write_json(
    calculate_probability(hmm.transition_probs), 
    os.path.join(DATA_PATH, 'baum_welch_transitions.json'))

### Emission matrix after Baum-Welch

In [15]:
calculate_probability(hmm.emission_probs)

{0: {0: 0.16348548765776935,
  1: 0.17494855707874685,
  2: 0.17472086746153845,
  3: 0.17494855707874685,
  4: 0.16348548765776935,
  5: 0.17494855707874685},
 1: {0: 0.15935190659639234,
  1: 0.16906786804399718,
  2: 0.1739052523421918,
  3: 0.16906786804399718,
  4: 0.15935190659639234,
  5: 0.16906786804399718},
 2: {0: 0.15929694221694193,
  5: 0.16900671888139726,
  1: 0.16900671888139726,
  2: 0.1738385688610511,
  3: 0.16900671888139726,
  4: 0.15929694221694193},
 3: {0: 0.159349299563928,
  3: 0.16906496684335556,
  4: 0.159349299563928,
  2: 0.17390208883293012,
  1: 0.16906496684335556,
  5: 0.16906496684335556},
 4: {0: 0.15921689528866453,
  5: 0.1689176655330661,
  1: 0.1689176655330661,
  2: 0.17374145510831593,
  3: 0.1689176655330661,
  4: 0.15921689528866453},
 5: {0: 0.15934928144792346,
  5: 0.16906494667468233,
  1: 0.16906494667468233,
  2: 0.1739020668434554,
  3: 0.16906494667468233,
  4: 0.15934928144792346}}

In [16]:
write_json(
    calculate_probability(hmm.emission_probs), 
    os.path.join(DATA_PATH, 'baum_welch_emissions.json'))

## Forward-Backward probabilities

In [17]:
forward_probs = hmm.evaluate(observed_seqs=test_data, method='forward')

In [18]:
backward_probs = hmm.evaluate(observed_seqs=test_data, method='backward')

In [19]:
forward_backward_probs = {} 
for i, (fp, bp) in enumerate(zip(forward_probs, backward_probs)):
    forward_backward_probs.update({i: {'forward_probability': fp, 'backward_probability': bp}})
    print(fp, bp)

1.791759469228055 -170.37220653366208
1.791759469228055 -170.20188024520596
1.791759469228055 -170.39226238440753
1.791759469228055 -170.18121688528498
1.791759469228055 -170.90356805261624
1.791759469228055 -170.32706126149068
1.791759469228055 -170.5528271447152
1.791759469228055 -170.27947921980797
1.791759469228055 -170.38728513900801
1.791759469228055 -170.33655650798394
1.791759469228055 -170.53047286148237
1.791759469228055 -170.17788502092301
1.791759469228055 -170.63203141913613
1.791759469228055 -170.08142606201372
1.791759469228055 -169.72789518832985
1.791759469228055 -170.23162185877115
1.791759469228055 -170.97474993848522
1.791759469228055 -170.47469650499406
1.791759469228055 -170.19783244296454
1.791759469228055 -170.4425228616427
1.791759469228055 -170.06776511656926
1.791759469228055 -170.6529724929367
1.791759469228055 -170.43595820900921
1.791759469228055 -170.3336128385803
1.791759469228055 -170.31680246754877
1.791759469228055 -169.88149203091808
1.79175946922805

In [20]:
write_json(forward_backward_probs, 
           os.path.join(DATA_PATH, 'forward_backward_log_probs.json'))

## Viterbi

In [21]:
most_probable_paths = {}

for i, test_seq in enumerate(test_data):
    most_probable_paths.update({i: hmm.decode(test_seq)})
    print(hmm.decode(test_seq))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [22]:
write_json(most_probable_paths, os.path.join(DATA_PATH, 'viterbi_paths.json'))