![HMM Probs](data/HMM_with_two_states_probs.png)

The HMM described in Problem 11.4.

## Problem 11.4
Figure shows an HMM with two states α and β. When in the α state, it is more likely to emit purines (A and G). When in the β state, it is more likely to emit pyrimidines (C and T). Decode the most likely sequence of states (α\β) for sequence GGCT. Use log-scores, rather than straight probability scores.

### Without Log Scores

In [1]:
states = ('α','β')


emission_probability = {
	'α': {'A':0.4, 'G':0.4, 'T':0.1, 'C':0.1},
	'β': {'T':0.3, 'C':0.3, 'A':0.2, 'G':0.2},
}

'''
The sequence of observations. That is, a sequence of one feature vector
produced for each input image of a character.
'''
observations = ('G', 'G', 'C', 'T')


start_probability = { 'α': 0.5,'β': 0.5 }


transition_probability = {
    'α': {'α':0.9, 'β':0.1},
	'β': {'α':0.1, 'β':0.9}

}

In [2]:
def viterbi(observations, states, start_probability, transition_probability, emission_probability):
  # Initialize the trellis and path
  trellis = [{}]
  path = {}
  max_probability = 0

  # Calculate the initial probabilities for each state
  for state in states:
      initial_prob = start_probability[state] * emission_probability[state][observations[0]]
      trellis[0][state] = initial_prob
      path[state] = [state]

      if initial_prob > max_probability:
          max_probability = initial_prob

  # Calculate probabilities for each subsequent state transition
  for observations_index in range(1, len(observations)):
      trellis.append({})
      new_path = {}

      for state in states:
          # Calculate the maximum probability and corresponding state
          probabilities = [
              trellis[observations_index - 1][prev_state] * transition_probability[prev_state][state] *
              emission_probability[state][observations[observations_index]]
              for prev_state in states
          ]
          max_probability = max(probabilities)
          max_state_index = probabilities.index(max_probability)
          possible_state = states[max_state_index]

          # Update the trellis and path
          trellis[observations_index][state] = max_probability
          new_path[state] = path[possible_state] + [state]

      path = new_path

  # Find the maximum probability and corresponding state at the end of the observation sequence
  final_probabilities = [trellis[len(observations) - 1][state] for state in states]
  max_probability = max(final_probabilities)
  max_state_index = final_probabilities.index(max_probability)
  state = states[max_state_index]

  return trellis, max_probability, path[state]

trellis, max_score, best_path = viterbi(observations, states, start_probability, transition_probability, emission_probability)

# Print Viterbi table
print("Viterbi Table:")
for t in range(len(observations)):
    print(f"Step {t+1}: {trellis[t]}")

# Print maximum score
print(f"\nMax Score: {max_score}")

# Print best path
print(f"Best Path: {' -> '.join(best_path)}")

Viterbi Table:
Step 1: {'α': 0.2, 'β': 0.1}
Step 2: {'α': 0.07200000000000001, 'β': 0.018000000000000002}
Step 3: {'α': 0.006480000000000001, 'β': 0.004860000000000001}
Step 4: {'α': 0.0005832000000000002, 'β': 0.0013122000000000001}

Max Score: 0.0013122000000000001
Best Path: β -> β -> β -> β


### With Log Scores

In [3]:
import math

states = ('α', 'β')

emission_probability = {
    'α': {'A': math.log2(0.4), 'G': math.log2(0.4), 'T': math.log2(0.1), 'C': math.log2(0.1)},
    'β': {'T': math.log2(0.3), 'C': math.log2(0.3), 'A': math.log2(0.2), 'G': math.log2(0.2)},
}

observations = ('G', 'G', 'C', 'T')

start_probability = {'α': math.log2(0.5), 'β': math.log2(0.5)}

transition_probability = {
    'α': {'α': math.log2(0.9), 'β': math.log2(0.1)},
    'β': {'α': math.log2(0.1), 'β': math.log2(0.9)}
}


def viterbi(observations, states, start_probability, transition_probability, emission_probability):
    trellis = [{}]
    path = {}

    # Initialize the trellis and path for the first observation
    for state in states:
        trellis[0][state] = start_probability[state] + emission_probability[state][observations[0]]
        path[state] = [state]

    # Iterate over subsequent observations
    for t in range(1, len(observations)):
        trellis.append({})
        new_path = {}

        # Compute the maximum log-score and corresponding state for each current state
        for state in states:
            max_score, prev_state = max(
                (trellis[t - 1][prev_state] + transition_probability[prev_state][state] + emission_probability[state][
                    observations[t]], prev_state)
                for prev_state in states
            )

            trellis[t][state] = max_score
            new_path[state] = path[prev_state] + [state]

        path = new_path

    # Find the final state with the highest log-score
    max_score, final_state = max((trellis[-1][state], state) for state in states)

    return trellis, max_score, path[final_state]


trellis, max_score, best_path = viterbi(observations, states, start_probability, transition_probability, emission_probability)

# Print Viterbi table
print("Viterbi Table:")
for t in range(len(observations)):
    print(f"Step {t+1}: {trellis[t]}")

# Print maximum score
print(f"\nMax Score: {max_score}")

# Print best path
print(f"Best Path: {' -> '.join(best_path)}")

Viterbi Table:
Step 1: {'α': -2.321928094887362, 'β': -3.321928094887362}
Step 2: {'α': -3.7958592832197744, 'β': -5.795859283219775}
Step 3: {'α': -7.269790471552186, 'β': -7.68482797083103}
Step 4: {'α': -10.743721659884597, 'β': -9.573796658442285}

Max Score: -9.573796658442285
Best Path: β -> β -> β -> β
