-
Notifications
You must be signed in to change notification settings - Fork 0
/
forward_backward_pandas_adv.py
137 lines (123 loc) · 6.67 KB
/
forward_backward_pandas_adv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from pandas import DataFrame
from itertools import cycle
# Initialize tuples of conditions. Observations are the input
observations = ("Wearing Trenchcoat & Fedora", "Eating Pizza",
"Eating Doritos", "Browsing Reddit", "Playing WoW", "Smelly",
"Vaping", "Listening to Power Metal", "Brandishing Katana",
"Wearing Trenchcoat & Fedora", "Browsing 4chan",
"Playing Magic the Gathering", "Drinking Mountain Dew")
hidden_states = ("Depressed", "Confident", "Tired", "Hungry", "Thirsty",
"Angry", "Gamer", "Brony", "Libertarian", "Atheist", "End")
emit_states = ("Eating Pizza", "Browsing Reddit", "Drinking Mountain Dew",
"Eating Doritos", "Wearing Trenchcoat & Fedora",
"Browsing 4chan", "Playing Magic the Gathering", "Playing WoW",
"Brandishing Katana", "Watching My Little Pony",
"Listening to Power Metal", "Vaping", "Smelly")
# Color pattern for final output
colors = cycle(["red", "orange", "green", "blue", "purple"]) # Rainbow effect
colors_dict = {} # Each observation gets a color
# Probability of transition from state to state, remaining static, etc.
trans_prob_df = DataFrame(
data={
"Depressed": (0.10, 0.10, 0.10, 0.15, 0.05, 0.15, 0.05, 0.05, 0.05,
0.05),
"Confident": (0.05, 0.05, 0.05, 0.10, 0.10, 0.14, 0.05, 0.15, 0.10,
0.10),
"Tired": (0.15, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05),
"Hungry": (0.14, 0.05, 0.10, 0.14, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05),
"Thirsty": (0.10, 0.14, 0.15, 0.10, 0.10, 0.05, 0.05, 0.10, 0.05,
0.05),
"Angry": (0.05, 0.10, 0.14, 0.05, 0.15, 0.15, 0.15, 0.05, 0.15, 0.15),
"Gamer": (0.10, 0.10, 0.10, 0.10, 0.14, 0.10, 0.19, 0.15, 0.05, 0.05),
"Brony": (0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 0.15, 0.29, 0.05, 0.05),
"Libertarian": (0.10, 0.15, 0.10, 0.10, 0.10, 0.10, 0.10, 0.05, 0.29,
0.15),
"Atheist": (0.10, 0.15, 0.10, 0.10, 0.10, 0.10, 0.10, 0.05, 0.15,
0.29),
"End": (0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01)
},
columns=hidden_states,
index=hidden_states[:-1]) # All should vertically sum to 1
# Probability of observation given the hidden state
emit_prob_df = DataFrame(
data={
"Eating Pizza": (0.10, 0.05, 0.05, 0.20, 0.05, 0.05, 0.05, 0.05, 0.05,
0.05),
"Browsing Reddit": (0.10, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05,
0.15, 0.15),
"Drinking Mountain Dew": (0.10, 0.10, 0.15, 0.10, 0.20, 0.05, 0.15,
0.05, 0.05, 0.05),
"Eating Doritos": (0.10, 0.05, 0.05, 0.15, 0.10, 0.05, 0.10, 0.05,
0.05, 0.05),
"Wearing Trenchcoat & Fedora": (0.05, 0.20, 0.05, 0.05, 0.05, 0.05,
0.05, 0.10, 0.10, 0.10),
"Browsing 4chan": (0.10, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.10,
0.05, 0.05),
"Playing Magic the Gathering": (0.05, 0.05, 0.05, 0.05, 0.05, 0.10,
0.15, 0.05, 0.05, 0.05),
"Playing WoW": (0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05, 0.05,
0.05),
"Brandishing Katana": (0.05, 0.10, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05,
0.15, 0.15),
"Watching My Little Pony": (0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05,
0.25, 0.05, 0.10),
"Listening to Power Metal": (0.05, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05,
0.05, 0.05, 0.10),
"Vaping": (0.05, 0.10, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.10, 0.05),
"Smelly": (0.15, 0.10, 0.15, 0.10, 0.10, 0.15, 0.05, 0.10, 0.10, 0.05)
},
columns=emit_states,
index=hidden_states[:-1]) # All should vertically sum to 1
# Initialize starting probabilities
start_probs = DataFrame(
data={
"(0) {}".format(observations[0]): (0.10, 0.10, 0.10, 0.15, 0.10, 0.10,
0.15, 0.10, 0.05, 0.05)
},
index=hidden_states[:-1])
# Initialize forward dataframe
forward_df = start_probs.multiply(emit_prob_df[observations[0]], axis="index")
colors_dict[forward_df.columns[0]] = next(colors) # For final colored output
# Start forward part - 1st pass
for i, observation in enumerate(observations[1:]): # Same as viterbi
previous_forward_sum = trans_prob_df.iloc[:, :-1].multiply(
forward_df.iloc[:, i], axis="index").sum()
forward_df["({}) {}".format(
i + 1, # Similar to Viterbi but sum, line below is identical
observation)] = previous_forward_sum * emit_prob_df.loc[:, observation]
colors_dict[forward_df.columns[i + 1]] = next(colors) # Update colors
# Calculate forward probability
# Multiply last columns and sum the result
forward_prob = (forward_df.iloc[:, -1] * trans_prob_df.iloc[:, -1]).sum()
# Initialize backward dataframe
backward_df = DataFrame(
data={ # The last column of trans_prob_df
"({}) {}".format(len(observations) - 1, observations[-1]):
trans_prob_df.iloc[:, -1]
})
# Start backward part - 2nd pass
for i, observation in zip( # Same as viterbi
range(len(observations) - 2, -1, -1), reversed(observations[1:])):
backward_df.insert( # Countdown to 2nd observation
0, # The left-most column updates itself by multiplying
# The entire trans_prob_df and emit_prob_df that matches observation
"({}) {}".format(i, observations[i]),
(backward_df.iloc[:, 0] * trans_prob_df.iloc[:, :-1] *
emit_prob_df.loc[:, observation]).sum(axis=1)) # Horizontal sum
# Calculate backward probability: Should == forward probability
# Now use beginning values, opposite of forward
backward_prob = (backward_df.iloc[:, 0] * start_probs.iloc[:, 0] *
emit_prob_df.loc[:, observations[0]]).sum()
# Now merge the two - vectorized multiplication of all and divide by either
# forward or backward probability
posterior_df = (forward_df * backward_df) / forward_prob
# Stylized output for reading top-down
posterior_df_style = posterior_df.style.apply( # Color the columns
lambda x: ["background-color: {}".format(colors_dict[x.name])] * len(x))
# Print final results - table should vertically sum to 1
print("The observations:", ", ".join(observations))
print("The most likely non-sequential hidden states are:")
print(posterior_df.idxmax())
print("The summed forward & backward probabilities: ", forward_prob, ",",
backward_prob)
posterior_df_style.highlight_max(color="black") # Highlight maximums