<a href="https://colab.research.google.com/github/supsi-dacd-isaac/TeachDecisionMakingUncertainty/blob/main/L10/MarkovChain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random
import matplotlib.animation as animation

class ThreeStateMDP:
    def __init__(self):
        self.num_states = 3
        self.state = 1  # Initial state
        self.actions = ["forward", "backward"]

    def step(self, action):
        if action == "forward":
            if random.random() < 0.8:
                self.state = min(self.state + 1, 2)  # Move forward with probability 0.9
            else:
                self.state = random.choice([0, 1, 2])  # Random transition otherwise
        elif action == "backward":
            if random.random() < 0.8:
                self.state = max(self.state - 1, 0)  # Move backward with probability 0.9
            else:
                self.state = random.choice([0, 1, 2]) # Random transition otherwise
        return self.state

# Simulation
num_steps_per_episode = 10000
state_distribution = np.zeros((num_steps_per_episode, 3)) # Store state distributions for animation

mdp = ThreeStateMDP()
for step in range(num_steps_per_episode):
    action = 'forward' if mdp.state == 2 else random.choice(mdp.actions)
    next_state = mdp.step(action)
    state_distribution[step, next_state] += 1

# Averaging over all episodes
average_state_distribution = np.cumsum(state_distribution, axis=0)




In [3]:

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.animation as animation
from IPython.display import HTML

# Assuming average_state_distribution and num_steps_per_episode are defined from the previous code
fig, ax = plt.subplots(figsize=(5,2),layout='tight')
x = np.arange(3)
rects = ax.bar(x, [0.8]*3, width=0.8, color='red', alpha=0.0)  # fixed height, only alpha changes
triangle, = ax.plot([], [], marker='v', markersize=15, color='black', linestyle='None')
texts = [ax.text(pos, 0.4, '', ha='center', va='center', fontsize=10, color='white') for pos in x]
# Setup axis
#ax.set_xlim(-0.5, 2.5)
#ax.set_ylim(-0.5, 1.5)
ax.set_aspect('equal')
ax.axis('off')

def animate(i):
    t = i
    t = min(t, num_steps_per_episode - 1)  # avoid index out of bounds

    distribution = average_state_distribution[t]
    distribution = distribution / np.sum(distribution) if np.sum(distribution) > 0 else np.zeros(3)

    # Update alpha for each square
    for rect, alpha, text in zip(rects, distribution, texts):
        rect.set_alpha(alpha)
        text.set_text(f'{alpha:.2f}')
    # Determine last visited state
    if t > 0:
        last_visited_state = np.argwhere(state_distribution[t])

    else:
        last_visited_state = np.argmax(state_distribution[t])

    # Update triangle position (wrapped in list)
    triangle.set_data([last_visited_state], [1.1])


    ax.set_title(f"State Distribution at Time Step: {t}")

    return [*rects, triangle]

ani = animation.FuncAnimation(fig, animate, frames=200, interval=200, blit=False)

plt.close(fig)  # prevent static display
HTML(ani.to_jshtml())