In [None]:
# imports
import numpy as np
import matplotlib.pyplot as plt
from mdp_module import MDP, run_simulation


In [None]:
# Parameters for example 1
num_states   = 3
num_actions  = 2
T            = 100
s0           = 0
P = np.zeros((num_states, num_actions, num_states))
P[0,0] = [0.7,0.3,0.0]
P[0,1] = [0.0,0.4,0.6]
P[1,0] = [0.8,0.0,0.2]
P[1,1] = [0.0,0.5,0.5]
P[2,:,2] = 1.0

R = np.zeros((num_states, num_actions))
R[0,0], R[0,1] = 5.0, 1.0

pi_b = np.array([[0.3,0.7],[0.8,0.2],[0.1,0.9]])
pi_e = np.array([[0.5,0.5],[0.0,1.0],[0.0,1.0]])

mdp = MDP(num_states, num_actions, T, s0, P, R, pi_b, pi_e)


In [None]:
# Simulate and get data
eif_vals, term_vals = run_simulation(mdp, n_trajectories=1000, seed=42)


In [None]:
# Histogram of EIF values
plt.hist(eif_vals, bins=30)
plt.title("Distribution of EIF Values")
plt.xlabel("EIF")
plt.ylabel("Frequency")
plt.show()


In [None]:
# Running average of EIF values
running_avg = np.cumsum(term_vals) / np.arange(1, len(term_vals)+1)
plt.plot(running_avg)
plt.axhline(y=np.mean(term_vals), linestyle='--', label='Final estimate')
plt.title("Running Average of EIF Estimate")
plt.xlabel("Trajectory #")
plt.ylabel("Estimate of ρ^πᵉ")
plt.legend()
plt.show()
