<a href="https://colab.research.google.com/github/mcnica89/Markov-Chains-RL-W24/blob/main/Drunkards_Walk_Markov_Chain_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Falls off cliff at $x=0$, and reaches home at $x=n$. At each step, 50% chance to go up or down.

In [29]:
import numpy as np
np.set_printoptions(precision=4,suppress=True)

In [30]:
n = 10

TransitionMatrix = np.zeros((n+1,n+1),dtype=float)

TransitionMatrix[0,0]=1 #if you start at 0, you stay at 0
TransitionMatrix[n,n]=1 #if you start at n, you stay at n
#loop over all possible intermediate states
for from_ix in range(1,n):
  TransitionMatrix[from_ix,from_ix + 1] = 0.5 #50% chance to move up by 1
  TransitionMatrix[from_ix,from_ix - 1] = 0.5 #50% chance to move down by 1



# Limiting Probabilities - Power method

In [31]:
x_init = 2
p_init = np.zeros(n+1,dtype=float)
p_init[x_init] = 1.0

big_t = 100
p_final_approx = p_init @ np.linalg.matrix_power(TransitionMatrix,big_t)
print(f"{p_final_approx=}")

p_final_approx=array([0.7976, 0.    , 0.0009, 0.    , 0.0015, 0.    , 0.0015, 0.    ,
       0.0009, 0.    , 0.1976])


# Limiting Probabilities - Equation Method

In [56]:
#use the fact that in the limit all the eigenvalues that are strictly < 1 will vanish

eigenvalues, eigenvectors = np.linalg.eig(TransitionMatrix)
TransitionMatrix_eig = eigenvectors @ np.diag(eigenvalues) @ np.linalg.inv(eigenvectors) #this is the eigen decomposition of the transition matrix
print(f"{np.allclose(TransitionMatrix_eig,TransitionMatrix)=}")


#round all the eigenvalues which are |lambda|<1 down to 0. Keep any that are =1 as 1
rounded_eigs = 1.0*np.isclose(eigenvalues,1.0)
#this is equivalent to rounded_eigs[k] = 1.0 if eigenvalues[k]==1 and rounded_eigs[k] = 0.0 if not.


print(f"{rounded_eigs=}")

LimitingTransitionMatrix = eigenvectors @ np.diag(rounded_eigs) @ np.linalg.inv(eigenvectors) #this is the eigen decomposition of the transition matrix
print(f"{LimitingTransitionMatrix=}")

np.allclose(TransitionMatrix_eig,TransitionMatrix)=True
rounded_eigs=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.])
LimitingTransitionMatrix=array([[1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0.9, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.1],
       [0.8, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.2],
       [0.7, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.3],
       [0.6, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.4],
       [0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5],
       [0.4, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.6],
       [0.3, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.7],
       [0.2, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.8],
       [0.1, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.9],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]])


# How many steps - Power method

In [52]:
x_init = 2
p_init = np.zeros(n+1,dtype=float)
p_init[x_init] = 1.0
p_t = p_init

big_t = 100
E_visits = np.zeros(n+1,dtype=float)
for t in range(big_t):
  E_visits += p_t
  p_t = p_t @ TransitionMatrix

print(f"{E_visits=}")

reward_v = np.ones(n+1)
reward_v[0] = 0.0
reward_v[-1] = 0.0

E_steps = E_visits @ reward_v
print(f"{E_steps=}")

E_visits=array([70.4501,  1.5952,  3.1904,  2.7875,  2.3845,  1.9845,  1.5845,
        1.1875,  0.7904,  0.3952, 13.6501])
E_steps=15.899723201470946


# How many steps - Equation method

Must solve $\vec{v} = \text{Reward} + M\vec{v}$ and $\vec{v}_0 = 0$, $\vec{v}_n = 0$. Make an augmented matrix to record these equations!

In [54]:
IminusM= np.eye(n+1)-TransitionMatrix

eqn_v0_is_0 = np.zeros((1,n+1))
eqn_v0_is_0[0,0] = 1.0

eqn_vn_is_0 = np.zeros((1,n+1))
eqn_vn_is_0[0,n] = 1.0

augmented_matrix = np.concatenate((eqn_v0_is_0,eqn_vn_is_0,IminusM),axis=0)
print(f"{augmented_matrix=}")

target_vec = np.concatenate([np.array([0]),np.array([0]),reward_v],axis=0)
print(f"{target_vec=}")


value_vec = np.linalg.lstsq(augmented_matrix,target_vec,rcond=None)[0]
print(f"{value_vec=}")


augmented_matrix=array([[ 1. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  1. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [-0.5,  1. , -0.5,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. , -0.5,  1. , -0.5,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. , -0.5,  1. , -0.5,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. , -0.5,  1. , -0.5,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. , -0.5,  1. , -0.5,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. , -0.5,  1. , -0.5,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.5,  1. , -0.5,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.5,  1. , -0.5,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.5,  1. , -0.5],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ]])
target_