## Markov Decision Processes

In [1]:
import numpy as np
import pandas as pd

from environment import GridWorld5x5, GridWorld10x10, GridWorld3x4
from mdpagent import MDPAgent

### Grid world

In [2]:
actions = ['left', 'up', 'right', 'down']

model = GridWorld5x5(actions)
# model = GridWorld10x10(actions)
# model = GridWorld3x4(actions)

mdpAgent = MDPAgent(model.n_states, model.n_actions, model.action_sets, discount=0.9, 
                    threshold=0.0001, max_iter=1000, verbose=True)

In [3]:
pi = np.ones((model.n_states, model.n_actions)) / model.n_actions
mdpAgent.iterative_policy_evaluation(model, pi)

Iterative Policy Evaluation: iteration 1, delta 19.500000
Iterative Policy Evaluation: iteration 2, delta 12.600000
Iterative Policy Evaluation: iteration 3, delta 7.644375
Iterative Policy Evaluation: iteration 4, delta 5.245383
Iterative Policy Evaluation: iteration 5, delta 3.507316
Iterative Policy Evaluation: iteration 6, delta 2.533802
Iterative Policy Evaluation: iteration 7, delta 1.757414
Iterative Policy Evaluation: iteration 8, delta 1.233562
Iterative Policy Evaluation: iteration 9, delta 0.875924
Iterative Policy Evaluation: iteration 10, delta 0.597858
Iterative Policy Evaluation: iteration 11, delta 0.429113
Iterative Policy Evaluation: iteration 12, delta 0.292723
Iterative Policy Evaluation: iteration 13, delta 0.211146
Iterative Policy Evaluation: iteration 14, delta 0.152113
Iterative Policy Evaluation: iteration 15, delta 0.111109
Iterative Policy Evaluation: iteration 16, delta 0.083551
Iterative Policy Evaluation: iteration 17, delta 0.065910
Iterative Policy Eval

In [4]:
df = model.show_state_value(mdpAgent.U)
df

Unnamed: 0,0,1,2,3,4
0,3.30903,8.789326,4.427653,5.322401,1.492212
1,1.521622,2.992351,2.250174,1.907605,0.547436
2,0.050856,0.738204,0.673147,0.35822,-0.403108
3,-0.973559,-0.435462,-0.354849,-0.585571,-1.183041
4,-1.857667,-1.345198,-1.229234,-1.422885,-1.975145


### Policy iteration

In [5]:
mdpAgent.reset()
mdpAgent.policy_iteration(model)

Iterative Policy Evaluation: iteration 1, delta 21.000000
Iterative Policy Evaluation: iteration 2, delta 11.700000
Iterative Policy Evaluation: iteration 3, delta 7.290000
Iterative Policy Evaluation: iteration 4, delta 6.561000
Iterative Policy Evaluation: iteration 5, delta 5.904900
Iterative Policy Evaluation: iteration 6, delta 5.314410
Iterative Policy Evaluation: iteration 7, delta 4.782969
Iterative Policy Evaluation: iteration 8, delta 4.304672
Iterative Policy Evaluation: iteration 9, delta 3.874205
Iterative Policy Evaluation: iteration 10, delta 3.486784
Iterative Policy Evaluation: iteration 11, delta 3.138106
Iterative Policy Evaluation: iteration 12, delta 2.824295
Iterative Policy Evaluation: iteration 13, delta 2.541866
Iterative Policy Evaluation: iteration 14, delta 2.287679
Iterative Policy Evaluation: iteration 15, delta 2.058911
Iterative Policy Evaluation: iteration 16, delta 1.853020
Iterative Policy Evaluation: iteration 17, delta 1.667718
Iterative Policy Eval

In [6]:
df = model.show_state_value(mdpAgent.U)
df

Unnamed: 0,0,1,2,3,4
0,21.977461,24.419401,21.977461,19.419401,17.477461
1,19.779702,21.977461,19.779702,17.801732,16.021556
2,17.801732,19.779702,17.801732,16.021556,14.419401
3,16.021556,17.801732,16.021556,14.419401,12.977461
4,14.419401,16.021556,14.419401,12.977461,11.679702


In [7]:
mdpAgent.extract_policy()
df = model.show_policy(mdpAgent.pi)
df

Unnamed: 0,0,1,2,3,4
0,right,left,left,left,left
1,up,up,left,left,left
2,up,up,left,left,left
3,up,up,left,left,left
4,up,up,left,left,left


In [8]:
df = model.show_action_value(mdpAgent.Q)
df

Unnamed: 0,0,1,2,3,4
0,"{'left': 18.78, 'up': 18.78, 'right': 21.98, '...","{'left': 24.42, 'up': 24.42, 'right': 24.42, '...","{'left': 21.98, 'up': 18.78, 'right': 17.48, '...","{'left': 19.42, 'up': 19.42, 'right': 19.42, '...","{'left': 17.48, 'up': 14.73, 'right': 14.73, '..."
1,"{'left': 16.8, 'up': 19.78, 'right': 19.78, 'd...","{'left': 17.8, 'up': 21.98, 'right': 17.8, 'do...","{'left': 19.78, 'up': 19.78, 'right': 16.02, '...","{'left': 17.8, 'up': 17.48, 'right': 14.42, 'd...","{'left': 16.02, 'up': 15.73, 'right': 13.42, '..."
2,"{'left': 15.02, 'up': 17.8, 'right': 17.8, 'do...","{'left': 16.02, 'up': 19.78, 'right': 16.02, '...","{'left': 17.8, 'up': 17.8, 'right': 14.42, 'do...","{'left': 16.02, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.98, '..."
3,"{'left': 13.42, 'up': 16.02, 'right': 16.02, '...","{'left': 14.42, 'up': 17.8, 'right': 14.42, 'd...","{'left': 16.02, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.68, '...","{'left': 12.98, 'up': 12.98, 'right': 10.68, '..."
4,"{'left': 11.98, 'up': 14.42, 'right': 14.42, '...","{'left': 12.98, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.68, '...","{'left': 12.98, 'up': 12.98, 'right': 10.51, '...","{'left': 11.68, 'up': 11.68, 'right': 9.51, 'd..."


### Value iteration

In [9]:
mdpAgent.reset()
mdpAgent.value_iteration(model)

Value Iteration: iteration 1, delta 15.000000
Value Iteration: iteration 2, delta 36.000000
Value Iteration: iteration 3, delta 32.400000
Value Iteration: iteration 4, delta 35.595000
Value Iteration: iteration 5, delta 35.037000
Value Iteration: iteration 6, delta 28.329750
Value Iteration: iteration 7, delta 25.270785
Value Iteration: iteration 8, delta 21.270762
Value Iteration: iteration 9, delta 20.244556
Value Iteration: iteration 10, delta 19.560552
Value Iteration: iteration 11, delta 16.402679
Value Iteration: iteration 12, delta 14.412568
Value Iteration: iteration 13, delta 12.081650
Value Iteration: iteration 14, delta 11.974356
Value Iteration: iteration 15, delta 11.461099
Value Iteration: iteration 16, delta 9.723740
Value Iteration: iteration 17, delta 8.750517
Value Iteration: iteration 18, delta 6.955475
Value Iteration: iteration 19, delta 6.960453
Value Iteration: iteration 20, delta 7.013161
Value Iteration: iteration 21, delta 6.107863
Value Iteration: iteration 2

In [10]:
df = model.show_state_value(mdpAgent.U)
df

Unnamed: 0,0,1,2,3,4
0,21.977443,24.419382,21.977443,19.419382,17.477443
1,19.779699,21.977443,19.779699,17.801729,16.021535
2,17.801729,19.779699,17.801729,16.021535,14.419382
3,16.021535,17.801729,16.021535,14.419382,12.977443
4,14.419382,16.021535,14.419382,12.977443,11.679699


In [11]:
mdpAgent.extract_policy()
df = model.show_policy(mdpAgent.pi)
df

Unnamed: 0,0,1,2,3,4
0,right,left,left,left,left
1,up,up,left,left,left
2,up,up,left,left,left
3,up,up,left,left,left
4,up,up,left,left,left


In [12]:
df = model.show_action_value(mdpAgent.Q)
df

Unnamed: 0,0,1,2,3,4
0,"{'left': 18.78, 'up': 18.78, 'right': 21.98, '...","{'left': 24.42, 'up': 24.42, 'right': 24.42, '...","{'left': 21.98, 'up': 18.78, 'right': 17.48, '...","{'left': 19.42, 'up': 19.42, 'right': 19.42, '...","{'left': 17.48, 'up': 14.73, 'right': 14.73, '..."
1,"{'left': 16.8, 'up': 19.78, 'right': 19.78, 'd...","{'left': 17.8, 'up': 21.98, 'right': 17.8, 'do...","{'left': 19.78, 'up': 19.78, 'right': 16.02, '...","{'left': 17.8, 'up': 17.48, 'right': 14.42, 'd...","{'left': 16.02, 'up': 15.73, 'right': 13.42, '..."
2,"{'left': 15.02, 'up': 17.8, 'right': 17.8, 'do...","{'left': 16.02, 'up': 19.78, 'right': 16.02, '...","{'left': 17.8, 'up': 17.8, 'right': 14.42, 'do...","{'left': 16.02, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.98, '..."
3,"{'left': 13.42, 'up': 16.02, 'right': 16.02, '...","{'left': 14.42, 'up': 17.8, 'right': 14.42, 'd...","{'left': 16.02, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.68, '...","{'left': 12.98, 'up': 12.98, 'right': 10.68, '..."
4,"{'left': 11.98, 'up': 14.42, 'right': 14.42, '...","{'left': 12.98, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.68, '...","{'left': 12.98, 'up': 12.98, 'right': 10.51, '...","{'left': 11.68, 'up': 11.68, 'right': 9.51, 'd..."


### Gauss-Seidel value iteration

In [13]:
mdpAgent.reset()
mdpAgent.value_iteration_gs(model)

Gauss-Siedel Value Iteration: iteration 1, delta 134.940489
Gauss-Siedel Value Iteration: iteration 2, delta 122.146564
Gauss-Siedel Value Iteration: iteration 3, delta 72.126325
Gauss-Siedel Value Iteration: iteration 4, delta 42.589874
Gauss-Siedel Value Iteration: iteration 5, delta 25.148894
Gauss-Siedel Value Iteration: iteration 6, delta 14.850171
Gauss-Siedel Value Iteration: iteration 7, delta 8.768877
Gauss-Siedel Value Iteration: iteration 8, delta 5.177934
Gauss-Siedel Value Iteration: iteration 9, delta 3.057518
Gauss-Siedel Value Iteration: iteration 10, delta 1.805434
Gauss-Siedel Value Iteration: iteration 11, delta 1.066091
Gauss-Siedel Value Iteration: iteration 12, delta 0.629516
Gauss-Siedel Value Iteration: iteration 13, delta 0.371723
Gauss-Siedel Value Iteration: iteration 14, delta 0.219499
Gauss-Siedel Value Iteration: iteration 15, delta 0.129612
Gauss-Siedel Value Iteration: iteration 16, delta 0.076534
Gauss-Siedel Value Iteration: iteration 17, delta 0.04519

In [14]:
df = model.show_state_value(mdpAgent.U)
df

Unnamed: 0,0,1,2,3,4
0,21.977477,24.419422,21.97748,19.419422,17.47748
1,19.779729,21.97748,19.779732,17.801759,16.021583
2,17.801756,19.779732,17.801759,16.021583,14.419425
3,16.02158,17.801759,16.021583,14.419425,12.977482
4,14.419422,16.021583,14.419425,12.977482,11.679734


In [15]:
mdpAgent.extract_policy()
df = model.show_policy(mdpAgent.pi)
df

Unnamed: 0,0,1,2,3,4
0,right,left,left,left,left
1,up,up,left,left,left
2,up,up,left,left,left
3,up,up,left,left,left
4,up,up,left,left,left


In [16]:
df = model.show_action_value(mdpAgent.Q)
df

Unnamed: 0,0,1,2,3,4
0,"{'left': 18.78, 'up': 18.78, 'right': 21.98, '...","{'left': 24.42, 'up': 24.42, 'right': 24.42, '...","{'left': 21.98, 'up': 18.78, 'right': 17.48, '...","{'left': 19.42, 'up': 19.42, 'right': 19.42, '...","{'left': 17.48, 'up': 14.73, 'right': 14.73, '..."
1,"{'left': 16.8, 'up': 19.78, 'right': 19.78, 'd...","{'left': 17.8, 'up': 21.98, 'right': 17.8, 'do...","{'left': 19.78, 'up': 19.78, 'right': 16.02, '...","{'left': 17.8, 'up': 17.48, 'right': 14.42, 'd...","{'left': 16.02, 'up': 15.73, 'right': 13.42, '..."
2,"{'left': 15.02, 'up': 17.8, 'right': 17.8, 'do...","{'left': 16.02, 'up': 19.78, 'right': 16.02, '...","{'left': 17.8, 'up': 17.8, 'right': 14.42, 'do...","{'left': 16.02, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.98, '..."
3,"{'left': 13.42, 'up': 16.02, 'right': 16.02, '...","{'left': 14.42, 'up': 17.8, 'right': 14.42, 'd...","{'left': 16.02, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.68, '...","{'left': 12.98, 'up': 12.98, 'right': 10.68, '..."
4,"{'left': 11.98, 'up': 14.42, 'right': 14.42, '...","{'left': 12.98, 'up': 16.02, 'right': 12.98, '...","{'left': 14.42, 'up': 14.42, 'right': 11.68, '...","{'left': 12.98, 'up': 12.98, 'right': 10.51, '...","{'left': 11.68, 'up': 11.68, 'right': 9.51, 'd..."
