In [2]:
import numpy as np
import pandas as pd
from scipy import linalg as slin
import warnings
warnings.filterwarnings('ignore')

In [3]:
TPM = np.array([
    [0.5, 0.3, .1, .1],
    [0.1, 0.5, 0.2, 0.2],
    [0.2, 0.1, 0.5, 0.2],
    [0.1, 0.1, 0.3, 0.5]
])


In [9]:
state = [0, 1, 2, 3, 4, 5, 6, 7] 
# states are (0, .1) => buy @.1, (0, 1)=> buy @1, ((0,2), 0,3)
# states are (1, .1) => sell @.1, (1, 1)=> sell @1, ((1,2), 1,3)
action = [0, 1, 2] # 0= do nothing, 1 = buy, 2=sell @ above states

action_cost = np.array([
    [.1],
    [1],
    [2],
    [3],
    [-.1],
    [-1],
    [-2],
    [-3]]
)

policy_state_transition = [(0,0,0), (1,0,1), (2,0,2), (3,0,3), (0,1,4), (1,1,5), (2,1,6), (3,1,7),
                           (4,0,4), (5,0,5), (6,0,6), (7,0,7), (4,2,0), (5,2,1), (6,2,2), (7,2,3),]


# Create the immediate reward matrix for states and actions -- Difficult to make this generic, this has to be modified for every problem

In [12]:
def state_action_imm_rew(policy_state_transition, action_cost):

    state_action = np.zeros((len(state),len(action),1))
    state_action_imm_rew = np.array(state_action.copy())
    state_action_imm_rew
    for s, a, s1 in policy_state_transition:
        if (s in  [0,1,2,3]) & (a in [0,2]):
            state_action_imm_rew[s,a,:] = 0
        elif (s in  [4,5,6,7]) & (a in [0,1]):
            state_action_imm_rew[s,a,:] = 0
        else:
            state_action_imm_rew[s,a,:] = -1 * action_cost[s]
        
    return state_action_imm_rew
        
        
state_action_imm_rew = state_action_imm_rew(policy_state_transition, action_cost)
state_action_imm_rew            

array([[[ 0. ],
        [-0.1],
        [ 0. ]],

       [[ 0. ],
        [-1. ],
        [ 0. ]],

       [[ 0. ],
        [-2. ],
        [ 0. ]],

       [[ 0. ],
        [-3. ],
        [ 0. ]],

       [[ 0. ],
        [ 0. ],
        [ 0.1]],

       [[ 0. ],
        [ 0. ],
        [ 1. ]],

       [[ 0. ],
        [ 0. ],
        [ 2. ]],

       [[ 0. ],
        [ 0. ],
        [ 3. ]]])

# Inputs to test Policy and reward -- Having challenge to incorporate for period for discount factoring

# LP formulation for MDP -- Unable to make the constraint and matrix formation generic

In [60]:
from pulp import *
# initialize the model
prob = LpProblem("mdp02", LpMinimize)
discount = 0.95
#########
policy_state_transition = [(0,0,0), (1,0,1), (2,0,2), (3,0,3), (0,1,4), (1,1,5), (2,1,6), (3,1,7),
                           (4,0,4), (5,0,5), (6,0,6), (7,0,7), (4,2,0), (5,2,1), (6,2,2), (7,2,3),]

transitions = []
rewards = []
for p, q, r in policy_state_transition:
    if r > 3: # custom for this problem
        r-= 4
    transitions.append(TPM[r])
    rewards.append(state_action_imm_rew[p,q])
new_mat =np.array(transitions) * discount ** (1)
print(new_mat)

T = len(state)
# ---------------------
# VARIABLES
# ---------------------

dv = LpVariable.dicts("dv", range(0, T), 0, None)

[[0.475 0.285 0.095 0.095]
 [0.095 0.475 0.19  0.19 ]
 [0.19  0.095 0.475 0.19 ]
 [0.095 0.095 0.285 0.475]
 [0.475 0.285 0.095 0.095]
 [0.095 0.475 0.19  0.19 ]
 [0.19  0.095 0.475 0.19 ]
 [0.095 0.095 0.285 0.475]
 [0.475 0.285 0.095 0.095]
 [0.095 0.475 0.19  0.19 ]
 [0.19  0.095 0.475 0.19 ]
 [0.095 0.095 0.285 0.475]
 [0.475 0.285 0.095 0.095]
 [0.095 0.475 0.19  0.19 ]
 [0.19  0.095 0.475 0.19 ]
 [0.095 0.095 0.285 0.475]]


In [61]:
# Constraints

prob += dv[0] >= lpSum([new_mat[0, i] * dv[i] for i in [0,1,2,3]]) + rewards[0]
prob += dv[0] >= lpSum([new_mat[4, (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[4]

prob += dv[1] >= lpSum([new_mat[1, i] * dv[i] for i in [0,1,2,3]]) + rewards[1]
prob += dv[1] >= lpSum([new_mat[5,  (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[5]

prob += dv[2] >= lpSum([new_mat[2, i] * dv[i] for i in [0,1,2,3]]) + rewards[2]
prob += dv[2] >= lpSum([new_mat[6, (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[6]

prob += dv[3] >= lpSum([new_mat[3, i] * dv[i] for i in [0,1,2,3]]) + rewards[3]
prob += dv[3] >= lpSum([new_mat[7, (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[7]

prob += dv[4] >= lpSum([new_mat[8, (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[8]
prob += dv[4] >= lpSum([new_mat[12, i] * dv[i] for i in [0,1,2,3]]) + rewards[12]

prob += dv[5] >= lpSum([new_mat[9, (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[9]
prob += dv[5] >= lpSum([new_mat[13, i] * dv[i] for i in [0,1,2,3]]) + rewards[13]

prob += dv[6] >= lpSum([new_mat[10, (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[10]
prob += dv[6] >= lpSum([new_mat[14, i] * dv[i] for i in [0,1,2,3]]) + rewards[14]

prob += dv[7] >= lpSum([new_mat[11, (i-4)] * dv[i] for i in [4,5,6,7]]) + rewards[11]
prob += dv[7] >= lpSum([new_mat[15, i] * dv[i] for i in [0,1,2,3]]) + rewards[15]

# Objective function
prob += sum([dv[i] for i in np.arange(T)]), "Objective"
    

prob

mdp02:
MINIMIZE
1*dv_0 + 1*dv_1 + 1*dv_2 + 1*dv_3 + 1*dv_4 + 1*dv_5 + 1*dv_6 + 1*dv_7 + 0
SUBJECT TO
_C1: 0.525 dv_0 - 0.285 dv_1 - 0.095 dv_2 - 0.095 dv_3 >= 0

_C2: dv_0 - 0.475 dv_4 - 0.285 dv_5 - 0.095 dv_6 - 0.095 dv_7 >= -0.1

_C3: - 0.095 dv_0 + 0.525 dv_1 - 0.19 dv_2 - 0.19 dv_3 >= 0

_C4: dv_1 - 0.095 dv_4 - 0.475 dv_5 - 0.19 dv_6 - 0.19 dv_7 >= -1

_C5: - 0.19 dv_0 - 0.095 dv_1 + 0.525 dv_2 - 0.19 dv_3 >= 0

_C6: dv_2 - 0.19 dv_4 - 0.095 dv_5 - 0.475 dv_6 - 0.19 dv_7 >= -2

_C7: - 0.095 dv_0 - 0.095 dv_1 - 0.285 dv_2 + 0.525 dv_3 >= 0

_C8: dv_3 - 0.095 dv_4 - 0.095 dv_5 - 0.285 dv_6 - 0.475 dv_7 >= -3

_C9: 0.525 dv_4 - 0.285 dv_5 - 0.095 dv_6 - 0.095 dv_7 >= 0

_C10: - 0.475 dv_0 - 0.285 dv_1 - 0.095 dv_2 - 0.095 dv_3 + dv_4 >= 0.1

_C11: - 0.095 dv_4 + 0.525 dv_5 - 0.19 dv_6 - 0.19 dv_7 >= 0

_C12: - 0.095 dv_0 - 0.475 dv_1 - 0.19 dv_2 - 0.19 dv_3 + dv_5 >= 1

_C13: - 0.19 dv_4 - 0.095 dv_5 + 0.525 dv_6 - 0.19 dv_7 >= 0

_C14: - 0.19 dv_0 - 0.095 dv_1 - 0.475 dv_2 - 0.19 d

In [62]:

    
prob.writeLP("mdp02.lp")
    
status = prob.solve(GLPK(options=["--ranges","mdp02.sen"]))
print(status)

#print the result
print("dv")
for i in range(0, T):
    print(dv[i].value())
    
print("Objective", value(prob.objective))

1
dv
5.93303
5.30852
4.7838
4.63111
6.03303
6.30852
6.7838
7.63111
Objective 47.41292


In [None]:
# %load mdp02.sen
GLPK 4.65 - SENSITIVITY ANALYSIS REPORT                                                                         Page   1

Problem:    
Objective:  Objective = 47.41291369 (MINimum)

   No. Row name     St      Activity         Slack   Lower bound       Activity      Obj coef  Obj value at Limiting
                                          Marginal   Upper bound          range         range   break point variable
------ ------------ -- ------------- ------------- -------------  ------------- ------------- ------------- ------------
     1 _C1          BS        .70750       -.70750        .             1.48947     -25.95034      29.05305 _C2
                                            .               +Inf         .44680      32.43772      70.36260 _C4

     2 _C10         BS        .80750       -.70750        .10000        3.64882     -14.36013      35.81711 _C9
                                            .               +Inf       -1.36500      28.68196      70.57359 _C2

     3 _C11         NL        .             .             .             -.29458     -29.16327      38.82211 _C12
                                          29.16327          +Inf        3.85263          +Inf     159.76826 _C13

     4 _C12         BS       1.43450       -.43450       1.00000        7.11713     -19.77171      19.05040 _C11
                                            .               +Inf         .25533      19.46263      75.33206 _C4

     5 _C13         BS        .36600       -.36600        .              .77053     -22.10288      39.32326 _C14
                                            .               +Inf         .00580      61.07374      69.76590 _C16

     6 _C14         NL       2.00000        .            2.00000        1.30286     -11.60401      39.32326 _C13
                                          11.60401          +Inf        2.77053          +Inf      56.35411 _C6

     7 _C15         BS        .90050       -.90050        .             1.89579     -22.10288      27.50927 _C16
                                            .               +Inf         .68090      40.71583      84.07752 _C14

     8 _C16         NL       3.00000        .            3.00000        1.28476     -11.60401      27.50927 _C15
                                          11.60401          +Inf        4.89579          +Inf      69.41168 _C8

     9 _C2          NL       -.10000        .            -.10000       -1.44762     -13.62393      29.05305 _C1
                                          13.62393          +Inf        1.38947          +Inf      67.70540 _C10

    10 _C3          BS        .43450       -.43450        .              .91474     -17.60905      39.76178 _C4
                                            .               +Inf         .29300     143.40979     109.72447 _C2

GLPK 4.65 - SENSITIVITY ANALYSIS REPORT                                                                         Page   2

Problem:    
Objective:  Objective = 47.41291369 (MINimum)

   No. Row name     St      Activity         Slack   Lower bound       Activity      Obj coef  Obj value at Limiting
                                          Marginal   Upper bound          range         range   break point variable
------ ------------ -- ------------- ------------- -------------  ------------- ------------- ------------- ------------
    11 _C4          NL      -1.00000        .           -1.00000       -1.82762      -9.24475      39.76178 _C3
                                           9.24475          +Inf        -.08526          +Inf      55.86943 _C12

    12 _C5          NL        .             .             .             -.24814     -34.49261      38.85407 _C6
                                          34.49261          +Inf        2.28684          +Inf     126.29207 _C12

    13 _C6          BS      -1.63400       -.36600      -2.00000        1.73909     -23.38482      85.62371 _C5
                                            .               +Inf       -3.13483      24.42950       7.49511 _C14

    14 _C7          NL        .             .             .             -.61051     -29.08622      29.65553 _C8
                                          29.08622          +Inf        2.28684          +Inf     113.92850 _C12

    15 _C8          BS      -2.09950       -.90050      -3.00000        1.27359     -19.71947      88.81394 _C7
                                            .               +Inf       -3.01450      24.42950      -3.87682 _C16

    16 _C9          NL        .             .             .             -.47966     -21.18120      37.25312 _C10
                                          21.18120          +Inf        1.92632          +Inf      88.21459 _C13

GLPK 4.65 - SENSITIVITY ANALYSIS REPORT                                                                         Page   3

Problem:    
Objective:  Objective = 47.41291369 (MINimum)

   No. Column name  St      Activity      Obj coef   Lower bound       Activity      Obj coef  Obj value at Limiting
                                          Marginal   Upper bound          range         range   break point variable
------ ------------ -- ------------- ------------- -------------  ------------- ------------- ------------- ------------
     1 dv_0         BS       5.93303       1.00000        .             9.54236      -4.62224      14.05595 _C2
                                            .               +Inf        5.93303          +Inf          +Inf

     2 dv_1         BS       5.30852       1.00000        .             7.11234      -3.68811      22.52598 _C4
                                            .               +Inf        5.30852          +Inf          +Inf

     3 dv_2         BS       4.78380       1.00000        .            17.56678      -5.17064      17.89380 _C5
                                            .               +Inf        4.78380          +Inf          +Inf

     4 dv_3         BS       4.63111       1.00000        .            16.08386      -4.80782      20.51627 _C7
                                            .               +Inf        4.63111          +Inf          +Inf

     5 dv_4         BS       6.03303       1.00000        .            13.83558      -4.22928      15.86451 _C9
                                            .               +Inf        6.03303          +Inf          +Inf

     6 dv_5         BS       6.30852       1.00000        .            25.18549      -4.95198       9.86477 _C11
                                            .               +Inf        6.30852          +Inf          +Inf

     7 dv_6         BS       6.78380       1.00000        .             8.48782      -4.24712      11.81752 _C14
                                            .               +Inf        6.78380          +Inf          +Inf

     8 dv_7         BS       7.63111       1.00000        .            11.77089      -4.31399       6.86128 _C16
                                            .               +Inf        7.63111          +Inf          +Inf

End of report


In [65]:
# https://en.wikibooks.org/wiki/GLPK/Solution_information