In [26]:
import quantecon as qe
import numpy as np
import gurobipy as gp
from gurobipy import *

In [5]:
class SimpleOG(object):

    def __init__(self, B=10, M=5, alpha=0.5, beta=0.9):

        self.B, self.M, self.alpha, self.beta  = B, M, alpha, beta
        self.n = B + M + 1
        self.m = M + 1

        self.R = np.empty((self.n, self.m))
        self.Q = np.zeros((self.n, self.m, self.n))

        self.populate_Q()
        self.populate_R()

    def u(self, c):
        return c**self.alpha

    def populate_R(self):

        for s in range(self.n):
            for a in range(self.m):
                self.R[s, a] = self.u(s - a) if a <= s else -np.inf

    def populate_Q(self):

        for a in range(self.m):
            self.Q[:, a, a:(a + self.B + 1)] = 1.0 / (self.B + 1)

In [6]:
g = SimpleOG()

In [7]:
# n = 16
# m = 6
ddp = qe.markov.DiscreteDP(g.R, g.Q, g.beta)

In [9]:
# reward array
ddp.R

array([[ 0.        ,        -inf,        -inf,        -inf,        -inf,
               -inf],
       [ 1.        ,  0.        ,        -inf,        -inf,        -inf,
               -inf],
       [ 1.41421356,  1.        ,  0.        ,        -inf,        -inf,
               -inf],
       [ 1.73205081,  1.41421356,  1.        ,  0.        ,        -inf,
               -inf],
       [ 2.        ,  1.73205081,  1.41421356,  1.        ,  0.        ,
               -inf],
       [ 2.23606798,  2.        ,  1.73205081,  1.41421356,  1.        ,
         0.        ],
       [ 2.44948974,  2.23606798,  2.        ,  1.73205081,  1.41421356,
         1.        ],
       [ 2.64575131,  2.44948974,  2.23606798,  2.        ,  1.73205081,
         1.41421356],
       [ 2.82842712,  2.64575131,  2.44948974,  2.23606798,  2.        ,
         1.73205081],
       [ 3.        ,  2.82842712,  2.64575131,  2.44948974,  2.23606798,
         2.        ],
       [ 3.16227766,  3.        ,  2.82842712,  2.

In [10]:
# transition probability array
ddp.Q

array([[[ 0.09090909,  0.09090909,  0.09090909, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.09090909,  0.09090909, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.09090909, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.09090909,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.09090909,
          0.09090909,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.09090909,
          0.09090909,  0.09090909]],

       [[ 0.09090909,  0.09090909,  0.09090909, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.09090909,  0.09090909, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.09090909, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.09090909,
          0.  

In [11]:
# discount factor
ddp.beta

0.9

In [12]:
# number of states
ddp.num_states

16

In [13]:
# number of actions
ddp.num_actions

6

In [14]:
# number of pairs of actions and states
ddp.num_sa_pairs

81

In [34]:
#exact LP
#　変数の導入（ここではvalue function）
num_state = range(1, ddp.num_states + 1)
num_action = range(1, ddp.num_actions + 1)
LP = gp.Model()

x = {}
for i in num_state:
    for j in num_action:
        x[i,j] = LP.addVar(vtype = "C", name = "x(%s,%s)" %(i,j))
LP.update()




In [35]:
x

{(1, 1): <gurobi.Var x(1,1)>,
 (1, 2): <gurobi.Var x(1,2)>,
 (1, 3): <gurobi.Var x(1,3)>,
 (1, 4): <gurobi.Var x(1,4)>,
 (1, 5): <gurobi.Var x(1,5)>,
 (1, 6): <gurobi.Var x(1,6)>,
 (2, 1): <gurobi.Var x(2,1)>,
 (2, 2): <gurobi.Var x(2,2)>,
 (2, 3): <gurobi.Var x(2,3)>,
 (2, 4): <gurobi.Var x(2,4)>,
 (2, 5): <gurobi.Var x(2,5)>,
 (2, 6): <gurobi.Var x(2,6)>,
 (3, 1): <gurobi.Var x(3,1)>,
 (3, 2): <gurobi.Var x(3,2)>,
 (3, 3): <gurobi.Var x(3,3)>,
 (3, 4): <gurobi.Var x(3,4)>,
 (3, 5): <gurobi.Var x(3,5)>,
 (3, 6): <gurobi.Var x(3,6)>,
 (4, 1): <gurobi.Var x(4,1)>,
 (4, 2): <gurobi.Var x(4,2)>,
 (4, 3): <gurobi.Var x(4,3)>,
 (4, 4): <gurobi.Var x(4,4)>,
 (4, 5): <gurobi.Var x(4,5)>,
 (4, 6): <gurobi.Var x(4,6)>,
 (5, 1): <gurobi.Var x(5,1)>,
 (5, 2): <gurobi.Var x(5,2)>,
 (5, 3): <gurobi.Var x(5,3)>,
 (5, 4): <gurobi.Var x(5,4)>,
 (5, 5): <gurobi.Var x(5,5)>,
 (5, 6): <gurobi.Var x(5,6)>,
 (6, 1): <gurobi.Var x(6,1)>,
 (6, 2): <gurobi.Var x(6,2)>,
 (6, 3): <gurobi.Var x(6,3)>,
 (6, 4): <

In [36]:
#　制約の追加
for i in num_state:
    for j in num_action:
        LP.addConstr((x[i,j] - ddp.beta * quicksum(ddp.Q[i-1, j-1, k-1] * quicksum(x[k, l] for l in num_action) for k in num_state)) >= ddp.R[i-1,j-1])

LP.update()

In [37]:
#　目的関数の設定
LP.setObjective(quicksum(x[i,j] for i in num_state for j in num_action))
LP.update()

In [43]:
# 求解
LP.ModelSense = 1
LP.optimize()
LP.computeIIS()

for v in LP.getVars():
    print v.VarName ,v.X

Optimize a model with 96 rows, 96 columns and 6366 nonzeros
Coefficient statistics:
  Matrix range    [8e-02, 1e+00]
  Objective range [1e+00, 1e+00]
  Bounds range    [0e+00, 0e+00]
  RHS range       [1e+00, 1e+100]
Presolve removed 15 rows and 15 columns
Presolve time: 0.01s
Presolved: 81 rows, 81 columns, 4836 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    6.6856050e+03   1.383565e+04   0.000000e+00      0s

Solved in 12 iterations and 0.01 seconds
Infeasible model
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    0.0000000e+00   1.874222e+02   0.000000e+00      0s

IIS computed: 18 constraints and 79 bounds
IIS runtime: 0.00 seconds
 x(1,1) 0.0
x(1,2) 0.0
x(1,3) 0.0
x(1,4) 0.0
x(1,5) 0.0
x(1,6) 0.0
x(2,1) 0.0
x(2,2) 0.0
x(2,3) 0.0
x(2,4) 0.0
x(2,5) 0.0
x(2,6) 0.0
x(3,1) 0.0
x(3,2) 0.0
x(3,3) 0.0
x(3,4) 0.0
x(3,5) 0.0
x(3,6) 0.0
x(4,1) 0.0
x(4,2) 0.0
x(4,3) 0.0
x(4,4) 0.0
x(4,5) 0.0
x(4,6) 0.0
x(5,1) 0.0
x(5,2) 0.0
x(5

In [44]:
# 真の値
results = ddp.solve(method='policy_iteration')

In [45]:
results.v

array([ 19.01740222,  20.01740222,  20.43161578,  20.74945302,
        21.04078099,  21.30873018,  21.54479816,  21.76928181,
        21.98270358,  22.18824323,  22.3845048 ,  22.57807736,
        22.76109127,  22.94376708,  23.11533996,  23.27761762])