In [1]:
%matplotlib inline

In [4]:
import numpy as np

In [None]:
struct SimpleOG{TI <: Integer, T <: Real,
                TR <: AbstractArray{T}, TQ <: AbstractArray{T}}

    sbar :: TI
    beta :: T
    price :: T
    R :: TR
    Q :: TQ
end

function SimpleOG{T <: Real}(;sbar::TI=100,price::T=1.0,beta::T=0.9)

    c(s,x) = (x-1)^2/s
    n = sbar + 1
    m = sbar + 1

    R = Matrix{T}(n, m)
    Q = zeros(Float64,n,m,n)
    
    for i in 1:n
        for j in 1:m
            if j <= i
                Q[i,j,i-j+1] = 1.0
            else
                Q[i,j,1] = 1.0
            end
        end
    end
    
    for p in 1:n
        for q in 1:m
            if p <= q
                R[p,q] = price*(q-1)-c(p,q)
            else
                R[p,q] = -Inf
            end
        end
    end
    
    return SimpleOG(sbar,price,beta,R, Q)
end

In [6]:

price = 1     # Market price of ore
sbar = 100    # Upper bound of ore stock
beta = 0.9    # Discount rate
n = sbar + 1  # Number of states
m = sbar + 1  # Number of actions

# Cost function
c = lambda s, x: x**2 / (1+s)

In [8]:
R = np.empty((n, m))
for s, x in itertools.product(range(n), range(m)):
    R[s, x] = price * x - c(s, x) if x <= s else -np.inf

In [9]:
R

array([[ 0.        ,        -inf,        -inf, ...,        -inf,
               -inf,        -inf],
       [ 0.        ,  0.5       ,        -inf, ...,        -inf,
               -inf,        -inf],
       [ 0.        ,  0.66666667,  0.66666667, ...,        -inf,
               -inf,        -inf],
       ..., 
       [ 0.        ,  0.98989899,  1.95959596, ...,  0.98989899,
               -inf,        -inf],
       [ 0.        ,  0.99      ,  1.96      , ...,  1.96      ,
         0.99      ,        -inf],
       [ 0.        ,  0.99009901,  1.96039604, ...,  2.91089109,
         1.96039604,  0.99009901]])

In [10]:
Q = np.zeros((n, m, n))
for s, x in itertools.product(range(n), range(m)):
    if x <= s:
        Q[s, x, s-x] = 1
    else:
        Q[s, x, 0] = 1  # Arbitrary

In [11]:
Q

array([[[ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  1.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  0.,  1., ...,  0.,  0.,  0.],
        [ 0.,  1.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       ..., 
       [[ 0.,  0.,  0., ...,  1.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0., 