In [29]:
import numpy
from whittle import *
from Markov import *

# Define the environment parameters for the test
beta = 0.9  # discount factor
num_states = 3  # two possible states
num_partitions = 100
num_arms = 2  # two arms
horizon = 10  # time horizon

nc = 3
tt = 3
na = nc * num_states
ftype = numpy.ones(na, dtype=numpy.int32)
prob_remain = numpy.round(numpy.linspace(0.1 / num_states, 1 / num_states, na), 2)
numpy.random.shuffle(prob_remain)

R = Values(horizon, na, num_states, ftype, True)
M = MarkovDynamics(na, num_states, prob_remain, tt, True)

reward, transition = R.vals, M.transitions

# # Define the reward structure for each state and action (can be changed as needed)
# reward = np.array([[[0.75, 0.25], [0.5, 0.5]], [[0, 0.5], [1, 0.25]]])  # Shape (num_states, num_actions, num_arms)

# # Define the transition matrix for state transitions
# transition = np.array([
#     [[[0.7, 0.3], [0.3, 0.7]], [[0.4, 0.6], [0.6, 0.4]]],
#     [[[0.2, 0.8], [0.8, 0.2]], [[0.5, 0.5], [0.5, 0.5]]]
# ])  # Shape (num_states, num_states, num_actions, num_arms)

# Initialize the class instances
whittle_class = WhittleDisT(beta, num_states, num_arms, reward, transition, horizon)
safe_whittle_class = SafeWhittleDisT(beta, (num_states, num_states), num_arms, reward, transition, horizon, u_type=1, u_order=4, thresholds=[0.3, 0.3])

# Define the test for backward induction with different lambda values
def test_backward_induction(whittle_class, lambdas):
    for lambda_value in lambdas:
        print(f"Testing backward induction for lambda = {lambda_value}")
        pi, V, Q = whittle_class.bellman_equation(0, lambda_value)
        print("Policy:\n", pi)
        print("Value Function:\n", V)
        print("State-action value function (Q):\n", Q)
        print("\n")
        print(f"Testing SAFE backward induction for lambda = {lambda_value}")
        pi, V, Q = safe_whittle_class.bellman_equation(0, lambda_value)
        print("Policy:\n", pi)
        print("Value Function:\n", V)
        print("State-action value function (Q):\n", Q)
        print("\n")

# Test backward induction for three different lambda values
lambdas = [0, 5, 10]
test_backward_induction(whittle_class, lambdas)

# Define the test for Whittle index computation
def test_whittle_indices(whittle_class, lower_bound, upper_bound, n_trials):
    print(f"Computing Whittle indices between bounds {lower_bound} and {upper_bound} with {n_trials} trials")
    whittle_class.get_whittle_indices(computation_type=1, params=(lower_bound, upper_bound), n_trials=n_trials)
    print("Computed Whittle indices:\n", whittle_class.w_indices)
    print(f"Computing SAFE Whittle indices between bounds {lower_bound} and {upper_bound} with {n_trials} trials")
    safe_whittle_class.get_whittle_indices(computation_type=1, params=(lower_bound, upper_bound), n_trials=n_trials)
    print("Computed SAFE Whittle indices:\n", whittle_class.w_indices)

# Test the Whittle index computation
test_whittle_indices(whittle_class, lower_bound=0, upper_bound=10, n_trials=100)


Testing backward induction for lambda = 0
Policy:
 [[1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]]
Value Function:
 [[0.05369051 0.0498163  0.04551163 0.04072867 0.0354143  0.02950971
  0.02295123 0.01568124 0.00774    0.         0.        ]
 [0.05979598 0.05592177 0.0516171  0.04683413 0.04151972 0.03561483
  0.0290539  0.02176515 0.013685   0.005      0.        ]
 [0.06513216 0.06125795 0.05695328 0.05217031 0.0468559  0.040951
  0.03439    0.0271     0.019      0.01       0.        ]]
State-action value function (Q):
 [[[0.04483467 0.05369051]
  [0.04096047 0.0498163 ]
  [0.0366558  0.04551163]
  [0.03187287 0.04072867]
  [0.02655874 0.0354143 ]
  [0.02065611 0.02950971]
  [0.01411312 0.02295123]
  [0.006966   0.01568124]
  [0.         0.00774   ]
  [0.         0.        ]]

 [[0.05060396 0.05979598]
  [0.04672976 0.05592177]
  [0.04242509 0.0516171 ]
  [0.03764215 0.04683413]
  [0.03232799 0.04151972]
  [0.02642505 0.03561483]
  [0.01987969 0.0290539 ]
  [0.01

In [28]:
import numpy as np
from whittle import *

# Define the environment parameters for the test
beta = 0.9  # discount factor
num_states = 2  # two possible states
num_arms = 2  # two arms
horizon = 50  # time horizon

# Define the reward structure for each state and action (can be changed as needed)
reward = np.array([[[0.75, 0.25], [0.5, 0.5]], [[0, 0.5], [1, 0.25]]])  # Shape (num_states, num_actions, num_arms)

# Define the transition matrix for state transitions
transition = np.array([
    [[[0.7, 0.3], [0.3, 0.7]], [[0.4, 0.6], [0.6, 0.4]]],
    [[[0.2, 0.8], [0.8, 0.2]], [[0.5, 0.5], [0.5, 0.5]]]
])  # Shape (num_states, num_states, num_actions, num_arms)

# Initialize the class instances
whittle_class = WhittleDis(beta, num_states, num_arms, reward, transition)
safe_whittle_class = SafeWhittleDis(beta, (num_states, num_states), num_arms, reward, transition, horizon, u_type=1, u_order=2, thresholds=[0.5, 0.5])

# Define the test for backward induction with different lambda values
def test_backward_induction(whittle_class, lambdas):
    for lambda_value in lambdas:
        print(f"Testing backward induction for lambda = {lambda_value}")
        pi, V, Q = whittle_class.bellman_equation(0, lambda_value)
        print("Policy:\n", pi)
        print("Value Function:\n", V)
        print("State-action value function (Q):\n", Q)
        print("\n")
        print(f"Testing SAFE backward induction for lambda = {lambda_value}")
        pi, V, Q = safe_whittle_class.bellman_equation(0, lambda_value)
        print("Policy:\n", pi)
        print("Value Function:\n", V)
        print("State-action value function (Q):\n", Q)
        print("\n")

# Test backward induction for three different lambda values
lambdas = [0, 50, 200]
test_backward_induction(whittle_class, lambdas)

# Define the test for Whittle index computation
def test_whittle_indices(whittle_class, lower_bound, upper_bound, n_trials):
    print(f"Computing Whittle indices between bounds {lower_bound} and {upper_bound} with {n_trials} trials")
    whittle_class.get_whittle_indices(computation_type=1, params=(lower_bound, upper_bound), n_trials=n_trials)
    print("Computed Whittle indices:\n", whittle_class.w_indices)
    print(f"Computing SAFE Whittle indices between bounds {lower_bound} and {upper_bound} with {n_trials} trials")
    safe_whittle_class.get_whittle_indices(computation_type=1, params=(lower_bound, upper_bound), n_trials=n_trials)
    print("Computed SAFE Whittle indices:\n", whittle_class.w_indices)

# Test the Whittle index computation
test_whittle_indices(whittle_class, lower_bound=0, upper_bound=10000, n_trials=100)


Testing backward induction for lambda = 0
Policy:
 [0 1]
Value Function:
 [6.2395597e+31 7.7237893e+31]
State-action value function (Q):
 [[6.2395597e+31 5.4437881e+31]
 [4.3544270e+31 7.7237893e+31]]


Testing SAFE backward induction for lambda = 0
Policy:
 [[0 1]
 [0 1]]
Value Function:
 [[ 850.37775 1008.66766]
 [2902.3135  3442.552  ]]
State-action value function (Q):
 [[[ 850.37775  732.52106]
  [ 574.23114 1008.66766]]

 [[2902.3135  2500.072  ]
  [1959.8335  3442.552  ]]]


Testing backward induction for lambda = 50
Policy:
 [0 0]
Value Function:
 [0.29712036 0.09721202]
State-action value function (Q):
 [[ 0.29712036 -4.8173223 ]
 [ 0.09721202 -4.642343  ]]


Testing SAFE backward induction for lambda = 50
Policy:
 [[0 1]
 [0 1]]
Value Function:
 [[ 394.31686  462.5976 ]
 [2446.2522  2896.4814 ]]
State-action value function (Q):
 [[[ 394.31686  333.49063]
  [ 265.2099   462.5976 ]]

 [[2446.2522  2101.0413 ]
  [1650.812   2896.4814 ]]]


Testing backward induction for lambda = 