# Model predictive control is almost optimal for restless bandits: parameters

This notebook contains all the code to "pretty-print" the parameters of the problems used for the figures.

In [1]:
import numpy as np
import bandit_lp
import strategies

In [15]:
def print_bandit(bandit, alpha=0.5):
    bandit.print_latex()
    strat = strategies.LPPriorityStragegy(bandit, alpha=alpha)
    for i in  strat.lp_index: 
        print(bandit_lp.float_to_str(i), end=' , ')
    print('\n order=', strat.order_of_states)
    print("relaxed-value = ", bandit.relaxed_lp_average_reward(alpha)[0])

In [16]:
print_bandit(bandit_lp.BanditCounterExample())
    

\begin{align*}
    P^{ 0 }=\left(
    \begin{array}{cccccccc}
       1 & & & & & & & \\
       1 & & & & & & & \\
        &0.48 &0.52 & & & & & \\
        & &0.47 &0.53 & & & & \\
        & & & &0.9 &0.1 & & \\
        & & & & &0.9 &0.1 & \\
        & & & & & &0.9 &0.1 \\
       0.1 & & & & & & &0.9 \\
    \end{array}\right)
\end{align*}

R^{ 0 } = , , , , , , , 0.1,

\begin{align*}
    P^{ 1 }=\left(
    \begin{array}{cccccccc}
       0.9 &0.1 & & & & & & \\
        &0.9 &0.1 & & & & & \\
        & &0.9 &0.1 & & & & \\
        & & &0.9 &0.1 & & & \\
        & & &0.46 &0.54 & & & \\
        & & & &0.45 &0.55 & & \\
        & & & & &0.44 &0.56 & \\
        & & & & & &0.43 &0.57 \\
    \end{array}\right)
\end{align*}

R^{ 1 } = , , , , , , , ,

0.025 , 0.025 , 0.025 , 0.025 ,  , -0.113 , -0.110 , -0.108 , 
 order= [1 2 0 3 4 7 6 5]
relaxed-value =  0.0125


In [18]:
print_bandit(bandit_lp.BanditCounterExampleYan2(), alpha=0.4)


\begin{align*}
    P^{ 0 }=\left(
    \begin{array}{ccc}
       0.022 &0.102 &0.875 \\
       0.034 &0.172 &0.794 \\
       0.523 &0.455 &0.022 \\
    \end{array}\right)
\end{align*}

R^{ 0 } = , , ,

\begin{align*}
    P^{ 1 }=\left(
    \begin{array}{ccc}
       0.149 &0.304 &0.547 \\
       0.568 &0.411 &0.020 \\
       0.253 &0.273 &0.474 \\
    \end{array}\right)
\end{align*}

R^{ 1 } = 0.374, 0.117, 0.079,

0.199 , -0.000 , -0.133 , 
 order= [0 1 2]
relaxed-value =  0.12380017120322259


In [13]:
print_bandit(bandit_lp.BanditRandom(number_of_states=8, number_of_actions=2, seed=3))


\begin{align*}
    P^{ 0 }=\left(
    \begin{array}{cccccccc}
       0.101 &0.155 &0.043 &0.090 &0.281 &0.285 &0.017 &0.029 \\
       0.006 &0.207 &0.076 &0.136 &0.085 &0.299 &0.147 &0.043 \\
       0.317 &0.254 &0.065 &0.013 &0.144 &0.111 &0.061 &0.035 \\
       0.098 &0.183 &0.069 &0.068 &0.218 &0.028 &0.200 &0.136 \\
       0.053 &0.080 &0.009 &0.038 &0.483 &0.036 &0.159 &0.143 \\
       0.018 &0.105 &0.027 &0.397 &0.150 &0.102 &0.161 &0.040 \\
       0.110 &0.050 &0.088 &0.024 &0.023 &0.142 &0.169 &0.393 \\
       0.055 &0.043 &0.017 &0.494 &0.227 &0.034 &0.119 &0.011 \\
    \end{array}\right)
\end{align*}

R^{ 0 } = 0.073, 0.087, 0.778, 0.186, 1.178, 0.417, 1.996, 1.351,

\begin{align*}
    P^{ 1 }=\left(
    \begin{array}{cccccccc}
       0.011 &0.124 &0.006 &0.131 &0.224 &0.070 &0.241 &0.191 \\
       0.071 &0.138 &0.033 &0.023 &0.045 &0.250 &0.339 &0.101 \\
       0.093 &0.113 &0.056 &0.061 &0.109 &0.351 &0.157 &0.059 \\
       0.158 &0.176 &0.151 &0.150 &0.060 &0.142 &0.053 &0

'1.123'