In [1]:
from enum import Enum
from random import random
import numpy as np

In [2]:
class Action(Enum):
    A = 0
    B = 1


def choose_action():
    if 7 * random() < 1:
        return Action.A
    else:
        return Action.B


In [3]:
for i in range(20):
    print(choose_action().name, end=', ')

A, B, B, B, B, B, B, B, B, B, B, B, B, A, B, A, B, B, B, B, 

In [4]:
class Problem:
    N_STATES = 7

    def __init__(self):
        self._transition_probs = self.define_transition_probabilities()

    @property
    def transition_probs_a(self):
        return self._transition_probs[Action.A.value]

    @property
    def transition_probs_b(self):
        return self._transition_probs[Action.B.value]

    @classmethod
    def define_transition_probabilities(cls):
        tp_a = np.zeros((cls.N_STATES, cls.N_STATES))
        tp_a[:, -1] = 1

        tp_b = np.full((cls.N_STATES, cls.N_STATES), 1/6)
        tp_b[:, -1] = 0

        return np.stack([tp_a, tp_b])


In [5]:
p = Problem()
p.transition_probs_a

array([[0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1.]])

In [6]:
p.transition_probs_b

array([[0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
        0.16666667, 0.        ],
       [0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
        0.16666667, 0.        ],
       [0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
        0.16666667, 0.        ],
       [0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
        0.16666667, 0.        ],
       [0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
        0.16666667, 0.        ],
       [0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
        0.16666667, 0.        ],
       [0.16666667, 0.16666667, 0.16666667, 0.16666667, 0.16666667,
        0.16666667, 0.        ]])

In [7]:
w = np.array(6*[1] + [10] + 8 * [1])
w

array([ 1,  1,  1,  1,  1,  1, 10,  1,  1,  1,  1,  1,  1,  1,  1])

In [8]:
def define_weight_matrices(n_states: int, n_params: int):
    phi_a = np.zeros((n_states, n_params), dtype=int)
    phi_a[:, n_params//2] = 1
    phi_a[np.arange(n_states-1), np.arange(n_states-1)] = 2
    phi_a[n_states-1, n_states-1] = 1
    phi_a[n_states-1, n_states] = 2

    phi_b = np.zeros((n_states, n_params), dtype=int)
    phi_b[np.arange(n_states), n_params//2 + n_params%2 + np.arange(n_states)] = 1

    return np.stack([phi_a, phi_b]).swapaxes(0, 1)


In [9]:
phi = define_weight_matrices(Problem.N_STATES, len(w))
phi.shape

(7, 2, 15)

In [10]:
phi[:, 0]

array([[2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0]])

In [11]:
phi[:, 1]

array([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [12]:
phi @ w

array([[ 3,  1],
       [ 3,  1],
       [ 3,  1],
       [ 3,  1],
       [ 3,  1],
       [ 3,  1],
       [12,  1]])