# Q-learning
$$Q(x_t, u_t) \leftarrow Q(x_t, u_t) + \alpha [R_{t+1} + \gamma max_{u} Q(x_t, u) - Q(x_t, u_t)]$$

In [1]:
import os
import sys
import numpy as np

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from network_control_rl_framework.rl import QLearning
from network_control_rl_framework.algebra import BaseNumber, FiniteField
from network_control_rl_framework.network import Network, calculate_next_state, calculate_next_state_base_number

In [2]:
network = Network()
network.from_edges([(0, 1), (1, 2), (2, 3)])

input_matrix = {0: 0}
q = 8
n = network.nodes

initial_state = BaseNumber(n, q)
initial_state.from_array(np.array([1, 2, 3, 1]))
end_state = BaseNumber(n, q)
end_state.from_array(np.array([1, 3, 2, 1]))

model = QLearning(
    initial_state,
    end_state,
    network,
    input_matrix,
    num_episodes=200,
    max_iteration=10
)
model.train()

In [3]:
model.get_signals(vector=True)

array([[2.],
       [3.],
       [1.]])