## Define the environment

In [8]:
import numpy as np 
from pbo.environment.linear_quadratic import LinearQuadraticEnv

env = LinearQuadraticEnv(max_state=10, max_action=10)

Transition: s' = As + Ba
Transition: s' = -2.71s + -9.2a
Reward: Qs² + Ra² + 2 Ssa
Reward: -2.63s² + -9.41a² + -7.84sa


## Test random agent

In [9]:
initial_state = np.array([3])
state = env.reset(state=initial_state)
terminal = False
step = 0
sum_reward = 0

while not terminal and step < 10:
    action = np.random.uniform(-1, 1, size=1)
    next_state, reward, terminal, _ = env.step(action)

    print(f"State: {np.around(state[0], 2)}", end=" ")
    print(f"Action: {np.around(action[0], 2)}", end=" ")
    print(f"Reward: {np.around(reward, 4)}")

    state = next_state
    sum_reward += reward
    step += 1

print(f"Sum reward: {sum_reward}")

State: 3 Action: 0.91 Reward: -52.7361
State: -10.0 Action: 0.46 Reward: -228.6492
State: 10.0 Action: 0.5 Reward: -304.8608
State: -10.0 Action: -0.95 Reward: -345.7079
State: 10.0 Action: 0.69 Reward: -321.8144
State: -10.0 Action: 0.91 Reward: -199.3837
State: 10.0 Action: -1.0 Reward: -194.0642
State: -10.0 Action: -0.91 Reward: -342.4143
State: 10.0 Action: 0.12 Reward: -272.6523
State: -10.0 Action: 0.5 Reward: -225.9548
Sum reward: -2488.2379508988265


## Test optimal agent

In [14]:
initial_state = np.array([3])
state = env.reset(state=initial_state)
terminal = False
step = 0
sum_reward = 0


while not terminal and step < 10:
    action = env.optimal_action()
    next_state, reward, terminal, _ = env.step(action)

    print(f"State: {np.around(state[0], 2)}", end=" ")
    print(f"Action: {np.around(action[0], 2)}", end=" ")
    print(f"Reward: {np.around(reward, 4)}")

    state = next_state
    sum_reward += reward
    step += 1
print(f"Sum reward: {sum_reward}")

State: 3 Action: -0.92 Reward: -10.0303
State: 0.3 Action: -0.09 Reward: -0.1013
State: 0.03 Action: -0.01 Reward: -0.001
State: 0.0 Action: -0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
Sum reward: -10.132686460508383
