## Define the environment

In [1]:
import numpy as np 
from pbo.environment.linear_quadratic import LinearQuadraticEnv

env = LinearQuadraticEnv(max_state=10, max_action=10)

Transition: s' = As + Ba
Transition: s' = 0.75s + 7.68a
Reward: Qs² + Ra² + 2 Ssa
Reward: -6.63s² + 8.86a² + -9.98sa


## Test random agent

In [2]:
initial_state = np.array([3])
state = env.reset(state=initial_state)
terminal = False
step = 0
sum_reward = 0

while not terminal and step < 10:
    action = np.random.uniform(-1, 1, size=1)
    next_state, reward, terminal, _ = env.step(action)

    print(f"State: {np.around(state[0], 2)}", end=" ")
    print(f"Action: {np.around(action[0], 2)}", end=" ")
    print(f"Reward: {np.around(reward, 4)}")

    state = next_state
    sum_reward += reward
    step += 1

print(f"Sum reward: {sum_reward}")

State: 3 Action: 0.15 Reward: -64.0117
State: 3.41 Action: 0.08 Reward: -79.9077
State: 3.16 Action: 0.34 Reward: -75.7654
State: 4.96 Action: -0.15 Reward: -155.355
State: 2.59 Action: -0.23 Reward: -38.2513
State: 0.22 Action: 0.8 Reward: 3.621
State: 6.29 Action: 0.18 Reward: -273.6145
State: 6.14 Action: 0.22 Reward: -262.8402
State: 6.3 Action: -0.36 Reward: -239.0955
State: 1.96 Action: 0.83 Reward: -35.6203
Sum reward: -1220.8405640331735


## Test optimal agent

In [3]:
initial_state = np.array([3])
state = env.reset(state=initial_state)
terminal = False
step = 0
sum_reward = 0


while not terminal and step < 10:
    action = env.optimal_action()
    next_state, reward, terminal, _ = env.step(action)

    print(f"State: {np.around(state[0], 2)}", end=" ")
    print(f"Action: {np.around(action[0], 2)}", end=" ")
    print(f"Reward: {np.around(reward, 4)}")

    state = next_state
    sum_reward += reward
    step += 1
print(f"Sum reward: {sum_reward}")

State: 3 Action: -0.35 Reward: -48.1232
State: -0.43 Action: 0.05 Reward: -0.9947
State: 0.06 Action: -0.01 Reward: -0.0206
State: -0.01 Action: 0.0 Reward: -0.0004
State: 0.0 Action: -0.0 Reward: -0.0
State: -0.0 Action: 0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
State: -0.0 Action: 0.0 Reward: -0.0
State: 0.0 Action: -0.0 Reward: -0.0
State: -0.0 Action: 0.0 Reward: -0.0
Sum reward: -49.13890580906188
