In [1]:
import numpy as np
from numpy import radians, pi
from lib.cartpolesystem import CartPoleStepperMotorSystem, Cart, Pole, StepperMotor
from lib.cartpoleenv import CartPoleEnv
from lib.colors import Colors
from lib.controllers import LQR
import time
from time import perf_counter
from matplotlib import pyplot as plt
from lib.numerical import fe_step, rk4_step

In [2]:
dt = 0.02
g = 9.81

cart = Cart(0.1, 0.01, (-0.8, 0.8), 0.1)
motor = StepperMotor(0.05, (-2, 2), 0.2, (-100, 100), 0.2)
poles = [Pole(0.1, 0.2, 0.01), Pole(0.1, 0.3, 0.01)]
n = len(poles)

system = CartPoleStepperMotorSystem(cart, motor, poles, g)

max_time = 10
N = int(max_time/dt)

env = CartPoleEnv(system, dt, N, rk4_step)
env.observation_space.shape

(6,)

In [3]:
linearization_x0 = np.array([0, 0] + [radians(0), 0]*n)
linearization_u0 = np.array([0])

A0, B0 = system.linearize(linearization_x0, linearization_u0)

C = np.eye(2+2*n)
D = np.zeros((2+2*n, 1))

ct_dt = dt

lqr = LQR(ct_dt)

Q = np.diag([10, 1] + [100, 10]*n)
R = np.diag([1])

A_d0, B_d0 = lqr.discretize(A0, B0, C, D)
_, K_d0 = lqr.calculate_K_d(A_d0, B_d0, Q, R)

In [4]:
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
print("A")
print(A0)
print("B")
print(B0)

A
[[ 0.000  1.000  0.000  0.000  0.000  0.000]
 [ 0.000  0.000  0.000  0.000  0.000  0.000]
 [ 0.000  0.000  0.000  1.000  0.000  0.000]
 [ 0.000  0.000  42.043 -4.286  0.000  0.000]
 [ 0.000  0.000  0.000  0.000  0.000  1.000]
 [ 0.000  0.000  0.000  0.000  28.029 -1.905]]
B
[[ 0.000]
 [ 1.000]
 [ 0.000]
 [-4.286]
 [ 0.000]
 [-2.857]]


In [5]:
def is_controllable(A, B):
    n = A.shape[0]  # Number of states
    controllability_matrix = np.column_stack([np.linalg.matrix_power(A, i) @ B for i in range(n)])
    rank = np.linalg.matrix_rank(controllability_matrix)
    return rank == n

if is_controllable(A0, B0):
    print("The system is controllable.")
else:
    print("The system is not controllable.")

The system is controllable.


In [6]:
x0 = np.array([-0.4, 0, radians(5), 0, radians(4), 0])

state, _ = env.reset(x0)

r = np.array([0.1, 0] + [radians(0), 0]*n)

last_update = perf_counter()

time1 = time2 = 0

for i in range(N-1):
    while perf_counter() < last_update + dt:
        pass
    last_update = perf_counter()
    error = system.calculate_error(state, r)
    control = lqr.feedback(K_d0, error)
    time1 = time.perf_counter()
    state, reward, done, msg, _ = env.step(control)
    time2 = time.perf_counter()
    env.render()
env.close()
print(time2-time1)

0.0002901999978348613
