In [25]:
import numpy as np
import pandas as pd
import importlib

import ipywidgets as widgets

import pysindy as ps
import do_mpc.model as mpc_model
import do_mpc.controller as mpc_contr

In [3]:
import cpagent
import cprender
import cpenvs
import cppid

_ = importlib.reload(cppid)
_ = importlib.reload(cpagent)
_ = importlib.reload(cprender)
_ = importlib.reload(cpenvs)

In [4]:
# Baseline agent
agent = cppid.PidAgentMoving2(
    (0.301, 0.0190, 0.0797),
    (0.0659, 0.00, 0.0499),
    1.33
)
df = cpagent.execute_cartpole(agent, env=cpenvs.MovingCartpoleEnv(), num_episodes=500)
df

Unnamed: 0,ep,t,cart_pos,cart_vel,pole_ang,pole_vel,pos_deviation,force,reward,cart_pos_setpoint
0,0,0,-0.036138,0.007657,0.002915,-0.031009,-0.036138,0.0,,0.000000
1,0,1,-0.035985,-0.187507,0.002294,0.262592,-0.035985,-10.0,0.999353,0.000000
2,0,2,-0.039735,-0.382662,0.007546,0.555998,-0.039735,-10.0,0.999211,0.000000
3,0,3,-0.047388,-0.187646,0.018666,0.265702,-0.047388,10.0,0.998877,0.000000
4,0,4,-0.051141,0.007204,0.023980,-0.021035,-0.051141,10.0,0.998692,0.000000
...,...,...,...,...,...,...,...,...,...,...
250495,499,496,-0.929140,0.369910,0.011134,-0.543009,-0.019890,10.0,0.999802,-0.909249
250496,499,497,-0.921741,0.174633,0.000274,-0.246838,-0.012492,-10.0,0.999922,-0.909249
250497,499,498,-0.918249,-0.020493,-0.004663,0.045931,-0.009000,-10.0,0.999960,-0.909249
250498,499,499,-0.918659,-0.215548,-0.003744,0.337139,-0.009409,-10.0,0.999956,-0.909249


In [5]:
trajectories_x = []
trajectories_xdot = []
trajectories_u = []


for ep in range(100):
    dff = df.loc[df['ep']==ep]
    trajectories_x.append(dff[["cart_pos", "cart_vel", "pole_ang", "pole_vel"]].to_numpy())
    #trajectories_x.append(dff[["cart_pos", "pole_ang"]].to_numpy())
    #trajectories_xdot.append(dff[["cart_vel", "pole_vel"]].to_numpy())
    trajectories_u.append(dff[["force"]].to_numpy())

In [6]:
mySindy = ps.SINDy(
    #feature_names=["cart_pos",  "pole_ang", "force"],
    feature_names=["cart_pos", "cart_vel", "pole_ang", "pole_vel", "force"],
    t_default = 0.02
)

mySindy.fit(
    multiple_trajectories=True,
    x=trajectories_x, 
    #x_dot=trajectories_xdot,
    u=trajectories_u,
)

  b[self.d] = np.math.factorial(self.d)


In [7]:
mySindy.print()

(cart_pos)' = 0.936 cart_vel
(cart_vel)' = -0.183 cart_pos + 3.661 cart_vel + 6.109 pole_ang + 12.237 pole_vel + 0.834 force + 0.758 cart_pos cart_vel + 3.591 cart_pos pole_ang + 0.642 cart_pos pole_vel + -0.090 cart_vel pole_ang + -8.697 pole_ang^2 + -4.126 pole_ang pole_vel + -0.187 pole_vel^2
(pole_ang)' = 0.485 pole_ang + 0.846 pole_vel
(pole_vel)' = 0.291 cart_pos + -5.487 cart_vel + 5.868 pole_ang + -18.484 pole_vel + -1.255 force + -0.158 cart_pos^2 + -1.769 cart_pos cart_vel + -6.641 cart_pos pole_ang + -1.407 cart_pos pole_vel + 0.275 cart_vel^2 + -2.038 cart_vel pole_ang + 0.369 cart_vel pole_vel + 15.095 pole_ang^2 + 4.147 pole_ang pole_vel + 0.522 pole_vel^2


In [8]:
mySindy.get_feature_names()

['1',
 'cart_pos',
 'cart_vel',
 'pole_ang',
 'pole_vel',
 'force',
 'cart_pos^2',
 'cart_pos cart_vel',
 'cart_pos pole_ang',
 'cart_pos pole_vel',
 'cart_pos force',
 'cart_vel^2',
 'cart_vel pole_ang',
 'cart_vel pole_vel',
 'cart_vel force',
 'pole_ang^2',
 'pole_ang pole_vel',
 'pole_ang force',
 'pole_vel^2',
 'pole_vel force',
 'force^2']

In [9]:
mySindy.coefficients()

array([[  0.        ,   0.        ,   0.93598395,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ],
       [  0.        ,  -0.18300184,   3.66145478,   6.10911718,
         12.23691456,   0.83426131,   0.        ,   0.75754658,
          3.59141184,   0.64181149,   0.        ,   0.        ,
         -0.08978934,   0.        ,   0.        ,  -8.69679913,
         -4.12617385,   0.        ,  -0.18681686,   0.        ,
          0.        ],
       [  0.        ,   0.        ,   0.        ,   0.48490049,
          0.84550783,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.    

In [13]:
mySindy.coefficients()[1, :]

array([ 0.        , -0.18300184,  3.66145478,  6.10911718, 12.23691456,
        0.83426131,  0.        ,  0.75754658,  3.59141184,  0.64181149,
        0.        ,  0.        , -0.08978934,  0.        ,  0.        ,
       -8.69679913, -4.12617385,  0.        , -0.18681686,  0.        ,
        0.        ])

In [26]:
myModel = mpc_model.Model(model_type="continuous")
cart_pos = myModel.set_variable(var_type='_x', var_name='cart_pos', shape=(1,1))
cart_vel = myModel.set_variable(var_type='_x', var_name='cart_vel', shape=(1,1))
pole_ang = myModel.set_variable(var_type='_x', var_name='pole_ang', shape=(1,1))
pole_vel = myModel.set_variable(var_type='_x', var_name='pole_vel', shape=(1,1))
u__force = myModel.set_variable(var_type='_u', var_name='u__force', shape=(1,1))

modelvars_x = [cart_pos, cart_vel, pole_ang, pole_vel]
modelvars_u = [u__force]
modelvars = modelvars_x + modelvars_u

In [27]:
# Output should match output of "mySindy.get_feature_names()"

sindy_candidates = []

sindy_candidates.append(1.0)

for i,v in enumerate(modelvars):
    sindy_candidates.append(v)

for i,v1 in enumerate(modelvars):
    for j,v2 in enumerate(modelvars):
        if i >= j:
            sindy_candidates.append(v1 * v2)

# Print
for c in sindy_candidates:
    print(c)

1.0
cart_pos
cart_vel
pole_ang
pole_vel
u__force
sq(cart_pos)
(cart_vel*cart_pos)
sq(cart_vel)
(pole_ang*cart_pos)
(pole_ang*cart_vel)
sq(pole_ang)
(pole_vel*cart_pos)
(pole_vel*cart_vel)
(pole_vel*pole_ang)
sq(pole_vel)
(u__force*cart_pos)
(u__force*cart_vel)
(u__force*pole_ang)
(u__force*pole_vel)
sq(u__force)


In [28]:
for i, v in enumerate(modelvars_x):
    rhs = 0.0
    coeffs_vector = mySindy.coefficients()[i, :]
    for j, cand in enumerate(sindy_candidates):
        coeff = coeffs_vector[j]
        if abs(coeff) >= 0.0001:
            rhs = rhs + coeff * cand
    print(f"({v.name()})' = {rhs}")
    myModel.set_rhs(v.name(), rhs)

myModel.setup()

(cart_pos)' = (0.935984*cart_vel)
(cart_vel)' = ((((((((((((-0.183002*cart_pos)+(3.66145*cart_vel))+(6.10912*pole_ang))+(12.2369*pole_vel))+(0.834261*u__force))+(0.757547*(cart_vel*cart_pos)))+(3.59141*sq(cart_vel)))+(0.641811*(pole_ang*cart_pos)))+(-0.0897893*(pole_vel*cart_pos)))+(-8.6968*sq(pole_vel)))+(-4.12617*(u__force*cart_pos)))+(-0.186817*(u__force*pole_ang)))
(pole_ang)' = ((0.4849*pole_ang)+(0.845508*pole_vel))
(pole_vel)' = (((((((((((((((0.29053*cart_pos)+(-5.48737*cart_vel))+(5.86842*pole_ang))+(-18.4839*pole_vel))+(-1.25455*u__force))+(-0.158161*sq(cart_pos)))+(-1.76917*(cart_vel*cart_pos)))+(-6.64134*sq(cart_vel)))+(-1.40708*(pole_ang*cart_pos)))+(0.275318*sq(pole_ang)))+(-2.03809*(pole_vel*cart_pos)))+(0.369465*(pole_vel*cart_vel)))+(15.0951*sq(pole_vel)))+(4.14738*(u__force*cart_pos)))+(0.522058*(u__force*pole_ang)))


In [43]:
mpc = mpc_contr.MPC(myModel)

setup_mpc = {
    'n_horizon': 20,
    't_step': 0.1,
    'n_robust': 1,
    'store_full_solution': True,
}
mpc.set_param(**setup_mpc)

# mterm = cart_pos**2 + pole_ang**2 
# lterm = cart_pos**2 + pole_ang**2 
mterm = pole_ang**2 
lterm = cart_pos**2 + pole_ang**2 
mpc.set_objective(mterm=mterm, lterm=lterm)

mpc.bounds['lower','_u', 'u__force'] = -10.0
mpc.bounds['upper','_u', 'u__force'] = 10.0
mpc.setup()

In [44]:
class MPCCartPoleAgentCont(cpagent.CartPoleAgentABC):
    def __init__(self, mpc: mpc_contr.MPC) -> None:
        self.mpc = mpc

    def step(self, env_state: np.ndarray) -> int|float:
        u = mpc.make_step(np.array([
            1.0*env_state[0],
            env_state[1],
            env_state[2],
            env_state[3],
        ]))
        return u[0]

In [45]:
# Baseline agent
mpcagent = MPCCartPoleAgentCont(mpc)
dfmpc = cpagent.execute_cartpole(agent, env=cpenvs.MovingCartpoleEnvEnvCont(), num_episodes=10)
dfmpc

Unnamed: 0,ep,t,cart_pos,cart_vel,pole_ang,pole_vel,pos_deviation,force,reward,cart_pos_setpoint
0,0,0,0.033795,-0.002600,-0.004143,0.028240,0.033795,0.0,,0.0
1,0,1,0.033743,0.192581,-0.003578,-0.265747,0.033743,10.0,0.999431,0.0
2,0,2,0.037595,0.387754,-0.008893,-0.559557,0.037595,10.0,0.999293,0.0
3,0,3,0.045350,0.387879,-0.020084,-0.562358,0.045350,-10.0,0.998972,0.0
4,0,4,0.053107,0.388161,-0.031331,-0.568685,0.053107,-10.0,0.998590,0.0
...,...,...,...,...,...,...,...,...,...,...
378,9,42,-0.050384,0.024437,-0.394971,-1.550363,-0.050384,-10.0,0.998731,0.0
379,9,43,-0.049895,0.028583,-0.425978,-1.669229,-0.049895,-10.0,0.998755,0.0
380,9,44,-0.049323,0.032803,-0.459363,-1.796479,-0.049323,-10.0,0.998784,0.0
381,9,45,-0.048667,0.037046,-0.495292,-1.932536,-0.048667,-10.0,0.998816,0.0


In [46]:
EP = 0

fig = cprender.lineplot(dfmpc, ep=EP, incl_velo=False)
fig.show()