In [1]:
import crocoddyl
import casadi
import numpy as np


In [2]:
class CasadiCrocoddylActionModelCartPole(crocoddyl.ActionModelAbstract):
    """
    Corrected version with stable dynamics equations.
    """
    m1, m2, l, g, dt = 1., .1, .5, 9.81, 5e-2

    def __init__(self, weights):
        x_cas, u_cas = casadi.SX.sym('x', 4), casadi.SX.sym('u', 1)
        self.build_running_functions(x_cas, u_cas, weights)
        self.build_terminal_functions(x_cas, weights)
        state = crocoddyl.StateVector(4)
        crocoddyl.ActionModelAbstract.__init__(self, state, 1)

    def build_running_functions(self, x_cas, u_cas, weights):
        y, th, ydot, thdot = x_cas[0], x_cas[1], x_cas[2], x_cas[3]
        f = u_cas[0]
        s, c = casadi.sin(th), casadi.cos(th)
        m = self.m1 + self.m2
        mu = self.m1 + self.m2 * s**2
        
        # ===================================================================
        # CORRECTED DYNAMICS EQUATIONS
        # These are standard, stable formulations for the cart-pole system.
        yddot = (f + self.m2 * s * (self.l * thdot**2 - self.g * c)) / mu
        thddot = (-f * c - self.m2 * self.l * c * s * thdot**2 + m * self.g * s) / (self.l * mu)
        # ===================================================================

        # Your original integration had a small mix-up. This is the correct order.
        x_next = casadi.vertcat(y + ydot * self.dt, th + thdot * self.dt,
                                ydot + yddot * self.dt, thdot + thddot * self.dt)

        residual = casadi.vertcat(s, 1 - c, y, ydot, thdot, f)
        w_cas = casadi.DM(weights)
        rcost = 0.5 * casadi.dot(residual * w_cas, residual)
        
        # The rest of the function generation is the same
        self.calc_running_func = casadi.Function('calc', [x_cas, u_cas], [x_next, rcost])
        self.Fx_func = casadi.Function('Fx', [x_cas, u_cas], [casadi.jacobian(x_next, x_cas)])
        self.Fu_func = casadi.Function('Fu', [x_cas, u_cas], [casadi.jacobian(x_next, u_cas)])
        self.Lx_func = casadi.Function('Lx', [x_cas, u_cas], [casadi.gradient(rcost, x_cas)])
        self.Lu_func = casadi.Function('Lu', [x_cas, u_cas], [casadi.gradient(rcost, u_cas)])
        self.Lxx_func = casadi.Function('Lxx', [x_cas, u_cas], [casadi.hessian(rcost, x_cas)[0]])
        self.Luu_func = casadi.Function('Luu', [x_cas, u_cas], [casadi.hessian(rcost, u_cas)[0]])
        self.Lxu_func = casadi.Function('Lxu', [x_cas, u_cas], [casadi.jacobian(casadi.gradient(rcost, u_cas), x_cas)])

    def build_terminal_functions(self, x_cas, weights):
        y, th, ydot, thdot = x_cas[0], x_cas[1], x_cas[2], x_cas[3]
        s, c = casadi.sin(th), casadi.cos(th)
        residual = casadi.vertcat(s, 1 - c, y, ydot, thdot, 0.0) # f is 0
        w_cas = casadi.DM(weights)
        rcost = 0.5 * casadi.dot(residual * w_cas, residual)
        self.calc_terminal_func = casadi.Function('calc_term', [x_cas], [rcost])
        self.Lx_terminal_func = casadi.Function('Lx_term', [x_cas], [casadi.gradient(rcost, x_cas)])
        self.Lxx_terminal_func = casadi.Function('Lxx_term', [x_cas], [casadi.hessian(rcost, x_cas)[0]])

    def calc(self, data, x, u=None):
        if u is not None:
            x_next, rcost = self.calc_running_func(x, u)
            data.xnext = np.asarray(x_next).flatten()
            data.cost = float(rcost)
        else:
            rcost = self.calc_terminal_func(x)
            data.cost = float(rcost)
        return data.xnext, data.cost

    def calcDiff(self, data, x, u=None):
        if u is not None:
            Fx, Fu = self.Fx_func(x, u), self.Fu_func(x, u)
            Lx, Lu = self.Lx_func(x, u), self.Lu_func(x, u)
            Lxx, Luu, Lxu = self.Lxx_func(x, u), self.Luu_func(x, u), self.Lxu_func(x, u)
            data.Fx, data.Fu = np.asarray(Fx), np.asarray(Fu)
            data.Lx, data.Lu = np.asarray(Lx).flatten(), np.asarray(Lu).flatten()
            data.Lxx, data.Luu, data.Lxu = np.asarray(Lxx), np.asarray(Luu), np.asarray(Lxu)
        else:
            Lx, Lxx = self.Lx_terminal_func(x), self.Lxx_terminal_func(x)
            data.Lx, data.Lxx = np.asarray(Lx).flatten(), np.asarray(Lxx)
            data.Lu.fill(0.); data.Luu.fill(0.); data.Lxu.fill(0.)
            data.Fx.fill(0.); data.Fu.fill(0.)
            
    def createData(self):
        return crocoddyl.ActionDataAbstract(self)

In [3]:

### Simple Example to Test It

# 1. Define weights for the cost function
cost_weights = [10., 10., 1., 0.1, 0.1, 0.01]

# 2. Instantiate the CasADi-powered Crocoddyl model
model = CasadiCrocoddylActionModelCartPole(weights=cost_weights)

# 3. Create the data structure
data = model.createData()

# 4. Define a random initial state and control
x0 = np.random.rand(model.state.nx)
u_rand = np.random.rand(model.nu)

# --- Test the calc() and calcDiff() methods ---
print("--- Testing CasADi-Powered Crocoddyl Action Model ---")
print(f"Initial State (x0): {x0}")
print(f"Random Control (u): {u_rand}\n")

# Run the calculations (no NumDiff wrapper needed!)
model.calc(data, x0, u_rand)
model.calcDiff(data, x0, u_rand)

# --- Print the results stored in the data object ---
print("--- Results from calc() ---")
print(f"Next State (x_next): {data.xnext}")
print(f"Running Cost (cost): {data.cost}\n")

print("--- Results from calcDiff() (computed by CasADi) ---")
print(f"State Jacobian (Fx):\n{data.Fx}\n")
print(f"Cost Hessian w.r.t. state (Lxx):\n{data.Lxx}")

--- Testing CasADi-Powered Crocoddyl Action Model ---
Initial State (x0): [0.41337781 0.56290756 0.41683649 0.31698744]
Random Control (u): [0.5637139]

--- Results from calc() ---
Next State (x_next): [0.43421964 0.57875693 0.42284831 0.83032706]
Running Cost (cost): 1.6436704012626677

--- Results from calcDiff() (computed by CasADi) ---
State Jacobian (Fx):
[[ 1.00000000e+00  0.00000000e+00  5.00000000e-02  0.00000000e+00]
 [ 0.00000000e+00  1.00000000e+00  0.00000000e+00  5.00000000e-02]
 [ 0.00000000e+00 -2.08495682e-02  1.00000000e+00  8.22377969e-04]
 [ 0.00000000e+00  8.71320279e-01  0.00000000e+00  9.98609018e-01]]

Cost Hessian w.r.t. state (Lxx):
[[1.         0.         0.         0.        ]
 [0.         8.45707076 0.         0.        ]
 [0.         0.         0.1        0.        ]
 [0.         0.         0.         0.1       ]]


In [4]:
type(data.xnext)

numpy.ndarray

In [7]:
# Goal state: pole balanced upright at the origin, at rest
x_goal = np.array([0.0, 0.0, 0.0, 0.0])

# Trajectory length (number of knots)
T = 50

# --- Create the Action Models (Running and Terminal) ---
# The dynamics are the same, but the cost weights differ.

# Weights for the running cost:
# Encourage swing-up but keep control effort cheap.
running_weights = [
    1.0,      # sin(th)
    1.0,      # 1-cos(th)
    0.1,      # y (cart position)
    0.1,      # ydot (cart velocity)
    0.1,      # thdot (pole velocity)
    0.001     # f (control force)
]

# Weights for the terminal cost:
# Must achieve the goal state. Penalize deviations heavily.
terminal_weights = [
    1000.0,   # sin(th) must be 0
    1000.0,   # 1-cos(th) must be 0
    1000.0,   # y must be 0
    100.0,    # ydot must be 0
    100.0,    # thdot must be 0
    0.0       # Final control force doesn't matter
]

running_model = CasadiCrocoddylActionModelCartPole(weights=running_weights)
terminal_model = CasadiCrocoddylActionModelCartPole(weights=terminal_weights)

# --- Create the Shooting Problem ---
running_models = [running_model] * T
problem = crocoddyl.ShootingProblem(x0, running_models, terminal_model)

# --- Create the DDP Solver ---
solver = crocoddyl.SolverDDP(problem)
# Add callbacks for logging and viewing progress
solver.setCallbacks([
    crocoddyl.CallbackLogger(),
    crocoddyl.CallbackVerbose()
])
# Create a warm start
init_xs = [x0 + i/T * (x_goal - x0) for i in range(T + 1)]
init_us = [np.zeros(running_model.nu) for i in range(T)]

# Solve the problem
print("--- Solving the cart-pole swing-up problem ---")
solver.solve(init_xs=init_xs, init_us=init_us, maxiter=500)


# solver.solve()

: 