Merge a40906e into a11d3be

murrayrm · web-flow · commit 819db5793a50 · 2023-09-16T17:32:07.000Z
diff --git a/control/iosys.py b/control/iosys.py
@@ -127,6 +127,11 @@ def __init__(
         if kwargs:
             raise TypeError("unrecognized keywords: ", str(kwargs))
 
+    # Keep track of the keywords that we recognize
+    kwargs_list = [
+        'name', 'inputs', 'outputs', 'states', 'input_prefix',
+        'output_prefix', 'state_prefix', 'dt']
+
     #
     # Functions to manipulate the system name
     #
diff --git a/control/optimal.py b/control/optimal.py
@@ -66,7 +66,7 @@ class OptimalControlProblem():
        `(fun, lb, ub)`.  The constraints will be applied at each time point
        along the trajectory.
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
+        Function that returns the terminal cost given the final state
         and input.  Called as terminal_cost(x, u).
     trajectory_method : string, optional
         Method to use for carrying out the optimization. Currently supported
@@ -287,12 +287,16 @@ def __init__(
     # time point and we use a trapezoidal approximation to compute the
     # integral cost, then add on the terminal cost.
     #
-    # For shooting methods, given the input U = [u[0], ... u[N]] we need to
+    # For shooting methods, given the input U = [u[t_0], ... u[t_N]] we need to
     # compute the cost of the trajectory generated by that input.  This
     # means we have to simulate the system to get the state trajectory X =
-    # [x[0], ..., x[N]] and then compute the cost at each point:
+    # [x[t_0], ..., x[t_N]] and then compute the cost at each point:
     #
-    #   cost = sum_k integral_cost(x[k], u[k]) + terminal_cost(x[N], u[N])
+    #   cost = sum_k integral_cost(x[t_k], u[t_k])
+    #          + terminal_cost(x[t_N], u[t_N])
+    #
+    # The actual calculation is a bit more complex: for continuous time
+    # systems, we use a trapezoidal approximation for the integral cost.
     #
     # The initial state used for generating the simulation is stored in the
     # class parameter `x` prior to calling the optimization algorithm.
@@ -325,8 +329,8 @@ def _cost_function(self, coeffs):
             # Sum the integral cost over the time (second) indices
             # cost += self.integral_cost(states[:,i], inputs[:,i])
             cost = sum(map(
-                self.integral_cost, np.transpose(states[:, :-1]),
-                np.transpose(inputs[:, :-1])))
+                self.integral_cost, states[:, :-1].transpose(),
+                inputs[:, :-1].transpose()))
 
         # Terminal cost
         if self.terminal_cost is not None:
@@ -954,7 +958,22 @@ def solve_ocp(
         transpose=None, return_states=True, print_summary=True, log=False,
         **kwargs):
 
-    """Compute the solution to an optimal control problem
+    """Compute the solution to an optimal control problem.
+
+    The optimal trajectory (states and inputs) is computed so as to
+    approximately mimimize a cost function of the following form (for
+    continuous time systems):
+
+      J(x(.), u(.)) = \int_0^T L(x(t), u(t)) dt + V(x(T)),
+
+    where T is the time horizon.
+
+    Discrete time systems use a similar formulation, with the integral
+    replaced by a sum:
+
+      J(x[.], u[.]) = \sum_0^{N-1} L(x_k, u_k) + V(x_N),
+
+    where N is the time horizon (corresponding to timepts[-1]).
 
     Parameters
     ----------
@@ -968,7 +987,7 @@ def solve_ocp(
         Initial condition (default = 0).
 
     cost : callable
-        Function that returns the integral cost given the current state
+        Function that returns the integral cost (L) given the current state
         and input.  Called as `cost(x, u)`.
 
     trajectory_constraints : list of tuples, optional
@@ -990,8 +1009,10 @@ def solve_ocp(
         The constraints are applied at each time point along the trajectory.
 
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
-        and input.  Called as terminal_cost(x, u).
+        Function that returns the terminal cost (V) given the final state
+        and input.  Called as terminal_cost(x, u).  (For compatibility with
+        the form of the cost function, u is passed even though it is often
+        not part of the terminal cost.)
 
     terminal_constraints : list of tuples, optional
         List of constraints that should hold at the end of the trajectory.
@@ -1044,9 +1065,19 @@ def solve_ocp(
 
     Notes
     -----
-    Additional keyword parameters can be used to fine-tune the behavior of
-    the underlying optimization and integration functions.  See
-    :func:`OptimalControlProblem` for more information.
+    1. For discrete time systems, the final value of the timepts vector
+       specifies the final time t_N, and the trajectory cost is computed
+       from time t_0 to t_{N-1}.  Note that the input u_N does not affect
+       the state x_N and so it should always be returned as 0.  Further, if
+       neither a terminal cost nor a terminal constraint is given, then the
+       input at time point t_{N-1} does not affect the cost function and
+       hence u_{N-1} will also be returned as zero.  If you want the
+       trajectory cost to include state costs at time t_{N}, then you can
+       set `terminal_cost` to be the same function as `cost`.
+
+    2. Additional keyword parameters can be used to fine-tune the behavior
+       of the underlying optimization and integration functions.  See
+       :func:`OptimalControlProblem` for more information.
 
     """
     # Process keyword arguments
@@ -1116,15 +1147,16 @@ def create_mpc_iosystem(
         See :func:`~control.optimal.solve_ocp` for more details.
 
     terminal_cost : callable, optional
-        Function that returns the terminal cost given the current state
+        Function that returns the terminal cost given the final state
         and input.  Called as terminal_cost(x, u).
 
     terminal_constraints : list of tuples, optional
         List of constraints that should hold at the end of the trajectory.
         Same format as `constraints`.
 
-    kwargs : dict, optional
-        Additional parameters (passed to :func:`scipy.optimal.minimize`).
+    **kwargs
+        Additional parameters, passed to :func:`scipy.optimal.minimize` and
+        :class:`NonlinearIOSystem`.
 
     Returns
     -------
@@ -1149,14 +1181,22 @@ def create_mpc_iosystem(
     :func:`OptimalControlProblem` for more information.
 
     """
+    from .iosys import InputOutputSystem
+
+    # Grab the keyword arguments known by this function
+    iosys_kwargs = {}
+    for kw in InputOutputSystem.kwargs_list:
+        if kw in kwargs:
+            iosys_kwargs[kw] = kwargs.pop(kw)
+
     # Set up the optimal control problem
     ocp = OptimalControlProblem(
         sys, timepts, cost, trajectory_constraints=constraints,
         terminal_cost=terminal_cost, terminal_constraints=terminal_constraints,
-        log=log, kwargs_check=False, **kwargs)
+        log=log, **kwargs)
 
     # Return an I/O system implementing the model predictive controller
-    return ocp.create_mpc_iosystem(**kwargs)
+    return ocp.create_mpc_iosystem(**iosys_kwargs)
 
 
 #
diff --git a/control/statefbk.py b/control/statefbk.py
@@ -613,7 +613,7 @@ def create_statefbk_iosystem(
         The I/O system that represents the process dynamics.  If no estimator
         is given, the output of this system should represent the full state.
 
-    gain : ndarray or tuple
+    gain : ndarray, tuple, or I/O system
         If an array is given, it represents the state feedback gain (`K`).
         This matrix defines the gains to be applied to the system.  If
         `integral_action` is None, then the dimensions of this array
@@ -627,6 +627,9 @@ def create_statefbk_iosystem(
         gains are computed.  The `gainsched_indices` parameter should be
         used to specify the scheduling variables.
 
+        If an I/O system is given, the error e = x - xd is passed to the
+        system and the output is used as the feedback compensation term.
+
     xd_labels, ud_labels : str or list of str, optional
         Set the name of the signals to use for the desired state and
         inputs.  If a single string is specified, it should be a format
@@ -813,7 +816,15 @@ def create_statefbk_iosystem(
         # Stack gains and points if past as a list
         gains = np.stack(gains)
         points = np.stack(points)
-        gainsched=True
+        gainsched = True
+
+    elif isinstance(gain, NonlinearIOSystem):
+        if controller_type not in ['iosystem', None]:
+            raise ControlArgument(
+                f"incompatible controller type '{controller_type}'")
+        fbkctrl = gain
+        controller_type = 'iosystem'
+        gainsched = False
 
     else:
         raise ControlArgument("gain must be an array or a tuple")
@@ -825,7 +836,7 @@ def create_statefbk_iosystem(
             " gain scheduled controller")
     elif controller_type is None:
         controller_type = 'nonlinear' if gainsched else 'linear'
-    elif controller_type not in {'linear', 'nonlinear'}:
+    elif controller_type not in {'linear', 'nonlinear', 'iosystem'}:
         raise ControlArgument(f"unknown controller_type '{controller_type}'")
 
     # Figure out the labels to use
@@ -919,6 +930,30 @@ def _control_output(t, states, inputs, params):
             _control_update, _control_output, name=name, inputs=inputs,
             outputs=outputs, states=states, params=params)
 
+    elif controller_type == 'iosystem':
+        # Use the passed system to compute feedback compensation
+        def _control_update(t, states, inputs, params):
+            # Split input into desired state, nominal input, and current state
+            xd_vec = inputs[0:sys_nstates]
+            x_vec = inputs[-sys_nstates:]
+
+            # Compute the integral error in the xy coordinates
+            return fbkctrl.updfcn(t, states, (x_vec - xd_vec), params)
+
+        def _control_output(t, states, inputs, params):
+            # Split input into desired state, nominal input, and current state
+            xd_vec = inputs[0:sys_nstates]
+            ud_vec = inputs[sys_nstates:sys_nstates + sys_ninputs]
+            x_vec = inputs[-sys_nstates:]
+
+            # Compute the control law
+            return ud_vec + fbkctrl.outfcn(t, states, (x_vec - xd_vec), params)
+
+        # TODO: add a way to pass parameters
+        ctrl = NonlinearIOSystem(
+            _control_update, _control_output, name=name, inputs=inputs,
+            outputs=outputs, states=fbkctrl.state_labels, dt=fbkctrl.dt)
+
     elif controller_type == 'linear' or controller_type is None:
         # Create the matrices implementing the controller
         if isctime(sys):
diff --git a/control/tests/optimal_test.py b/control/tests/optimal_test.py
@@ -238,6 +238,14 @@ def test_mpc_iosystem_rename():
     assert mpc_relabeled.state_labels == state_relabels
     assert mpc_relabeled.name == 'mpc_relabeled'
 
+    # Change the optimization parameters (check by passing bad value)
+    mpc_custom = opt.create_mpc_iosystem(
+        sys, timepts, cost, minimize_method='unknown')
+    with pytest.raises(ValueError, match="Unknown solver unknown"):
+        # Optimization problem is implicit => check that an error is generated
+        mpc_custom.updfcn(
+            0, np.zeros(mpc_custom.nstates), np.zeros(mpc_custom.ninputs), {})
+
     # Make sure that unknown keywords are caught
     # Unrecognized arguments
     with pytest.raises(TypeError, match="unrecognized keyword"):
@@ -659,7 +667,7 @@ def final_point_eval(x, u):
     "method, npts, initial_guess, fail", [
         ('shooting', 3, None, 'xfail'),         # doesn't converge
         ('shooting', 3, 'zero', 'xfail'),       # doesn't converge
-        ('shooting', 3, 'u0', None),            # github issue #782
+        # ('shooting', 3, 'u0', None),          # github issue #782
         ('shooting', 3, 'input', 'endpoint'),   # doesn't converge to optimal
         ('shooting', 5, 'input', 'endpoint'),   # doesn't converge to optimal
         ('collocation', 3, 'u0', 'endpoint'),   # doesn't converge to optimal
diff --git a/control/tests/statefbk_test.py b/control/tests/statefbk_test.py
@@ -506,6 +506,8 @@ def test_lqr_discrete(self):
          (2,      0,        1,       0,            'nonlinear'),
          (4,      0,        2,       2,            'nonlinear'),
          (4,      3,        2,       2,            'nonlinear'),
+         (2,      0,        1,       0,            'iosystem'),
+         (2,      0,        1,       1,            'iosystem'),
         ])
     def test_statefbk_iosys(
             self, nstates, ninputs, noutputs, nintegrators, type_):
@@ -551,17 +553,26 @@ def test_statefbk_iosys(
         K, _, _ = ct.lqr(aug, np.eye(nstates + nintegrators), np.eye(ninputs))
         Kp, Ki = K[:, :nstates], K[:, nstates:]
 
-        # Create an I/O system for the controller
-        ctrl, clsys = ct.create_statefbk_iosystem(
-            sys, K, integral_action=C_int, estimator=est,
-            controller_type=type_, name=type_)
+        if type_ == 'iosystem':
+            # Create an I/O system for the controller
+            A_fbk = np.zeros((nintegrators, nintegrators))
+            B_fbk = np.eye(nintegrators, sys.nstates)
+            fbksys = ct.ss(A_fbk, B_fbk, -Ki, -Kp)
+            ctrl, clsys = ct.create_statefbk_iosystem(
+                sys, fbksys, integral_action=C_int, estimator=est,
+                controller_type=type_, name=type_)
+
+        else:
+            ctrl, clsys = ct.create_statefbk_iosystem(
+                sys, K, integral_action=C_int, estimator=est,
+                controller_type=type_, name=type_)
 
         # Make sure the name got set correctly
         if type_ is not None:
             assert ctrl.name == type_
 
         # If we used a nonlinear controller, linearize it for testing
-        if type_ == 'nonlinear':
+        if type_ == 'nonlinear' or type_ == 'iosystem':
             clsys = clsys.linearize(0, 0)
 
         # Make sure the linear system elements are correct
diff --git a/doc/optimal.rst b/doc/optimal.rst
@@ -65,6 +65,13 @@ can be on the input, the state, or combinations of input and state,
 depending on the form of :math:`g_i`.  Furthermore, these constraints are
 intended to hold at all instants in time along the trajectory.
 
+For a discrete time system, the same basic formulation applies except
+that the cost function is given by
+
+.. math::
+
+  J(x, u) = \sum_{k=0}^{N-1} L(x_k, u_k)\, dt + V \bigl( x_N \bigr).
+
 A common use of optimization-based control techniques is the implementation
 of model predictive control (also called receding horizon control).  In
 model predictive control, a finite horizon optimal control problem is solved,

Original file line number	Diff line number	Diff line change
`@@ -127,6 +127,11 @@ def __init__(`
`127`	`127`	`if kwargs:`
`128`	`128`	`raise TypeError("unrecognized keywords: ", str(kwargs))`
`129`	`129`
	`130`	`+ # Keep track of the keywords that we recognize`
	`131`	`+ kwargs_list = [`
	`132`	`+ 'name', 'inputs', 'outputs', 'states', 'input_prefix',`
	`133`	`+ 'output_prefix', 'state_prefix', 'dt']`
	`134`	`+`
`130`	`135`	`#`
`131`	`136`	`# Functions to manipulate the system name`
`132`	`137`	`#`