In [17]:
import numpy as np
import scipy as sp
from scipy import linalg


- assume double integrator model

In [18]:
# M players
M = 3

# N time steps
N = 5

# n state size
n = 4 # (x, y, v_x, v_y)

# m control input size
m = 2 # (a_x, a_y)

# timestep
dt = 0.01

# collision avoidance radius
r = 0.5

### States

$X^v$ and $U^v$ trajectory for each player $v$ basically like the CFTOCP batch approach without substitution from HW1

$X^v = [x_1, ... x_N]$ and $U^v = [u_0, ... u_{N-1}]$

For the full Nash equilibium problem, just concatenate all the players trajectories together 

$X = [X^1, X^2, ..., X^m]$ and $U = [U^1, U^2, ..., U^m]$



### Dynamics

$D(X,U)$ contains all trajectories and control inputs stacked

In [19]:
# double integrator
A = np.eye(n) + np.diag([dt, dt], k=int(n / 2))
B = np.vstack((np.eye(m) * dt**2 / 2, (np.eye(m) * dt)))

# player dynamics
# A_eq * X_v + B_eq * U_v = E_eq
a_block = linalg.block_diag(*([-A] * (N - 1)))
A_eq = np.eye(N * n) + np.pad(a_block, [(n, 0), (0, n)])
B_eq = linalg.block_diag(*([-B] * N))
E_eq = np.vstack((A, np.zeros(((N - 1) * n, n))))

# system dynamics for all players
# A_sys * X + B_sys * U = E_sys
A_sys = linalg.block_diag(*([A_eq] * M))
B_sys = linalg.block_diag(*([B_eq] * M))
E_sys = np.vstack(([E_eq] * M))

In [20]:
print(np.shape(A_sys),np.shape(B_sys))
print(np.shape(A_eq),np.shape(B_eq))
print(np.shape(A),np.shape(B))


(60, 60) (60, 30)
(20, 20) (20, 10)
(4, 4) (4, 2)


### Inequality Constraints

1) walls: all position + radius should not exceed value (assume only horizontal/vertical walls)

$C_{wall}X - (D_{wall} - R) \leq 0$

2) collision avoidance: player position + radius not exceed another player position

$R^2 - ||x_k^v - x_k^{v'} ||_2^2 \leq 0$

3) control input bounds: bounds on control input centered at zero

$-B \leq U \leq B$

In [69]:
# position wall bounds
# example wall at y = 2 (extends forever)
wall_y = 2.0
ind_y = 1  # index corresponding to y position in state

# for single state k
# c_wall_ineq * x_k - d_wall_ineq <= 0
c_wall_ineq = np.zeros(n)
c_wall_ineq[ind_y] = 1
d_wall_ineq = wall_y - r

# for single player
# C_wall_ineq * X_v - D_wall_ineq <= 0
C_wall_ineq = linalg.block_diag(*([c_wall_ineq] * N))
D_wall_ineq = np.vstack(([d_wall_ineq]*N))

# for all players
# C_wall_sys * X - D_wall_sys <= 0
C_wall_sys = linalg.block_diag(*([C_wall_ineq] * M))
D_wall_sys = np.vstack(([D_wall_ineq]*M))

print(np.shape(C_wall_sys))

(15, 60)


In [22]:
# collision avoidance with other players
# will need to be formulated per timestep k, per players v1, v2
# r - (C_cola_k_v1_v2 * X).T @ (C_cola_k_v1_v2 * X) <= 0

# select position from the states
pos = np.hstack((np.eye(2), np.zeros((2, 2))))

matrices = []
for k in range(N):  # timestep
    for v1 in range(M):  # player 1
        for v2 in range(v1 + 1, M):  # player 2
            c_block = [np.zeros((2, n))] * N * M
            ind1 = v1 * N + k
            ind2 = v2 * N + k
            c_block[ind1] = pos
            c_block[ind2] = -1 * pos
            C_cola = np.hstack(c_block)
            matrices.append(C_cola)

# will need to stack up all of these individually after evaluating the quadratic

# there might be better way linear algebra wise to stack them together but prob 
# is gonna make the derivative worse to find

In [None]:
# control input bounds
max_x = 5
max_y = 5

ind_x = 0  # index corresponding to x position in control
ind_y = 1
f_ineq = np.zeros((4, m))
f_ineq[0, ind_x] = 1
f_ineq[1, ind_x] = -1
f_ineq[2, ind_y] = 1
f_ineq[3, ind_y] = -1
g_ineq = np.hstack((np.ones((2, 1)) * max_x, np.ones((2, 1)) * max_y))

F_ineq = linalg.block_diag(*([f_ineq] * N))
G_ineq = np.vstack(([g_ineq] * N))

F_sys = linalg.block_diag(*([F_ineq] * M))
G_sys = np.vstack(([G_ineq] * M))

print(np.shape(F_sys))

(60, 30) (30, 2)


### Objective

In [None]:
# X = np.random.rand(N*M*n)
X = np.array(range(M*N*n))
U = np.array(range(M*N*m)) * 0.1

xf = np.array([1,1,2,2])
Q = np.eye(n)*2
Qf = np.eye(n)*5
R = np.eye(m)*10

u_v = 2

cost = 0
# trajectory states
for k in range(0, N - 2):
    for v in range(M):
        ind = (v * N + k) * n
        xk = X[ind:ind + n]
        cost += 0.5 * (xk - xf).T @ Q @ (xk - xf)

# final state
k = N - 1
ind = (v * N + k) * n
xk = X[ind:ind + n]
cost += 0.5 * (xk - xf).T @ Qf @ (xk - xf)

# player control input
for k in range(0, N - 1):
    ind = (u_v * N + k) * m
    uk = U[ind:ind + m]
    cost += 0.5 * uk.T @ R @ uk
cost

62326.00000000001

In [None]:
# wrt x
xf_sys = np.tile(np.reshape(xf, (n, )), N * M)
Q_sys = linalg.block_diag(*(([Q] * (N - 1) + [Qf]) * M))
J_x = (X - xf_sys).T @ Q_sys

# wrt u
ind = (u_v * N) * m
Uv = U[ind: ind + N*m]
R_sys = linalg.block_diag(*([R] * N))
J_u = Uv.T @ R_sys

np.hstack((J_x, J_u))
print(np.shape(Uv), np.shape(R_sys))
print(np.shape(J_x), np.shape(J_u))
print(np.shape(X),np.shape(U))

(10,) (10, 10)
(60,) (10,)
(60,) (30,)


In [23]:
# control input bounds
# example u_x <= 5 (for all time)
u_x_max = 5.0
ind_x = 0  # index corresponding to x position in control

# for single state k
# f_ineq * x_k - g_ineq <= 0
f_ineq = np.zeros((2,m))
f_ineq[0,ind_x] = 1
f_ineq[1,ind_x] = -1
g_ineq = np.ones((2,1)) * u_x_max

# for single player
# F_ineq * X_v - G_ineq <= 0
F_ineq = linalg.block_diag(*([f_ineq] * N))
G_ineq = np.vstack(([g_ineq]*N))

# for all players
# f_sys * X - g_sys <= 0
F_sys = linalg.block_diag(*([F_ineq] * M))
G_sys = np.vstack(([G_ineq]*M))

### Augmented Lagrangian

$ L^v(X,U) = J^v + \mu^{v\top} D + \lambda^\top C + \frac{1}{2}C^\top I_\rho C
$