In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np; np.random.seed(0)
np.set_printoptions(precision=3)
import matplotlib.pyplot as plt
from plot_settings import savefig

from extquadcontrol import ExtendedQuadratic, dp_infinite, dp_finite
from system import InfiniteHorizonSystem
from policy import TimeInvariantAffinePolicy

# Jump LQR

In this example, the dynamics are linear and the cost is quadratic. The system has modes and randomly transitions  between them.

The dynamics are
$$s=1; \quad x_{t+1} = 1.2x_t + 0.1u_t+0.2w_t$$
$$s=2; \quad x_{t+1} = 0.8x_t - 0.1u_t+0.1w_t$$
where $w_t \sim \mathcal{N}(0,1)$, and the Markov chain transition probabilities are
$$\Pi = \begin{bmatrix} .8 & .2 \\ .2 & .8 \end{bmatrix}.$$
The goal is to minimize
$$\sum_{t=0}^\infty \|x_t\|_2^2 + \|u_t\|_2^2.$$

In [3]:
n = 1
m = 1
N = 100
K = 2
T = 50

In [4]:
def get_sample(individual=False):
    def sample(N):
        A = np.zeros((N,K,n,n))
        B = np.zeros((N,K,n,m))
        c = np.zeros((N,K,n))
        
        A[:,0,:,:] = 1.2
        B[:,0,:,:] = .1
        c[:,0,:] = 0*np.random.normal(size=(N,n))
    
        A[:,1,:,:] = 0.8
        B[:,1,:,:] = -.1
        c[:,1,:] = 0*np.random.normal(size=(N,n))
        
        Pi = np.array([
            [.8,.2],
            [.2,.8]
        ])
        
        if individual: 
            Pi = np.eye(K)

        P = np.diag(np.r_[np.ones(n),1*np.ones(m)])
        q = np.zeros(n+m)
        r = 0
        g = [[ExtendedQuadratic(P,q,r) for _ in range(K)] for _ in range(N)]

        return A,B,c,g,Pi

    return sample

In [5]:
sample = get_sample()
V, Q, policy = dp_infinite(sample, T, N)
p = TimeInvariantAffinePolicy(policy)
policy

[(array([[-2.541]]), array([0.])), (array([[0.919]]), array([0.]))]

In [6]:
sample = get_sample(True)
V, Q, policy_individual = dp_infinite(sample, T, N)
p_ind = TimeInvariantAffinePolicy(policy_individual)
policy_individual

[(array([[-3.844]]), array([0.])), (array([[0.207]]), array([0.]))]

In [7]:
system = InfiniteHorizonSystem(get_sample(), K)

In [8]:
avg_cost = 0.
np.random.seed(0)
for _ in range(100):
    Xs, Us, Modes, cost = system.simulate(x0=[10],s0=0,T=100,policy=p)
    avg_cost += cost/100
avg_cost

16.162312795106406

In [9]:
avg_cost = 0.
np.random.seed(0)
for _ in range(100):
    Xs, Us, Modes, cost = system.simulate(x0=[10],s0=0,T=100,policy=p_ind)
    avg_cost += cost/100
avg_cost

18.527510707874765