In [1]:
import torch
from torch.autograd import Variable
dtype = torch.FloatTensor

## Get a state of the double integrator

<p align="center">
  <img src="double_integrator_brick.svg" width="350"/>
</p>

In [2]:
### initial state and parameters

## parameters
width  = 10.0 # width of brick in pixels
height = 5.0
t_f = 100     # number of time steps

## state
x    = 30.0 # position to right in pixels
xdot = 0.0  # 

state_initial = torch.FloatTensor([x, xdot])
state_initial = Variable(state_initial, requires_grad=False)
print state_initial

Variable containing:
 30
  0
[torch.FloatTensor of size 2]



## Just render double integrator

- Convert state of double integrator to an AABB list of corners in img
- Find all pixels inside AABB

In [3]:
import numpy as np
pi = np.pi

def convert_world_to_img_coordinates(world_coordinates):
    return world_coordinates + 50

def torch_flip(two_element_tensor):
    return torch.FloatTensor([two_element_tensor[1],two_element_tensor[0]])
    
def torch_det(A,B):
    return A[0]*B[1] - A[1]*B[0]

def distance_pytorch(A, B, P):
    if (A == B).all() or (B == P).all():
        return 0
    if (torch.acos(torch.FloatTensor([torch.dot((P - A) / (P - A).norm(), 
                                                (B - A) / (B - A).norm())])) > pi/2).all():
        return (P - A).norm()
    
    if (torch.acos(torch.FloatTensor([torch.dot((P - B) / (P - B).norm(), 
                                                (A - B) / (A - B).norm())])) > pi/2).all():
        return (P - B).norm()
    
    return abs(torch_det(A-B, A-P))/(B-A).norm()

def inside_aabb(aabb_corners, P):
    # check x conditions
    if (P[0] < aabb_corners[0][0]).all() or (P[0] > aabb_corners[3][0]).all():
        return False
    
    # check y conditions
    if (P[1] < aabb_corners[0][1]).all() or (P[1] > aabb_corners[3][1]).all():
        return False
    
    return True

def distance_to_aabb(aabb_corners, P):
    # if inside aabb, return 0
    distance_to_left   = distance_pytorch(aabb_corners[0], aabb_corners[1], P)
    distance_to_top    = distance_pytorch(aabb_corners[1], aabb_corners[3], P)
    distance_to_right  = distance_pytorch(aabb_corners[0], aabb_corners[1], P)
    distance_to_bottom = distance_pytorch(aabb_corners[0], aabb_corners[1], P) 

In [4]:
%matplotlib notebook
import matplotlib.pyplot as plt

def where(cond, x_1, x_2):
    cond = cond.float()    
    return (cond * x_1) + ((1-cond) * x_2)

def vectorized_inside_aabb(aabb_corners):
    img = Variable(torch.ones((100,100)), requires_grad=False)
    img[0:int(aabb_corners[0][0]),:] = 0.0
    img[int(aabb_corners[3][0]):,:] = 0.0
    img[:,0:int(aabb_corners[0][1])] = 0.0
    img[:,int(aabb_corners[3][1]):] = 0.0
    return img
    
def double_integrator_state_to_img(state):
    center = Variable(torch.zeros(2), requires_grad = False)
    center[1] = state[0]
    center_in_img = convert_world_to_img_coordinates(center)

    lower_left  = center + Variable(torch.FloatTensor([0,      -width/2]), requires_grad=False)
    upper_left  = center + Variable(torch.FloatTensor([height, -width/2]), requires_grad=False)
    lower_right = center + Variable(torch.FloatTensor([0,       width/2]), requires_grad=False)
    upper_right = center + Variable(torch.FloatTensor([height,  width/2]), requires_grad=False)

    corners = [lower_left, upper_left, lower_right, upper_right]
    corners_in_img = [convert_world_to_img_coordinates(x) for x in corners]
    
    return vectorized_inside_aabb(corners_in_img)

import time; start = time.time()
img = double_integrator_state_to_img(state_initial)
print time.time() - start, " seconds for state to img"
plt.imshow(img.data, cmap=plt.get_cmap('gray_r'))
plt.show()

0.000482082366943  seconds for state to img


<IPython.core.display.Javascript object>

## State feedback: perform state feedback with just PD

In [5]:
def pd_origin_controller(state):
    x_desired = 0
    xdot_desired = 0
    diff_pos = (x_desired    - state[0])
    diff_vel = (xdot_desired - state[1])
    u = 1000*diff_pos + 100*diff_vel
    if (u > 500).all():
        u = u*0 + 500  # this trick mantains u as a Variable
    if (u < -500).all():
        u = u*0 -500
    return u

def double_integrator_next_state(state, u):
    deriv = Variable(torch.zeros(2), requires_grad = False)
    deriv[0] = state[1]
    deriv[1] = u
    dt = .01
    next_state = state + deriv*dt
    return next_state

state_tape = []
state_tape.append(state_initial)

for i in range(t_f):
    u = pd_origin_controller(state_tape[-1])
    next_state = double_integrator_next_state(state_tape[-1],u)
    state_tape.append(next_state)
    
print len(state_tape)

101


## Convert states to imgs

In [6]:
img_tape = []

for i in state_tape:
    img_tape.append(double_integrator_state_to_img(i))
    
print len(img_tape)

101


## Visualize

In [7]:
%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import animation
from IPython.display import HTML

def get_animation(img_tape):
    fig = plt.figure()
    first_img = img_tape[0].data
    im = plt.imshow(first_img, cmap='gist_gray_r')

    def init():
        im.set_data(first_img)

    def animate(i):
        im.set_data(img_tape[i].data)
        return im

    animate = animation.FuncAnimation(fig, animate, init_func=init, frames=len(img_tape), interval=20, blit=True)
    plt.close(fig)
    return animate
    
# ani = get_animation(img_tape)
# HTML(ani.to_html5_video())

## Simultaneous search for policy and verifying Lyapunov function

$$\dot{x} = f(x,u)$$

$$ u = \pi_\theta(x)) $$

$$ V(x) = p.s.d. \text{by construction, but parameterized by parameters } \psi $$

$$ \dot{V} = \frac{dV}{dx} \dot{x}$$
$$ = \big[ \frac{dV}{dx}\big]^T \big[f(x,\pi_{\theta}(x) \big] $$

### Loss function

$X$ = {$x_1, x_2, ..., x_N$} many samples

$$ L(\theta) = \sum_{i} l(x_i, \theta) $$

\begin{equation}
  \mathcal{l}(x_i,\theta) =
  \begin{cases}
    \dot{V}(x_i, \theta) & \text{if $\dot{V}(x_i, \theta) > 0$} \\
    0 & \text{otherwise}
  \end{cases}
\end{equation}



In [25]:
x_i = Variable(torch.FloatTensor([1.0, 1.1]).type(dtype), requires_grad=True)
print x_i

P = Variable(torch.FloatTensor([1, 1]).type(dtype), requires_grad=True)
print P
### Step 1: compute V(x)
def compute_V(x):
    '''V(x) = x_1^2 + x_2^2'''
    x_squared = x.pow(2)
    return torch.dot(P, x_squared)

print x_i.shape
print compute_V(x_i)

many_x_i = Variable(torch.FloatTensor([[1.0, 1.1, 2.0],[1.1, 2.2, 3.0]]).type(dtype), requires_grad=True)

def compute_V_vectorized(x):
    x_squared = x.pow(2)
    P_transpose = torch.t(P.unsqueeze(1))
    return P_transpose.mm(x_squared)

print many_x_i
print compute_V_vectorized(many_x_i)

Variable containing:
 1.0000
 1.1000
[torch.FloatTensor of size 2]

Variable containing:
 1
 1
[torch.FloatTensor of size 2]

torch.Size([2])
Variable containing:
 2.2100
[torch.FloatTensor of size 1]

Variable containing:
 1.0000  1.1000  2.0000
 1.1000  2.2000  3.0000
[torch.FloatTensor of size 2x3]

Variable containing:
  2.2100   6.0500  13.0000
[torch.FloatTensor of size 1x3]



In [26]:
### Step 2: compute dV/dx

### Step 3: initialize policy parameters
K = Variable(torch.FloatTensor([1, 2]).type(dtype), requires_grad=True)
print K

### Step 4: define dynamics function
def dynamics(x):
    xdot = Variable(torch.zeros(2).type(dtype))
    xdot[0] = x[1]
    xdot[1] = -torch.dot(K,x)
    return xdot
    
xdot = dynamics(x_i)
print xdot

many_x_i = Variable(torch.FloatTensor([[1.0, 1.1, 2.0],[1.1, 2.2, 3.0]]).type(dtype), requires_grad=True)

def dynamics_vectorized(x):
    xdot = Variable(torch.zeros(2,x.shape[1]).type(dtype))
    xdot[0,:] = x[1,:]
    K_transpose = torch.t(K.unsqueeze(1))
    xdot[1,:] = -K_transpose.mm(x)
    return xdot
      
print many_x_i
print dynamics_vectorized(many_x_i)

Variable containing:
 1
 2
[torch.FloatTensor of size 2]

Variable containing:
 1.1000
-3.2000
[torch.FloatTensor of size 2]

Variable containing:
 1.0000  1.1000  2.0000
 1.1000  2.2000  3.0000
[torch.FloatTensor of size 2x3]

Variable containing:
 1.1000  2.2000  3.0000
-3.2000 -5.5000 -8.0000
[torch.FloatTensor of size 2x3]



In [45]:
def compute_Vdot(x):
    V = compute_V(x)
    print x.shape
    print V.shape
    print V
    V.backward(torch.ones(1).type(dtype),retain_graph=True)
    jacobian_x = Variable(x.grad.data)
    print "jacobian_x is", jacobian_x
    f = dynamics(x)
    Vdot = torch.dot(jacobian_x,f)
    return Vdot

print "x_i is", x_i
Vdot = compute_Vdot(x_i)
print Vdot
Vdot.backward()

def compute_Vdot_vectorized(x):
    V = compute_V_vectorized(x)
    print V
    V.backward(torch.ones(1,x.shape[1]).type(dtype),retain_graph=True)
    jacobian_x = Variable(x.grad.data)
    print "jacobian_x is", jacobian_x
    f = dynamics_vectorized(x)
    Vdot = (jacobian_x*f).sum(dim=0)
    return Vdot

many_x_i.grad.data.zero_()
print "many_x_i is", many_x_i
Vdot_vec = compute_Vdot_vectorized(many_x_i)
print Vdot_vec
#Vdot.backward(torch.ones(many_x_i.shape[1]).type(dtype))
cost = Vdot_vec.clamp(min=0).sum()
print cost
cost.backward()



x_i is Variable containing:
 1.0000
 1.1000
[torch.FloatTensor of size 2]

torch.Size([2])
torch.Size([1])
Variable containing:
 2.2100
[torch.FloatTensor of size 1]

jacobian_x is Variable containing:
 1.8000
 2.0000
[torch.FloatTensor of size 2]

Variable containing:
-4.4200
[torch.FloatTensor of size 1]

many_x_i is Variable containing:
 1.0000  1.1000  2.0000
 1.1000  2.2000  3.0000
[torch.FloatTensor of size 2x3]

Variable containing:
  2.2100   6.0500  13.0000
[torch.FloatTensor of size 1x3]

jacobian_x is Variable containing:
 2.0000  2.2000  4.0000
 2.2000  4.4000  6.0000
[torch.FloatTensor of size 2x3]

Variable containing:
 -4.8400
-19.3600
-36.0000
[torch.FloatTensor of size 3]

Variable containing:
 0
[torch.FloatTensor of size 1]



## First let's verify, no synthesis
 
K = [1, 2] and P = [1, 1] should be stable

In [11]:
K = Variable(torch.FloatTensor([1, 2]).type(dtype), requires_grad=True)
P = Variable(torch.FloatTensor([1, 1]).type(dtype), requires_grad=True)
for i in range(10000):
    x_i = Variable(torch.randn(2).type(dtype), requires_grad=True)
    Vdot = compute_Vdot(x_i)
    if Vdot.data[0] > 0:
        print "false, counterexample found: ", x_i
        break

## Now let's search for K, from an initialization not stable

In [12]:
#P = Variable(torch.rand(2).type(dtype), requires_grad=True)
K = Variable(torch.randn(2).type(dtype), requires_grad=True)

print "Initial P", P
print "Initial K", K


for i in range(10000):
    x_i = Variable(torch.randn(2).type(dtype), requires_grad=True)
    Vdot = compute_Vdot(x_i)
    if Vdot.data[0] > 0:
        print "false, counterexample found: ", x_i
        break

Initial P Variable containing:
 1
 1
[torch.FloatTensor of size 2]

Initial K Variable containing:
-0.7661
-0.0808
[torch.FloatTensor of size 2]

false, counterexample found:  Variable containing:
-0.8291
-1.0426
[torch.FloatTensor of size 2]



In [13]:
## optimization plotting tool

cost_current_iteration = 0
cost_history = []
cost_iteration_number_history = []

f, (cost_axis) = plt.subplots(1, 1)

cost_axis.plot(cost_iteration_number_history, cost_history)
cost_axis.set_title('Running cost')

plt.tight_layout()

<IPython.core.display.Javascript object>

In [14]:
## optimize

num_iterations = 1000
num_samples_per_iteration = 1000
step_rate = 1e-2

# K has already been initialized above, and initial policy visualized

import time
print "first P is", P
print "first K is", K

for cost_iteration in range(num_iterations):
    
    start = time.time()
    
    cost = 0
    
    many_x_i = Variable(torch.randn(2,1000).type(dtype), requires_grad=True)
    Vdot = compute_Vdot_vectorized(many_x_i)
    cost = Vdot.clamp(min=0).sum()
        
    ## Automatically differentiate
    cost.backward()

    # Update K via gradient descent
    K.data -= step_rate * K.grad.data
    #P.data -= step_rate * P.grad.data
    
    # Project P into feasible
    #P.data = torch.abs(P.data)
    # Make P numerically stable by normalizing
    #P.data = P.data/P.data.sum()
      
    # Manually zero the gradients after running the backward pass
    K.grad.data.zero_()
    #P.grad.data.zero_()
    
    print time.time() - start, "is time for one step of grad descent"
    print 
    print cost.data[0]
    
    # handle plotting
    cost_history.append(cost.data[0])
    cost_iteration_number_history.append(cost_iteration)
    
    if cost_iteration % 100 == 0:
        cost_axis.lines[0].set_xdata(cost_iteration_number_history)
        cost_axis.lines[0].set_ydata(cost_history)
        cost_axis.relim()
        cost_axis.autoscale_view()
        cost_axis.figure.canvas.draw()
        
    if cost.data[0] == 0:
        break
        
    print "P is", P
    print "K is", K

first P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

first K is Variable containing:
-0.7661
-0.0808
[torch.FloatTensor of size 2]

0.00166416168213 is time for one step of grad descent

550.430969238
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 1.6938
 3.2685
[torch.FloatTensor of size 2]

0.00119185447693 is time for one step of grad descent

1149.27099609
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.9201
 0.1539
[torch.FloatTensor of size 2]

0.00109791755676 is time for one step of grad descent

542.401855469
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 2.2733
 3.0919
[torch.FloatTensor of size 2]

0.00105786323547 is time for one step of grad descent

1159.84240723
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.5794
 0.6476
[torch.FloatTensor of size 2]

0.00107789039612 is time for one

K is Variable containing:
-0.1765
-0.6092
[torch.FloatTensor of size 2]

0.00126194953918 is time for one step of grad descent

640.58380127
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.3098
 3.3181
[torch.FloatTensor of size 2]

0.00105094909668 is time for one step of grad descent

998.332458496
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.3112
-0.9057
[torch.FloatTensor of size 2]

0.000980138778687 is time for one step of grad descent

772.333374023
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.0344
 3.0907
[torch.FloatTensor of size 2]

0.00098705291748 is time for one step of grad descent

855.392456055
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.1035
-0.9983
[torch.FloatTensor of size 2]

0.000972986221313 is time for one step of grad descent

795.447143555
P is Variable containing:
 1
 1
[torc

0.00318002700806 is time for one step of grad descent

1167.03979492
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.1631
 0.0341
[torch.FloatTensor of size 2]

0.000779151916504 is time for one step of grad descent

378.214660645
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.2341
 3.8827
[torch.FloatTensor of size 2]

0.000787019729614 is time for one step of grad descent

1132.76428223
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.2406
-0.0473
[torch.FloatTensor of size 2]

0.000815153121948 is time for one step of grad descent

451.586120605
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.0061
 3.9782
[torch.FloatTensor of size 2]

0.00079607963562 is time for one step of grad descent

1254.92297363
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.1682
 0.1613
[to

0.00312113761902 is time for one step of grad descent

1453.69641113
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.0361
 0.6543
[torch.FloatTensor of size 2]

0.000948905944824 is time for one step of grad descent

135.170288086
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.1663
 4.5436
[torch.FloatTensor of size 2]

0.000712871551514 is time for one step of grad descent

1469.87609863
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.1202
 0.3977
[torch.FloatTensor of size 2]

0.00081992149353 is time for one step of grad descent

232.666091919
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.9159
 4.0537
[torch.FloatTensor of size 2]

0.000882863998413 is time for one step of grad descent

1227.4744873
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.1956
 0.3674
[tor

P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.6018
 2.0707
[torch.FloatTensor of size 2]

0.00141191482544 is time for one step of grad descent

482.826629639
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.1401
-1.4596
[torch.FloatTensor of size 2]

0.00110507011414 is time for one step of grad descent

1108.56555176
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.9721
 2.0684
[torch.FloatTensor of size 2]

0.000989198684692 is time for one step of grad descent

586.644897461
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.3787
-1.2836
[torch.FloatTensor of size 2]

0.00131702423096 is time for one step of grad descent

1071.08276367
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.9774
 1.9842
[torch.FloatTensor of size 2]

0.00129985809326 is time for one step of gr

P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.6976
 0.4576
[torch.FloatTensor of size 2]

0.00152921676636 is time for one step of grad descent

350.137512207
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 2.5570
 2.7268
[torch.FloatTensor of size 2]

0.00123691558838 is time for one step of grad descent

1192.5526123
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.9343
 0.9903
[torch.FloatTensor of size 2]

0.00126099586487 is time for one step of grad descent

352.061584473
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 2.8330
 1.0845
[torch.FloatTensor of size 2]

0.00102615356445 is time for one step of grad descent

1163.08850098
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.2617
 0.7244
[torch.FloatTensor of size 2]

0.00103497505188 is time for one step of grad

K is Variable containing:
-1.3100
-1.1455
[torch.FloatTensor of size 2]

0.00253200531006 is time for one step of grad descent

984.222106934
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.5862
 2.2842
[torch.FloatTensor of size 2]

0.00103902816772 is time for one step of grad descent

568.889038086
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.2240
-1.3195
[torch.FloatTensor of size 2]

0.00116991996765 is time for one step of grad descent

975.734985352
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.2463
 2.1112
[torch.FloatTensor of size 2]

0.00117301940918 is time for one step of grad descent

450.979248047
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.4467
-1.7936
[torch.FloatTensor of size 2]

0.00112700462341 is time for one step of grad descent

1182.30444336
P is Variable containing:
 1
 1
[torch

P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 1.3247
 3.2809
[torch.FloatTensor of size 2]

0.00121092796326 is time for one step of grad descent

1087.33068848
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.7153
-0.3013
[torch.FloatTensor of size 2]

0.00102615356445 is time for one step of grad descent

579.768493652
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 1.3300
 3.0299
[torch.FloatTensor of size 2]

0.00117301940918 is time for one step of grad descent

972.743713379
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-0.7675
-0.3877
[torch.FloatTensor of size 2]

0.00116586685181 is time for one step of grad descent

637.426513672
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 1.3441
 3.0377
[torch.FloatTensor of size 2]

0.0012321472168 is time for one step of grad

P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.7609
 2.0955
[torch.FloatTensor of size 2]

0.00106906890869 is time for one step of grad descent

504.32510376
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.3674
-1.0299
[torch.FloatTensor of size 2]

0.000960826873779 is time for one step of grad descent

931.194091797
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.8291
 2.0777
[torch.FloatTensor of size 2]

0.000930070877075 is time for one step of grad descent

549.844726562
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.6928
-1.0841
[torch.FloatTensor of size 2]

0.00094199180603 is time for one step of grad descent

1057.4855957
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.9258
 1.8628
[torch.FloatTensor of size 2]

0.000915050506592 is time for one step of gr

P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-2.9501
 2.3083
[torch.FloatTensor of size 2]

0.00109815597534 is time for one step of grad descent

1268.72802734
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.5955
 0.6056
[torch.FloatTensor of size 2]

0.000921964645386 is time for one step of grad descent

277.327728271
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-2.4487
 3.0407
[torch.FloatTensor of size 2]

0.00104403495789 is time for one step of grad descent

1276.59875488
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.6652
 0.5215
[torch.FloatTensor of size 2]

0.00107002258301 is time for one step of grad descent

330.686859131
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-2.6331
 2.8476
[torch.FloatTensor of size 2]

0.00103497505188 is time for one step of gr

P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-2.0205
 2.2873
[torch.FloatTensor of size 2]

0.00103092193604 is time for one step of grad descent

861.397644043
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.9334
 0.2323
[torch.FloatTensor of size 2]

0.000828981399536 is time for one step of grad descent

470.177886963
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-2.0892
 2.6817
[torch.FloatTensor of size 2]

0.000916957855225 is time for one step of grad descent

1200.64343262
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 1.2380
-0.3243
[torch.FloatTensor of size 2]

0.000983953475952 is time for one step of grad descent

731.106262207
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.2151
 2.9031
[torch.FloatTensor of size 2]

0.00126695632935 is time for one step of 

P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.6824
 3.5973
[torch.FloatTensor of size 2]

0.000933885574341 is time for one step of grad descent

1354.34240723
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 0.9083
 0.0609
[torch.FloatTensor of size 2]

0.000910997390747 is time for one step of grad descent

517.47467041
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.6606
 3.0868
[torch.FloatTensor of size 2]

0.000920057296753 is time for one step of grad descent

1132.99438477
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
 1.1888
-0.0751
[torch.FloatTensor of size 2]

0.000879049301147 is time for one step of grad descent

616.866577148
P is Variable containing:
 1
 1
[torch.FloatTensor of size 2]

K is Variable containing:
-1.8847
 2.2638
[torch.FloatTensor of size 2]

0.000823020935059 is time for one step of

In [14]:
print K
print P

Variable containing:
 0.0532
 0.9432
[torch.FloatTensor of size 2]

Variable containing:
 1
 1
[torch.FloatTensor of size 2]



## Analytical double integrator Lyapunov

$$x = 
       \begin{bmatrix} x_1 \\ x_2
        \end{bmatrix} $$

$$\dot{x} = f(x,u) =
       \begin{bmatrix} x_2 \\ u
        \end{bmatrix} $$
        
Policy

$$ u = \pi_K(x) = -K^Tx = - k_1x_1 - k_2x_2$$
        
Lyapunov

$$V(x) = P^Tx = p_1x_1^2 + p_2x_2^2 = \text{p.s.d. by construction}$$

$$\dot{V}(x) = \bigg[ \frac{\partial V}{\partial x}\bigg]^T \big[f(x,\pi_K(x) \big] $$

$$ = \begin{bmatrix} 2p_1x_1 \\ 2p_2x_2
        \end{bmatrix}^T \begin{bmatrix} x_2 \\ -k_1x_1 - k_2x_2
        \end{bmatrix} $$
        
$$ =  2p_1x_1x_2 + 2p_2x_2( - k_1x_1 - k_2x_2) $$

$$ =  2p_1x_1x_2 - 2p_2k_1x_1x_2 - 2p_2k_2x_2^2 $$

We need both terms to be negative semidefinite:

$$ =  2x_1x_2(p_1 - p_2k_1 ) - 2p_2k_2x_2^2 $$

Therefore conditions are:

a) $p_1 = p_2k_1$

b) $k_2 > 0$ ($p_2$ already must be $> 0$) 

In [15]:
# condition (a):  p_1 - p_2*k_1
print P[0] - P[1]*K[0]

# condition (b): k_2 > 0
print K[1]


Variable containing:
 0.9468
[torch.FloatTensor of size 1]

Variable containing:
 0.9432
[torch.FloatTensor of size 1]



In [16]:
for i in range(10000):
    x_i = Variable(torch.randn(2).type(dtype), requires_grad=True)
    Vdot = compute_Vdot(x_i)
    if Vdot.data[0] > 0:
        print "false, counterexample found: ", x_i
        break

false, counterexample found:  Variable containing:
 0.4748
 0.4160
[torch.FloatTensor of size 2]



In [17]:
# Note: first time I ended up with these values:

# print K
# print P
# Variable containing:
#   0.2410
#  16.2734
# [torch.FloatTensor of size 2]

# Variable containing:
#  0.3484
#  0.8831
# [torch.FloatTensor of size 2]

#Which gave only:

# # condition (a):  p_1 - p_2*k_1
# print P[0] - P[1]*K[0]

# # condition (b): k_2 > 0
# print K[1]

# Variable containing:
#  0.1356
# [torch.FloatTensor of size 1]

# Variable containing:
#  16.2734
# [torch.FloatTensor of size 1]

# But this was Vdot negative semidefinite over all samples, since k_2 was so large it could outweigh the first term

In [18]:
state_tape = []
state_tape.append(state_initial)

for i in range(1000):
    u = dynamics(state_tape[-1])[1]
    next_state = double_integrator_next_state(state_tape[-1],u)
    state_tape.append(next_state)
    
print len(state_tape)
img_tape = []

for i in state_tape:
    img_tape.append(double_integrator_state_to_img(i))
    
print len(img_tape)

1001
1001


In [None]:
    
ani = get_animation(img_tape)
HTML(ani.to_html5_video())