In [1]:

import os
import sys
import time
import pickle
from collections import defaultdict
from functools import partial
import torch
from torchviz import make_dot

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter

from safe_control_gym.experiments.ROA_cartpole.utilities import *
from lyapnov import LyapunovNN, Lyapunov, QuadraticFunction, GridWorld_pendulum
from utilities import balanced_class_weights, dlqr, \
                      get_discrete_linear_system_matrices, onestep_dynamics




## Train the NN to fit the Lyapnov function defined by LQR

In [2]:
# set random seed for reproducibility
seed = 1
torch.manual_seed(seed)
np.random.seed(seed)

np.set_printoptions(threshold=sys.maxsize) # np print full array
torch.autograd.set_detect_anomaly(True)

class Options(object):
    def __init__(self, **kwargs):
        super(Options, self).__init__()
        self.__dict__.update(kwargs)

OPTIONS = Options(np_dtype              = np.float32,
                  torch_dtype           = torch.float32,
                  eps                   = 1e-8,                            # numerical tolerance
                  saturate              = True,                            # apply saturation constraints to the control input
                  use_zero_threshold    = True,                            # assume the discretization is infinitely fine (i.e., tau = 0)
                  pre_train             = True,                            # pre-train the neural network to match a given candidate in a supervised approach
                  dpi                   = 150,
                  num_cores             = 4,
                  num_sockets           = 1,
                #   tf_checkpoint_path    = "./tmp/lyapunov_function_learning.ckpt"
                )

# detect torch device
myDevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
myDevice = torch.device("cpu")


In [3]:
#################################### Constants ####################################
dt = 0.01   # sampling time
g = 9.81    # gravity

# True system parameters
m = 0.15    # pendulum mass
L = 0.5     # pole length
b = 0.1     # rotational friction

# State and action normalizers
theta_max = np.deg2rad(180)                     # angular position [rad]
omega_max = np.deg2rad(360)                     # angular velocity [rad/s]
# u_max     = g * m * L * np.sin(np.deg2rad(60))  # torque [N.m], control action
u_max = 0.5

state_norm = (theta_max, omega_max)
action_norm = (u_max,)

# Dimensions and domains
state_dim     = 2
action_dim    = 1
state_limits  = np.array([[-1., 1.]] * state_dim)
action_limits = np.array([[-1., 1.]] * action_dim)

In [4]:
############################### System dynamics ################################
# Initialize system class and its linearization
pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])
A, B = pendulum.linearize()
# print("A\n ", A)
# print("B\n ", B)
# dynamics = pendulum.__call__
dynamics = pendulum.__call__

In [5]:
############################### Discretization ################################
state_constraints = np.array([[-theta_max, theta_max], [-omega_max, omega_max]])
# print('state_constraints: ', state_constraints)
num_states = 100

grid_limits = np.array([[-1., 1.], ] * state_dim)
# state_discretization = gridding(state_dim, state_constraints=None, num_states = 100)
state_discretization = GridWorld_pendulum(grid_limits, num_states)
# state_discretization = gridding(state_dim, state_constraints, num_states = 100)
# print('state_discretization.all_points.shape: ', state_discretization.all_points.shape)

# Discretization constant
if OPTIONS.use_zero_threshold:
    tau = 0.0
else:
    tau = np.sum(state_discretization.unit_maxes) / 2

print('Grid size: {}'.format(state_discretization.nindex))
print('Discretization constant (tau): {}'.format(tau))

# Set initial safe set as a ball around the origin (in normalized coordinates)
cutoff_radius    = 0.1
initial_safe_set = np.linalg.norm(state_discretization.all_points, ord=2, axis=1) <= cutoff_radius
# print('state_discretization.all_points.shape: ', state_discretization.all_points.shape)
# print('initial_safe_set.sum(): ', initial_safe_set.shape)


Grid size: 10000
Discretization constant (tau): 0.0


In [6]:
########################## define LQR policy ##############################
Q = np.identity(state_dim).astype(OPTIONS.np_dtype)     # state cost matrix
Q = np.diag([5, 1])
R = 1* np.identity(action_dim).astype(OPTIONS.np_dtype)    # action cost matrix
K, P_lqr = dlqr(A, B, Q, R) 

policy = lambda x: -K @ x
if OPTIONS.saturate:
    policy = lambda x: np.clip(-K @ x, -1, 1)

In [7]:
###############  closed-loop dynamics and Lipschitz constants ##############
    
cl_dynamics = lambda x: dynamics(np.concatenate([x, policy(x)]))
L_pol = lambda x: np.linalg.norm(-K, 1)
L_dyn = lambda x: np.linalg.norm(A, 1) + np.linalg.norm(B, 1) * L_pol(x)

In [8]:
########################## define Lyapunov LQR ##########################
lyapunov_function = QuadraticFunction(P_lqr)
# Approximate local Lipschitz constants with gradients
grad_lyapunov_function = lambda x: 2 * torch.tensor(P_lqr, dtype=torch.float32) @ x
L_v = lambda x: torch.norm(grad_lyapunov_function(x), p=1, dim=-1, keepdim=True)
# Initialize Lyapunov class
lyapunov_lqr = Lyapunov(state_discretization, lyapunov_function, cl_dynamics, L_dyn, L_v, tau, policy, initial_safe_set)
lyapunov_lqr.update_values()
lyapunov_lqr.update_safe_set()
# print('lyapunov_lqr.c_max\n', lyapunov_lqr.c_max)
print('lyapunov_lqr.safe_set.sum()\n', lyapunov_lqr.safe_set.sum())

lyapunov_lqr.safe_set.sum()
 2006


In [9]:
######################## define Lyapunov NN ########################
# initialize Lyapunov NN
layer_dim = [64, 64, 64]
# layer_dim = [128, 128, 128]
activations = [torch.nn.Tanh(), torch.nn.Tanh(), torch.nn.Tanh()]
nn = LyapunovNN(state_dim, layer_dim, activations)
print('nn\n', nn)
for name, param in nn.named_parameters():
    if param.requires_grad:
        print(name, param.data.shape)

# approximate local Lipschitz constant with gradient
grad_lyapunov_function = \
    lambda x: torch.autograd.grad(nn(x), x, \
                    torch.ones_like(nn(x)), allow_unused=True,)[0]
lyapunov_nn = Lyapunov(state_discretization, nn, \
                          cl_dynamics, L_dyn, L_v, tau, policy, \
                          initial_safe_set)
lyapunov_nn.update_values()
lyapunov_nn.update_safe_set()

nn
 LyapunovNN(
  (layers): ModuleList(
    (0): Linear(in_features=2, out_features=2, bias=False)
    (1): Linear(in_features=2, out_features=62, bias=False)
    (2): Linear(in_features=64, out_features=33, bias=False)
    (3): Linear(in_features=64, out_features=33, bias=False)
  )
)
layers.0.weight torch.Size([2, 2])
layers.1.weight torch.Size([62, 2])
layers.2.weight torch.Size([33, 64])
layers.3.weight torch.Size([33, 64])


In [10]:
#########################################################################
# train the parameteric Lyapunov candidate in order to expand the verifiable
# safe set toward the brute-force safe set
test_classfier_loss = []
test_decrease_loss   = []
roa_estimate         = np.copy(lyapunov_nn.safe_set)
roa = np.copy(lyapunov_lqr.safe_set) # train the nn to fit the lqr roa

# grid              = lyapunov_lqr.discretization
grid              = lyapunov_nn.discretization
c_max             = [lyapunov_nn.c_max, ]
safe_set_fraction = [lyapunov_nn.safe_set.sum() / grid.nindex, ]
print('safe_set_fraction', safe_set_fraction)
roa_estimate.shape

safe_set_fraction [0.0118]


(10000,)

In [11]:
######################### traning hyperparameters #######################
outer_iters = 5
inner_iters = 10
horizon     = 100
test_size   = int(1e4)

safe_level = 1
lagrange_multiplier = 5000
level_multiplier = 1.3
learning_rate = 5e-3
batch_size    = int(1e3)

optimizer = torch.optim.SGD(lyapunov_nn.lyapunov_function.parameters(), lr=learning_rate)
# print('optimizer\n', optimizer)
for name, param in lyapunov_nn.lyapunov_function.named_parameters():
    if param.requires_grad:
        print(name, param.data.shape)



layers.0.weight torch.Size([2, 2])
layers.1.weight torch.Size([62, 2])
layers.2.weight torch.Size([33, 64])
layers.3.weight torch.Size([33, 64])


In [36]:
# randomize 10 initial conditions and feed them into the neural network
batch_size = 10
x0 = torch.rand(batch_size, state_dim, dtype=OPTIONS.torch_dtype)
x0
x0.reshape(batch_size, -1, state_dim)
x0.shape

torch.Size([10, 2])

In [35]:
# feed the initial conditions into the neural network and get the output
y0 = lyapunov_nn.lyapunov_function(x0)
y0.shape

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x2 and 10x2)

In [29]:
x0

tensor([[ 0.8068, -0.7251],
        [-0.7214,  0.6148],
        [-0.2046, -0.6693],
        [ 0.8550, -0.3045],
        [ 0.5016,  0.4520],
        [ 0.7666,  0.2473],
        [ 0.5019, -0.3022],
        [-0.4601,  0.7918],
        [-0.1438,  0.9297],
        [ 0.3269,  0.2434]])