In [37]:
from torch import nn
from bound_propagation import BoundModelFactory, HyperRectangle
from bound_propagation.polynomial import Pow
import torch
import numpy as np

class NNDM(nn.Sequential):
    def __init__(self):
        super(NNDM, self).__init__(
            nn.Linear(28, 64),
            nn.Tanh(),
            nn.Linear(64, 24),
        )

    def forward(self, x):
        out = super().forward(x)
        return out + x[:,:24]

    
class HHead(nn.Sequential):
    def __init__(self):
        
        super().__init__(
            Pow(2),
            nn.Linear(24, 2)
        )
        # self[1].weight.data = torch.tensor([[-1/x_0_max**2, 0, 0, 0],
        #                                     [0, 0, -1/x_2_max**2, 0]])
        # self[1].bias.data = torch.tensor([1., 1.])

class CombinedModel(nn.Sequential):
    def __init__(self):
        super(CombinedModel, self).__init__()
        self.add_module('nndm', NNDM())
        self.add_module('hhead', HHead())
    

# thetanet = HThetaHead(pole_angel)
net = CombinedModel()

factory = BoundModelFactory()
boundnet = factory.build(net)

In [2]:
import torch.nn.functional as F

class Actor(nn.Module):

    def __init__(self, n_observations, n_actions):
        super(Actor, self).__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = torch.tanh(self.layer3(x))
        return self.layer3(x)
    
class Critic(nn.Module):

    def __init__(self, no_state_actions):
        super(Critic, self).__init__()
        self.layer1 = nn.Linear(no_state_actions, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, 1)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)
    
actor_network = Actor(24, 4)
critic_network = Critic(28)

# TODO: when we have trained the model
# dqn_model.load_state_dict(torch.load(PATH_TO_MODEL_WEIGHTS))

In [5]:
x = torch.rand(1, 28)
epsilon = 0.1
input_bounds = HyperRectangle.from_eps(x, epsilon)

crown_bounds = boundnet.crown(input_bounds)
crown_ibp_bounds = boundnet.crown_ibp(input_bounds)

alpha_crown_bounds = boundnet.crown(input_bounds, alpha=True)
alpha_crown_ibp_bounds = boundnet.crown_ibp(input_bounds, alpha=True)

In [74]:
def get_lower_bounds(model, state, action, epsilon):
    """
    model: Pytorch neural network initialised with BoundModelFactory
        - model input size: [state_dimensionality + action_dimensionality]
        - model output size: [state_dimensionality]
    state: Tensor with shape [state_dimensionality]
    action: Tensor with the specified action partition
    ---
    output: [action_options, state_dimensionality] (2: upperbound & lowerbound)
    """

    state_action = torch.cat((state, action), dim=1).view(1, -1)
    input_bounds = HyperRectangle.from_eps(state_action, epsilon)
    crown_bounds = model.crown(input_bounds)

    lower_bound = crown_bounds.lower[0].unsqueeze(0)

    return lower_bound

In [75]:
# hfunction = HHead()
state = torch.rand(1, 24)
action = torch.rand(1, 4)
lower_bounds = get_lower_bounds(boundnet, state, action, 0.1)
lower_bounds

tensor([[[[-1.6283e-02,  6.2691e-03, -1.4915e-02,  1.6204e-03,  1.2873e-02,
           -8.1222e-03,  1.0502e-02, -4.8700e-03, -7.9150e-03, -1.9101e-02,
            6.7544e-03,  1.5642e-03, -9.1911e-03,  9.4562e-05, -5.1320e-03,
           -7.2536e-03, -1.3353e-02,  6.2870e-03,  3.2984e-03,  6.5543e-03,
            4.6554e-03, -6.0388e-03,  3.1865e-03, -2.4600e-03,  1.9490e-02,
           -9.5051e-03,  5.1683e-03, -3.0761e-03],
          [ 5.4977e-03,  1.7959e-02,  1.6638e-02, -9.4214e-03, -5.3678e-03,
            1.9432e-03, -1.7078e-02, -1.1867e-02,  1.7231e-02,  5.7204e-04,
           -3.8280e-03,  4.7540e-03,  3.3574e-03,  6.9865e-03,  8.1688e-03,
           -3.1000e-03,  2.9842e-03,  9.3601e-04,  2.2302e-03,  1.0668e-03,
           -3.1726e-04, -4.5712e-03,  5.5388e-03, -3.6725e-04, -3.2278e-03,
            8.2344e-03, -1.1634e-02,  1.2340e-02]]]],
       grad_fn=<UnsqueezeBackward0>)

In [63]:
class InfeasibilityError(Exception):
    """Exception raised if there are no actions that fulfill the safety criterions."""

    def __init__(self, message="No safe action to take"):
        self.message = message
        super().__init__(self.message)


In [76]:
"""WORK IN PROGRESS FOR BIPEDAL WALKER"""

import cvxpy as cp

def continuous_cbf(state, nominal_action, boundnet, hfunction, alpha, epsilon):
    # Ensure nominal_action is a CVXPY parameter with the correct shape
    nominal_action_cvx = cp.Parameter(nominal_action.shape[1])
    nominal_action_cvx.value = nominal_action.view(-1).numpy()

    # Define the optimization variable (control input)
    action = cp.Variable(nominal_action.shape[1])

    # Define the objective function
    objective = cp.Minimize(cp.norm(action - nominal_action_cvx, 2))

    # Obtain the linearized CBF coefficients
    lower_bound_matrix = get_lower_bounds(boundnet, state, action, epsilon)

    # Construct the linear CBF constraint using the coefficients
    # Assuming the matrix is in the form [A, B] where A multiplies the state and B multiplies the action
    A, B = torch.split(lower_bound_matrix, [state.size(1), action.size(0)], dim=1)
    cbf_constraint = A @ state + B @ action >= alpha * hfunction(state)

    # Add the CBF constraint to the problem
    constraints = [cbf_constraint]

    # Define and solve the optimization problem with the new constraint
    prob = cp.Problem(objective, constraints)
    prob.solve()

    return action.value


In [42]:
def h_function(output_from_get_lower_bounds):
    # Assuming output_from_get_lower_bounds is a 2D tensor where each row is a different bound
    # We can take a simple operation like summing across the dimensions
    # or any other operation that makes sense in your context
    return torch.sum(output_from_get_lower_bounds, dim=1)

In [77]:
# hfunction = HHead()
state = torch.rand(1, 24)
action = torch.rand(1, 4)
lower_bounds = get_lower_bounds(boundnet, state, action, 0.1)

new_action = continuous_cbf(state, action, boundnet, h_function, 0.01, 0.1)

TypeError: expected Tensor as element 1 in argument 0, but got Variable

In [67]:
action.shape

torch.Size([1, 4])

In [58]:
def discrete_cbf(action_space, state, nominal_action, hfunction, h_nndm, alpha):
    """
    action_space: A 1-dimensional tensor containing possible actions.
    state: The current state of the system.
    nominal_action: A pre-selected action by the nominal controller.
    hfunction: A function representing the barrier condition.
    h_nndm: A NNDM with the h function put on top.
    alpha: A scaling factor.
    """
    best_action = nominal_action
    res = []
    for action in action_space:
        h = h_nndm(torch.cat((state, action.unsqueeze(0))).view(1, -1))
        h_prev = hfunction(state)
        if torch.all(torch.ge(h, alpha * h_prev)):
            res += [(int(action!=nominal_action), h, action)]
    best_action_tuple = min(res, key = lambda x: x[0])
    
    if sum(best_action_tuple[0] == action_tuple[0] for action_tuple in res) > 1:
        best_action_tuple = min([action_tuple for action_tuple in res if action_tuple[0] == best_action_tuple[0]], key= lambda x: x[1])
    best_action = best_action_tuple[2]
    return best_action