In [29]:
from collections import OrderedDict
from torch import nn
from bound_propagation import BoundModelFactory, HyperRectangle
from bound_propagation.polynomial import Pow
from bound_propagation.bounds import LinearBounds
import torch
from scipy.stats import norm
import numpy as np
import cvxpy as cp
import gymnasium as gym
from gymnasium import spaces

In [30]:
def create_action_partitions(env, partitions):
    """
    Generates partitions of the action space of a reinforcement learning environment into smaller hyperrectangles.

    This function is useful for discretizing a continuous action space into smaller, manageable parts, allowing for more granular exploration or analysis.

    Parameters:
    - env (Environment): The reinforcement learning environment object, which should have an action space attribute.
    - partitions (int): The number of partitions to create along each dimension of the action space.

    Each dimension of the action space is divided into 'partitions' number of equal parts. For an action space with 'n' dimensions, this results in 'partitions^n' total hyperrectangles.

    Returns:
    - A list of HyperRectangle objects, each representing a partition of the action space. Each HyperRectangle object is defined by its lower and upper bounds in the action space.
    """
    action_space = env.action_space
    num_actions = action_space.shape[0]
    action_low = action_space.low
    action_high = action_space.high

    res = []

    def generate_partitions(dimensions, lower, upper, current_partition):
        if dimensions == num_actions:
            # If we've reached the number of dimensions, add the current partition
            res.append(HyperRectangle(torch.tensor(lower, dtype=torch.float32).unsqueeze(0), torch.tensor(upper, dtype=torch.float32).unsqueeze(0)))
        else:
            # Calculate the size of the partition for the current dimension
            partition_size = (action_high[dimensions] - action_low[dimensions]) / partitions

            for part in range(partitions):
                # Determine the lower and upper bounds for the current dimension
                dim_lower_bound = action_low[dimensions] + part * partition_size
                dim_upper_bound = dim_lower_bound + partition_size

                # Recursively generate partitions for the next dimension
                generate_partitions(dimensions + 1, lower + [dim_lower_bound], upper + [dim_upper_bound], current_partition)

    generate_partitions(0, [], [], [])

    return res

In [31]:
def get_lower_bound(model, state, action_partition, epsilon):
    """
    Computes the lower bound of the output for a given state-action pair using CROWN (Convex Relaxation for neural network Output bounNd) method.

    This function applies CROWN, a method for robustness verification of neural networks, to compute the lower bound of the neural network's output for a given state-action pair within a specified perturbation range. The perturbation range is defined by epsilon around the state-action pair.

    Parameters:
    - model (NN): The neural network model for which the bounds are computed.
    - state (Tensor): The current state of the system, represented as a tensor.
    - action (Tensor): The action to be evaluated, represented as a tensor.
    - epsilon (float): The perturbation range around the state-action pair within which the lower bound is computed.

    Returns:
    - A tensor representing the lower bound of the network's output for the given state-action pair within the specified perturbation range. The shape of the tensor matches the concatenated state-action input shape.

    Notes:
    - The method assumes that the input to 'model' is a concatenation of state and action.
    - The lower bound is computed using the CROWN method, which involves a convex relaxation technique to estimate bounds on neural network outputs.
    - HyperRectangle.from_eps is used to define the perturbation range around the input state-action pair.
    """
    state_partition = HyperRectangle.from_eps(state.view(1, -1), epsilon)

    input_bounds = HyperRectangle(
        torch.cat((state_partition.lower, action_partition.lower), dim=1),
        torch.cat((state_partition.upper, action_partition.upper), dim=1)
    )
    crown_bounds = model.crown(input_bounds, bound_upper=False)

    crown_bounds.lower # (A, b)

    return crown_bounds.lower

In [32]:
def create_bound_matrices(partitions, state, env, boundnet):
    """
    Computes bound matrices for action partitions in a given state using a bound network.

    Parameters:
    - partitions (Iterable): A collection of partitions of the action space, each representing a subset of potential actions.
    - state (Tensor): The current state of the system, represented as a tensor.
    - env: The environment object, containing information about the action space.
    - boundnet (NN): A neural network used to compute linear bounds for a given state and action.

    Returns:
    - A list of tuples. Each tuple contains:
        1. An action partition,
        2. A tensor representing the action-dependent component of the bound for that partition,
        3. A numpy array representing the state vector component of the bound.
    """
    action_space = env.action_space
    action_dimensionality = action_space.shape[0]
    res = []
    for action_partition in partitions:
        print("partition", action_partition.lower.shape)
        print("state", state.shape)
        (A, b) = get_lower_bound(boundnet, state, action_partition, 0.01)
        h_action_dependent = A[:, :, -action_dimensionality:]
        # State input region is a hyperrectangle with "radius" 0.01
        state_input_bounds = HyperRectangle.from_eps(state, 0.01)
        # State dependent part of the A matrix
        state_A = A[:, :, :-action_dimensionality]
        # Make this into a (lower) linear bounds (\underbar{A}_x x + b \leq ...)
        state_linear_bounds = LinearBounds(state_input_bounds, (state_A, b), None)
        # Convert to lower interval bounds (b \leq ...)
        state_interval_bounds = state_linear_bounds.concretize()
        # Select the lower bound
        h_vec = state_interval_bounds.lower

        vecs = (action_partition, h_action_dependent.squeeze().detach().numpy(), h_vec.squeeze().detach().numpy())
        res.append(vecs)

    return res

In [33]:
class InfeasibilityError(Exception):
    """Exception raised if there are no actions that fulfill the safety criterions."""

    def __init__(self, message="No safe action to take"):
        self.message = message
        super().__init__(self.message)

In [34]:
def continuous_cbf(bound_matrices, nominal_action, h_current, alpha):
    """
    Selects a safe action from bound matrices using continuous control barrier functions (CBF).

    Parameters:
    - bound_matrices (Iterable): An iterable of tuples, each containing the action partition, the action-dependent component of the barrier function h, and the state vector component of the bound of h.
    - nominal_action (Tensor): The preferred action in the current state, typically derived from an unconstrained policy.
    - h_current (Tensor): The current value of the barrier function.
    - alpha (float): A scaling factor used in the safety criterion. It scales the current barrier function value to set a threshold for the next state's barrier function value.

    Returns:
    - The selected safe action as a numpy array. If multiple safe actions are available, it returns the one closest to the nominal action.

    Raises:
    - InfeasibilityError: If no safe actions are found, indicating that the current state is infeasible under the given safety constraints.
    """
    safe_actions = []
    for action_partition, h_action_dependent, h_vec in bound_matrices:
        num_actions = nominal_action.shape[0]
        action = cp.Variable(num_actions)

        # Constraints
        action_lower_bound = (action_partition).lower.reshape((-1,))
        action_upper_bound = (action_partition).upper.reshape((-1,))
        constraints = [action_lower_bound <= action, action <= action_upper_bound, h_action_dependent @ action + h_vec >= alpha * h_current]

        # Objective
        objective = cp.Minimize(cp.norm(action - nominal_action, 2))

        # Solve the problem
        problem = cp.Problem(objective, constraints)
        problem.solve()

        if problem.status is cp.UNBOUNDED:
            print("something goes very wrong")
        elif problem.status is cp.INFEASIBLE:
           pass
        else:
            safe_actions.append((action.value, objective.value))

    if safe_actions and len(safe_actions) > 1:
      return min(safe_actions, key=lambda x: x[1], default=(None, None))[0]
    elif safe_actions:
      return safe_actions[0]
    else:
      raise InfeasibilityError()

In [97]:
class NNDM(nn.Sequential):
    """
    TODO: write docstring
    TODO: generalise class
    """
    def __init__(self):
        super(NNDM, self).__init__(
            nn.Linear(29, 64),
            nn.Tanh(),
            nn.Linear(64, 25),
        )

    def forward(self, x):
        out = super().forward(x)
        return out + x[:,:25]


class HHead(nn.Sequential):
    """
    TODO: implement h function
    TODO: write docstring
    TODO: generalise class
    """
    def __init__(self):

        super().__init__(
            Pow(2),
            nn.Linear(25, 2)
        )

        
class CombinedModel(nn.Sequential):
    def __init__(self):
        super(CombinedModel, self).__init__(OrderedDict([
            ('nndm', NNDM()),
            ('hhead', HHead())
        ]))

In [36]:
class BipedalWalkerWithPositionWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        # Adjust the shape of the observation space to include the additional state
        obs_space = env.observation_space
        low = np.append(obs_space.low.flatten(), -np.inf)
        high = np.append(obs_space.high.flatten(), np.inf)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)

    def step(self, action):
        state, reward, terminated, truncated, info = self.env.step(action)
        hull_position = np.array([self.env.unwrapped.hull.position[0]])
        # Flatten the state and add the x-coordinate of the hull's position
        extended_state = np.concatenate((state.flatten(), hull_position))
        return extended_state, reward, terminated, truncated, info

    def reset(self, **kwargs):
        state = self.env.reset(**kwargs)
        hull_position = np.array([self.env.unwrapped.hull.position[0]])
        # Flatten the state and add the x-coordinate of the hull's position
        extended_state = np.concatenate((state[0].flatten(), hull_position))
        return extended_state

In [99]:
# Initialise the environment
env = gym.make("BipedalWalker-v3")
env = BipedalWalkerWithPositionWrapper(env)

# Initialise the barrier function h and the NNDM with the barrier function on top to compute linear bounds
hnet = HHead()
net = CombinedModel()
factory = BoundModelFactory()
boundnet = factory.build(net)

# Generate random state, h values, and nominal action for demonstration

# Generate the action partitions
partitions = create_action_partitions(env, 2)

state = env.reset()
state = torch.tensor(state, dtype=torch.float32)

while True:
    h_current = hnet(state).view(-1).detach().numpy()
    nominal_action = torch.rand(4)
    bound_matrices = create_bound_matrices(partitions, state, env, boundnet)
    safe_action = continuous_cbf(bound_matrices, nominal_action, h_current, -100000)
    state, reward, terminated, truncated, info = env.step(safe_action)
    state = torch.tensor(state, dtype=torch.float32)
    print(safe_action)

(2, 4) (2,)
[0.43678264 0.49589566 0.74225503 0.90159802]
(2, 4) (2,)
[0.63323735 0.97505343 0.66157214 0.44874566]
(2, 4) (2,)


    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


[0.08655416 0.26384049 0.90911073 0.56301212]
(2, 4) (2,)
[0.72607477 0.32788276 0.65659136 0.15113667]
(2, 4) (2,)
[0.20018184 0.90354902 0.85165268 0.34600842]
(2, 4) (2,)


SolverError: Solver 'ECOS' failed. Try another solver, or solve with verbose=True for more information.

In [38]:
def create_noise_partitions(ids, state_dimensionality, partitions, std):
    """
    Create partitions for specified dimensions in a multi-dimensional space.

    Parameters:
    ids (list): List of dimensions to be partitioned.
    state_dimensionality (int): The total number of dimensions in the space.
    partitions (int): Number of partitions for each dimension in ids.
    std (float): Standard deviation used to define the partition range.

    Returns:
    list: A list of HyperRectangle objects representing the partitioned space.
    """
    partition_lower, partition_upper = -6 * std, 6 * std

    res = []

    def generate_partitions(dimension, current_lower, current_upper):
        if dimension == state_dimensionality:
            # If we've reached the total number of dimensions, add the current partition
            res.append(HyperRectangle(torch.tensor(current_lower, dtype=torch.float32).unsqueeze(0),
                                      torch.tensor(current_upper, dtype=torch.float32).unsqueeze(0)))
        else:
            if dimension in ids:
                # Partition this dimension
                partition_size = (partition_upper - partition_lower) / partitions
                for part in range(partitions):
                    dim_lower_bound = partition_lower + part * partition_size
                    dim_upper_bound = dim_lower_bound + partition_size
                    generate_partitions(dimension + 1, current_lower + [dim_lower_bound], current_upper + [dim_upper_bound])
            else:
                # Set this dimension's bounds to 0 and move to the next dimension
                generate_partitions(dimension + 1, current_lower + [0], current_upper + [0])

    generate_partitions(0, [], [])

    return res

In [172]:
from collections.abc import Iterable
from itertools import product

def create_noise_partitions(h_ids, state_dimensionality, partitions, stds):
    assert isinstance(partitions, int)
    assert isinstance(stds, Iterable)
    assert len(h_ids) == len(stds)

    # Define the limits for partitioning
    partitions_lower = [-6 * std for std in stds]
    partitions_upper = [6 * std for std in stds]

    # Create the partition slices for each dimension in h_ids
    partition_slices = []
    for dim_num_slices, dim_min, dim_max in zip([partitions] * len(h_ids), partitions_lower, partitions_upper):
        dim = torch.linspace(dim_min, dim_max, dim_num_slices + 1)
        centers = (dim[:-1] + dim[1:]) / 2
        half_widths = (dim[1:] - dim[:-1]) / 2
        partition_slices.append(list(zip(centers, half_widths)))

    # Create all combinations of partitions across the dimensions in h_ids
    hyperrectangles = []
    for combination in product(*partition_slices):
        lower_bounds = torch.zeros((1, state_dimensionality))
        upper_bounds = torch.zeros((1, state_dimensionality))

        for (center, half_width), h_id in zip(combination, h_ids):
            lower_bounds[0, h_id] = center - half_width
            upper_bounds[0, h_id] = center + half_width

        hyperrectangles.append(HyperRectangle(lower_bounds, upper_bounds))

    return hyperrectangles

In [None]:
from scipy.stats import truncnorm
from scipy.special import erf, erfc
import numpy as np
import torch

def log_prob(x, y):
    x, y = x/np.sqrt(2), y/np.sqrt(2)
    if abs(x) <= 1/np.sqrt(2) and abs(y) <= 1/np.sqrt(2):
        return np.log((erf(y) - erf(x))/2)
    elif x >= 0 and y >= 0:
        return np.log((erfc(x) - erfc(y))/2)
    elif x <= 0 and y <= 0:
        return np.log((erfc(-y) - erfc(-x))/2)
    else:
        return np.log((erf(y) - erf(x))/2)

def truncated_normal_expectation(mean, std_dev, lower_bound, upper_bound):
    a, b = (lower_bound - mean) / std_dev, (upper_bound - mean) / std_dev
    return mean + std_dev * (truncnorm.expect(args=(a, b), loc=mean, scale=std_dev))

def weighted_noise_prob(ids, HR, sigma):
    res = torch.tensor(len(ids))
    HR_prob = HR_probability(ids, HR, sigma)
    for i in range(len(ids)):
        res[i] = HR_prob * truncated_normal_expectation(0, sigma[ids[i]], HR.lower[i], HR.upper[i])
    return res

def HR_probability(ids, HR, sigma):
    lower_list = []
    upper_list = []
    len_vector = len(ids)
    for i in range(len_vector):
        lower_list += [HR.lower[i]/sigma]
        upper_list += [HR.upper[i]/sigma]
    prob = 0
    for j in range(len_vector):
        prob += (log_prob(upper_list[j], lower_list[j]))
    return np.exp(prob)

In [135]:
import math

def create_noise_bounds(noise_partitions, action_partitions, h_ids, std, boundnet, state, x_inds, u_inds, w_inds):
    # h_ids are the dimensions of the state that are used in h
    h_dim = len(h_ids)
    res = []
    for action_partition in action_partitions:
        # state input region is a hyperrectangle with "radius" 0.01
        state_input_bounds = HyperRectangle.from_eps(state.view(1, -1), 0.01)
        # initialise the part of the bound on h that is dependend on the action
        h_action_dependend = torch.zeros(1, h_dim, len(u_inds))
        # initialise the part of the bound on h that is INdependend on the action
        h_vec = torch.zeros(1, h_dim)
        for noise_partition in noise_partitions:
            # input region is a hyperrectangle with the state bounds and the noise + action partitions
            input_bounds = HyperRectangle(torch.cat((state_input_bounds.lower, action_partition.lower, noise_partition.lower), dim=1),
                                          torch.cat((state_input_bounds.upper, action_partition.upper, noise_partition.upper), dim=1))
            crown_bounds = boundnet.crown(input_bounds, bound_upper=False)

            # Get the lower bounds
            (A, b) = crown_bounds.lower

            # State, action, and noise dependent part of the A matrix
            state_A = A[:, :, x_inds]
            action_A = A[:, :, u_inds]
            noise_A = A[:, :, w_inds]

            # compute the probability of the the noise falling in the given partition of the noise space
            noise_prob = HR_probability(h_ids, noise_partition, std)
            noise_prob = noise_prob.item()

            # Scale state_A and b corresponding to noise_prob
            state_A, b = noise_prob * state_A, noise_prob * b
            # Make this into a (lower) linear bounds (\underbar{A}_x x + b \leq ...)
            state_linear_bounds = LinearBounds(state_input_bounds, (state_A, b), None)
            # Convert to lower interval bounds (b \leq ...)
            state_interval_bounds = state_linear_bounds.concretize()
            # Select the lower bound
            h_vec_state = state_interval_bounds.lower.detach()

            # compute \int_{HR_{wi}} w \, p(w) \, dw
            # implement log exp trick
            weighted_noise_prob = 1/(math.sqrt(2*math.pi) * std) * (torch.exp(torch.square(noise_partition.upper / std)/-2) - torch.exp(torch.square(noise_partition.lower / std)/-2))
            # The part of the bound on h that is dependend on the noise
            h_vec_noise = noise_A @ weighted_noise_prob.squeeze(0)
            # the part of the bound on h that is independend on the action
            h_vec +=  h_vec_state + h_vec_noise

            # the weighted part of the bound on h that is dependend on the action
            h_action_dependend += noise_prob * action_A
        res.append((action_partition, h_action_dependend.squeeze().detach().numpy(), h_vec.squeeze().detach().numpy()))
    return res

In [42]:
from bound_propagation.bivariate import Add
from bound_propagation.reshape import Select

class NNDM_H(nn.Sequential):
    def __init__(self, xu_inds, nndm, w_inds, h):
        super().__init__(OrderedDict([
            ('nndm(x, y) + w', Add(
                nn.Sequential(
                    Select(xu_inds),  # Select (x, u) from input hyperrectangle)
                    nndm
                ),
                Select(w_inds)  # Select w from input hyperrectangle
            )),  # The output of Add is nndm(x, u) + w
            ('h', h)  # Feed through h
        ]))

In [142]:
state_dimensionality = 25
h_ids = [23, 24]
partitions = 2
std = 10/6
action_dimensionality = 4
x_inds = list(range(25))
u_inds = list(range(25, 29))
w_inds = list(range(29, 54))

nndm = NNDM()
h = HHead()
nndm_h = NNDM_H(x_inds+u_inds, nndm, w_inds, h)
factory = BoundModelFactory()
boundnet = factory.build(nndm_h)

action_partitions = create_action_partitions(env, 2)
noise_partitions = create_noise_partitions(h_ids, state_dimensionality, partitions, std)
bounds = create_noise_bounds(noise_partitions, action_partitions, h_ids, std, boundnet, state, x_inds, u_inds, w_inds)
hr, h_action, h_vec = bounds[0]
print("h_action", h_action.shape)
print("h_vec", h_vec.shape)
print(h_vec)

tensor([[[-4.3210e-03, -4.0310e-03,  3.3366e-03,  7.4847e-03, -1.0562e-02,
           3.6987e-05, -4.8630e-03, -4.0742e-03, -1.8127e-03,  1.9137e-03,
          -5.2954e-03,  4.7519e-03,  3.0685e-03, -3.9045e-03, -1.5499e-03,
          -2.1146e-03,  4.0732e-03, -1.4192e-03,  4.3465e-03, -9.2420e-03,
           4.6986e-03,  4.3599e-03, -7.3359e-03, -5.7132e-03,  1.3568e-04],
         [ 5.5296e-03, -2.3516e-03,  3.3057e-03,  8.9185e-03,  2.0609e-03,
           4.5111e-03,  8.2449e-04,  6.9347e-03, -1.0877e-02,  4.7246e-03,
          -1.4179e-02,  6.7858e-03,  4.3979e-03, -1.0002e-04, -5.6580e-03,
          -9.7998e-04,  6.0913e-03, -3.3571e-03,  6.6813e-03, -4.7234e-03,
           7.2049e-03,  5.0036e-03, -7.2266e-03, -1.1683e-02, -4.7289e-03]]],
       grad_fn=<MulBackward0>)
tensor([[[-3.4657e-03, -7.2942e-04,  1.9998e-04,  6.5842e-03,  1.5135e-03,
          -5.4986e-03,  4.1378e-03, -4.9373e-04, -5.3384e-03, -3.8845e-05,
          -3.8829e-03,  1.6007e-03,  7.8730e-03, -8.5771e-04, -3.

In [131]:
from torchrl.modules import TruncatedNormal

tnormal = TruncatedNormal(torch.zeros(1), torch.eye(1), min=1, max=10, tanh_loc=True)
tnormal.mean

tensor([[5.5000]])