In [105]:
from collections import OrderedDict
from torch import nn
from bound_propagation import BoundModelFactory, HyperRectangle
from bound_propagation.polynomial import Pow
from bound_propagation.bounds import LinearBounds
import torch
from scipy.stats import norm
import numpy as np
import cvxpy as cp
import gymnasium as gym
from gymnasium import spaces

In [62]:
def create_action_partitions(env, partitions):
    """
    Generates partitions of the action space of a reinforcement learning environment into smaller hyperrectangles.

    This function is useful for discretizing a continuous action space into smaller, manageable parts, allowing for more granular exploration or analysis.

    Parameters:
    - env (Environment): The reinforcement learning environment object, which should have an action space attribute.
    - partitions (int): The number of partitions to create along each dimension of the action space.

    Each dimension of the action space is divided into 'partitions' number of equal parts. For an action space with 'n' dimensions, this results in 'partitions^n' total hyperrectangles.

    Returns:
    - A list of HyperRectangle objects, each representing a partition of the action space. Each HyperRectangle object is defined by its lower and upper bounds in the action space.
    """
    action_space = env.action_space
    num_actions = action_space.shape[0]
    action_low = action_space.low
    action_high = action_space.high

    res = []

    def generate_partitions(dimensions, lower, upper, current_partition):
        if dimensions == num_actions:
            # If we've reached the number of dimensions, add the current partition
            res.append(HyperRectangle(torch.tensor(lower, dtype=torch.float32).unsqueeze(0), torch.tensor(upper, dtype=torch.float32).unsqueeze(0)))
        else:
            # Calculate the size of the partition for the current dimension
            partition_size = (action_high[dimensions] - action_low[dimensions]) / partitions

            for part in range(partitions):
                # Determine the lower and upper bounds for the current dimension
                dim_lower_bound = action_low[dimensions] + part * partition_size
                dim_upper_bound = dim_lower_bound + partition_size

                # Recursively generate partitions for the next dimension
                generate_partitions(dimensions + 1, lower + [dim_lower_bound], upper + [dim_upper_bound], current_partition)

    generate_partitions(0, [], [], [])

    return res

In [63]:
def get_lower_bound(model, state, action_partition, epsilon):
    """
    Computes the lower bound of the output for a given state-action pair using CROWN (Convex Relaxation for neural network Output bounNd) method.

    This function applies CROWN, a method for robustness verification of neural networks, to compute the lower bound of the neural network's output for a given state-action pair within a specified perturbation range. The perturbation range is defined by epsilon around the state-action pair.

    Parameters:
    - model (NN): The neural network model for which the bounds are computed.
    - state (Tensor): The current state of the system, represented as a tensor.
    - action (Tensor): The action to be evaluated, represented as a tensor.
    - epsilon (float): The perturbation range around the state-action pair within which the lower bound is computed.

    Returns:
    - A tensor representing the lower bound of the network's output for the given state-action pair within the specified perturbation range. The shape of the tensor matches the concatenated state-action input shape.

    Notes:
    - The method assumes that the input to 'model' is a concatenation of state and action.
    - The lower bound is computed using the CROWN method, which involves a convex relaxation technique to estimate bounds on neural network outputs.
    - HyperRectangle.from_eps is used to define the perturbation range around the input state-action pair.
    """
    state_partition = HyperRectangle.from_eps(state.view(1, -1), epsilon)

    input_bounds = HyperRectangle(
        torch.cat((state_partition.lower, action_partition.lower), dim=1),
        torch.cat((state_partition.upper, action_partition.upper), dim=1)
    )
    crown_bounds = model.crown(input_bounds, bound_upper=False)

    crown_bounds.lower # (A, b)

    return crown_bounds.lower

In [140]:
def create_bound_matrices(partitions, state, env, boundnet):
    """
    Computes bound matrices for action partitions in a given state using a bound network.

    Parameters:
    - partitions (Iterable): A collection of partitions of the action space, each representing a subset of potential actions.
    - state (Tensor): The current state of the system, represented as a tensor.
    - env: The environment object, containing information about the action space.
    - boundnet (NN): A neural network used to compute linear bounds for a given state and action.

    Returns:
    - A list of tuples. Each tuple contains:
        1. An action partition,
        2. A tensor representing the action-dependent component of the bound for that partition,
        3. A numpy array representing the state vector component of the bound.
    """
    action_space = env.action_space
    action_dimensionality = action_space.shape[0]
    res = []
    for action_partition in partitions:
        print("partition", action_partition.lower.shape)
        print("state", state.shape)
        (A, b) = get_lower_bound(boundnet, state, action_partition, 0.01)
        h_action_dependent = A[:, :, -action_dimensionality:]
        # State input region is a hyperrectangle with "radius" 0.01
        state_input_bounds = HyperRectangle.from_eps(state, 0.01)
        # State dependent part of the A matrix
        state_A = A[:, :, :-action_dimensionality]
        # Make this into a (lower) linear bounds (\underbar{A}_x x + b \leq ...)
        state_linear_bounds = LinearBounds(state_input_bounds, (state_A, b), None)
        # Convert to lower interval bounds (b \leq ...)
        state_interval_bounds = state_linear_bounds.concretize()
        # Select the lower bound
        h_vec = state_interval_bounds.lower

        vecs = (action_partition, h_action_dependent.squeeze().detach().numpy(), h_vec.squeeze().detach().numpy())
        res.append(vecs)

    return res

In [65]:
class InfeasibilityError(Exception):
    """Exception raised if there are no actions that fulfill the safety criterions."""

    def __init__(self, message="No safe action to take"):
        self.message = message
        super().__init__(self.message)

In [70]:
def continuous_cbf(bound_matrices, nominal_action, h_current, alpha):
    """
    Selects a safe action from bound matrices using continuous control barrier functions (CBF).

    Parameters:
    - bound_matrices (Iterable): An iterable of tuples, each containing the action partition, the action-dependent component of the barrier function h, and the state vector component of the bound of h.
    - nominal_action (Tensor): The preferred action in the current state, typically derived from an unconstrained policy.
    - h_current (Tensor): The current value of the barrier function.
    - alpha (float): A scaling factor used in the safety criterion. It scales the current barrier function value to set a threshold for the next state's barrier function value.

    Returns:
    - The selected safe action as a numpy array. If multiple safe actions are available, it returns the one closest to the nominal action.

    Raises:
    - InfeasibilityError: If no safe actions are found, indicating that the current state is infeasible under the given safety constraints.
    """
    safe_actions = []
    for action_partition, h_action_dependent, h_vec in bound_matrices:
        num_actions = nominal_action.shape[0]
        action = cp.Variable(num_actions)

        # Constraints
        action_lower_bound = (action_partition).lower.reshape((-1,))
        action_upper_bound = (action_partition).upper.reshape((-1,))
        constraints = [action_lower_bound <= action, action <= action_upper_bound, h_action_dependent @ action + h_vec >= alpha * h_current]

        # Objective
        objective = cp.Minimize(cp.norm(action - nominal_action, 2))

        # Solve the problem
        problem = cp.Problem(objective, constraints)
        problem.solve()

        if problem.status is cp.UNBOUNDED:
            print("something goes very wrong")
        elif problem.status is cp.INFEASIBLE:
           pass
        else:
            safe_actions.append((action.value, objective.value))

    if safe_actions and len(safe_actions) > 1:
      return min(safe_actions, key=lambda x: x[1], default=(None, None))[0]
    elif safe_actions:
      return safe_actions[0]
    else:
      raise InfeasibilityError()

In [67]:
class NNDM(nn.Sequential):
    """
    TODO: write docstring
    TODO: generalise class
    """
    def __init__(self):
        super(NNDM, self).__init__(
            nn.Linear(29, 64),
            nn.Tanh(),
            nn.Linear(64, 25),
        )

    def forward(self, x):
        out = super().forward(x)
        return out + x[:,:25]


class HHead(nn.Sequential):
    """
    TODO: implement h function
    TODO: write docstring
    TODO: generalise class
    """
    def __init__(self):

        super().__init__(
            Pow(2),
            nn.Linear(25, 2)
        )

class CombinedModel(nn.Sequential):
    def __init__(self):
        super(CombinedModel, self).__init__(OrderedDict([
            ('nndm', NNDM()),
            ('hhead', HHead())
        ]))

In [68]:
class BipedalWalkerWithPositionWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        # Adjust the shape of the observation space to include the additional state
        obs_space = env.observation_space
        low = np.append(obs_space.low.flatten(), -np.inf)
        high = np.append(obs_space.high.flatten(), np.inf)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)

    def step(self, action):
        state, reward, terminated, truncated, info = self.env.step(action)
        hull_position = np.array([self.env.unwrapped.hull.position[0]])
        # Flatten the state and add the x-coordinate of the hull's position
        extended_state = np.concatenate((state.flatten(), hull_position))
        return extended_state, reward, terminated, truncated, info

    def reset(self, **kwargs):
        state = self.env.reset(**kwargs)
        hull_position = np.array([self.env.unwrapped.hull.position[0]])
        # Flatten the state and add the x-coordinate of the hull's position
        extended_state = np.concatenate((state[0].flatten(), hull_position))
        return extended_state

In [141]:
# Initialise the environment
env = gym.make("BipedalWalker-v3")
env = BipedalWalkerWithPositionWrapper(env)

# Initialise the barrier function h and the NNDM with the barrier function on top to compute linear bounds
hnet = HHead()
net = CombinedModel()
factory = BoundModelFactory()
boundnet = factory.build(net)

# Generate random state, h values, and nominal action for demonstration

# Generate the action partitions
partitions = create_action_partitions(env, 2)

state = env.reset()
state = torch.tensor(state, dtype=torch.float32)

while True:
    h_current = hnet(state).view(-1).detach().numpy()
    nominal_action = torch.rand(4)
    bound_matrices = create_bound_matrices(partitions, state, env, boundnet)
    safe_action = continuous_cbf(bound_matrices, nominal_action, h_current, -100000)
    state, reward, terminated, truncated, info = env.step(safe_action)
    state = torch.tensor(state, dtype=torch.float32)
    print(safe_action)

partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])
partition torch.Size([1, 4])
state torch.Size([25])


    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


InfeasibilityError: No safe action to take

In [84]:
print(env.observation_space.shape)

(25,)


In [87]:
def create_noise_partitions(ids, state_dimensionality, partitions, std):
    """
    Create partitions for specified dimensions in a multi-dimensional space.

    Parameters:
    ids (list): List of dimensions to be partitioned.
    state_dimensionality (int): The total number of dimensions in the space.
    partitions (int): Number of partitions for each dimension in ids.
    std (float): Standard deviation used to define the partition range.

    Returns:
    list: A list of HyperRectangle objects representing the partitioned space.
    """
    partition_lower, partition_upper = -6 * std, 6 * std

    res = []

    def generate_partitions(dimension, current_lower, current_upper):
        if dimension == state_dimensionality:
            # If we've reached the total number of dimensions, add the current partition
            res.append(HyperRectangle(torch.tensor(current_lower, dtype=torch.float32).unsqueeze(0),
                                      torch.tensor(current_upper, dtype=torch.float32).unsqueeze(0)))
        else:
            if dimension in ids:
                # Partition this dimension
                partition_size = (partition_upper - partition_lower) / partitions
                for part in range(partitions):
                    dim_lower_bound = partition_lower + part * partition_size
                    dim_upper_bound = dim_lower_bound + partition_size
                    generate_partitions(dimension + 1, current_lower + [dim_lower_bound], current_upper + [dim_upper_bound])
            else:
                # Set this dimension's bounds to 0 and move to the next dimension
                generate_partitions(dimension + 1, current_lower + [0], current_upper + [0])

    generate_partitions(0, [], [])

    return res


In [106]:
def HR_probability(ids, HR, sigma):
    lower_list = []
    upper_list = []
    len_vector = len(ids)
    for i in range(len_vector):
        lower_list += [HR.lower[i]/sigma]
        upper_list += [HR.upper[i]/sigma]
    prob = 1
    for j in range(len_vector):
        prob *= (norm.cdf(upper_list[j]) - norm.cdf(lower_list[j]))
    return prob


In [138]:
def create_noise_bounds(noise_partitions, ids, std, boundnet, action_dimensionality, state):
    """
    TODO: this is still wrong
    TODO: write docstring
    """
    res = []
    for noise_partition in noise_partitions:
        noise_prob = HR_probability(ids, noise_partition, std)
        # State input region is a hyperrectangle with
        state_input_region = HyperRectangle(noise_partition.lower + state, noise_partition.upper + state)
        print("partition", state_input_region.lower.shape)
        print("state", state.shape)
        # Get the lower bounds
        (A, b) = get_lower_bound(boundnet, state, state_input_region, 0.01)
        # State dependent part of the A matrix
        state_A = A[:, :, :-action_dimensionality]
        # Make this into a (lower) linear bounds (\underbar{A}_x x + b \leq ...)
        state_linear_bounds = LinearBounds(state_input_region, (state_A, b), None)
        # Convert to lower interval bounds (b \leq ...)
        state_interval_bounds = state_linear_bounds.concretize()
        # Select the lower bound
        h_vec = state_interval_bounds.lower
        res.append((h_vec * noise_prob))
    return sum(res)


In [142]:
state_dimensionality = 25
ids = [24]
partitions = 10
std = 10/6
action_dimensionality = 4

noise_partitions = create_noise_partitions(ids, state_dimensionality, partitions, std)
bound = create_noise_bounds(noise_partitions, ids, std, boundnet, action_dimensionality, state)


partition torch.Size([1, 25])
state torch.Size([25])


RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
  File "/Users/koentuin/Documents/Studie/Minor Engineering wit AI/CAI/venv/lib/python3.11/site-packages/bound_propagation/linear.py", line 38, in ibp_forward_linear_jit
    weight = weight.transpose(-1, -2).to(dtype)

    w_mid = center.matmul(weight) + (bias.to(dtype).unsqueeze(-2) if bias is not None else torch.tensor(0.0, device=device, dtype=dtype))
            ~~~~~~~~~~~~~ <--- HERE
    w_diff = diff.matmul(weight.abs())
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x50 and 29x64)
