In [44]:
from torch import nn
from bound_propagation import BoundModelFactory, HyperRectangle
from bound_propagation.polynomial import Pow
import torch

class NNDM(nn.Sequential):
    def __init__(self):
        super(NNDM, self).__init__(
            nn.Linear(5, 64),
            nn.Tanh(),
            nn.Linear(64, 4),
        )

    def forward(self, x):
        out = super().forward(x)
        return out + x[:,:4]

    
class HHead(nn.Sequential):
    def __init__(self, x_0_max, x_2_max):
        
        super().__init__(
            Pow(2),
            nn.Linear(4, 2)
        )
        self[1].weight.data = torch.tensor([[-1/x_0_max**2, 0, 0, 0],
                                            [0, 0, -1/x_2_max**2, 0]])
        self[1].bias.data = torch.tensor([1., 1.])

class CombinedModel(nn.Sequential):
    def __init__(self, x_0_max, x_2_max):
        super(CombinedModel, self).__init__()
        self.add_module('nndm', NNDM())
        self.add_module('hhead', HHead(x_0_max, x_2_max))
    
# Parameters for safety function
x_position = 2.4
pole_angel = 0.2095  # In radians

# thetanet = HThetaHead(pole_angel)
prenet = CombinedModel(x_position, pole_angel)

factory = BoundModelFactory()
net = factory.build(prenet)

In [47]:
x = torch.rand(1, 5)
epsilon = 0.1
input_bounds = HyperRectangle.from_eps(x, epsilon)

crown_bounds = net.crown(input_bounds)
crown_ibp_bounds = net.crown_ibp(input_bounds)

alpha_crown_bounds = net.crown(input_bounds, alpha=True)
alpha_crown_ibp_bounds = net.crown_ibp(input_bounds, alpha=True)

In [48]:
print("Final lower bounds: ", crown_bounds.lower[1])

Final lower bounds:  tensor([[0.9997, 0.8685]], grad_fn=<AddBackward0>)


In [49]:
def get_bounds(model, state, action_space_partition, epsilon):
    """
    model: Pytorch neural network initialised with BoundModelFactory
        - model input size: [state_dimensionality + action_dimensionality]
        - model output size: [state_dimensionality]
    state: Tensor with shape [state_dimensionality]
    action_space_partition: Tensor with shape: [action_options] -> in future: [action_dimensionality, action_partition]
    epsilon: Perturbation radius for input bounds
    ---
    output: [action_options, 2, state_dimensionality] (2: upperbound & lowerbound)
    """
    result = []

    for action in action_space_partition:
        state_action = torch.cat((state, action.unsqueeze(0))).view(1, 5)
        input_bounds = HyperRectangle.from_eps(state_action, epsilon)
        crown_bounds = model.crown(input_bounds)

        # Assuming the output dimension of the model is state_dimensionality
        lower_bound = crown_bounds.lower[0].unsqueeze(0)

        result.append(lower_bound)

    return torch.stack(result, dim=0)

In [50]:
import numpy as np

carpole_data = np.load('Cartpole_data.npy')
state = torch.Tensor(carpole_data[0, :4])
action_space = torch.Tensor([0, 1])
bounds_for_all_actions = get_bounds(net, state, action_space, 0.1)

In [51]:
desired_action_index = 0

guaranteed_next_state_set = bounds_for_all_actions[desired_action_index, :, :]
guaranteed_next_state_set
bounds_for_all_actions

tensor([[[[[ 0.0008, -0.0016, -0.0006,  0.0005,  0.0012],
           [ 0.3251, -0.3889, -0.0926, -0.4328,  0.1166]]]],



        [[[[-0.0059,  0.0106,  0.0041, -0.0029, -0.0082],
           [-0.0219,  0.0258,  0.0124,  0.0343, -0.0107]]]]],
       grad_fn=<StackBackward0>)

In [65]:
import torch.nn.functional as F

class DQN(nn.Module):

    def __init__(self, n_observations, n_actions):
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)
    
dqn_model = DQN(4, 2)

# TODO: when we have trained the model
# dqn_model.load_state_dict(torch.load(PATH_TO_MODEL_WEIGHTS))

In [69]:
dqn_model(state)

tensor([0.0233, 0.0100], grad_fn=<ViewBackward0>)

In [58]:
def discrete_cbf(action_space, state, nominal_action, hfunction, h_nndm, alpha):
    """
    action_space: A 1-dimensional tensor containing possible actions.
    state: The current state of the system.
    nominal_action: A pre-selected action by the nominal controller.
    hfunction: A function representing the barrier condition.
    h_nndm: A NNDM with the h function put on top.
    alpha: A scaling factor.
    """
    best_action = nominal_action
    res = []
    for action in action_space:
        h = h_nndm(torch.cat((state, action.unsqueeze(0))).view(1, -1))
        h_prev = hfunction(state)
        if torch.all(torch.ge(h, alpha * h_prev)):
            res += [(int(action!=nominal_action), h, action)]
    best_action_tuple = min(res, key = lambda x: x[0])
    
    if sum(best_action_tuple[0] == action_tuple[0] for action_tuple in res) > 1:
        best_action_tuple = min([action_tuple for action_tuple in res if action_tuple[0] == best_action_tuple[0]], key= lambda x: x[1])
    best_action = best_action_tuple[2]
    return best_action

In [60]:
hfunction = HHead(x_position, pole_angel)
new_action = discrete_cbf(action_space, state, torch.tensor(1), hfunction, prenet, 1)
new_action

tensor(1.)

In [19]:
"""WORK IN PROGRESS"""

import cvxpy as cp

# Parameters for safety function
x_0_max = 2.4
x_2_max = 0.2095  # In radians

# Define the safety function based on your system's requirements
def h(x):
    # Example safety function
    return 1 - (torch.abs(x[0])/x_0_max + torch.abs(x[2])/x_2_max)

# Define the optimization variable (control input)
u = cp.Variable(1) # 1 because action is described by a single integer 0/1

def knom(state_tensor):
    with torch.no_grad():
        q_values = dqn_model(state_tensor)
        # Assuming you want the action with the highest Q-value
        action = torch.argmax(q_values).item()
        return action

# Define the nominal control
k_nom = knom(state)


# Define the safety condition constraint
alpha = 0.1  # Define alpha based on your requirements
state_action = torch.cat((state, torch.Tensor(1))).view(1, 5)

# Compute the safety function values
h_state_action = h(net(state_action)).detach().numpy()
h_state = h(state).detach().numpy()

# Define the safety condition constraint
# This is a simplified example assuming h(x) can be linearized or is linear
safety_condition = cp.Constant(h_state_action) - alpha * cp.Constant(h_state) >= 0

# Define constraints
constraints = [safety_condition]  # Include other constraints if any

# Define the objective function (minimizing deviation from nominal control)
objective = cp.Minimize(cp.norm(u - k_nom, 2))

# Define constraints
constraints = [safety_condition]

# Set up and solve the optimization problem
problem = cp.Problem(objective, constraints)
problem.solve()

# Optimal control input
optimal_control_input = u.value if problem.status not in ["infeasible", "unbounded"] else None


IndexError: index 2 is out of bounds for dimension 0 with size 1