In [1]:
from torch import nn
from bound_propagation import BoundModelFactory, HyperRectangle
import torch

class NNDM(nn.Sequential):
    def __init__(self):
        super(NNDM, self).__init__(
            nn.Linear(5, 64),
            nn.Tanh(),
            nn.Linear(64, 64),
            nn.Tanh(),
            nn.Linear(64, 32),
            nn.Tanh(),
            nn.Linear(32, 32),
            nn.Tanh(),
            nn.Linear(32, 16),
            nn.Tanh(),
            nn.Linear(16, 4)
        )

    def forward(self, x):
        out = super().forward(x)
        return out + x[:,:4]

net = NNDM()

factory = BoundModelFactory()
net = factory.build(net)

In [2]:
x = torch.rand(1, 5)
epsilon = 0.1
input_bounds = HyperRectangle.from_eps(x, epsilon)

crown_bounds = net.crown(input_bounds)
crown_ibp_bounds = net.crown_ibp(input_bounds)

alpha_crown_bounds = net.crown(input_bounds, alpha=True)
alpha_crown_ibp_bounds = net.crown_ibp(input_bounds, alpha=True)

In [3]:
print("Final lower bounds: ", crown_bounds.lower[1])

Final lower bounds:  tensor([[ 0.3256, -0.2088, -0.1270, -0.2015]], grad_fn=<AddBackward0>)


In [4]:
def get_bounds(model, state, action_space_partition, epsilon):
    """
    model: Pytorch neural network initialised with BoundModelFactory
        - model input size: [state_dimensionality + action_dimensionality]
        - model output size: [state_dimensionality]
    state: Tensor with shape [state_dimensionality]
    action_space_partition: Tensor with shape: [action_options] -> in future: [action_dimensionality, action_partition]
    epsilon: Perturbation radius for input bounds
    ---
    output: [action_options, 2, state_dimensionality] (2: upperbound & lowerbound)
    """
    result = []

    for action in action_space_partition:
        state_action = torch.cat((state, action.unsqueeze(0))).view(1, 5)
        input_bounds = HyperRectangle.from_eps(state_action, epsilon)
        crown_bounds = model.crown(input_bounds)

        # Assuming the output dimension of the model is state_dimensionality
        lower_bound = crown_bounds.lower[0].unsqueeze(0)
        upper_bound = crown_bounds.upper[0].unsqueeze(0)

        result.append(torch.cat((lower_bound, upper_bound), dim=0))

    return torch.stack(result, dim=0)

In [7]:
import numpy as np

carpole_data = np.load('Cartpole_data.npy')
state = torch.Tensor(carpole_data[0, :4])
action_space = torch.Tensor([0, 1])
bounds_for_all_actions = get_bounds(net, state, action_space, 0.1)

In [9]:
desired_action_index = 0

guaranteed_next_state_set = bounds_for_all_actions[desired_action_index, :, :]
guaranteed_next_state_set

tensor([[[[-0.0031, -0.0158, -0.0013,  0.0196, -0.0068],
          [ 0.0120,  0.0188, -0.0001,  0.0222, -0.0045],
          [ 0.0012, -0.0014,  0.0071,  0.0035, -0.0011],
          [ 0.0033,  0.0264, -0.0149,  0.0342, -0.0054]]],


        [[[-0.0031, -0.0160, -0.0014,  0.0196, -0.0068],
          [ 0.0119,  0.0188, -0.0002,  0.0222, -0.0047],
          [ 0.0011, -0.0014,  0.0070,  0.0036, -0.0010],
          [ 0.0034,  0.0264, -0.0151,  0.0340, -0.0055]]]],
       grad_fn=<SliceBackward0>)

In [10]:
import torch.nn.functional as F

class DQN(nn.Module):

    def __init__(self, n_observations, n_actions):
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)
    
dqn_model = DQN(4, 1)

# TODO: when we have trained the model
# dqn_model.load_state_dict(torch.load(PATH_TO_MODEL_WEIGHTS))

In [19]:
"""WORK IN PROGRESS"""

import cvxpy as cp

# Parameters for safety function
x_0_max = 2.4
x_2_max = 0.2095  # In radians

# Define the safety function based on your system's requirements
def h(x):
    # Example safety function
    return 1 - (torch.abs(x[0])/x_0_max + torch.abs(x[2])/x_2_max)

# Define the optimization variable (control input)
u = cp.Variable(1) # 1 because action is described by a single integer 0/1

def knom(state_tensor):
    with torch.no_grad():
        q_values = dqn_model(state_tensor)
        # Assuming you want the action with the highest Q-value
        action = torch.argmax(q_values).item()
        return action

# Define the nominal control
k_nom = knom(state)


# Define the safety condition constraint
alpha = 0.1  # Define alpha based on your requirements
state_action = torch.cat((state, torch.Tensor(1))).view(1, 5)

# Compute the safety function values
h_state_action = h(net(state_action)).detach().numpy()
h_state = h(state).detach().numpy()

# Define the safety condition constraint
# This is a simplified example assuming h(x) can be linearized or is linear
safety_condition = cp.Constant(h_state_action) - alpha * cp.Constant(h_state) >= 0

# Define constraints
constraints = [safety_condition]  # Include other constraints if any

# Define the objective function (minimizing deviation from nominal control)
objective = cp.Minimize(cp.norm(u - k_nom, 2))

# Define constraints
constraints = [safety_condition]

# Set up and solve the optimization problem
problem = cp.Problem(objective, constraints)
problem.solve()

# Optimal control input
optimal_control_input = u.value if problem.status not in ["infeasible", "unbounded"] else None


IndexError: index 2 is out of bounds for dimension 0 with size 1