In [1]:
import torch
import numpy as np
import cvxpy as cp
import cvxpylayers

import pickle
import os

from src.neural_net import *
# from src.models import *
from trainer import *

## Data Preparation

##### Load training data

In [2]:
relative_path = os.getcwd()
relative_path = os.path.abspath("..")
dataset_fn = relative_path + '/robot_nav/data' + '/single.p'
prob_features = ['x0', 'xg']

data_file = open(dataset_fn,'rb')
all_data = pickle.load(data_file)[:100000]  # use only part of the dataset for quick testing
data_file.close()
num_train = len(all_data)
print(f"Number of training samples: {num_train}")

X0 = np.vstack([all_data[ii]['x0'].T for ii in range(num_train)])  
XG = np.vstack([all_data[ii]['xg'].T for ii in range(num_train)])  
OBS = np.vstack([all_data[ii]['xg'].T for ii in range(num_train)])  
XX = np.array([all_data[ii]['XX'] for ii in range(num_train)])
UU = np.array([all_data[ii]['UU'] for ii in range(num_train)])
YY = np.concatenate([all_data[ii]['YY'].astype(int) for ii in range(num_train)], axis=1).transpose(1,0,2)
train_data = [{'x0': X0, 'xg': XG}, {'XX': XX, 'UU' : UU}, YY]


Number of training samples: 83904


In [3]:
print(YY.shape)

for y in YY[0]:
    print(y.shape)

(83904, 3, 20)
(20,)
(20,)
(20,)


In [4]:
for u in UU[0]:
    print(u.shape)

(20,)
(20,)


##### Obstacle Info

In [5]:
Obs_info = np.array([[1.0,  0.0, 0.4, 0.5, 0.0],
                     [0.7, -1.1, 0.5, 0.4, 0.0],
                     [0.4, -2.5, 0.4, 0.5, 0.0]])
n_obs = 3 

##### Dataset Construction

In [6]:
n_features = 6

X_train = train_data[0]  # Problem parameters, will be inputs of the NNs
Y_train = train_data[2]  # Discrete solutions, will be outputs of the NNs
P_train = train_data[1]  # Continuous trajectories, will be used as parameters in training
num_train = Y_train.shape[0]
y_shape = Y_train.shape[1:]
n_y = int(np.prod(y_shape))

feature_blocks = []
for feature in prob_features:
    if feature == "obstacles_map":
        continue
    values = X_train.get(feature)
    if values is None:
        print('Feature {} is unknown or missing'.format(feature))
        continue
    values = np.asarray(values)
    if values.shape[0] != num_train:
        raise ValueError(
            f"Feature '{feature}' has {values.shape[0]} samples, expected {num_train}"
        )
    feature_blocks.append(values.reshape(num_train, -1))
if feature_blocks:
    features = np.concatenate(feature_blocks, axis=1)
else:
    features = np.zeros((num_train, 0))
if features.shape[1] != n_features:
    n_features = features.shape[1]
labels = Y_train.reshape(num_train, n_y)
# print(labels.shape)
# print(labels[:20])
labels_int = labels.astype(np.int64, copy=False)
bit_shifts = np.arange(4 - 1, -1, -1, dtype=np.int64)
outputs_bits = (labels_int[..., None] >> bit_shifts) & 1
# print(outputs_bits.shape)
outputs = outputs_bits.reshape(num_train, -1)

# P_blocks = []
# for sample in data_list:
#     XX = sample["XX"]            # shape (4, H+1) -> [x, y, vx, vy]
#     UU = sample["UU"]            # shape (2, H)
#     # match layer outputs: keep full trajectories and controls together
#     block = np.concatenate(
#         [XX.reshape(-1), UU.reshape(-1)],
#         axis=0,
#     )
#     P_blocks.append(block)

# P_arr = np.stack(P_blocks)       # shape (N, 4*(H+1) + 2*H)

X_arr = features
Y_arr = outputs
P_arr = P_train['XX'][:, :, :]
Pu_arr = P_train['UU'][:, :, :]
# P_arr = np.concatenate([P_train['XX'][:, :, 1:], P_train['UU']], axis=1)

X_tensor = torch.from_numpy(X_arr).float()
Y_tensor = torch.from_numpy(Y_arr).float()
P_tensor = torch.from_numpy(P_arr).float()
Pu_tensor = torch.from_numpy(Pu_arr).float()

In [7]:
batch_size = 128
dataset = torch.utils.data.TensorDataset(X_tensor, Y_tensor, P_tensor, Pu_tensor)

from torch.utils.data import random_split

n_train = int(0.9 * len(dataset))
n_test = len(dataset) - n_train
train_set, test_set = random_split(dataset, [n_train, n_test])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

## QP Layer

In [8]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="cvxpy.reductions.solvers.solving_chain_utils")

In [9]:
from cvxpy_mpc_layer import *

T = 0.25
H = 20
M = 1  # update if you have more robots
bounds = {
    "x_max": 2.00,
    "x_min": -0.5,
    "y_max": 0.5,
    "y_min": -3.0,
    "v_max": 0.50,
    "v_min": -0.50,
    "u_max": 0.50,
    "u_min": -0.50,
}
weights = (1.0, 1.0, 10.0)  # (Wu, Wp, Wpt)
d_min = 0.25

# Obstacles exactly as in the simulator
from Robots import obstacle
obstacles = [
    obstacle(1.0, 0.0, 0.4, 0.5, 0.0),
    obstacle(0.7, -1.1, 0.5, 0.4, 0.0),
    obstacle(0.40, -2.50, 0.4, 0.5, 0.0),
]

M = 1
p = np.zeros((2, M))  # stack of robot positions; replace with actual state
d_prox = 2.0
coupling_pairs = [
    (m, n)
    for m in range(M)
    for n in range(m + 1, M)
    if np.linalg.norm(p[:, m] - p[:, n]) <= d_prox
]

cplayer, meta = build_mpc_cvxpy_layer(
        T=.25,
        H=20,
        M=1,
        bounds=bounds,
        weights=weights,
        d_min=d_min,
        obstacles=obstacles,
        coupling_pairs=coupling_pairs
    )

cplayer.to(torch.device("cpu"))

    You didn't specify the order of the reshape expression. The default order
    used in CVXPY is Fortran ('F') order. This default will change to match NumPy's
    default order ('C') in a future version of CVXPY.
    


CvxpyLayer()

In [10]:

for x, y, p, pu in train_loader:
    print(x.shape, y.shape, p.shape)
    
    u_opt, p_opt, v_opt, s_opt = cplayer(
        x[:, 0:2], x[:, 2:4], x[:, 4:6], y.reshape(-1, 3, 1, 20, 4)
    )
    
    # print(u_opt)
    # print(pu)
    
    threshold = 1e-3
    diff = torch.abs(u_opt - pu).clone()
    diff[diff < threshold] = 0
    print(diff.mean(dim=0))
    
    # print('--------------------------\n\n')
    # # print(p_opt)
    # # print(v_opt)
    # # print(v_opt.shape)
    # # print(p)
    
    # # print(torch.abs(torch.cat([p_opt, v_opt], dim=1) - p))
    
    threshold = 1e-3
    diff = torch.abs(torch.cat([p_opt, v_opt], dim=1) - p).clone()
    diff[diff < threshold] = 0
    print(diff.mean(dim=0))
    
    print(p_opt.shape)
    print(v_opt.shape)
    print(p.shape)
    
    # obj.backward()
    
    break

torch.Size([128, 6]) torch.Size([128, 240]) torch.Size([128, 4, 21])
tensor([[0.0046, 0.0044, 0.0060, 0.0054, 0.0031, 0.0028, 0.0051, 0.0033, 0.0033,
         0.0030, 0.0032, 0.0020, 0.0015, 0.0017, 0.0007, 0.0007, 0.0008, 0.0008,
         0.0005, 0.0002],
        [0.0096, 0.0077, 0.0084, 0.0077, 0.0066, 0.0109, 0.0090, 0.0033, 0.0086,
         0.0076, 0.0066, 0.0064, 0.0063, 0.0066, 0.0051, 0.0042, 0.0029, 0.0019,
         0.0011, 0.0003]])
tensor([[0.0000, 0.0001, 0.0004, 0.0008, 0.0013, 0.0019, 0.0025, 0.0030, 0.0032,
         0.0034, 0.0034, 0.0032, 0.0029, 0.0025, 0.0022, 0.0020, 0.0018, 0.0016,
         0.0013, 0.0011, 0.0009],
        [0.0000, 0.0003, 0.0010, 0.0018, 0.0025, 0.0031, 0.0037, 0.0044, 0.0051,
         0.0058, 0.0062, 0.0063, 0.0062, 0.0060, 0.0058, 0.0056, 0.0051, 0.0045,
         0.0037, 0.0028, 0.0021],
        [0.0000, 0.0011, 0.0015, 0.0016, 0.0021, 0.0027, 0.0026, 0.0016, 0.0017,
         0.0014, 0.0010, 0.0011, 0.0015, 0.0013, 0.0011, 0.0010, 0.0010, 0.0010,


In [11]:
from cons_utils import *

# y_reshape = y.reshape(-1, 3, 20, 4)
# y_reshape = y_reshape.swapaxes(2,3).float()

# p_reshape = p.swapaxes(1,2).float()
# print(p_reshape.shape)
y_reshape = NNoutput_reshape_torch(y, 3)
print(y_reshape.shape)
print(constraint_violation_torch(y_reshape, p[:,:2,]))

torch.Size([128, 3, 4, 20])
tensor([0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 5.9605e-08, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        2.9802e-08, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.9802e-08, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        5.9605e-08, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 5.9605e-08, 0.0000e+00,
        0.00

In [12]:
# print(obj)

## Training

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [14]:
n_input = 6
n_output = 240

nn_model = MLPWithSTE(insize=n_input, outsize=n_output,
                bias=True,
                linear_map=torch.nn.Linear,
                nonlin=nn.ReLU,
                hsizes=[128] * 4)

In [24]:
Model = SSL_MIQP_incorporated(nn_model, cplayer, 6, 4, device=device)

In [18]:
import datetime
now = datetime.datetime.now()
dt_string = now.strftime("%Y%m%d_%H%M%S")

In [None]:
True
training_params = {}
training_params['TRAINING_EPOCHS'] = int(5)
training_params['CHECKPOINT_AFTER'] = int(50)
training_params['LEARNING_RATE'] = 1e-3
training_params['WEIGHT_DECAY'] = 1e-5
training_params['PATIENCE'] = 5
training_params['WANDB_PROJECT'] = "l2o_ssl_miqp_robot_nav"
training_params['RUN_NAME'] = "experiment_1_" + dt_string

slack_weight = 1e3
constraint_weight = 1e6
supervised_weight = 1e5
loss_weights = [0.0, slack_weight, constraint_weight, supervised_weight]

# Model.train_SL(
#     train_loader=train_loader, 
#     test_loader=test_loader, 
#     training_params=training_params, 
#     # loss_weights=loss_weights,
#     wandb_log = True)
# Model.train_SSL(
#     train_loader=train_loader, 
#     test_loader=test_loader, 
#     training_params=training_params, 
#     loss_weights=loss_weights,
#     wandb_log = True)

[34m[1mwandb[0m: Currently logged in as: [33mmux2001[0m ([33mmux2001-xxxxxlab-test1[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[epoch 1 | step 50] validation: loss = 7.3815, obj_val = 0.0000, opt_gap = nan %, slack_pen = 0.0000, y_penalty = 8.1074, supervised_loss = 0.1973, 
[epoch 1 | step 100] validation: loss = 4.8259, obj_val = 0.0000, opt_gap = nan %, slack_pen = 0.0000, y_penalty = 5.2941, supervised_loss = 0.1920, 
[epoch 1 | step 150] validation: loss = 5.3383, obj_val = 0.0000, opt_gap = nan %, slack_pen = 0.0000, y_penalty = 5.8582, supervised_loss = 0.1921, 
[epoch 1 | step 200] validation: loss = 4.2987, obj_val = 0.0000, opt_gap = nan %, slack_pen = 0.0000, y_penalty = 4.7138, supervised_loss = 0.1902, 
[epoch 1 | step 250] validation: loss = 4.1418, obj_val = 0.0000, opt_gap = nan %, slack_pen = 0.0000, y_penalty = 4.5412, supervised_loss = 0.1892, 
[epoch 1 | step 300] validation: loss = 3.6514, obj_val = 0.0000, opt_gap = nan %, slack_pen = 0.0000, y_penalty = 4.0014, supervised_loss = 0.1871, 
[epoch 1 | step 350] validation: loss = 1.6527, obj_val = 0.0000, opt_gap = nan %, slack_pen = 0.0000

In [None]:
# def evaluate_model(nn_model, test_loader, device):
#     nn_model.eval()
#     supervised_loss_fn = torch.nn.HuberLoss()
#     val_loss_total = 0.0
#     with torch.no_grad():
#         for val_theta_batch, val_y_gt_batch, _, _ in test_loader:
#             val_theta_batch = val_theta_batch.to(device)
#             val_y_gt_batch = val_y_gt_batch.to(device)
#             # ---- Predict y from theta ----
#             y_pred_test = nn_model(val_theta_batch).float() # (B, ny), hard {0,1}
#             val_loss_total += supervised_loss_fn(y_pred_test, val_y_gt_batch.float()).item()                           
#     avg_val_loss = val_loss_total / len(test_loader)

#     print(f"validation loss = {avg_val_loss:.4f}")


In [None]:
# evaluate_model(Model.nn_model, test_loader, device)

validation loss = 0.1506


In [19]:
nn_model_1 = MLPWithSTE(insize=n_input, outsize=n_output,
                bias=True,
                linear_map=torch.nn.Linear,
                nonlin=nn.ReLU,
                hsizes=[128] * 4)

ModelSL = SSL_MIQP_incorporated(nn_model_1, cplayer, 6, 4, device=device)

training_params = {}
training_params['TRAINING_EPOCHS'] = int(1)
training_params['CHECKPOINT_AFTER'] = int(50)
training_params['LEARNING_RATE'] = 1e-3
training_params['WEIGHT_DECAY'] = 1e-5
training_params['PATIENCE'] = 5
training_params['WANDB_PROJECT'] = "l2o_ssl_miqp_robot_nav"
training_params['RUN_NAME'] = "experiment_1_" + dt_string

slack_weight = 1e3
constraint_weight = 1e6
supervised_weight = 1e5
loss_weights = [0.0, slack_weight, constraint_weight, supervised_weight]

ModelSL.train_SL(
    train_loader=train_loader, 
    test_loader=test_loader, 
    training_params=training_params, 
    # loss_weights=loss_weights,
    wandb_log = False)

[epoch 1 | step 1] training loss = 0.2570, validation loss = 0.2545
[epoch 1 | step 50] training loss = 0.0690, validation loss = 0.0690
[epoch 1 | step 100] training loss = 0.0392, validation loss = 0.0393
[epoch 1 | step 150] training loss = 0.0416, validation loss = 0.0376
[epoch 1 | step 200] training loss = 0.0382, validation loss = 0.0475
[epoch 1 | step 250] training loss = 0.0351, validation loss = 0.0354
[epoch 1 | step 300] training loss = 0.0349, validation loss = 0.0405
[epoch 1 | step 350] training loss = 0.0552, validation loss = 0.0429
[epoch 1 | step 400] training loss = 0.0475, validation loss = 0.0469
[epoch 1 | step 450] training loss = 0.0380, validation loss = 0.0363
Learning rate updated: 0.001000 -> 0.000500
[epoch 1 | step 500] training loss = 0.0282, validation loss = 0.0295
[epoch 1 | step 550] training loss = 0.0344, validation loss = 0.0328


In [20]:
# evaluate_model(ModelSL.nn_model, test_loader, device)
ModelSL.evaluate(test_loader)

Evaluation Results: Avg Supervised Loss = 0.0398, Avg Constraint Violation = 1.9343, Avg Optimality Gap = 242.7286


{'avg_supervised_loss': 0.039840271809335914,
 'avg_constraint_violation': 1.9343116825277156,
 'avg_optimality_gap': 242.7285674124053}