In [1]:
import time

import numpy as np
import pandas as pd
import torch
from torch import nn
from tqdm import tqdm

# random seed
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

### Define Problem

In [2]:
class penaltyLoss(nn.Module):
    """
    Penalty loss function for Rosenbrock problem
    """
    def __init__(self, input_keys, steepness, num_blocks, penalty_weight=50, output_key="loss"):
        super().__init__()
        self.p_key, self.a_key, self.x_key = input_keys
        self.output_key = output_key
        self.steepness = steepness
        self.num_blocks = num_blocks
        self.penalty_weight = penalty_weight
        self.device = None
        # coef
        rng = np.random.RandomState(17)
        b = rng.normal(scale=1, size=(num_blocks))
        q = rng.normal(scale=1, size=(num_blocks))
        self.b = torch.from_numpy(b).float()
        self.q = torch.from_numpy(q).float()

    def forward(self, input_dict):
        """
        forward pass
        """
        # objective function
        obj = self.cal_obj(input_dict)
        # constraints violation
        viol = self.cal_constr_viol(input_dict)
        # penalized loss
        loss = obj + self.penalty_weight * viol
        input_dict[self.output_key] = torch.mean(loss)
        return input_dict

    def cal_obj(self, input_dict):
        """
        calculate objective function
        """
        # get values
        x, a = input_dict[self.x_key], input_dict[self.a_key]
        # x_2i
        x1 = x[:, ::2]
        # x_2i+1
        x2 = x[:, 1::2]
        # objective function
        f = torch.sum((a - x1) ** 2 + self.steepness * (x2 - x1 ** 2) ** 2, dim=1)
        return f

    def cal_constr_viol(self, input_dict):
        """
        calculate constraints violation
        """
        # get values
        x, p = input_dict[self.x_key], input_dict[self.p_key]
        # update device
        if self.device is None:
            self.device = x.device
            self.b = self.b.to(self.device)
            self.q = self.q.to(self.device)
        # inner constraint violation
        lhs_inner = torch.sum(x[:, 1::2], dim=1)
        rhs_inner = self.num_blocks * p[:, 0] / 2
        inner_violation = torch.relu(rhs_inner - lhs_inner)
        # outer constraint violation
        lhs_outer = torch.sum(x[:, ::2] ** 2, dim=1)
        rhs_outer = self.num_blocks * p[:, 0]
        outer_violation = torch.relu(lhs_outer - rhs_outer)
        # lear constraint violation
        lhs_1 = torch.matmul(x[:, 0::2], self.b)
        lhs_2 = torch.matmul(x[:, 1::2], self.q)
        linear_violation = torch.abs(lhs_1) + torch.abs(lhs_2)
        return inner_violation + outer_violation + linear_violation

In [3]:
import numpy as np
from pyomo import environ as pe
from src.problem.math_solver import abcParamSolver

class rosenbrock(abcParamSolver):
    def __init__(self, steepness, num_blocks, timelimit=None):
        super().__init__(timelimit=timelimit)
        # create model
        m = pe.ConcreteModel()
        # parameters
        m.p = pe.Param(default=1, mutable=True)
        m.a = pe.Param(pe.RangeSet(0, num_blocks-1), default=1, mutable=True)
        # variables
        m.x = pe.Var(pe.RangeSet(0, num_blocks*2-1), domain=pe.Reals)
        for i in range(num_blocks):
            # integer variables
            m.x[2*i+1].domain = pe.Integers
        # objective
        obj = sum((m.a[i] - m.x[2*i]) ** 2 + \
                   steepness * (m.x[2*i+1] - m.x[2*i] ** 2) ** 2 for i in range(num_blocks))
        m.obj = pe.Objective(sense=pe.minimize, expr=obj)
        # constraints
        m.cons = pe.ConstraintList()
        m.cons.add(sum(m.x[2*i+1] for i in range(num_blocks)) >= num_blocks * m.p / 2)
        m.cons.add(sum(m.x[2*i] ** 2 for i in range(num_blocks)) <= num_blocks * m.p)
        rng = np.random.RandomState(17)
        b = rng.normal(scale=1, size=(num_blocks))
        q = rng.normal(scale=1, size=(num_blocks))
        m.cons.add(sum(b[i] * m.x[2*i] for i in range(num_blocks)) == 0)
        m.cons.add(sum(q[i] * m.x[2*i+1] for i in range(num_blocks)) == 0)
        # attribute
        self.model = m
        self.params ={"p":m.p, "a":m.a}
        self.vars = {"x":m.x}
        self.cons = m.cons

### Problem Setting

In [4]:
# init
steepness = 50    # steepness factor
num_blocks = 10   # number of expression blocks
num_data = 9100   # number of data
test_size = 100   # number of test size
val_size = 1000   # number of validation size
train_size = num_data - test_size - val_size

In [5]:
# parameters as input data
p_low, p_high = 1.0, 8.0
a_low, a_high = 0.5, 4.5
p_train = np.random.uniform(p_low, p_high, (train_size, 1)).astype(np.float32)
p_test  = np.random.uniform(p_low, p_high, (test_size, 1)).astype(np.float32)
p_dev   = np.random.uniform(p_low, p_high, (val_size, 1)).astype(np.float32)
a_train = np.random.uniform(a_low, a_high, (train_size, num_blocks)).astype(np.float32)
a_test  = np.random.uniform(a_low, a_high, (test_size, num_blocks)).astype(np.float32)
a_dev   = np.random.uniform(a_low, a_high, (val_size, num_blocks)).astype(np.float32)

In [6]:
# nm datasets
from neuromancer.dataset import DictDataset
data_train = DictDataset({"p":p_train, "a":a_train}, name="train")
data_test = DictDataset({"p":p_test, "a":a_test}, name="test")
data_dev = DictDataset({"p":p_dev, "a":a_dev}, name="dev")
# torch dataloaders
from torch.utils.data import DataLoader
batch_size = 64
loader_train = DataLoader(data_train, batch_size, num_workers=0, collate_fn=data_train.collate_fn, shuffle=True)
loader_test = DataLoader(data_test, batch_size, num_workers=0, collate_fn=data_test.collate_fn, shuffle=False)
loader_dev = DataLoader(data_dev, batch_size, num_workers=0, collate_fn=data_dev.collate_fn, shuffle=True)

### Exact Solver

In [7]:
from src.problem import msRosenbrock
model = msRosenbrock(steepness, num_blocks, timelimit=60)

### Rounding Classification

In [8]:
# random seed
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [9]:
# hyperparameters
penalty_weight = 100  # weight of constraint violation penealty
hlayers_sol = 5       # number of hidden layers for solution mapping
hlayers_rnd = 4       # number of hidden layers for solution mapping
hsize = 16            # width of hidden layers for solution mapping
lr = 1e-3             # learning rate

In [10]:
# set problem
import neuromancer as nm
from src.problem import nmRosenbrock
from src.func.layer import netFC
from src.func import roundGumbelModel
# build neural architecture for the solution map
func = nm.modules.blocks.MLP(insize=num_blocks+1, outsize=2*num_blocks, bias=True,
                             linear_map=nm.slim.maps["linear"],
                             nonlin=nn.ReLU, hsizes=[hsize]*hlayers_sol)
smap = nm.system.Node(func, ["p", "a"], ["x"], name="smap")
# define rounding model
layers_rnd = netFC(input_dim=3*num_blocks+1, hidden_dims=[hsize]*hlayers_rnd, output_dim=2*num_blocks)
rnd = roundGumbelModel(layers=layers_rnd, param_keys=["p", "a"], var_keys=["x"],  output_keys=["x_rnd"], 
                       int_ind=model.int_ind, continuous_update=True, name="round")
# build neuromancer problem for rounding
components = nn.ModuleList([smap, rnd]).to("cuda")
loss_fn = nmRosenbrock(["p", "a", "x_rnd"], steepness, num_blocks, penalty_weight)

In [11]:
from src.problem.neuromancer.trainer import trainer
# training
epochs = 200                    # number of training epochs
warmup = 20                     # number of epochs to wait before enacting early stopping policy
patience = 20                   # number of epochs with no improvement in eval metric to allow before early stopping
optimizer = torch.optim.AdamW(components.parameters(), lr=lr)
# create a trainer for the problem
my_trainer = trainer(components, loss_fn, optimizer, epochs=epochs, patience=patience, warmup=warmup, device="cuda")
# training for the rounding problem
my_trainer.train(loader_train, loader_dev)

Epoch 0, Iters 0, Validation Loss: 2830.30
Epoch 0, Iters 125, Training Loss: 1941.64, Validation Loss: 647.80
Epoch 1, Iters 250, Training Loss: 505.68, Validation Loss: 287.80
Epoch 2, Iters 375, Training Loss: 294.45, Validation Loss: 210.76
Epoch 3, Iters 500, Training Loss: 252.17, Validation Loss: 176.59
Epoch 4, Iters 625, Training Loss: 220.55, Validation Loss: 165.01
Epoch 5, Iters 750, Training Loss: 202.93, Validation Loss: 151.86
Epoch 6, Iters 875, Training Loss: 191.25, Validation Loss: 144.81
Epoch 7, Iters 1000, Training Loss: 176.29, Validation Loss: 122.88
Epoch 8, Iters 1125, Training Loss: 161.03, Validation Loss: 115.69
Epoch 9, Iters 1250, Training Loss: 144.65, Validation Loss: 112.59
Epoch 10, Iters 1375, Training Loss: 137.93, Validation Loss: 105.83
Epoch 11, Iters 1500, Training Loss: 130.95, Validation Loss: 95.66
Epoch 12, Iters 1625, Training Loss: 121.30, Validation Loss: 90.68
Epoch 13, Iters 1750, Training Loss: 111.32, Validation Loss: 88.52
Epoch 14, 

In [14]:
params, sols, objvals, mean_viols, max_viols, num_viols, elapseds = [], [], [], [], [], [], []
for p, a in tqdm(list(zip(p_test, a_test))):
    # data point as tensor
    datapoints = {"p": torch.tensor(np.array([p]), dtype=torch.float32).to("cuda"), 
                  "a": torch.tensor(np.array([a]), dtype=torch.float32).to("cuda"),
                  "name": "test"}
    # infer
    components.eval()
    tick = time.time()
    with torch.no_grad():
        for comp in components:
            datapoints.update(comp(datapoints))
    tock = time.time()
    # assign params
    model.set_param_val({"p":p, "a":a})
    # assign vars
    x = datapoints["x_rnd"]
    for i in range(2*num_blocks):
        model.vars["x"][i].value = x[0,i].item()
    # get solutions
    xval, objval = model.get_val()    
    params.append(list(p)+list(a))
    sols.append(list(list(xval.values())[0].values()))
    objvals.append(objval)
    viol = model.cal_violation()
    mean_viols.append(np.mean(viol))
    max_viols.append(np.max(viol))
    num_viols.append(np.sum(viol > 1e-6))
    elapseds.append(tock - tick)
df = pd.DataFrame({"Param": params,
                    "Sol": sols,
                    "Obj Val": objvals,
                    "Mean Violation": mean_viols,
                    "Max Violation": max_viols,
                    "Num Violations": num_viols,
                    "Elapsed Time": elapseds})
time.sleep(1)
print(df.describe())
print("Number of infeasible solution: {}".format(np.sum(df["Num Violations"] > 0)))

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 199.06it/s]


          Obj Val  Mean Violation  Max Violation  Num Violations  Elapsed Time
count  100.000000      100.000000     100.000000          100.00    100.000000
mean    56.988932        0.000012       0.000046            0.01      0.002455
std     25.866054        0.000116       0.000464            0.10      0.000664
min     16.703403        0.000000       0.000000            0.00      0.001000
25%     39.617259        0.000000       0.000000            0.00      0.002001
50%     50.957301        0.000000       0.000000            0.00      0.002012
75%     66.363043        0.000000       0.000000            0.00      0.003000
max    145.902156        0.001160       0.004638            1.00      0.005068
Number of infeasible solution: 1
