# Model Parameters

In [49]:
import numpy as np

# population size
N = 2

act_dim = 2
state_dim = (N+1)**2
lam = 0.9
next_V = np.ones(state_dim)*10


# pernalty coefficients
k = np.ones(state_dim) 
K = np.ones((state_dim, state_dim))

# LDR coefficients
delta_s = np.ones(act_dim)
sigma_s = np.ones(act_dim)
rho_s = np.ones((act_dim, state_dim))

bar_r_s = 10
bar_eta_s = np.ones(state_dim)
bar_Sigma_s = np.ones((state_dim, state_dim))

# bound of actions
ub_a = np.asarray([1,N])

# MIP Formulation 1 -- penalty on both mean and variance

In [50]:
import gurobipy as gp
from gurobipy import GRB
import numpy as np

model1 = gp.Model()
a = [0 for i in range(act_dim)]
w = [0 for i in range(state_dim)]
u = [0 for i in range(state_dim)]
Q = [[0 for j in range(state_dim)] for i in range(state_dim)]
h = [0 for i in range(state_dim)]
m0 = [[0 for j in range(state_dim)] for i in range(act_dim)]
m1 = [[0 for j in range(state_dim)] for i in range(act_dim)]
m2 = [[[0 for j_ in range(state_dim)] for j in range(state_dim)] for i in range(act_dim)]
m3 = [[[[0 for j_ in  range(state_dim)] for j in range(state_dim)] for i_ in range(act_dim)] for i in range(act_dim)]
m4 = [[0 for j in range(state_dim)] for i in range(state_dim)]
m5 = [[0 for j in range(state_dim)] for i in range(state_dim)]


r = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'r')
q = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'q')
for i in range(act_dim):
    a[i] = model1.addVar(vtype=GRB.INTEGER, lb = 0.0, ub = ub_a[i], name = 'a%d' %i)
for i in range(state_dim):
    w[i] = model1.addVar(vtype=GRB.CONTINUOUS, lb = 0.0, ub = k[i], name = 'w%d' %i)
for i in range(state_dim):
    u[i] = model1.addVar(vtype=GRB.CONTINUOUS, lb = 0.0, ub = k[i], name = 'u%d' %i)
for i in range(state_dim):
    for j in range(state_dim):
        Q[i][j] = model1.addVar(vtype=GRB.CONTINUOUS, lb = -np.sqrt(K[i][i]*K[j][j]), ub = K[i][j], name = 'Q%d%d' %(i,j))
for i in range(state_dim):
    h[i] = model1.addVar(vtype=GRB.CONTINUOUS, name = 'h%d' %i)
for i in range(act_dim):
    for j in range(state_dim):
        m0[i][j] = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm0-%d%d' %(i,j))
for i in range(act_dim):
    for j in range(state_dim):
        m1[i][j] = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm1-%d%d' %(i,j))
for i in range(act_dim):
    for j in range(state_dim):
        for j_ in range(state_dim):
            m2[i][j][j_] = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm2-%d%d%d' %(i,j,j_))
for i in range(act_dim):
    for i_ in range(act_dim):
        for j in range(state_dim):
            for j_ in range(state_dim):
                m3[i][i_][j][j_] = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm3-%d%d%d%d' %(i,i_,j,j_))
for i in range(state_dim):
    for j in range(state_dim):
        m4[i][j] = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm4-%d%d' %(i,j))
for i in range(state_dim):
    for j in range(state_dim):
        m5[i][j] = model1.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm5-%d%d' %(i,j))
    
# Objective
obj = 1*r + bar_r_s
for i in range(act_dim):
    obj.addTerms(bar_r_s*delta_s[i], a[i])
for j in range(state_dim):
    obj.addTerms(-bar_eta_s[j], w[j])
for i in range(act_dim):
    for j in range(state_dim):
        obj.addTerms(-(bar_eta_s[j]*rho_s[i][j]), m0[i][j])
for j in range(state_dim):
    obj.addTerms(bar_eta_s[j], u[j])
for i in range(act_dim):
    for j in range(state_dim):
        obj.addTerms(bar_eta_s[j]*rho_s[i][j], m1[i][j])
for j in range(state_dim):
    for j_ in range(state_dim):
        obj.addTerms(bar_eta_s[j]*bar_eta_s[j_], Q[j][j_])
for j in range(state_dim):
    for j_ in range(state_dim):
        for i in range(act_dim):
            obj.addTerms(bar_eta_s[j]*bar_eta_s[j_]*(rho_s[i][j] + rho_s[i][j_]), m2[i][j][j_])
for j in range(state_dim):
    for j_ in range(state_dim):
        for i in range(act_dim):
            for i_ in range(act_dim):
                obj.addTerms(bar_eta_s[j]*bar_eta_s[j_]*rho_s[i][j]*rho_s[i_][j_], m3[i][i_][j][j_])
for j in range(state_dim):
    for j_ in range(state_dim):
        obj.addTerms(-bar_Sigma_s[j][j_], Q[j][j_])
for j in range(state_dim):
    for j_ in range(state_dim):
        for i in range(act_dim):
            obj.addTerms(-bar_Sigma_s[j][j_]*sigma_s[i], m2[i][j][j_])
model1.setObjective(obj, GRB.MAXIMIZE)

# Constraint 1
constraint1 = q - r
for i in range(state_dim):
    for j in range(state_dim):
        constraint1.addTerms(-1,m5[i][j])
model1.addConstr(constraint1, GRB.GREATER_EQUAL, 0, "c1")

# Constraint 2 -- j constraints
for j in range(state_dim):
    constraint = lam*next_V[j] + w[j] - u[j] - q
    for i in range(state_dim): 
        constraint.addTerms(1, m4[i][j])
    for j_ in range(state_dim):
        constraint.addTerms(-2*bar_eta_s[j_], Q[j_][j])
    for j_ in range(state_dim):
        for i in range(act_dim):
            constraint.addTerms(-2*bar_eta_s[j_]*rho_s[i][j_], m2[i][j_][j])
    model1.addConstr(constraint, GRB.GREATER_EQUAL, 0, "c2-%d" %j);

# PSD
for i in range(state_dim):
    model1.addConstr(Q[i][i] >= gp.quicksum(Q[i][j] for j in range(state_dim)), 'PSD diagonal dominance')
    model1.addConstr(Q[i][i] >= 0, "PSD positive diagonal")
        
# McCormick envelopes
# M0
for i in range(act_dim):
    for j in range(state_dim):
        model1.addConstr(m0[i][j] - 0*w[j] - a[i]*0 + 0*0 >= 0, "m0-1-%d%d" %(i,j))
        model1.addConstr(m0[i][j] - ub_a[i]*w[j] - a[i]*k[j] + ub_a[i]*k[j] >= 0, "m0-2-%d%d" %(i,j))
        model1.addConstr(m0[i][j] - ub_a[i]*w[j] - a[i]*0 + ub_a[i]*0 <= 0, "m0-3-%d%d" %(i,j))
        model1.addConstr(m0[i][j] - a[i]*k[j] - 0*w[j] + 0*k[j] <= 0, "m0-4-%d%d" %(i,j))
    
# M1
for i in range(act_dim):
    for j in range(state_dim):
        model1.addConstr(m1[i][j] - 0*u[j] - a[i]*0 + 0*0 >= 0, "m1-1-%d%d" %(i,j))
        model1.addConstr(m1[i][j] - ub_a[i]*u[j] - a[i]*k[j] + ub_a[i]*k[j] >= 0, "m1-2-%d%d" %(i,j))
        model1.addConstr(m1[i][j] - ub_a[i]*u[j] - a[i]*0 + ub_a[i]*0 <= 0, "m1-3-%d%d" %(i,j))
        model1.addConstr(m1[i][j] - a[i]*k[j] - 0*u[j] + 0*k[j] <= 0, "m1-4-%d%d" %(i,j))
# M2
for i in range(act_dim):
    for j in range(state_dim):
        for j_ in range(state_dim):
            model1.addConstr(m2[i][j][j_] - 0*Q[j][j_] - a[i]*(-np.sqrt(K[j][j]*K[j_][j_])) + 0*(-np.sqrt(K[j][j]*K[j_][j_])) >= 0, "m2-1-%d%d%d" %(i,j,j_))
            model1.addConstr(m2[i][j][j_] - ub_a[i]*Q[j][j_] - a[i]*K[j][j_] + ub_a[i]*K[j][j_] >= 0, "m2-2-%d%d%d" %(i,j,j_))
            model1.addConstr(m2[i][j][j_] - ub_a[i]*Q[j][j_] - a[i]*(-np.sqrt(K[j][j]*K[j_][j_])) + ub_a[i]*(-np.sqrt(K[j][j]*K[j_][j_])) <= 0, "m2-3-%d%d%d" %(i,j,j_))
            model1.addConstr(m2[i][j][j_] - a[i]*K[j][j_] - 0*Q[j][j_] + 0*K[j][j_] <= 0, "m2-4-%d%d%d" %(i,j,j_))

# ub-m2 -- ub_a[i]*K[j][j_]
# lb-m2 -- -ub_a[i]*(np.sqrt(K[j][j]*K[j_][j_]))

# M3
for i in range(act_dim):
    for i_ in range(act_dim):
        for j in range(state_dim):
            for j_ in range(state_dim):
                model1.addConstr(m3[i][i_][j][j_] - 0*m2[i][j][j_] - a[i_]*(-ub_a[i]*np.sqrt(K[j][j]*K[j_][j_])) + 0*(-ub_a[i]*np.sqrt(K[j][j]*K[j_][j_])) >= 0)
                model1.addConstr(m3[i][i_][j][j_] - ub_a[i_]*m2[i][j][j_] - a[i_]*(ub_a[i]*K[j][j_]) + ub_a[i_]*(ub_a[i]*K[j][j_]) >= 0)
                model1.addConstr(m3[i][i_][j][j_] - ub_a[i_]*m2[i][j][j_] - a[i_]*(-ub_a[i]*np.sqrt(K[j][j]*K[j_][j_])) + ub_a[i_]*(-ub_a[i]*np.sqrt(K[j][j]*K[j_][j_])) <= 0)
                model1.addConstr(m3[i][i_][j][j_] - a[i_]*(ub_a[i]*K[j][j_]) - 0*m2[i][j][j_] + 0*(ub_a[i]*K[j][j_]) <= 0)


for i in range(state_dim):
    for j in range(state_dim):
        model1.addConstr(m4[i][j] <= 1)
        model1.addConstr(m4[i][j] >= -1)
        model1.addConstr(m5[i][j] <= 1)
        model1.addConstr(m5[i][j] >= -1)
        
        

result = model1.optimize() 
for v in model1.getVars():
    print('%s %g' % (v.varName, v.x))

Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 2440 rows, 796 columns and 5987 nonzeros
Model fingerprint: 0x41affc89
Variable types: 794 continuous, 2 integer (0 binary)
Coefficient statistics:
  Matrix range     [1e+00, 2e+00]
  Objective range  [1e+00, 1e+01]
  Bounds range     [1e+00, 2e+00]
  RHS range        [1e+00, 9e+00]
Presolve removed 2388 rows and 775 columns
Presolve time: 0.01s
Presolved: 52 rows, 21 columns, 133 nonzeros
Variable types: 19 continuous, 2 integer (1 binary)

Root relaxation: objective 6.856667e+02, 20 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0  595.00000    0    2          -  595.00000      -     -    0s
H    0     0                     203.0000000  595.00000   193%     -    0s
H    0     0                     556.0000000  595.00000  7.01%     -    0s
     0     0     cutoff    0   

# MIP Formulation 2 -- penalty on mean deviation

In [51]:
import gurobipy as gp
from gurobipy import GRB
import numpy as np

model2 = gp.Model()
a = [0 for i in range(act_dim)]
w = [0 for i in range(state_dim)]
u = [0 for i in range(state_dim)]
m0 = [[0 for j in range(state_dim)] for i in range(act_dim)]
m1 = [[0 for j in range(state_dim)] for i in range(act_dim)]
r = model2.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'r')
q = model2.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'q')
for i in range(act_dim):
    a[i] = model2.addVar(vtype=GRB.INTEGER, lb = 0.0, ub = ub_a[i], name = 'a%d' %i)
for i in range(state_dim):
    w[i] = model2.addVar(vtype=GRB.CONTINUOUS, lb = 0.0, ub = k[i], name = 'w%d' %i)
for i in range(state_dim):
    u[i] = model2.addVar(vtype=GRB.CONTINUOUS, lb = 0.0, ub = k[i], name = 'u%d' %i)
for i in range(act_dim):
    for j in range(state_dim):
        m0[i][j] = model2.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm0-%d%d' %(i,j))
for i in range(act_dim):
    for j in range(state_dim):
        m1[i][j] = model2.addVar(vtype=GRB.CONTINUOUS, lb = -GRB.INFINITY, ub = GRB.INFINITY, name = 'm1-%d%d' %(i,j))

# Objective
obj = 1*r + bar_r_s
for i in range(act_dim):
    obj.addTerms(bar_r_s*delta_s[i], a[i])
for j in range(state_dim):
    obj.addTerms(-bar_eta_s[j], w[j])
for i in range(act_dim):
    for j in range(state_dim):
        obj.addTerms(-(bar_eta_s[j]*rho_s[i][j]), m0[i][j])
for j in range(state_dim):
    obj.addTerms(bar_eta_s[j], u[j])
for i in range(act_dim):
    for j in range(state_dim):
        obj.addTerms(bar_eta_s[j]*rho_s[i][j], m1[i][j])
model2.setObjective(obj, GRB.MAXIMIZE)   

# Constraint 1
model2.addConstr(q-r, GRB.GREATER_EQUAL, 0, "c1");

# Constraint 2
for j in range(state_dim):
    model2.addConstr(lam*next_V[j] + w[j] - u[j] - q >= 0)
    
# McCormick envelopes
# M0
for i in range(act_dim):
    for j in range(state_dim):
        model2.addConstr(m0[i][j] - 0*w[j] - a[i]*0 + 0*0 >= 0, "m0-1-%d%d" %(i,j))
        model2.addConstr(m0[i][j] - ub_a[i]*w[j] - a[i]*k[j] + ub_a[i]*k[j] >= 0, "m0-2-%d%d" %(i,j))
        model2.addConstr(m0[i][j] - ub_a[i]*w[j] - a[i]*0 + ub_a[i]*0 <= 0, "m0-3-%d%d" %(i,j))
        model2.addConstr(m0[i][j] - a[i]*k[j] - 0*w[j] + 0*k[j] <= 0, "m0-4-%d%d" %(i,j))
    
# M1
for i in range(act_dim):
    for j in range(state_dim):
        model2.addConstr(m1[i][j] - 0*u[j] - a[i]*0 + 0*0 >= 0, "m1-1-%d%d" %(i,j))
        model2.addConstr(m1[i][j] - ub_a[i]*u[j] - a[i]*k[j] + ub_a[i]*k[j] >= 0, "m1-2-%d%d" %(i,j))
        model2.addConstr(m1[i][j] - ub_a[i]*u[j] - a[i]*0 + ub_a[i]*0 <= 0, "m1-3-%d%d" %(i,j))
        model2.addConstr(m1[i][j] - a[i]*k[j] - 0*u[j] + 0*k[j] <= 0, "m1-4-%d%d" %(i,j))

result = model2.optimize() 
for v in model2.getVars():
    print('%s %g' % (v.varName, v.x))

Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 154 rows, 58 columns and 317 nonzeros
Model fingerprint: 0x5f1d3a0a
Variable types: 56 continuous, 2 integer (0 binary)
Coefficient statistics:
  Matrix range     [1e+00, 2e+00]
  Objective range  [1e+00, 1e+01]
  Bounds range     [1e+00, 2e+00]
  RHS range        [1e+00, 9e+00]
Found heuristic solution: objective 40.0000000
Presolve removed 141 rows and 49 columns
Presolve time: 0.00s
Presolved: 13 rows, 9 columns, 31 nonzeros
Found heuristic solution: objective 48.0000000
Variable types: 7 continuous, 2 integer (1 binary)

Root relaxation: objective 8.400000e+01, 1 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

*    0     0               0      84.0000000   84.00000  0.00%     -    0s

Explored 0 nodes (1 simplex iterations) in 0.02 seconds
Thread count was 8 (of 8 available proce