In [19]:
from gurobipy import *
import numpy as np

In [139]:
class RSP_conj:
    
    def __init__(self):
        return
    
    def prog(self):
        
        options = ['R', 'S', 'P']
        m = Model("RSP-Program")
        # we will maximize the slack vars in the strict inequalities
        m.setAttr("ModelSense", -1)
        # allow non-convex quadratic constraints
        m.setParam("NonConvex", 2)
        
        ## decision variables
        
        # the player's distribution
        p = m.addVars(options, vtype = GRB.CONTINUOUS, name = 'p')
        
        # the opponent distribution
        p_op = m.addVars(options, vtype = GRB.CONTINUOUS, name = 'p*')
        
        # vars for the strict inequalitys
        epsilon = m.addVars(['c','b'], vtype = GRB.CONTINUOUS, name = 'epsilon', obj = 1.0)
        
        # the strategy where the player picks an option randomly
        random_strategy = {o: 1/3 for o in options}

        ## constraints
        
        # prob dist constraints 
        m.addConstr(quicksum(p[i] for i in options) == 1)
        m.addConstr(quicksum(p_op[i] for i in options) == 1)
        
        # aux vars to handle abs value in the tv constr
        d_p = m.addVars(options, vtype = GRB.CONTINUOUS, lb = - GRB.INFINITY, name = 'd_p')
        d_op = m.addVars(options, vtype = GRB.CONTINUOUS, lb = - GRB.INFINITY, name = 'd_op')
        a_p = m.addVars(options, vtype = GRB.CONTINUOUS, lb = - GRB.INFINITY, name = 'a_p')
        a_op = m.addVars(options, vtype = GRB.CONTINUOUS, lb = - GRB.INFINITY, name = 'a_op')
        # the total variation distance between the player's strategy and the opponent's must be less the random strategy and the opponent's
        m.addConstrs(d_p[i] == p[i] - p_op[i] for i in options)
        m.addConstrs(a_p[i] == abs_(d_p[i]) for i in options)
        m.addConstrs(d_op[i] == random_strategy[i] - p_op[i] for i in options)
        m.addConstrs(a_op[i] == abs_(d_op[i]) for i in options)
        m.addConstr(quicksum(a_p[i] for i in options) <= quicksum(a_op[i] for i in options))
        
        # the utility for the cyclic response must be negative
        m.addQConstr(p['R']*(p_op['R']-p_op['S']) + p['S']*(p_op['S'] - p_op['P']) + p['S']*(p_op['P'] - p_op['R']) + epsilon['c'] <= 0)
        
        # the utility for best response must be negative 
        indic = m.addVars(options, vtype = GRB.BINARY, name = 'x')
        
        z = m.addVar(vtype = GRB.CONTINUOUS, name = 'z')
        m.addConstr(z >= p['S'] - p['P']) 
        m.addConstr(z >= p['P'] - p['R'])
        m.addConstr(z >= p['R'] - p['S'])
        m.addConstr((indic['R'] == 1) >> (z <= p['S'] - p['P']))
        m.addConstr((indic['S'] == 1) >> (z <= p['P'] - p['R']))
        m.addConstr((indic['P'] == 1) >> (z <= p['R'] - p['S']))
        
        m.addConstr(quicksum(indic[i] for i in options) == 1)
        
        m.addQConstr(indic['R']*(p_op['S'] - p_op['P']) + indic['S']*(p_op['P'] - p_op['R']) + indic['P']*(p_op['R'] - p_op['S']) + epsilon['b'] <= 0)
        
        ## solve
        m.optimize()
        
        self.solution = m
        return

In [140]:
example = RSP_conj()
example.prog()

Changed value of parameter NonConvex to 2
   Prev: -1  Min: -1  Max: 2  Default: -1
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (linux64)
Optimize a model with 13 rows, 25 columns and 39 nonzeros
Model fingerprint: 0xbf29d414
Model has 2 quadratic constraints
Model has 9 general constraints
Variable types: 22 continuous, 3 integer (3 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  QMatrix range    [1e+00, 1e+00]
  QLMatrix range   [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [3e-01, 1e+00]
Presolve added 15 rows and 2 columns
Presolve time: 0.00s
Presolved: 47 rows, 37 columns, 133 nonzeros
Presolved model has 4 bilinear constraint(s)
Variable types: 31 continuous, 6 integer (6 binary)

Root relaxation: objective 2.000000e+00, 25 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

   

In [141]:
for v in example.solution.getVars():
    print('%s = %g' % (v.varName, v.x))

p[R] = 0.666667
p[S] = 0.333333
p[P] = 0
p*[R] = 0
p*[S] = 1
p*[P] = 0
epsilon[c] = 0.333333
epsilon[b] = 1
d_p[R] = 0.666667
d_p[S] = -0.666667
d_p[P] = 0
d_op[R] = 0.333333
d_op[S] = -0.666667
d_op[P] = 0.333333
a_p[R] = 0.666667
a_p[S] = 0.666667
a_p[P] = 0
a_op[R] = 0.333333
a_op[S] = 0.666667
a_op[P] = 0.333333
x[R] = -0
x[S] = 0
x[P] = 1
z = 0.333333
delta = 0
