In [1]:
import sys
import matplotlib.pyplot as plt
import numpy as np
import cvxopt

## Friend Q

In [2]:
arr = np.array([[2,6,8],
         [-1,5,4]])
argmax = np.argmax(arr)
(argmax // arr.shape[1], argmax % arr.shape[1])

(0, 2)

In [3]:
p = arr - np.min(arr)
p = p / np.sum(p)
p = p.flatten()
p

array([ 0.1       ,  0.23333333,  0.3       ,  0.        ,  0.2       ,
        0.16666667])

In [4]:
a = np.random.choice(len(p), p=p)
a

2

## Foe Q

In [5]:
arr = np.array([[20,-10,5],
                [5,10,-10],
                [-5, 0,10]])
NACTIONS = arr.shape[0]

In [6]:
G = np.identity(NACTIONS + 1)[1:,:]
G = G * -1
G

array([[-0., -1., -0., -0.],
       [-0., -0., -1., -0.],
       [-0., -0., -0., -1.]])

In [7]:
G = np.append(G, np.insert(arr.T, 0, -1, axis=1)*-1, axis=0)
G

array([[ -0.,  -1.,  -0.,  -0.],
       [ -0.,  -0.,  -1.,  -0.],
       [ -0.,  -0.,  -0.,  -1.],
       [  1., -20.,  -5.,   5.],
       [  1.,  10., -10.,   0.],
       [  1.,  -5.,  10., -10.]])

In [8]:
G = G.T
G

array([[ -0.,  -0.,  -0.,   1.,   1.,   1.],
       [ -1.,  -0.,  -0., -20.,  10.,  -5.],
       [ -0.,  -1.,  -0.,  -5., -10.,  10.],
       [ -0.,  -0.,  -1.,   5.,   0., -10.]])

In [9]:
h = np.zeros(G.shape[1])
h

array([ 0.,  0.,  0.,  0.,  0.,  0.])

In [10]:
c = np.zeros(G.shape[0])
c[0] = -1
c

array([-1.,  0.,  0.,  0.])

In [11]:
A = np.ones((G.shape[0],1))
A[0,0] = 0
A

array([[ 0.],
       [ 1.],
       [ 1.],
       [ 1.]])

In [12]:
b = [[1.]]

In [13]:
G = cvxopt.matrix(G.tolist())
h = cvxopt.matrix(h.tolist())
c = cvxopt.matrix(c.tolist())
A = cvxopt.matrix(A.tolist())
b = cvxopt.matrix(b)

In [14]:
solution = cvxopt.solvers.lp(c, G, h, A, b, solver='glpk')
print("objective = {}".format(solution['primal objective']))
print("x = {}".format(solution['x']))

objective = -2.1428571428571432
x = [ 2.14e+00]
[ 1.43e-01]
[ 3.57e-01]
[ 5.00e-01]



In [15]:
list(solution['x'])[1:]

[0.14285714285714288, 0.3571428571428572, 0.5000000000000001]

## Matrix mult

In [16]:
arr = np.array([[20,-10,5],
                [5,10,-10],
                [-5, 0,10]])
p = [0.3, 0.2, 0.5]

In [17]:
w = (arr.T * p).T
w

array([[ 6. , -3. ,  1.5],
       [ 1. ,  2. , -2. ],
       [-2.5,  0. ,  5. ]])

In [18]:
sw = np.sum(w, axis=0)
sw

array([ 4.5, -1. ,  4.5])

In [19]:
np.min(sw)

-1.0

## Env test

In [20]:
from envs import Soccer

In [21]:
env = Soccer(debug=True)

In [22]:
env.reset()

In [23]:
env.step([3,2])

0 :  [3, 2]
#Ba#
####


#Ba#
####

## CE LP

In [24]:
Qs = [np.array([[6.,2.],
                [7.,0.]]),
      np.array([[6.,7.],
                [2.,0.]])
     ]
NACTIONS = 2

In [25]:
G = np.identity(NACTIONS*NACTIONS)
G = G * -1
G

array([[-1., -0., -0., -0.],
       [-0., -1., -0., -0.],
       [-0., -0., -1., -0.],
       [-0., -0., -0., -1.]])

### constraints for row player

In [26]:
arr = Qs[0].copy()
for p in range(NACTIONS):  # primal action row
    for s in range(NACTIONS):  # secondary other choice row
        if p != s:
            diff = arr[p] - arr[s]
            constr = np.zeros(NACTIONS*NACTIONS)
            for c in range(diff.shape[0]):
                constr[p * NACTIONS + c] = diff[c]
            constr = constr * -1
            constr = np.array([constr])
            G = np.append(G, constr, axis=0)

### constraints for column player

In [27]:
arr = Qs[1].T.copy()
for p in range(NACTIONS):  # primal action row
    for s in range(NACTIONS):  # secondary other choice row
        if p != s:
            diff = arr[p] - arr[s]
            constr = np.zeros(NACTIONS*NACTIONS)
            for c in range(diff.shape[0]):
                constr[c * NACTIONS + p] = diff[c]
            constr = constr * -1
            constr = np.array([constr])
            G = np.append(G, constr, axis=0)
G

array([[-1., -0., -0., -0.],
       [-0., -1., -0., -0.],
       [-0., -0., -1., -0.],
       [-0., -0., -0., -1.],
       [ 1., -2., -0., -0.],
       [-0., -0., -1.,  2.],
       [ 1., -0., -2., -0.],
       [-0., -1., -0.,  2.]])

In [28]:
G = G.T
G

array([[-1., -0., -0., -0.,  1., -0.,  1., -0.],
       [-0., -1., -0., -0., -2., -0., -0., -1.],
       [-0., -0., -1., -0., -0., -1., -2., -0.],
       [-0., -0., -0., -1., -0.,  2., -0.,  2.]])

In [29]:
h = np.zeros(G.shape[1])
h

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [30]:
c = Qs[0].flatten() + Qs[1].flatten()
c = c * -1
c.tolist()

[-12.0, -9.0, -9.0, -0.0]

In [31]:
A = np.ones((G.shape[0],1))
A

array([[ 1.],
       [ 1.],
       [ 1.],
       [ 1.]])

In [32]:
b = [[1.]]

In [33]:
G = cvxopt.matrix(G.tolist())
h = cvxopt.matrix(h.tolist())
c = cvxopt.matrix(c.tolist())
A = cvxopt.matrix(A.tolist())
b = cvxopt.matrix(b)

In [34]:
solution = cvxopt.solvers.lp(c, G, h, A, b, solver='glpk')
print("objective = {}".format(solution['primal objective']))
print("x = {}".format(solution['x']))

objective = -10.5
x = [ 5.00e-01]
[ 2.50e-01]
[ 2.50e-01]
[ 0.00e+00]



In [35]:
p = list(solution['x'])
p = np.array(p)
p = np.reshape(p, (NACTIONS, NACTIONS))
p

array([[ 0.5 ,  0.25],
       [ 0.25,  0.  ]])

In [36]:
solution['primal objective']

-10.5