In [12]:
import numpy as np

test = np.load('test.npy')

In [13]:
test = test.reshape(18, -1)
print(test.shape)

(18, 18)


In [14]:
import sys
sys.path.append("..")
from mars.equilibrium_solver import NashEquilibriumECOSSolver, NashEquilibriumMWUSolver, NashEquilibriumParallelMWUSolver

all_dists, all_ne_values = NashEquilibriumParallelMWUSolver([test])
print(all_dists, all_ne_values)

[[[0.00372962 0.00596329 0.01129506 0.00949955 0.00793032 0.00495278
   0.07511731 0.0023361  0.82028966 0.00551273 0.00343987 0.02953115
   0.00319817 0.0044526  0.00281389 0.00400827 0.00304228 0.00288733]
  [0.06468436 0.08485117 0.04993061 0.0731729  0.04430808 0.0537998
   0.03803814 0.03923462 0.05098566 0.05272793 0.03855566 0.02669147
   0.06491771 0.10029328 0.02242828 0.05713011 0.08398317 0.05426651]]] [10.0696301]


In [15]:
all_dists, all_ne_values = NashEquilibriumECOSSolver(test)
print(all_dists, all_ne_values)

(array([3.68420245e-11, 8.70780568e-11, 1.34364502e-10, 1.73996001e-10,
       1.41773746e-10, 5.78266602e-11, 8.65573113e-10, 1.90947802e-11,
       9.99999998e-01, 7.38973225e-11, 1.85573915e-11, 2.68601907e-10,
       6.05324137e-12, 4.72596831e-11, 1.23540629e-11, 3.35665454e-11,
       1.25879486e-11, 7.67281956e-12]), array([7.91304485e-09, 2.33073186e-09, 4.57582617e-09, 5.39447349e-09,
       3.98977170e-09, 6.81164853e-09, 2.49367279e-09, 2.52734253e-09,
       3.79759084e-09, 5.76069426e-09, 2.75630817e-09, 1.21474592e-09,
       5.30544777e-09, 9.99999927e-01, 1.13202839e-09, 5.72141246e-09,
       6.82444074e-09, 3.95812496e-09])) 10.003111842652576


  warn("Converting G to a CSC matrix; may take a while.")
  warn("Converting A to a CSC matrix; may take a while.")


In [16]:
all_dists, all_ne_values = NashEquilibriumMWUSolver(test)
print(all_dists, all_ne_values)

[[0.00161157 0.00261999 0.00502514 0.00421547 0.00350759 0.002164
  0.03402338 0.00098113 0.91992777 0.00241669 0.00148061 0.01325051
  0.00137138 0.00193805 0.00119753 0.00173749 0.00130088 0.00123076]
 [0.0739167  0.11869727 0.0379308  0.10193174 0.03162168 0.04990728
  0.02194402 0.02259455 0.0355025  0.04519605 0.02265265 0.01240888
  0.05521229 0.17743388 0.01028354 0.04908657 0.09465985 0.03901952]] 9.976699830810343


In [17]:
import copy

def NashEquilibriumParallelMWUSolver(A, Itr=5000, verbose=False):
    """ Solve mulitple Nash equilibrium with multiplicative weights udpate."""
    EPS = 1e-7 # prevent numerical problem
    A = np.array(A)
    matrix_num = A.shape[0]
    row_action_num = A.shape[1]
    col_action_num = A.shape[2]
    learning_rate = np.sqrt(np.log(row_action_num)/Itr)  # sqrt(log |A| / T)

    row_policy = np.ones(row_action_num)/row_action_num
    col_policy = np.ones(col_action_num)/col_action_num
    policies = np.array(matrix_num*[[row_policy, col_policy]])
    final_policy = copy.deepcopy(policies)

    for i in range(Itr):
        # for row player, maximizer
        policies_ = copy.deepcopy(policies)  # track old value before update (update is inplace)
        payoff_vec = np.einsum('nb,nab->na', policies_[:, 1], A) 
        policies[:, 0] = policies[:, 0] * np.exp(learning_rate*payoff_vec)

        # for col player, minimizer
        payoff_vec = np.einsum('na,nab->nb', policies_[:, 0], A) 
        policies[:, 1] = policies[:, 1] * np.exp(-learning_rate*payoff_vec)


        # above is unnormalized, normalize it to be distribution
        policies = policies/np.expand_dims(EPS+np.sum(policies, axis=-1), -1)
        # MWU is average-iterate coverging, so accumulate polices
        final_policy += policies

    final_policy = final_policy / (Itr+1)

    if verbose:
        print(f'For row player, strategy is {final_policy[:, 0]}')
        print(f'For column player, strategy is {final_policy[:, 1]}')
        print(learning_rate)
    
    nash_value = np.einsum('nb,nb->n', np.einsum('na,nab->nb', policies[:, 0], A), final_policy[:, 1])

    return final_policy, nash_value

all_dists, all_ne_values = NashEquilibriumParallelMWUSolver([test])
print(all_dists, all_ne_values)

[[[4.98451100e-04 8.18024637e-04 1.58012176e-03 1.32360372e-03
   1.09929940e-03 6.73543091e-04 1.07702293e-02 2.98566218e-04
   9.74833614e-01 7.53606379e-04 4.56938250e-04 4.18643654e-03
   4.22318211e-04 6.01910960e-04 3.67191054e-04 5.38370793e-04
   3.99965950e-04 3.77730465e-04]
  [5.20287984e-02 1.35215508e-01 1.42089288e-02 1.23702172e-01
   1.13354754e-02 2.39619668e-02 7.12724594e-03 7.33720067e-03
   1.24830220e-02 1.92213963e-02 7.41009006e-03 3.90878657e-03
   2.27921525e-02 4.60971303e-01 3.23877813e-03 2.08185517e-02
   6.02951682e-02 1.39433286e-02]]] [10.0260578]


In [20]:
# test nash q and two side q
import numpy as np

action_dim = 4
q1 = np.random.uniform(0,1, action_dim)
q2 = np.random.uniform(0,1, action_dim)
a1 = np.argmax(q1)
a2 = np.argmax(q2)
print(q1, a1)
print(q2, a2)

[0.02641117 0.47058416 0.73503864 0.10125202] 2
[0.95882296 0.69388345 0.23338964 0.54529173] 0


In [21]:
merged_q = 0.5*(q1[:,  None] - q2[None])
print(merged_q)

dist, _ = NashEquilibriumParallelMWUSolver([merged_q])

[[-0.4662059  -0.33373614 -0.10348924 -0.25944028]
 [-0.2441194  -0.11164965  0.11859726 -0.03735379]
 [-0.11189216  0.02057759  0.2508245   0.09487345]
 [-0.42878547 -0.29631572 -0.06606881 -0.22201986]]


In [22]:
print(dist)

[[[0.01356672 0.05192131 0.91874831 0.01576355]
  [0.91107859 0.04951452 0.01255505 0.02685174]]]


In [None]:
import sys
sys.path.append("..")
from mars.equilibrium_solver import NashEquilibriumECOSSolver, NashEquilibriumMWUSolver, NashEquilibriumParallelMWUSolver

q1 = np.random.uniform(0,1, action_dim*action_dim)
dist, _ = NashEquilibriumParallelMWUSolver([q1])

In [32]:
import math

eps_start = 1.
eps_final = 1e-3
eps_decay = 1e5
delta_frame_idx = 300*20000

epsilon = eps_final + (eps_start - eps_final) * math.exp(-1. * delta_frame_idx / eps_decay)
print(epsilon)


0.0034762734244896923
