In [1]:
import numpy as np
from typing import Literal

In [2]:
FeedbackType = Literal['mixed_vector', 'pure_vector', 'bandit']

In [77]:
class Game:
    def __init__(self, payoffs, feedback_type: FeedbackType = 'mixed_vector'):
        self.payoffs = payoffs
        self.feedback_type = feedback_type

    def __str__(self):
        pass

    def update_values(self, 
                      strategies,
                      values, 
                      learning_rate = 0.1,
                      feedback_type: FeedbackType = None):
      if feedback_type is None:
          feedback_type = self.feedback_type
      
      feedbacks = self.get_feedbacks(strategies, feedback_type)
      for i in range(len(feedbacks)):
          
          values[i] += learning_rate * feedbacks[i]

      return values

    def get_strategies(self, values, dynamics = "exponential-weight"):
        exponential_weights = [[np.exp(strategy_value) for strategy_value in value] for value in values]  
        sum_exponential_weights = [np.sum(exponential_weight) for exponential_weight in exponential_weights]
        updated_strategies = [[strategy_weight/sum_exponential_weights[player] \
                                   for strategy_weight in exponential_weight] \
                                   for player, exponential_weight in enumerate(exponential_weights)]
        return updated_strategies
    
    def get_feedbacks(self, strategies, feedback_type: FeedbackType = None):
      if feedback_type is None:
          feedback_type = self.feedback_type

      if feedback_type == 'mixed_vector':
          feedback_0 = np.matmul(self.payoffs[0], strategies[1])
          feedback_1 = np.matmul(self.payoffs[1].T, strategies[0])
          return [feedback_0, feedback_1]
      
      action_0 = Game._sample_from_distribution(strategies[0])
      action_1 = Game._sample_from_distribution(strategies[1])
      
      if feedback_type == 'pure_vector':
          feedback_0 = np.array(self.payoffs[0][action_1])
          feedback_1 = np.array(self.payoffs[1][action_0])

          return [feedback_0, feedback_1]
      
      if feedback_type == 'bandit':
          feedback_0 = np.array([self.payoffs[0][action_1][0]*(action_0 == 0)/strategies[0][0], 
                        self.payoffs[0][action_1][0]*(action_0 == 1)/strategies[0][1]])
          feedback_1 = np.array([self.payoffs[1][action_0][0]*(action_1 == 0)/strategies[1][0], 
                        self.payoffs[1][action_0][1]*(action_1 == 1)/strategies[1][1]])
          return [feedback_0, feedback_1]
      

    @staticmethod
    def _sample_from_distribution(distribution):
        return np.random.choice(len(distribution), p=distribution)

In [78]:
A = np.array([[-1,-5], [0, -3]])
B = np.array([[-1,0], [-5, -3]])  

strategies = [[0.9, 0.1], [0.7, 0.3]]
values = [[0, 0], [0, 0]]

game = Game(np.array([A,B]), feedback_type='mixed_vector')

In [79]:
history = [[], []]
new_strategies = strategies
new_values = values

for i in range(100):
  new_values = game.update_values(new_strategies, values = new_values, learning_rate=0.1)
  new_strategies = game.get_strategies(new_values)
  
  history[0].append(new_strategies[0])
  history[1].append(new_strategies[0])

[array([-0.22, -0.09]), array([-0.14, -0.03])]


In [80]:
history[0][-1]

[3.286822933877988e-09, 0.999999996713177]