In [3]:
import numpy as np
from typing import Literal

In [18]:
FeedbackType = Literal['mixed_vector', 'pure_vector', 'bandit']

In [30]:
class Game:
    def __init__(self, payoffs, feedback_type: FeedbackType = 'mixed_vector'):
        self.payoffs = payoffs
        self.feedback_type = feedback_type

    def __str__(self):
        pass
    
    def update_strategies(self, strategies, values, learning_rate = 0.1, feedback_type: FeedbackType = None):
      if feedback_type is None:
          feedback_type = self.feedback_type

      print(f"Feedback Type: {feedback_type}")
      
      feedbacks = self.get_feedbacks(strategies, feedback_type)
      
      values += learning_rate * feedbacks

      exponential_values = [[np.exp(action) for action in value] for value in values]
      exponential_sums = [np.sum(exponential_value) for exponential_value in exponential_values]

      updated_strategies = [[exponential_value/exponential_sum 
                            for exponential_value in exponential_values] 
                           for exponential_sum in exponential_sums]
      return updated_strategies

    
    def get_feedbacks(self, strategies, feedback_type: FeedbackType = None):
      if feedback_type is None:
          feedback_type = self.feedback_type



      if feedback_type == 'mixed_vector':
          feedback_0 = np.matmul(self.payoffs, strategies[1])
          feedback_1 = np.matmul(self.payoffs.T, strategies[0])
          return [feedback_0, feedback_1]
      
      action_0 = Game._sample_from_distribution(strategies[0])
      action_1 = Game._sample_from_distribution(strategies[1])
      
      if feedback_type == 'pure_vector':
          feedback_0 = [self.payoffs[0][action_1], self.payoffs[1][action_1]]
          feedback_1 = [self.payoffs[action_0][0], self.payoffs[action_0][1]]
          return [feedback_0, feedback_1]
      
      if feedback_type == 'bandit':
          feedback_0 = [self.payoffs[0][action_1]*(action_1 == 0)/strategies[0][0], 
                        self.payoffs[1][action_1]*(action_1 == 1)/strategies[0][1]]
          feedback_1 = [self.payoffs[action_0][0]*(action_0 == 0)/strategies[1][0], 
                        self.payoffs[action_0][1]*(action_0 == 1)/strategies[1][1]]
          return [feedback_0, feedback_1]
      

    @staticmethod
    def _sample_from_distribution(distribution):
        return np.random.choice(len(distribution), p=distribution)
    

      

In [31]:
A = np.array([[-1,-5], [0, -3]])
B = np.array([[-1,0], [-5, -3]])  

strategies = [[0.9, 0.1], [0.7, 0.3]]
values = [[0, 0], [0, 0]]

game = Game(A, B)

In [32]:
game.update_strategies(strategies, values = values)

Feedback Type: [[-1  0]
 [-5 -3]]


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
arr = [0, 1, 2]

for 