In [5]:
import numpy as np
from typing import Literal

In [6]:
FeedbackType = Literal['mixed_vector', 'pure_vector', 'bandit']

In [None]:
class Game:
    def __init__(self, payoffs, feedback_type: FeedbackType = 'mixed_vector'):
        self.payoffs = payoffs
        self.feedback_type = feedback_type

    def __str__(self):
        pass
    
    def run_dynamic(self, init_strategies, feedback_type:FeedbackType="mixed_vector",
                    iter=100):
      history = np.array([[init_strategies]])
      init_values = [[0, 0], [0, 0]]
      for _ in range(iter):
          new_strategies = self.get_strategies()
          history.append(self.upda)
          
        

    def update_values(self, 
                      strategies,
                      values, 
                      learning_rate = 0.1,
                      feedback_type: FeedbackType = None):
      if feedback_type is None:
          feedback_type = self.feedback_type
      
      feedbacks = self.get_feedbacks(strategies, feedback_type)
      for i in range(len(feedbacks)):
          
          values[i] += learning_rate * feedbacks[i]

      return values

    def get_strategies(self, values):
        exponential_weights = [[np.exp(strategy_value) for strategy_value in value] for value in values]  
        sum_exponential_weights = [np.sum(exponential_weight) for exponential_weight in exponential_weights]
        updated_strategies = [[strategy_weight/sum_exponential_weights[player] \
                                   for strategy_weight in exponential_weight] \
                                   for player, exponential_weight in enumerate(exponential_weights)]
        return updated_strategies
    
    def get_feedbacks(self, strategies, feedback_type: FeedbackType = "mixed-vector"):

      if feedback_type == 'mixed_vector':
          feedback_0 = np.matmul(self.payoffs[0], strategies[1])
          feedback_1 = np.matmul(self.payoffs[1].T, strategies[0])
          return np.array([feedback_0, feedback_1])
      
      action_0 = Game._sample_from_distribution(strategies[0])
      action_1 = Game._sample_from_distribution(strategies[1])
      
      if feedback_type == 'pure_vector':
          feedback_0 = np.array(self.payoffs[0][action_1])
          feedback_1 = np.array(self.payoffs[1][action_0])

          return np.array([feedback_0, feedback_1])
      
      if feedback_type == 'bandit':
          feedback_0 = np.array([self.payoffs[0][action_1][0]*(action_0 == 0)/strategies[0][0], 
                        self.payoffs[0][action_1][0]*(action_0 == 1)/strategies[0][1]])
          feedback_1 = np.array([self.payoffs[1][action_0][0]*(action_1 == 0)/strategies[1][0], 
                        self.payoffs[1][action_0][1]*(action_1 == 1)/strategies[1][1]])
          return np.array([feedback_0, feedback_1])
      

    @staticmethod
    def _sample_from_distribution(distribution):
        return np.random.choice(len(distribution), p=distribution)

In [None]:
A = np.array([[-1,-5], [0, -3]])
B = np.array([[-1,0], [-5, -3]])  

strategies = [[0.9, 0.1], [0.7, 0.3]]
values = [[0, 0], [0, 0]]

game = Game(np.array([A,B]), feedback_type='mixed_vector')

In [None]:
history = [[[0.9, 0.1], [0.7, 0.3]]]
new_strategies = strategies
new_values = values

for i in range(100):
  new_values = game.update_values(new_strategies, values = new_values, learning_rate=0.1)
  new_strategies = game.get_strategies(new_values)
  
  history[0].append(new_strategies[0])
  history[1].append(new_strategies[0])

  break

In [None]:
strategy_1 = history[0]


In [12]:
history[0]

[[np.float64(2.8861442016395954e-09), np.float64(0.9999999971138558)],
 [np.float64(2.3629750176425764e-09), np.float64(0.9999999976370249)],
 [np.float64(1.9346403170010873e-09), np.float64(0.9999999980653597)],
 [np.float64(1.5839495245461617e-09), np.float64(0.9999999984160504)],
 [np.float64(1.2968281876542692e-09), np.float64(0.9999999987031718)],
 [np.float64(1.0617531190831237e-09), np.float64(0.9999999989382469)],
 [np.float64(8.692899310327192e-10), np.float64(0.9999999991307101)],
 [np.float64(7.117144000537497e-10), np.float64(0.9999999992882856)],
 [np.float64(5.827024668505783e-10), np.float64(0.9999999994172976)],
 [np.float64(4.770764295841537e-10), np.float64(0.9999999995229236)],
 [np.float64(3.905971445222676e-10), np.float64(0.9999999996094029)],
 [np.float64(3.197938943204053e-10), np.float64(0.999999999680206)],
 [np.float64(2.61825095950536e-10), np.float64(0.9999999997381749)],
 [np.float64(2.1436425799828441e-10), np.float64(0.9999999997856358)],
 [np.float64(1.