In [128]:
import numpy as np
import pytest
import numpy.typing as npt

In [129]:
class NormalFormGameCalculator:

    # non zero sum game constructor
    def __init__(self,
                 row_player_utility_matrix: npt.NDArray[np.float64],
                 column_player_utility_matrix: npt.NDArray[np.float64]) -> None:

        # if col player utility is not provided, we consider this a zero sum game
        if(column_player_utility_matrix is None):
            column_player_utility_matrix = -row_player_utility_matrix

        self.row_player_utility_matrix = row_player_utility_matrix
        self.column_player_utility_matrix = column_player_utility_matrix


    # calculation section

    def calculate_utilities(self,
                            row_player_strategy: npt.NDArray[np.float64],
                            column_player_strategy: npt.NDArray[np.float64]) -> [np.float64, np.float64]:


        action_probabilities = row_player_strategy @ column_player_strategy
        assert action_probabilities.sum() == pytest.approx(1)

        row_player_utility = action_probabilities * self.row_player_utility_matrix
        column_player_utility = action_probabilities * self.column_player_utility_matrix

        return row_player_utility.sum(), column_player_utility.sum()

    def get_best_response_strategy_against_row_player(self,
                                                  row_player_strategy: npt.NDArray[np.float64],
                                                  column_utility_matrix: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:

        array_of_zeros = np.zeros(column_utility_matrix.shape[0])
        utilities = column_utility_matrix @ row_player_strategy
        index = np.argmax(utilities)
        array_of_zeros[index] = 1
        best_response = np.reshape(a=array_of_zeros, newshape=(1,3))
        return (best_response)
    
    def get_best_response_strategy_against_column_player(self,
                                                  column_player_strategy: npt.NDArray[np.float64],
                                                  row_utility_matrix: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        
        array_of_zeros = np.zeros(row_utility_matrix.shape[1])
        utilities = column_player_strategy @ row_utility_matrix
        index = np.argmax(utilities)
        array_of_zeros[index] = 1
        best_response = np.reshape(a=array_of_zeros, newshape=(3,1))
        return (best_response)
    




In [130]:
rock_paper_scissors__utility_matrix = np.array([[0, 1, -1], [-1, 0, 1], [1, -1, 0]])

column_strategy = np.array([[0.3, 0.2, 0.5]])

row_strategy = np.array([[0.1,
                          0.2,
                          0.7]]).transpose()


normal_game_calculator = NormalFormGameCalculator(rock_paper_scissors__utility_matrix, None)

row_util, _ = normal_game_calculator.calculate_utilities(row_strategy, column_strategy)

assert row_util == pytest.approx(0.08)

row_util

0.08000000000000002

In [131]:


best_col = normal_game_calculator.get_best_response_strategy_against_row_player(row_strategy, rock_paper_scissors__utility_matrix)
best_row = normal_game_calculator.get_best_response_strategy_against_column_player(column_strategy, -rock_paper_scissors__utility_matrix)

# values when facing best responding opponent
rowvalue2, _ = normal_game_calculator.calculate_utilities(row_strategy, best_col)
_, colvalue1 = normal_game_calculator.calculate_utilities(best_row, column_strategy)

assert rowvalue2 == pytest.approx(-0.6)
assert colvalue1 == pytest.approx(-0.2)

rowvalue2, colvalue1

(-0.6, -0.2)