### Regret-Matching Rock-Paper-Scissors

From Section 2.5 of [An Introduction to Counterfactual Regret Minimization](http://modelai.gettysburg.edu/2013/cfr/cfr.pdf) by T. Neller and M. Lanctot.

#### 0 - Imports

In [1]:
import numpy as np

#### 1 - Players

In [2]:
N_PLAYERS = 2

#### 2 - Actions

In [3]:
ROCK = 0
PAPER = 1
SCISSORS = 2
N_ACTIONS = 3

In [4]:
# Get current mixed strategy through regret-matching
def get_action(strategy):
    return np.random.choice(np.arange(N_ACTIONS), p=strategy)

#### 3 - Strategy

In [5]:
regret_sums = np.zeros((N_PLAYERS, N_ACTIONS))
strategies = np.ones((N_PLAYERS, N_ACTIONS)) * 1/N_ACTIONS
strategy_sums = np.zeros((N_PLAYERS, N_ACTIONS))

In [6]:
# Get current mixed strategy through regret-matching 
def get_strategy(regret_sum, strategy_sum):
    # Set strategy to positive regret values
    strategy = np.where(regret_sum < 0, 0, regret_sum)
    # Normalize the strategy
    if sum(strategy) == 0:
        strategy = np.ones(N_ACTIONS) * 1/N_ACTIONS
    else:
        strategy = strategy / sum(strategy)
    # Add cummulative strategy
    strategy_sum += strategy
    return strategy

#### 4 - Utilities

In [7]:
def get_utility(op_action):
    utilities = np.zeros(N_ACTIONS)
    utilities[0 if (op_action == N_ACTIONS - 1) else (op_action + 1)] = 1
    utilities[(N_ACTIONS - 1) if (op_action == 0) else (op_action - 1)] = -1
    return utilities

#### 3 - Train

In [8]:
N_ITERS = 1000000

In [9]:
for i in range(N_ITERS):
    if (i % 100000 == 0):
        print(f"{i/10000}%")
    # Get regret-matched mixed-strategy actions
    actions = (get_action(get_strategy(regret_sums[0], strategy_sums[0])),
               get_action(get_strategy(regret_sums[1], strategy_sums[1])))
    # Action utility vectors
    utilities = (get_utility(actions[1]), get_utility(actions[0]))
    # Accumulate action regrets
    regret_sums[0] += utilities[0] - utilities[0][actions[0]]
    regret_sums[1] += utilities[1] - utilities[1][actions[1]]

0.0
1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0


#### 4 - Average Strategy

In [10]:
def average_strategy(strategy_sum):
    if sum(strategy_sum) == 0:
        return np.ones(N_ACTIONS) * 1/N_ACTIONS
    else:
        return strategy_sum / sum(strategy_sum)

In [17]:
avg_strats = (average_strategy(strategy_sums[0]),
              average_strategy(strategy_sums[1]))
print(f"Player 1: R: {round(100*avg_strats[0][0], 1)}%   P: {round(100*avg_strats[0][1], 1)}%   S: {round(100*avg_strats[0][2], 1)}%")
print(f"Player 2: R: {round(100*avg_strats[1][0], 1)}%   P: {round(100*avg_strats[1][1], 1)}%   S: {round(100*avg_strats[1][2], 1)}%")

Player 1: R: 33.3%   P: 33.3%   S: 33.3%
Player 2: R: 33.2%   P: 33.3%   S: 33.4%
