In [None]:
import numpy as np

R, P, S = 0, 1, 2
ACTIONS = [R, P, S]
ACTION_LOOKUP = {0: 'R', 1: 'P', 2:'S'}
NUM_ACTIONS = len(ACTIONS)
regret_sum = np.zeros(NUM_ACTIONS)
strategy_sum = np.zeros(NUM_ACTIONS)
score = [0,0]

In [None]:
p2_strat = { R: .1, P: .8, S: .1 }
p2_probs = list(p2_strat.values())

In [None]:
def calculate_result(a1, a2):
  if a1 == a2:
    return 0
  elif (a1 == R and a2 == S) or (a1 == P and a2 == R) or (a1 == S and a2 == P):
    return 1
  else:
    return -1


In [None]:
def choose_action(player):
  global strategy_sum
  if player == 2:
    return np.random.choice(NUM_ACTIONS, p=p2_probs)
  else:
    strategy = np.maximum(regret_sum, 0)
    normalizing_sum = np.sum(strategy)

    if normalizing_sum > 0:
      strategy /= normalizing_sum
    else:
      strategy += np.repeat(1/NUM_ACTIONS, NUM_ACTIONS)

    strategy_sum += strategy
    return np.random.choice(NUM_ACTIONS, p=strategy)



In [None]:
def update_regrets(a1, opponent_action, actual_result):
  for a in range(NUM_ACTIONS):
    result_if_taken = actual_result if a == a1 else calculate_result(a, opponent_action)
    regret_sum[a] += (actual_result + result_if_taken)

With this, we're saying "What if I had taken action a instead of a1? Would the result be better or worse?"

If `result_if_taken` is greater than `actual_result`, it means action `a` would have given a better result, so regret for not choosing a increases. If `result_if_taken` is less than `actual_result`, it means the actual action `a1` was better, so regret for not choosing a decreases.

In [None]:
def play_game(num_rounds=10000):
  for i in range(num_rounds):
    a1, a2 = choose_action(1), choose_action(2)
    result = calculate_result(a1, a2)
    score[0] += result
    score[1] -= result
    update_regrets(a1, a2, result)
    print(f"Round {i}: {ACTION_LOOKUP[a1]} {ACTION_LOOKUP[a2]} -- SCORE: {score[0]} {score[1]} -- {regret_sum}")


play_game()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Round 7500: S P -- SCORE: 3639 -3639 -- [-1558.  3657.  8818.]
[1.00000000e+00 2.21972503e+03 5.28027497e+03]
Round 7501: S P -- SCORE: 3640 -3640 -- [-1558.  3658.  8820.]
[1.00000000e+00 2.22001818e+03 5.28098182e+03]
Round 7502: S P -- SCORE: 3641 -3641 -- [-1558.  3659.  8822.]
[1.00000000e+00 2.22031133e+03 5.28168867e+03]
Round 7503: P P -- SCORE: 3641 -3641 -- [-1559.  3659.  8823.]
[1.0000000e+00 2.2206045e+03 5.2823955e+03]
Round 7504: P R -- SCORE: 3642 -3642 -- [-1558.  3661.  8823.]
[1.00000000e+00 2.22089764e+03 5.28310236e+03]
Round 7505: S P -- SCORE: 3643 -3643 -- [-1558.  3662.  8825.]
[1.0000000e+00 2.2211909e+03 5.2838091e+03]
Round 7506: S P -- SCORE: 3644 -3644 -- [-1558.  3663.  8827.]
[1.00000000e+00 2.22148416e+03 5.28451584e+03]
Round 7507: S S -- SCORE: 3644 -3644 -- [-1557.  3662.  8827.]
[1.00000000e+00 2.22177744e+03 5.28522256e+03]
Round 7508: S P -- SCORE: 3645 -3645 -- [-1557.  3663.  8829.