# Rock Paper Scissor strategy

Some strategy in playing RPS vs opponent that have some sort of tendency/favor a choice

# 1. Both play completely random

In [2]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
import random

In [24]:
def player(weights=(1,1,1)):
    # weights = [rock, paper, scis]
    # choices ('rock', 'paper', 'scissor') = 1,2,3
    return random.choices((1,2,3), weights)[0]

In [25]:
def opponent(weights=(1,1,1)):
    # weights = [rock, paper, scis]
    # choices ('rock', 'paper', 'scissor') = 1,2,3
    return random.choices((1,2,3), weights)[0]

In [37]:
def num_to_pick(number):
    return ['rock', 'paper', 'scissor'][number-1]

The reason for player/opponent pick is a number instead of string is to avoid a lot of and/or clause to determine who win.

If player/opponent's pick returns a string like 'rock', 'paper', 'scissor', The code would look like:

`if player/opponent same choice -> draw`

`elif player_rock and opponent_paper OR player_paper and opponent scissor OR player_scissor and opponent_rock -> lose`

`else win`

on top of that, opperating on comparing string is low

In [112]:
# if map choice to int:
# rock = 1, paper = 2, scissor = 3

# each play is player_choice - opponent_choice

# lose scenario: player_choice - opponent_choice in (-1,2):
### rock - paper = 1-2 = -1
### paper - scis = 2-3 = -1
### scis - rock  = 3-1 = 2

# win scenario: player_choice - opponent_choice in (1,-2)
### rock - scis = 1-3 = -2
### paper - roc = 2-1 = 1
### scis - papr = 3-2 = 1

# else Draw

# storing int to df is more memory efficient (and faster?)

In [114]:
def play(player_choice, opponent_choice):
    versus = player_choice - opponent_choice
    # print(f'{num_to_pick(player_choice)} vs {num_to_pick(opponent_choice)}')
    if versus in (-1,2):
        # print('Lose')
        return player_choice, opponent_choice, -1
    elif versus in (-2,1):
        # print('Win')
        return player_choice, opponent_choice, 1
    else:
        # print('Draw')
        return player_choice, opponent_choice, 0

In [106]:
play(player(), opponent())

('scissor', 'rock', -1)

In [140]:
def play_n_times(times):
    results = pd.DataFrame(columns=['Player', 'Opponent', 'Result'], dtype = np.int8)

    for i in range(times):
        turn = play(player(), opponent())
        newturn = pd.DataFrame({'Player': [turn[0]], 'Opponent': [turn[1]], 'Result': [turn[2]]})
        results = pd.concat([results, newturn], axis=0, ignore_index=True)

    # Convert to text
    results.Player = results.Player.apply(lambda x: num_to_pick(x))
    results.Opponent = results.Opponent.apply(lambda x: num_to_pick(x))
    return results

In [142]:
%%time
result = play_n_times(100_000)

CPU times: total: 53.5 s
Wall time: 1min 1s


In [143]:
result.Result.value_counts(normalize=True)

-1    0.33436
 0    0.33303
 1    0.33261
Name: Result, dtype: float64

# 2. Opponent has a favorite pick, player is oblivious and play at random

In [1]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
import random

In [2]:
def player(weights=(1,1,1)):
    # weights = [rock, paper, scis]
    # choices ('rock', 'paper', 'scissor') = 1,2,3
    return random.choices((1,2,3), weights)[0]

In [3]:
def opponent(weights=(1,1,1)):
    # weights = [rock, paper, scis]
    # choices ('rock', 'paper', 'scissor') = 1,2,3
    return random.choices((1,2,3), weights)[0]

In [4]:
def num_to_pick(number):
    return ['rock', 'paper', 'scissor'][number-1]

In [5]:
def play(player_choice, opponent_choice):
    versus = player_choice - opponent_choice
    # print(f'{num_to_pick(player_choice)} vs {num_to_pick(opponent_choice)}')
    if versus in (-1,2):
        # print('Lose')
        return player_choice, opponent_choice, -1
    elif versus in (-2,1):
        # print('Win')
        return player_choice, opponent_choice, 1
    else:
        # print('Draw')
        return player_choice, opponent_choice, 0

In [6]:
def play_n_times(times):
    results = pd.DataFrame(columns=['Player', 'Opponent', 'Result'], dtype = np.int8)

    for i in range(times):
                              # opponent  prefer rock than others
        turn = play(player(), opponent((1,.1,.1)))
        newturn = pd.DataFrame({'Player': [turn[0]], 'Opponent': [turn[1]], 'Result': [turn[2]]})
        results = pd.concat([results, newturn], axis=0, ignore_index=True)

    # Convert to text
    results.Player = results.Player.apply(lambda x: num_to_pick(x))
    results.Opponent = results.Opponent.apply(lambda x: num_to_pick(x))
    return results

In [7]:
%%time
result = play_n_times(10_000)

CPU times: total: 93.8 ms
Wall time: 111 ms


In [17]:
result.Player.value_counts()

scissor    3391
paper      3328
rock       3281
Name: Player, dtype: int64

In [18]:
result.Opponent.value_counts()

rock       8384
scissor     827
paper       789
Name: Opponent, dtype: int64

In [19]:
result.Result.value_counts(normalize=True)

-1    0.3369
 0    0.3344
 1    0.3287
Name: Result, dtype: float64

Doesn't matter what opponent prefer, if player picks at random, win/lose/draw ratio will be equal.

# 3. Player adapts, pick the option with the highest net win in the last 15 turns

- Opponent slightly favors a choice than the others
- Player suspects opponent does favor a choice and not random, but does not know what that favored choice is.
- Player look back at the last X turns, pick the option that resulted in highest net wins.
> (Note that this is different from the choice that WOULD HAVE resulted in highest net wins).

- Net win of a choice = (times pick CHOICE and win) - (times pick CHOICE and lose)

> for eg, in the last 15 turns, pick rock 8 times, won 5 times, lost 3 times => net win = 2

> (for now, avoid divide by sum of total picks to not have to deal with divide by 0)

In [35]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
import random
from tqdm.notebook import tqdm

In [36]:
def player(weights=(1,1,1)):
    # weights = [rock, paper, scis]
    # choices ('rock', 'paper', 'scissor') = 1,2,3
    return random.choices((1,2,3), weights)[0]

In [37]:
def opponent(weights=(1,1,1)):
    # weights = [rock, paper, scis]
    # choices ('rock', 'paper', 'scissor') = 1,2,3
    return random.choices((1,2,3), weights)[0]

In [38]:
def num_to_pick(number):
    return ['rock', 'paper', 'scissor'][number-1]

In [39]:
def play(player_choice, opponent_choice):
    versus = player_choice - opponent_choice
    # print(f'{num_to_pick(player_choice)} vs {num_to_pick(opponent_choice)}')
    if versus in (-1,2):
        # print('Lose')
        return player_choice, opponent_choice, -1
    elif versus in (-2,1):
        # print('Win')
        return player_choice, opponent_choice, 1
    else:
        # print('Draw')
        return player_choice, opponent_choice, 0

In [40]:
def play_n_times(times, rounds_before_adapting = 15, opponent_weights = (1,1,1)):
    results = pd.DataFrame(columns=['Player', 'Opponent', 'Result'], dtype = np.int8)
    
    # first X rounds_before_adapting, random:
    for i in range(rounds_before_adapting):
                              # opponent  prefer rock than others
        turn = play(player(), opponent(opponent_weights))
        newturn = pd.DataFrame({'Player': [turn[0]], 'Opponent': [turn[1]], 'Result': [turn[2]]})
        results = pd.concat([results, newturn], axis=0, ignore_index=True)
    
    # starting from round rounds_before_adapting, begin strategy:
    for i in np.arange(rounds_before_adapting, times):
        # calculate weights from previous x round
        # groupby would not work if lacking value
        # weights = results.iloc[-rounds_before_adapting:].groupby('Player').sum()['Result']
        # weights = (weights - weights.min() + 0.1).tolist()
        last_x_turns = results.iloc[-rounds_before_adapting:]
        
        rock_weight = last_x_turns.Result[last_x_turns.Player == 1].sum()
        paper_weight = last_x_turns.Result[last_x_turns.Player == 2].sum()
        scissor_weight = last_x_turns.Result[last_x_turns.Player == 3].sum()
        # min normalization + add small adjusting constant to avoid converge to 0 for draw-choice
        rock_weight = rock_weight - min([rock_weight, paper_weight, scissor_weight])+.1
        paper_weight = paper_weight - min([rock_weight, paper_weight, scissor_weight])+.1
        scissor_weight = scissor_weight - min([rock_weight, paper_weight, scissor_weight])+.1
        
        weights = [rock_weight, paper_weight, scissor_weight]
        try:
        # play next round with adjusted weights
            turn = play(player(weights), opponent(opponent_weights))
        except ValueError:
            print(results.iloc[-15:].groupby('Player').sum()['Result'])
        newturn = pd.DataFrame({'Player': [turn[0]], 'Opponent': [turn[1]], 'Result': [turn[2]]})
        results = pd.concat([results, newturn], axis=0, ignore_index=True)
    
    # Convert to text
    results.Player = results.Player.apply(lambda x: num_to_pick(x))
    results.Opponent = results.Opponent.apply(lambda x: num_to_pick(x))
    return results

In [7]:
%%time
# repeat the experiment 100 times, each time play 1000 rounds
games = 100
rounds_per_game = 1000

net_wins = []
for experiment_n in tqdm(np.arange(0,games)):
    result = play_n_times(rounds_per_game, rounds_before_adapting = 15, opponent_weights = (4,3,3))
    net_wins.append(result.Result.sum())

  0%|          | 0/100 [00:00<?, ?it/s]

CPU times: total: 1min 39s
Wall time: 1min 52s


In [8]:
np.mean(net_wins)

28.54

Remember the last 15 turns and adjust weights by net win => mean 28.84 net wins after 1000 round

**Square the weights**

In [41]:
def play_n_times(times, rounds_before_adapting = 15, opponent_weights = (1,1,1)):
    results = pd.DataFrame(columns=['Player', 'Opponent', 'Result'], dtype = np.int8)
    
    # first X rounds_before_adapting, random:
    for i in range(rounds_before_adapting):
                              # opponent  prefer rock than others
        turn = play(player(), opponent(opponent_weights))
        newturn = pd.DataFrame({'Player': [turn[0]], 'Opponent': [turn[1]], 'Result': [turn[2]]})
        results = pd.concat([results, newturn], axis=0, ignore_index=True)
    
    # starting from round rounds_before_adapting, begin strategy:
    for i in np.arange(rounds_before_adapting, times):
        # calculate weights from previous x round
        last_x_turns = results.iloc[-rounds_before_adapting:]
        rock_weight = last_x_turns.Result[last_x_turns.Player == 1].sum()
        paper_weight = last_x_turns.Result[last_x_turns.Player == 2].sum()
        scissor_weight = last_x_turns.Result[last_x_turns.Player == 3].sum()
        # min normalization + add small adjusting constant to avoid converge to 0 for draw-choice
        rock_weight = rock_weight - min([rock_weight, paper_weight, scissor_weight])+.1
        paper_weight = paper_weight - min([rock_weight, paper_weight, scissor_weight])+.1
        scissor_weight = scissor_weight - min([rock_weight, paper_weight, scissor_weight])+.1
        # SQUARE THE weights
        weights = [rock_weight**2, paper_weight**2, scissor_weight**2]
        try:
        # play next round with adjusted weights
            turn = play(player(weights), opponent(opponent_weights))
        except ValueError:
            print(results.iloc[-15:].groupby('Player').sum()['Result'])
        newturn = pd.DataFrame({'Player': [turn[0]], 'Opponent': [turn[1]], 'Result': [turn[2]]})
        results = pd.concat([results, newturn], axis=0, ignore_index=True)
    
    # Convert to text
    results.Player = results.Player.apply(lambda x: num_to_pick(x))
    results.Opponent = results.Opponent.apply(lambda x: num_to_pick(x))
    return results, weights

In [44]:
%%time
# repeat the experiment 100 times, each time play 1000 rounds
games = 25
rounds_per_game = 1000

net_wins = []
weight_list = []

for experiment_n in tqdm(np.arange(0,games)):
    result, weights = play_n_times(rounds_per_game, rounds_before_adapting = 15, opponent_weights = (4,3,3))
    net_wins.append(result.Result.sum())
    weight_list.append(weights)

  0%|          | 0/25 [00:00<?, ?it/s]

CPU times: total: 24.9 s
Wall time: 28.4 s


In [45]:
np.mean(net_wins)

36.12

In [46]:
weight_list

[[0.010000000000000002, 16.81, 0.010000000000000002],
 [4.41, 0.010000000000000002, 0.010000000000000002],
 [0.010000000000000002, 4.41, 0.010000000000000002],
 [0.010000000000000002, 26.009999999999998, 0.010000000000000002],
 [0.010000000000000002, 50.41, 0.010000000000000002],
 [1.2100000000000002, 0.010000000000000002, 0.010000000000000002],
 [16.81, 0.010000000000000002, 1.0],
 [0.010000000000000002, 37.209999999999994, 0.010000000000000002],
 [4.41, 16.81, 0.010000000000000002],
 [0.010000000000000002, 1.2100000000000002, 0.010000000000000002],
 [1.2100000000000002, 0.010000000000000002, 0.010000000000000002],
 [16.81, 0.010000000000000002, 0.010000000000000002],
 [0.010000000000000002, 0.010000000000000002, 0.010000000000000002],
 [0.010000000000000002, 37.209999999999994, 0.010000000000000002],
 [1.2100000000000002, 4.41, 0.010000000000000002],
 [0.010000000000000002, 0.010000000000000002, 1.0],
 [1.2100000000000002, 0.010000000000000002, 0.010000000000000002],
 [0.010000000000

Squaring the weights resulted in slightly higher mean net wins after 1000 round.

# Before explore different strategy, see if there are ways to run multiple experiments at the same time

In [7]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
import random
from tqdm.notebook import tqdm
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)


In [8]:
def play(versus):
    # print(f'{num_to_pick(player_choice)} vs {num_to_pick(opponent_choice)}')
    if versus in (-1,2):
        # print('Lose')
        return -1
    elif versus in (-2,1):
        # print('Win')
        return 1
    else:
        # print('Draw')
        return 0

In [9]:
#apply to a dataframe of 3 columns, 1,2,3 = choice for rock, paper, scissor
def make_choice(row):
    weights = row.values
    return np.random.choice([1,2,3], p=weights)

In [10]:
# normalize weights df
def normalize(weights_df):
    weights_df = weights_df.apply(lambda x: x/weights_df.sum(axis=1))
    return weights_df

In [11]:
# show summary stats of players
def show_choices(player_df):
    total_choices = player_df.shape[0]*player_df.shape[1]
    print(f'Rock: {(player_df==1).sum().sum()}, {(player_df==1).sum().sum() / total_choices:.2f}')
    print(f'Paper: {(player_df==2).sum().sum()}, {(player_df==2).sum().sum() / total_choices:.2f}')
    print(f'Scissor: {(player_df==3).sum().sum()}, {(player_df==3).sum().sum() / total_choices:.2f}')

In [12]:
# main functions for running games x rounds
def play_n_times(games, rounds_per_game, rounds_before_adapting, 
                 player_weights_initial = [1,1,1], 
                 opponent_weights_initial = [1,1,1]):
    # games: the number of rows of dataframes
    # rounds_per_games: columns
    player = pd.DataFrame()
    player_weights = normalize(pd.DataFrame(np.tile(player_weights_initial, (games, 1))))
    opponent = pd.DataFrame()
    opponent_weights = normalize(pd.DataFrame(np.tile(opponent_weights_initial, (games, 1))))

    results = pd.DataFrame()
    
    # before adapting
    for round_th in range(0, rounds_before_adapting): 
        # player choices
        player['round'+str(round_th)] = player_weights.apply(make_choice, axis=1)
        # opponent choices
        opponent['round'+str(round_th)] = opponent_weights.apply(make_choice, axis=1)
        # calculate, apply result mapping
        results['round'+str(round_th)] = (player['round'+str(round_th)] - opponent['round'+str(round_th)])\
        .apply(lambda x: play(x))
        
    # start adapting
    for round_th in tqdm(range(rounds_before_adapting, rounds_per_game)):
        # calculate weights from previous round
        current_weights = pd.DataFrame({
            'w1': results.iloc[:,-rounds_before_adapting:][player==1].sum(axis=1),
            'w2': results.iloc[:,-rounds_before_adapting:][player==2].sum(axis=1),
            'w3': results.iloc[:,-rounds_before_adapting:][player==3].sum(axis=1),
        })
        # eliminate negative + add small constant
        current_weights = current_weights.apply(lambda x: x-current_weights.min(axis=1)+0.1)
        # square
        current_weights = current_weights**2
        # normalize
        current_weights = normalize(current_weights)
        
        # player make choice base on current weights
        player['round'+str(round_th)] = current_weights.apply(make_choice, axis=1)
        # opponent choices
        opponent['round'+str(round_th)] = opponent_weights.apply(make_choice, axis=1)
         # calculate, apply result mapping
        results['round'+str(round_th)] = (player['round'+str(round_th)] - opponent['round'+str(round_th)])\
        .apply(lambda x: play(x))
        
    return player, opponent, results


In [13]:
%%time
# repeat the experiment 100 times, each time play 1000 rounds like last time
# last time: Wall time: 1min 48s
# new code: Wall time: 50.7 s, more than 2x faster

player, opponent, results = play_n_times(games = 100, rounds_per_game=1000, 
                                         rounds_before_adapting = 15,
                                         opponent_weights_initial = [4,3,3]
                                         )

  0%|          | 0/985 [00:00<?, ?it/s]

CPU times: total: 41.9 s
Wall time: 50.7 s


In [33]:
results.sum(axis=1).mean()

27.99

In [28]:
show_choices(player)

Rock: 30879, 0.31
Paper: 46858, 0.47
Scissor: 22263, 0.22


In [27]:
show_choices(opponent)

Rock: 40123, 0.40
Paper: 29771, 0.30
Scissor: 30106, 0.30


Double the games:

In [34]:
%%time
# repeat the experiment 200 times, each time play 1000 rounds
# Wall time: 59.2 s, barely increase from 100 times

player, opponent, results = play_n_times(games = 200, rounds_per_game=1000, 
                                         rounds_before_adapting = 15,
                                         opponent_weights_initial = [4,3,3]
                                         )

  0%|          | 0/985 [00:00<?, ?it/s]

CPU times: total: 46.8 s
Wall time: 59.2 s


In [35]:
results.sum(axis=1).mean()

25.25

In [36]:
show_choices(player)

Rock: 63092, 0.32
Paper: 91686, 0.46
Scissor: 45222, 0.23


In [37]:
show_choices(opponent)

Rock: 79880, 0.40
Paper: 60001, 0.30
Scissor: 60119, 0.30


# 4. Player adapts based on opponent:

- Opponent slightly favors a choice than the others
- Player suspects opponent does favor a choice and not random, but does not know what that favored choice is.
- Player look back at the last X rounds made by opponent, pick the option that WOULD HAVE resulted in highest net wins.

eg: In the last 15 rounds, opponent:
- picked rock 7 times, paper 5 times, scissor 3 times:
>=> prioritize paper(beat rock): 7, scissor(beat paper): 5, rock (beat scissor): 3



In [1]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
import random
from tqdm.notebook import tqdm
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)


In [2]:
def play(versus):
    # print(f'{num_to_pick(player_choice)} vs {num_to_pick(opponent_choice)}')
    if versus in (-1,2):
        # print('Lose')
        return -1
    elif versus in (-2,1):
        # print('Win')
        return 1
    else:
        # print('Draw')
        return 0

In [3]:
#apply to a dataframe of 3 columns, 1,2,3 = choice for rock, paper, scissor
def make_choice(row):
    weights = row.values
    return np.random.choice([1,2,3], p=weights)

In [4]:
# normalize weights df
def normalize(weights_df):
    weights_df = weights_df.apply(lambda x: x/weights_df.sum(axis=1))
    return weights_df

In [60]:
# show summary stats of players
def show_choices(player_df):
    total_choices = player_df.shape[0]*player_df.shape[1]
    print(f'- Rock: {(player_df==1).sum().sum()}, {(player_df==1).sum().sum() / total_choices:.2f}')
    print(f'- Paper: {(player_df==2).sum().sum()}, {(player_df==2).sum().sum() / total_choices:.2f}')
    print(f'- Scissor: {(player_df==3).sum().sum()}, {(player_df==3).sum().sum() / total_choices:.2f}')

In [59]:
# main functions for running games x rounds
def play_n_times(games, rounds_per_game, rounds_before_adapting, 
                 player_weights_initial = [1,1,1], 
                 opponent_weights_initial = [1,1,1]):
    # games: the number of rows of dataframes
    # rounds_per_games: columns
    player = pd.DataFrame()
    player_weights = normalize(pd.DataFrame(np.tile(player_weights_initial, (games, 1))))
    opponent = pd.DataFrame()
    opponent_weights = normalize(pd.DataFrame(np.tile(opponent_weights_initial, (games, 1))))

    results = pd.DataFrame()
    
    # before adapting
    for round_th in range(0, rounds_before_adapting): 
        # player choices
        player['round'+str(round_th)] = player_weights.apply(make_choice, axis=1)
        # opponent choices
        opponent['round'+str(round_th)] = opponent_weights.apply(make_choice, axis=1)
        # calculate, apply result mapping
        results['round'+str(round_th)] = (player['round'+str(round_th)] - opponent['round'+str(round_th)])\
        .apply(lambda x: play(x))
        
    # start adapting
    for round_th in tqdm(range(rounds_before_adapting, rounds_per_game)):
        # calculate weights from previous round
        current_weights = pd.DataFrame({
            'w1': (opponent.iloc[:,-rounds_before_adapting:]==3).sum(axis=1),
            'w2': (opponent.iloc[:,-rounds_before_adapting:]==1).sum(axis=1),
            'w3': (opponent.iloc[:,-rounds_before_adapting:]==2).sum(axis=1),
        })
        # eliminate negative + add small constant
        current_weights = current_weights.apply(lambda x: x-current_weights.min(axis=1)+0.1)
        # square
        current_weights = current_weights**2
        # normalize
        current_weights = normalize(current_weights)
        
        # player make choice base on current weights
        player['round'+str(round_th)] = current_weights.apply(make_choice, axis=1)
        # opponent choices
        opponent['round'+str(round_th)] = opponent_weights.apply(make_choice, axis=1)
         # calculate, apply result mapping
        results['round'+str(round_th)] = (player['round'+str(round_th)] - opponent['round'+str(round_th)])\
        .apply(lambda x: play(x))
    
    print(f'Average net wins after {rounds_per_game} rounds in {games} games:')
    print(results.sum(axis=1).mean())
    print('-'*50)
    print('Player\'s choices:')
    show_choices(player)
    print('-'*50)
    print('Opponent\'s choices:')
    show_choices(opponent)
    print('-'*50)
    return player, opponent, results

In [61]:
%%time
player, opponent, results = play_n_times(games = 100, rounds_per_game=1000, 
                                         rounds_before_adapting = 15,
                                         opponent_weights_initial = [4,3,3]
                                         )

  0%|          | 0/985 [00:00<?, ?it/s]

Average net wins after 1000 rounds in 100 games:
26.96
--------------------------------------------------
Player's choices:
- Rock: 25076, 0.25
- Paper: 50227, 0.50
- Scissor: 24697, 0.25
--------------------------------------------------
Opponent's choices:
- Rock: 40066, 0.40
- Paper: 29852, 0.30
- Scissor: 30082, 0.30
--------------------------------------------------
CPU times: total: 13 s
Wall time: 16.5 s


Remember last 20 picks instead of 15:

In [62]:
%%time
player, opponent, results = play_n_times(games = 100, rounds_per_game=1000, 
                                         rounds_before_adapting = 20,
                                         opponent_weights_initial = [4,3,3]
                                         )

  0%|          | 0/980 [00:00<?, ?it/s]

Average net wins after 1000 rounds in 100 games:
31.75
--------------------------------------------------
Player's choices:
- Rock: 23745, 0.24
- Paper: 52936, 0.53
- Scissor: 23319, 0.23
--------------------------------------------------
Opponent's choices:
- Rock: 40085, 0.40
- Paper: 29905, 0.30
- Scissor: 30010, 0.30
--------------------------------------------------
CPU times: total: 13.5 s
Wall time: 17.1 s


Consider opponent had a most favorite choice and least favorite choice

In [63]:
%%time
player, opponent, results = play_n_times(games = 100, rounds_per_game=1000, 
                                         rounds_before_adapting = 20,
                                         opponent_weights_initial = [4,3,2]
                                         )

  0%|          | 0/980 [00:00<?, ?it/s]

Average net wins after 1000 rounds in 100 games:
93.67
--------------------------------------------------
Player's choices:
- Rock: 8058, 0.08
- Paper: 62039, 0.62
- Scissor: 29903, 0.30
--------------------------------------------------
Opponent's choices:
- Rock: 44541, 0.45
- Paper: 33249, 0.33
- Scissor: 22210, 0.22
--------------------------------------------------
CPU times: total: 13.4 s
Wall time: 17.2 s


Reduce the disparity in the percentage

In [64]:
%%time
player, opponent, results = play_n_times(games = 100, rounds_per_game=1000, 
                                         rounds_before_adapting = 20,
                                         opponent_weights_initial = [3.5,3.3,3.1]
                                         )

  0%|          | 0/980 [00:00<?, ?it/s]

Average net wins after 1000 rounds in 100 games:
5.52
--------------------------------------------------
Player's choices:
- Rock: 26577, 0.27
- Paper: 39440, 0.39
- Scissor: 33983, 0.34
--------------------------------------------------
Opponent's choices:
- Rock: 35486, 0.35
- Paper: 33589, 0.34
- Scissor: 30925, 0.31
--------------------------------------------------
CPU times: total: 13.7 s
Wall time: 17.1 s
