In [57]:
import sys
sys.path.append("..")
import pickle

In [58]:
from games.abstract_poker import AbstractPokerRootChanceGameState
from games.algorithms import ChanceSamplingCFR, VanillaCFR
from common.constants import card_dealing, results_map
import numpy as np
import pandas as pd

from games.competition_abstract_poker import CompetitionAbstractPoker
from copy import deepcopy

In [59]:
# Hyperparameters
_NUM_TRAIN_ITERS = 100_000
_COMPETITION_ROUNDS = 100_000
_RUN_SINGLE_EXAMPLE = True

# Try single example

In [60]:
if _RUN_SINGLE_EXAMPLE:
    bots = []

    root = AbstractPokerRootChanceGameState(max_hand_strength = 5, 
                                        max_turns = 2,
                                        ante = 0.1,
                                        report = False)
    chance_sampling_cfr = ChanceSamplingCFR(root)
    chance_sampling_cfr.run(iterations = 100)
    chance_sampling_cfr.compute_nash_equilibrium()

    bots.append(chance_sampling_cfr)

    root = AbstractPokerRootChanceGameState(max_hand_strength = 3, 
                                        max_turns = 2,
                                        ante = 0.1,
                                        report = False)
    chance_sampling_cfr = ChanceSamplingCFR(root)
    chance_sampling_cfr.run(iterations = 100)
    chance_sampling_cfr.compute_nash_equilibrium()

    bots.append(chance_sampling_cfr)

    results = []
    rounds_per_competition = 1

    c = CompetitionAbstractPoker(deepcopy(bots[0]), deepcopy(bots[1]), max_turns = 2, ante = 0.1, report = True)
    result = c.compete(rounds = rounds_per_competition)
    print(f"Result is {result}")

-------------------------------------------------------------------
Player 0 with card 393 and self-estimated strength 2/5.
Information set is .2.
Action distribution was {'BET': 0.19775784769203267, 'CHECK_INITIATE': 0.8022421523079674}. Player chose BET.
Current turn log is ['BET']. Current action log is [['BET']].
['BET' 'FOLD']: 393.786 and 0.1
Result is 0.1


# Granularity competition

In [61]:
def train_bots_fixed_granularity_advantage(ante_list, granularity = [3,10], depth = 2, report = True):
    bots_weak = []
    bots_strong = []

    for a in ante_list:
        root = AbstractPokerRootChanceGameState(max_hand_strength = granularity[0], 
                                            max_turns = depth,
                                            ante = a,
                                            report = False)
        chance_sampling_cfr = ChanceSamplingCFR(root)
        chance_sampling_cfr.run(iterations = _NUM_TRAIN_ITERS)
        chance_sampling_cfr.compute_nash_equilibrium()
        bots_weak.append(chance_sampling_cfr)

        # vanilla cfr
        # vanilla_cfr_g3 = VanillaCFR(root)
        # vanilla_cfr_g3.run(iterations = _NUM_TRAIN_ITERS)
        # vanilla_cfr_g3.compute_nash_equilibrium()

        root = AbstractPokerRootChanceGameState(max_hand_strength = granularity[1], 
                                            max_turns = depth,
                                            ante = a,
                                            report = False)
        chance_sampling_cfr = ChanceSamplingCFR(root)
        chance_sampling_cfr.run(iterations = _NUM_TRAIN_ITERS)
        chance_sampling_cfr.compute_nash_equilibrium()
        bots_strong.append(chance_sampling_cfr)
        
    if report:
        print("------------------------------------------------------------")
        for b in bots_weak:
            print(f"Value of game for ante {b.root.ante}: {b.value_of_the_game():.5f}")
        
    return bots_weak, bots_strong

In [62]:
def train_bots_fixed_depth_advantage(ante_list, granularity = 5, depth = [1,3], report = True):
    bots_weak = []
    bots_strong = []

    for a in ante_list:
        root = AbstractPokerRootChanceGameState(max_hand_strength = granularity, 
                                            max_turns = depth[0],
                                            ante = a,
                                            report = False)
        chance_sampling_cfr = ChanceSamplingCFR(root)
        chance_sampling_cfr.run(iterations = _NUM_TRAIN_ITERS)
        chance_sampling_cfr.compute_nash_equilibrium()
        bots_weak.append(chance_sampling_cfr)

        # vanilla cfr
        # vanilla_cfr_g3 = VanillaCFR(root)
        # vanilla_cfr_g3.run(iterations = _NUM_TRAIN_ITERS)
        # vanilla_cfr_g3.compute_nash_equilibrium()

        root = AbstractPokerRootChanceGameState(max_hand_strength = granularity, 
                                            max_turns = depth[1],
                                            ante = a,
                                            report = False)
        chance_sampling_cfr = ChanceSamplingCFR(root)
        chance_sampling_cfr.run(iterations = _NUM_TRAIN_ITERS)
        chance_sampling_cfr.compute_nash_equilibrium()
        bots_strong.append(chance_sampling_cfr)
        
    if report:
        print("------------------------------------------------------------")
        for b in bots_weak:
            print(f"Value of game for ante {b.root.ante}: {b.value_of_the_game():.5f}")
        
    return bots_weak, bots_strong

## Train bots

In [63]:
ante_list = [np.round(0.1*i,1) for i in range(1,21)]
ante_list

[0.1,
 0.2,
 0.3,
 0.4,
 0.5,
 0.6,
 0.7,
 0.8,
 0.9,
 1.0,
 1.1,
 1.2,
 1.3,
 1.4,
 1.5,
 1.6,
 1.7,
 1.8,
 1.9,
 2.0]

In [64]:
weak_granularity_bots, strong_granularity_bots = train_bots_fixed_granularity_advantage(ante_list = ante_list, 
                                                                                        granularity = [3,10], 
                                                                                        depth = 2)

------------------------------------------------------------
Value of game for ante 0.1: 0.00002
Value of game for ante 0.2: 0.00005
Value of game for ante 0.3: -0.00003
Value of game for ante 0.4: -0.01358
Value of game for ante 0.5: -0.02901
Value of game for ante 0.6: -0.03240
Value of game for ante 0.7: -0.02847
Value of game for ante 0.8: -0.02223
Value of game for ante 0.9: -0.02092
Value of game for ante 1.0: -0.02245
Value of game for ante 1.1: -0.02840
Value of game for ante 1.2: -0.03619
Value of game for ante 1.3: -0.03868
Value of game for ante 1.4: -0.04051
Value of game for ante 1.5: -0.04296
Value of game for ante 1.6: -0.04561
Value of game for ante 1.7: -0.04661
Value of game for ante 1.8: -0.04375
Value of game for ante 1.9: -0.04331
Value of game for ante 2.0: -0.04067


In [65]:
weak_depth_bots, strong_depth_bots = train_bots_fixed_depth_advantage(ante_list = ante_list, 
                                                                            granularity = 5, 
                                                                            depth = [1,3])

------------------------------------------------------------
Value of game for ante 0.1: -0.00005
Value of game for ante 0.2: -0.01191
Value of game for ante 0.3: -0.02141
Value of game for ante 0.4: -0.02416
Value of game for ante 0.5: -0.03120
Value of game for ante 0.6: -0.04015
Value of game for ante 0.7: -0.04863
Value of game for ante 0.8: -0.04529
Value of game for ante 0.9: -0.04444
Value of game for ante 1.0: -0.04371
Value of game for ante 1.1: -0.04404
Value of game for ante 1.2: -0.04504
Value of game for ante 1.3: -0.04693
Value of game for ante 1.4: -0.04853
Value of game for ante 1.5: -0.04995
Value of game for ante 1.6: -0.05217
Value of game for ante 1.7: -0.05051
Value of game for ante 1.8: -0.04881
Value of game for ante 1.9: -0.04617
Value of game for ante 2.0: -0.04425


In [66]:
# low_granularity_bots = train_bots_fixed_granularity(depth = [1,2], granularity = 3)
# high_granularity_bots = train_bots_fixed_granularity(depth = [1,2], granularity = 10)

## Competition

In [67]:
def run_competition(weak_bots, strong_bots, rounds, report):
    results = []
    num_bots = len(weak_bots)

    for i in range(num_bots):
        b1 = weak_bots[i]
        b2 = strong_bots[i]
        
        g1 = b1.root.max_hand_strength
        g2 = b2.root.max_hand_strength

        d1 = b1.root.max_turns
        d2 = b2.root.max_turns
        game_depth = max(d1,d2)

        ante = b1.root.ante

        c = CompetitionAbstractPoker(deepcopy(b1), deepcopy(b2), max_turns = game_depth, 
                                    ante = ante, report = False)
        result = c.compete(rounds = rounds)
        results.append([g1,g2,d1,d2,game_depth,ante,result])

        if report:
            print(f"{d1} vs {d2}. Game granularity {g1}. Ante {ante}. Result is {result}")
                
    return results

In [68]:
granularity_advantage_competition = run_competition(weak_granularity_bots, 
                                                    strong_granularity_bots,
                                                    rounds = _COMPETITION_ROUNDS, report = True)

2 vs 2. Game granularity 3. Ante 0.1. Result is -0.0052910000000015705
2 vs 2. Game granularity 3. Ante 0.2. Result is -0.019254000000006082
2 vs 2. Game granularity 3. Ante 0.3. Result is -0.0220459999999944
2 vs 2. Game granularity 3. Ante 0.4. Result is -0.028324000000012242
2 vs 2. Game granularity 3. Ante 0.5. Result is -0.04387
2 vs 2. Game granularity 3. Ante 0.6. Result is -0.053396000000002934
2 vs 2. Game granularity 3. Ante 0.7. Result is -0.057911999999994926
2 vs 2. Game granularity 3. Ante 0.8. Result is -0.06291600000000705
2 vs 2. Game granularity 3. Ante 0.9. Result is -0.05492399999999568
2 vs 2. Game granularity 3. Ante 1.0. Result is -0.06381
2 vs 2. Game granularity 3. Ante 1.1. Result is -0.07807400000001702
2 vs 2. Game granularity 3. Ante 1.2. Result is -0.057497999999991216
2 vs 2. Game granularity 3. Ante 1.3. Result is -0.07498700000000666
2 vs 2. Game granularity 3. Ante 1.4. Result is -0.07405599999999359
2 vs 2. Game granularity 3. Ante 1.5. Result is -0.0

In [69]:
depth_advantage_competition = run_competition(weak_depth_bots, 
                                              strong_depth_bots,
                                              rounds = _COMPETITION_ROUNDS, report = True)

1 vs 3. Game granularity 5. Ante 0.1. Result is -0.0003740000000000029
1 vs 3. Game granularity 5. Ante 0.2. Result is -0.010862000000003088
1 vs 3. Game granularity 5. Ante 0.3. Result is -0.026597999999998338
1 vs 3. Game granularity 5. Ante 0.4. Result is -0.03168400000000173
1 vs 3. Game granularity 5. Ante 0.5. Result is -0.059865
1 vs 3. Game granularity 5. Ante 0.6. Result is -0.06166000000000754
1 vs 3. Game granularity 5. Ante 0.7. Result is -0.08969099999999611
1 vs 3. Game granularity 5. Ante 0.8. Result is -0.06395199999999102
1 vs 3. Game granularity 5. Ante 0.9. Result is -0.07479800000000611
1 vs 3. Game granularity 5. Ante 1.0. Result is -0.08866
1 vs 3. Game granularity 5. Ante 1.1. Result is -0.10196399999999715
1 vs 3. Game granularity 5. Ante 1.2. Result is -0.08876600000000237
1 vs 3. Game granularity 5. Ante 1.3. Result is -0.1114529999999996
1 vs 3. Game granularity 5. Ante 1.4. Result is -0.09224999999999764
1 vs 3. Game granularity 5. Ante 1.5. Result is -0.085

# Export results 

In [70]:
granularity_advantage_df = pd.DataFrame(granularity_advantage_competition, columns = ['player1_granularity', 'player2_granularity', 
                                                                          'player1_depth', 'player2_depth',
                                                                          'game_depth', 'ante', 'result'])
depth_advantage_df = pd.DataFrame(depth_advantage_competition, columns = ['player1_granularity', 'player2_granularity', 
                                                                          'player1_depth', 'player2_depth',
                                                                          'game_depth', 'ante', 'result'])

In [71]:
results_df = pd.concat([granularity_advantage_df, depth_advantage_df], ignore_index = True)

In [72]:
results_df.head(3)

Unnamed: 0,player1_granularity,player2_granularity,player1_depth,player2_depth,game_depth,ante,result
0,3,10,2,2,2,0.1,-0.005291
1,3,10,2,2,2,0.2,-0.019254
2,3,10,2,2,2,0.3,-0.022046


In [73]:
results_df.to_csv("output/advantages_by_ante.csv", index = False)