In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

from pypokerengine.players import BasePokerPlayer
from pypokerengine.utils.card_utils import Card, Deck
from pypokerengine.api.game import setup_config, start_poker

import pickle
import tensorflow as tf
import random

import sys
sys.path.insert(0, '../scripts/')

import PlayerModels as pm
from MyEmulator import MyEmulator
from DQNPlayer import DQNPlayer
from util import *

## Initialization

In [2]:
h_size = 128

In [3]:
%time main_wp = DQNPlayer(h_size=h_size, is_restore=True, is_train=False, debug=True, is_double=True)

INFO:tensorflow:Restoring parameters from ../cache/models/DQN/model_4999.ckpt
CPU times: user 1.43 s, sys: 350 ms, total: 1.78 s
Wall time: 1.6 s


## Testing

In [4]:
#config = setup_config(max_round=2, initial_stack=1500, small_blind_amount=15, summary_file='/dev/null')
config = setup_config(max_round=2, initial_stack=1500, small_blind_amount=15)
config.register_player(name="wp", algorithm=main_wp)
# config.register_player(name="r2", algorithm=RandomPlayer())
config.register_player(name="f2", algorithm=pm.CallPlayer())
config.register_player(name="f3", algorithm=pm.CallPlayer())
config.register_player(name="f4", algorithm=pm.CallPlayer())
config.register_player(name="f5", algorithm=pm.CallPlayer())
config.register_player(name="f6", algorithm=pm.CallPlayer())
config.register_player(name="f7", algorithm=pm.CallPlayer())
config.register_player(name="f8", algorithm=pm.CallPlayer())
config.register_player(name="f9", algorithm=pm.CallPlayer())

game_result = start_poker(config, verbose=1)

(['Hole:', ['H3', 'CK']],)
(['Start stack:', 1500],)
(['Estimation:', 0.0909],)
Started the round 1
Street "preflop" started. (community card = [])
"f4" declared "call:30"
"f5" declared "call:30"
"f6" declared "call:30"
"f7" declared "call:30"
"f8" declared "call:30"
"f9" declared "call:30"
(array([-1.06974602,  2.80618668,  1.05212402, -5.54151344, -1.16742325], dtype=float32),)
"wp" declared "call:30"
"f2" declared "call:30"
"f3" declared "call:30"
Street "flop" started. (community card = ['S8', 'D4', 'CA'])
"f2" declared "call:0"
"f3" declared "call:0"
"f4" declared "call:0"
"f5" declared "call:0"
"f6" declared "call:0"
"f7" declared "call:0"
"f8" declared "call:0"
"f9" declared "call:0"
(array([-2.65549469,  0.70095634, -0.29030037, -5.77394676, -0.88914108], dtype=float32),)
"wp" declared "call:0"
Street "turn" started. (community card = ['S8', 'D4', 'CA', 'DT'])
"f2" declared "call:0"
"f3" declared "call:0"
"f4" declared "call:0"
"f5" declared "call:0"
"f6" declared "call:0"
"f7"

## Metric

In [5]:
%time main_wp = DQNPlayer(h_size=h_size, is_restore=True, is_train=False, debug=False, is_double=True)

INFO:tensorflow:Restoring parameters from ../cache/models/DQN/model_4999.ckpt
CPU times: user 820 ms, sys: 155 ms, total: 975 ms
Wall time: 775 ms


In [6]:
config = setup_config(max_round=50, initial_stack=1500, small_blind_amount=15)

config.register_player(name="wp", algorithm=main_wp)
# config.register_player(name="r2", algorithm=RandomPlayer())
config.register_player(name="CallPlayer1", algorithm=pm.CallPlayer())
config.register_player(name="CallPlayer2", algorithm=pm.CallPlayer())
config.register_player(name="FoldPlayer1", algorithm=pm.FoldPlayer())
config.register_player(name="FoldPlayer2", algorithm=pm.FoldPlayer())
config.register_player(name="HeuristicPlayer1", algorithm=pm.HeuristicPlayer())
config.register_player(name="HeuristicPlayer2", algorithm=pm.HeuristicPlayer())
config.register_player(name="RandomPlayer1", algorithm=pm.RandomPlayer())
config.register_player(name="RandomPlayer2", algorithm=pm.RandomPlayer())

In [7]:
%%time
d = None
for i in range(100):
    game_result = start_poker(config, verbose=0)
    t = pd.DataFrame(game_result['players'])
    t['round'] = i
    print(i)
    if d is None:
        d = t
    else:
        d = pd.concat((d, t))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
CPU times: user 17min 1s, sys: 624 ms, total: 17min 2s
Wall time: 16min 38s


### With training only with CallPlayer for 3 hours

In [8]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        748.66
RandomPlayer2        786.99
FoldPlayer1         1083.45
FoldPlayer2         1094.10
HeuristicPlayer1    1284.91
HeuristicPlayer2    1448.99
wp                  2213.15
CallPlayer1         2316.79
CallPlayer2         2510.93
Name: stack, dtype: float64

### With training  with different players for 4 hours

In [9]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        748.66
RandomPlayer2        786.99
FoldPlayer1         1083.45
FoldPlayer2         1094.10
HeuristicPlayer1    1284.91
HeuristicPlayer2    1448.99
wp                  2213.15
CallPlayer1         2316.79
CallPlayer2         2510.93
Name: stack, dtype: float64

### With training  with different players for 8 hours

In [10]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        748.66
RandomPlayer2        786.99
FoldPlayer1         1083.45
FoldPlayer2         1094.10
HeuristicPlayer1    1284.91
HeuristicPlayer2    1448.99
wp                  2213.15
CallPlayer1         2316.79
CallPlayer2         2510.93
Name: stack, dtype: float64

### With training  with different players for 15 hours

In [11]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        748.66
RandomPlayer2        786.99
FoldPlayer1         1083.45
FoldPlayer2         1094.10
HeuristicPlayer1    1284.91
HeuristicPlayer2    1448.99
wp                  2213.15
CallPlayer1         2316.79
CallPlayer2         2510.93
Name: stack, dtype: float64

### With training  with different players for 26 hours

In [12]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        748.66
RandomPlayer2        786.99
FoldPlayer1         1083.45
FoldPlayer2         1094.10
HeuristicPlayer1    1284.91
HeuristicPlayer2    1448.99
wp                  2213.15
CallPlayer1         2316.79
CallPlayer2         2510.93
Name: stack, dtype: float64