In [1]:
from pypokerengine.api.game import setup_config, start_poker
from pypokerengine.utils.card_utils import gen_cards
from pypokerengine.players import BasePokerPlayer
from baseline_players import RandomPlayer
# from bots.honest_player import HonestPlayer
from bots.honest_player import HonestPlayer
from bots.fish_player import FishPlayer
from bots.fold_player import FoldPlayer
from copy import deepcopy
import scipy.stats as sps
import keras.backend as K
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline

Using TensorFlow backend.


In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

In [3]:
N_RANDOM_PLAYERS = 6

config = setup_config(max_round=50, initial_stack=1500, small_blind_amount=15)
config.register_player(name="foldman", algorithm=FoldPlayer())
config.register_player(name="honest", algorithm=HonestPlayer(20))
config.register_player(name="fishman", algorithm=FishPlayer())
for i in range(N_RANDOM_PLAYERS):
    config.register_player(name="random" + str(i), algorithm=RandomPlayer())

In [4]:
%%time

def get_all_results(config, n_games=1):
    players_stack_dict = {}
    for player_info in config.players_info:
        players_stack_dict[player_info['name']] = []
        
    for i in tqdm(range(n_games)):
        for player_info in start_poker(config, verbose=0)['players']:
            players_stack_dict[player_info['name']].append(player_info['stack'])
            
    return pd.DataFrame(players_stack_dict)

res = get_all_results(config, n_games=30)
#3.4s/it 5:40 - my
#5.9s/it 9:59 - base

100%|██████████| 30/30 [01:00<00:00,  2.23s/it]

CPU times: user 59.5 s, sys: 875 ms, total: 1min
Wall time: 1min





In [6]:
res

Unnamed: 0,fishman,foldman,honest,random0,random1,random2,random3,random4,random5
0,11082,855,1515,0,0,0,0,0,0
1,11202,870,1356,0,0,0,0,0,0
2,11213,840,1350,0,0,0,0,0,0
3,11305,870,1230,0,0,0,0,0,0
4,11446,870,1110,0,0,0,0,0,0
5,11427,840,1170,0,0,0,0,0,0
6,11154,855,1407,0,0,0,0,0,0
7,10810,840,1827,0,0,0,0,0,0
8,11102,840,1500,0,0,0,0,0,0
9,11010,855,1582,0,0,0,0,0,0


In [4]:
res.describe()

Unnamed: 0,fishman,foldman,honest,random0,random1,random2,random3,random4,random5
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,9869.15,786.0,265.85,631.15,0.0,0.0,625.95,0.0,1256.1
std,5074.870341,113.758747,309.170584,2822.588608,0.0,0.0,2799.333501,0.0,3866.200228
min,0.0,555.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,11892.0,787.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,12044.0,817.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,12633.5,855.0,600.0,0.0,0.0,0.0,0.0,0.0,0.0
max,12859.0,915.0,750.0,12623.0,0.0,0.0,12519.0,0.0,12582.0


In [16]:
class RLPokerPlayer(BasePokerPlayer):
    def __init__(self, nnet, alpha=0.1, lambda_agr=0.1, lambda_blef=0.1):
        super().__init__()
        if nnet is None:
            raise Exception('Try to use some nnet!')
        self.nnet = deepcopy(nnet)
        self.alpha = alpha
        self.full_agr = []
        self.last_agr = []
        self.full_blef = []
        self.last_blef = []
        self.history = {'moves': [], 'states': [], 'prev_stats': [], 'rewards': []}
        
    def process_current_data(self, hole_card, round_state):
        my_pos = round_state['next_player']
        n_players = len(round_state['seats'])
        community_card = round_state['community_card']
        if community_card is None:
            community_card = []
        cur_round = len(community_card)
        all_card = hole_card + community_card
        converted_cards = [(x.suit, x.rank) for x in gen_cards(all_card)]
        cards_raw = np.zeros(35)
        suit_class = (lambda x: int(np.log2(x)) - 1)
        for i in range(len(all_card)):
            cards_raw[i*5] = converted_cards[i][1]
            cards_raw[i*5 + suit_class(converted_cards[i][0])] = 1
        real_seat = np.roll(np.arange(n_players), n_players - my_pos)
        actives = np.array([int(player['state'] != 'folded') for player in round_state['seats']])
        stacks = np.array([player['stack'] for player in round_state['seats']])
        
        self.history['states'].append
        
    def declare_action(self, valid_actions, hole_card, round_state):
        cur_data = self.process_current_data(hole_card, round_state)
        probs = self.nnet.predict(cur_data.reshape((1,-1)))
        probs = [(probs[i], i) for i in range(len(probs))]
        probs = sorted(probs)
        best = probs[-1]
        if best[1] < 2:
            call_action_info = valid_actions[best[1]]
            action, amount = call_action_info["action"], call_action_info["amount"]
        if best[1] >= 2:
            if valid_actions[2]['min'] != -1:
                call_action_info = valid_actions[2]
                action, amount = call_action_info["action"], call_action_info["amount"]["min"] \
                    if best[1] else min(2 * call_action_info["amount"]["min"], call_action_info["amount"]["max"])
            else:
                best[1] = 1
                call_action_info = valid_actions[1]
                action, amount = call_action_info["action"], call_action_info["amount"]
        
        self.history['moves'].append(best[1])
        return action, amount
        
    def receive_game_start_message(self, game_info):
        self.n_player = game_info["player_num"]
        self.game_rule = game_info['rule']

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, action, round_state):
        if sps.bernoulli

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass

In [38]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(10, input_dim=8, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [39]:
m = baseline_model()

In [40]:
X = sps.bernoulli.rvs(size=(32,8),p=0.5)

In [43]:
y = np.zeros((32,3))
Z = sps.randint.rvs(size=32,low=0,high=3)
for i in range(len(y)):
    y[i][Z[i]] = 1

In [45]:
m.fit(X,y,batch_size=32, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1187b4e48>

In [54]:
m.predict(np.ones(8).reshape(1,-1))

array([[ 0.33000377,  0.34456059,  0.3254357 ]], dtype=float32)

In [77]:
a = [[1,2],[5,-1],[5,2],[-1,110]]

In [78]:
sorted(a)

[[-1, 110], [1, 2], [5, -1], [5, 2]]

In [73]:
a = np.arange(8).reshape((4,2))

In [76]:
sorted(a)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [9]:
hands_train = pd.DataFrame.from_csv('poker-hand-training-true.data.txt', header=None)
hands_test = pd.DataFrame.from_csv('poker-hand-testing.data.txt', header=None)

In [12]:
import xgboost as xgb

ImportError: cannot import name 'xgb'