In [1]:
from pypokerengine.api.game import setup_config, start_poker
from pypokerengine.utils.card_utils import gen_cards
from pypokerengine.players import BasePokerPlayer
from baseline_players import RandomPlayer
from bots.honest_player import HonestPlayer
# from bots.honest_player import HonestPlayer
from bots.fish_player import FishPlayer
from bots.fold_player import FoldPlayer
from copy import deepcopy
import scipy.stats as sps
import keras.backend as K
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline

Using TensorFlow backend.


In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

In [3]:
N_RANDOM_PLAYERS = 3

config = setup_config(max_round=50, initial_stack=1500, small_blind_amount=15)
config.register_player(name="foldman", algorithm=FishPlayer())
config.register_player(name="honest", algorithm=HonestPlayer(10))
config.register_player(name="fishman", algorithm=FishPlayer())
for i in range(N_RANDOM_PLAYERS):
    config.register_player(name="random" + str(i), algorithm=RandomPlayer())
# config.register_player(name="honest", algorithm=HonestPlayer(10))

In [4]:
%%time

def get_all_results(config, n_games=1):
    players_stack_dict = {}
    for player_info in config.players_info:
        players_stack_dict[player_info['name']] = []
        
    for i in tqdm(range(n_games)):
        for player_info in start_poker(config, verbose=0)['players']:
            players_stack_dict[player_info['name']].append(player_info['stack'])
            
    return pd.DataFrame(players_stack_dict)

res = get_all_results(config, n_games=1)
#3.4s/it 5:40 - my
#5.9s/it 9:59 - base

  0%|          | 0/1 [00:00<?, ?it/s]


TypeError: list indices must be integers or slices, not str

In [5]:
res

Unnamed: 0,fishman,foldman,honest,random0,random1,random2,random3,random4,random5
0,11260,795,1425,0,0,0,0,0,0
1,0,1005,0,0,0,0,12460,0,0
2,0,960,0,0,0,12496,0,0,0
3,0,1020,6543,0,5913,0,0,0,0
4,10886,825,1717,0,0,0,0,0,0
5,11375,900,1222,0,0,0,0,0,0
6,11174,825,1462,0,0,0,0,0,0
7,12902,555,0,0,0,0,0,0,0
8,0,855,0,0,0,0,12590,0,0
9,10698,870,1915,0,0,0,0,0,0


In [6]:
res.describe()

Unnamed: 0,fishman,foldman,honest,random0,random1,random2,random3,random4,random5
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,4158.633333,887.0,2133.866667,24.433333,1032.233333,1666.566667,1490.666667,1260.933333,805.866667
std,5569.237944,106.338756,3427.695272,133.826878,3305.520614,4321.575652,3961.730529,3847.46824,2675.882474
min,0.0,555.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,825.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,862.5,1155.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,11102.75,960.0,1682.0,0.0,0.0,0.0,0.0,0.0,0.0
max,12902.0,1110.0,12523.0,733.0,12621.0,12542.0,12590.0,12616.0,12634.0


In [16]:
class RLPokerPlayer(BasePokerPlayer):
    def __init__(self, nnet, alpha=0.1, lambda_agr=0.1, lambda_blef=0.1):
        super().__init__()
        if nnet is None:
            raise Exception('Try to use some nnet!')
        self.nnet = deepcopy(nnet)
        self.alpha = alpha
        self.history = {'moves': [], 'states': [], 'prev_stats': [], 'rewards': []}
        
    def process_current_data(self, hole_card, round_state):
        my_pos = round_state['next_player']
        n_players = len(round_state['seats'])
        community_card = round_state['community_card']
        if community_card is None:
            community_card = []
        cur_round = len(community_card)
        all_card = hole_card + community_card
        converted_cards = [(x.suit, x.rank) for x in gen_cards(all_card)]
        cards_raw = np.zeros(35)
        suit_class = (lambda x: int(np.log2(x)) - 1)
        for i in range(len(all_card)):
            cards_raw[i*5] = converted_cards[i][1]
            cards_raw[i*5 + suit_class(converted_cards[i][0])] = 1
        real_seat = np.roll(np.arange(n_players), n_players - my_pos)
        actives = np.array([int(player['state'] != 'folded') for player in round_state['seats']])
        stacks = np.array([player['stack'] for player in round_state['seats']])
        
        self.history['states'].append
        
    def declare_action(self, valid_actions, hole_card, round_state):
        cur_data = self.process_current_data(hole_card, round_state)
        probs = self.nnet.predict(cur_data.reshape((1,-1)))
        probs = [(probs[i], i) for i in range(len(probs))]
        probs = sorted(probs)
        best = probs[-1]
        if best[1] < 2:
            call_action_info = valid_actions[best[1]]
            action, amount = call_action_info["action"], call_action_info["amount"]
        if best[1] >= 2:
            if valid_actions[2]['min'] != -1:
                call_action_info = valid_actions[2]
                action, amount = call_action_info["action"], call_action_info["amount"]["min"] \
                    if best[1] else min(2 * call_action_info["amount"]["min"], call_action_info["amount"]["max"])
            else:
                best[1] = 1
                call_action_info = valid_actions[1]
                action, amount = call_action_info["action"], call_action_info["amount"]
        
        self.history['moves'].append(best[1])
        return action, amount
        
    def receive_game_start_message(self, game_info):
        self.n_player = game_info["player_num"]
        self.game_rule = game_info['rule']

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, action, round_state):
        if sps.bernoulli

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass

In [38]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(10, input_dim=8, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [39]:
m = baseline_model()

In [40]:
X = sps.bernoulli.rvs(size=(32,8),p=0.5)

In [43]:
y = np.zeros((32,3))
Z = sps.randint.rvs(size=32,low=0,high=3)
for i in range(len(y)):
    y[i][Z[i]] = 1

In [45]:
m.fit(X,y,batch_size=32, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1187b4e48>

In [54]:
m.predict(np.ones(8).reshape(1,-1))

array([[ 0.33000377,  0.34456059,  0.3254357 ]], dtype=float32)

In [77]:
a = [[1,2],[5,-1],[5,2],[-1,110]]

In [78]:
sorted(a)

[[-1, 110], [1, 2], [5, -1], [5, 2]]

In [73]:
a = np.arange(8).reshape((4,2))

In [76]:
sorted(a)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [38]:
hands_train = pd.DataFrame.from_csv('poker-hand-training-true.data.txt', header=None)
hands_test = pd.DataFrame.from_csv('poker-hand-testing.data.txt', header=None)

In [39]:
hands_train.shape

(25010, 10)

In [42]:
y_train = hands_train[10]
y_test = hands_test[10]
X_train = hands_train.drop(10,axis=1)
X_test = hands_test.drop(10,axis=1)

(25010, 9)

In [13]:
import xgboost as xgb

In [48]:
X_train.shape, y_train.shape

((25010, 9), (25010,))

In [56]:
%%time
model = xgb.XGBClassifier(
                            n_estimators=200, 
                            max_depth=3, 
                            learning_rate=1e-2,
                            colsample_bytree=0.5,
                            sublample=0.5,
                            random_seed=1,
                            n_threads=4,
                            tree_method='hist',
                            objective='multi:softmax',
                        ).fit(X_train, y_train, verbose=2,                    
                             )

CPU times: user 14.2 s, sys: 64.9 ms, total: 14.2 s
Wall time: 14.8 s


In [57]:
from sklearn.metrics import accuracy_score

In [58]:
accuracy_score(model.predict(X_test), y_test)

0.52325900000000003

In [60]:
p = model.predict(X_test)

In [63]:
np.sum(p), p.shape

(43154, (1000000,))