In [1]:
from keras.layers import Input, Dense, Conv2D,concatenate,Flatten
from keras.models import Model

input_cards = Input(shape=(4,13,4), name="cards_input")
input_actions = Input(shape=(2,6,4), name="actions_input")
input_position = Input(shape=(1,),name="position_input")

x1 = Conv2D(32,(2,2),activation='relu')(input_cards)
x2 = Conv2D(32,(2,2),activation='relu')(input_actions)
x3 = Dense(1,activation='relu')(input_position)

d1 = Dense(128,activation='relu')(x1)
d1 = Flatten()(d1)
d2 = Dense(128,activation='relu')(x2)
d2 = Flatten()(d2)
x = concatenate([d1,d2,x3])
x = Dense(128)(x)
x = Dense(32)(x)
out = Dense(4)(x)

model = Model(inputs=[input_cards, input_actions,input_position], outputs=out)
model.compile(optimizer='rmsprop', loss='mse')


Using TensorFlow backend.


In [2]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
cards_input (InputLayer)        (None, 4, 13, 4)     0                                            
__________________________________________________________________________________________________
actions_input (InputLayer)      (None, 2, 6, 4)      0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 3, 12, 32)    544         cards_input[0][0]                
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 1, 5, 32)     544         actions_input[0][0]              
__________________________________________________________________________________________________
dense_2 (D

In [3]:
def get_card_X(card):
    suit = card[0]
    if(suit == 'S'):
        return 0
    elif(suit == 'H'):
        return 1
    elif(suit=='D'):
        return 2
    elif(suit=='C'):
        return 3

def get_card_Y(card):
    index = card[1]
    if(index=='A'):
        return 12
    elif(index=='K'):
        return 11
    elif(index=='Q'):
        return 10    
    elif(index=='J'):
        return 9
    elif(index=='T'):
        return 8
    else:
        return int(index)-2


def get_street_grid(cards):
    grid = np.zeros((4,13))
    for card in cards:
        grid[get_card_X(card),get_card_Y(card)] = 1
    return grid

def convert_street_to_image(eff_stack,round_state,street):
    image = np.zeros((2,6))
    actions = round_state["action_histories"][street]
    index = 0
    turns = 0
    for action in actions:
        #max of 12actions per street
        if('amount' in action and turns < 6):
            image[index,turns] = action['amount'] / eff_stack
        index += 1
        if(index%2 == 0):
            index=0
            turns +=1
    return image




In [4]:
from pypokerengine.api.emulator import Emulator
import numpy as np

#Prepare game state
#25 BB Deep RPZ SPIN AND GO
starting_stack = 100
small_blind = 10

emulator = Emulator()
emulator.set_game_rule(player_num=2, max_round=10, small_blind_amount=small_blind, ante_amount=0)

players_info = {
  "sb_player": { "name": "sb_player", "stack": starting_stack },
  "bb_player": { "name": "bb_player", "stack": starting_stack },
}

#ready the game brother
initial_game_state = emulator.generate_initial_game_state(players_info)


In [5]:
def is_round_finished(events):
    for e in events:
        if e['type'] == 'event_round_finish':
            return True
    return False

def new_street(events):
    for e in events:
        if e['type'] == 'event_new_street':
            return e['street']
        else:
            return ''
def get_flop(event):
    return event['round_state']['community_card']
def get_turn(event):
    return event['round_state']['community_card'][2]
def get_river(event):
    return event['round_state']['community_card'][3]

def get_minraise_amount(events):
    for e in events:
        if(e['type'] == 'event_ask_player'):
            return e['valid_actions'][2]['amount']['min']
        
def get_all_in_amount(events):
    for e in events:
        if(e['type'] == 'event_ask_player'):
            return e['valid_actions'][2]['amount']['max']    

def get_call_amount(events):
    for e in events:
        if(e['type'] == 'event_ask_player'):
            return e['valid_actions'][1]['amount']  
        
def get_reward(events):
    for e in events:
        if(e['type'] == 'event_round_finish'):
            if(e['winners'][0]['uuid'] == 'sb_player'):
                return (e['winners'][0]['stack'] - starting_stack)
            else:
                return -(e['winners'][0]['stack'] - starting_stack)
    


In [6]:
#both the players have the same network

from collections import deque
max_replay_size = 50

num_hands = 1
experience_state = []
experience_reward = []
total_reward = 0
y = .99
e = 0.1


for i in range(num_hands):
    reward_sb = 0
    reward_bb = 0
    #Starts with dealer
    is_sb_turn = True
    
    sb_has_played = False
    bb_has_played = False
    #start new hand
    initial_game_state = emulator.generate_initial_game_state(players_info)
    game_state, events = emulator.start_new_round(initial_game_state)
    bb_cards = [game_state['table'].seats.players[0].hole_card[0].__str__(),
                game_state['table'].seats.players[0].hole_card[1].__str__()]

    sb_cards = [game_state['table'].seats.players[1].hole_card[0].__str__(),
                game_state['table'].seats.players[1].hole_card[1].__str__()]
    
    bb_cards_img = get_street_grid(bb_cards)
    sb_cards_img = get_street_grid(sb_cards)
    
    flop_cards_img = np.zeros((4,13))
    turn_cards_img = np.zeros((4,13))
    river_cards_img = np.zeros((4,13))
    
    flop  = []
    turn  = []
    river = []
   
    #while hand is still at play
    while(is_round_finished(events) == False):
        street = new_street(events)
        if(street == 'flop'):
            flop = get_flop(events[0])
            flop_cards_img = get_street_grid(flop)
        elif(street == 'turn'):
            turn = get_turn(events[0])
            
            turn_cards_img = get_street_grid([turn])
        elif(street == 'river'):
            river = get_river(events[0])
            river_cards_img = get_street_grid([river])
            #print('river:',river)
 
        #Compute all the features
    
        #position(ez)
        sb_position = 1
        bb_position = 0
        
        #action matrix
        preflop_actions = convert_street_to_image(starting_stack,events[0]['round_state'],'preflop')
        flop_actions = np.zeros((2,6))
        turn_actions = np.zeros((2,6))
        river_actions = np.ones((2,6))
        
        if(len(flop) > 0):
            flop_actions = convert_street_to_image(starting_stack,events[0]['round_state'],'flop')
            if(len(turn) >0):
                turn_actions = convert_street_to_image(starting_stack,events[0]['round_state'],'turn')
                if(len(river) >0):
                    turn_actions = convert_street_to_image(starting_stack,events[0]['round_state'],'river')
        
        
                    
        actions_feature = np.stack([preflop_actions,flop_actions,turn_actions,river_actions],axis=2).reshape((1,2,6,4))
        
        if(is_sb_turn == True):
            #save old state to update it asap with new rew
            if(sb_has_played == True):
                old_state = sb_features
                targetQ = allQ_sb
                oldAction = action_sb
                
            #current state
            sb_cards_feature = np.stack([sb_cards_img,flop_cards_img,turn_cards_img,river_cards_img],
                                    axis=2).reshape((1,4,13,4))
            sb_features = [sb_cards_feature,actions_feature,np.array([sb_position]).reshape((1,1))]
            
            #run model to choose action
            allQ_sb = model.predict(sb_features)
            action_sb = np.argmax(allQ_sb)
            #print(allQ_sb)
            
            #randomly discover new line
            if(np.random.rand(1) < e):
                action_sb = np.random.randint(0,4)
            
             
            #perform action
            if(action_sb == 0):
                
                game_state,events = emulator.apply_action(game_state,'fold',0)
            elif(action_sb == 1):
               # print('yey')
                game_state,events = emulator.apply_action(game_state,'call',get_call_amount(events))
            elif(action_sb == 2):
                #print('yey')
                game_state,events = emulator.apply_action(game_state,'raise',get_minraise_amount(events))
            else:
                #print('yey')
                game_state,events = emulator.apply_action(game_state,'raise',get_all_in_amount(events))
                
            
            
            if(sb_has_played == True):
                reward_sb += y*np.max(allQ_sb)
                targetQ[0,action_sb] = reward_sb
                ##model.fit(old_state,targetQ,verbose=0)
                experience_state.append(old_state)
                experience_reward.append(targetQ)
                if(len(experience_state) > max_replay_size):
                    del experience_state[0]
                    del experience_reward[0]
                
            sb_has_played = True
            is_sb_turn = False
        else:
            #save old state to update it asap with new rew
            if(bb_has_played == True):
                old_state = bb_features
                targetQ = allQ_bb
                oldAction = action_bb
                
            
            #current state
            bb_cards_feature = np.stack([bb_cards_img,flop_cards_img,turn_cards_img,river_cards_img],
                                    axis=2).reshape((1,4,13,4))
            bb_features = [sb_cards_feature,actions_feature,np.array([bb_position]).reshape((1,1))]
            
            #run model to choose action
            allQ_bb = model.predict(bb_features)
            action_bb = np.argmax(allQ_bb)
            
            #randomly discover new line
            if(np.random.rand(1) < e):
                action_bb = np.random.randint(0,4)
                
            #perform action
            if(action_bb == 0):
                
                game_state,events = emulator.apply_action(game_state,'fold',0)
            elif(action_bb == 1):
                game_state,events = emulator.apply_action(game_state,'call',get_call_amount(events))
            elif(action_bb == 2):
                game_state,events = emulator.apply_action(game_state,'raise',get_minraise_amount(events))
            else:
                game_state,events = emulator.apply_action(game_state,'raise',get_all_in_amount(events))
                
            
            
            if(bb_has_played == True):
                reward_bb += y*np.max(allQ_bb)
                targetQ[0,action_bb] = reward_bb
                #model.fit(old_state,targetQ,verbose=0)
                experience_state.append(old_state)
                experience_reward.append(targetQ)
                if(len(experience_state) > max_replay_size):
                    del experience_state[0]
                    del experience_reward[0]

            bb_has_played = True
            is_sb_turn = True
     
    r  = get_reward(events)
    total_reward +=r
    
    if(i%50 == 0):
        print('round',i)

        print('total won (SB):',total_reward)
        print(sb_cards)
        print(allQ_sb)
        print(events[0]['round_state']['action_histories'])
        if(bb_has_played):
            print(allQ_bb)
    
    #update sb with ACTUAL value
    allQ_sb[0,action_sb] = r
    experience_state.append(sb_features)
    experience_reward.append(allQ_sb)

#    model.fit(sb_features,allQ_sb,verbose=0)
    

    if(bb_has_played):   
        allQ_bb[0,action_bb] = -r
        experience_state.append(bb_features)
        experience_reward.append(allQ_bb)

 #       model.fit(bb_features,allQ_bb,verbose=0)
  
        
    if(len(experience_state) > max_replay_size):
        del experience_state[0]
        del experience_reward[0]

    for e in range(len(experience_state)):
        model.fit(experience_state[e],experience_reward[e],verbose = 0)

round 0
total won (SB): 20
['C2', 'H2']
[[-0.02546011  0.06943804 -0.13784268 -0.03538672]]
{'preflop': [{'action': 'SMALLBLIND', 'amount': 10, 'add_amount': 10, 'uuid': 'bb_player'}, {'action': 'BIGBLIND', 'amount': 20, 'add_amount': 10, 'uuid': 'sb_player'}, {'action': 'CALL', 'amount': 20, 'paid': 10, 'uuid': 'bb_player'}, {'action': 'CALL', 'amount': 20, 'paid': 0, 'uuid': 'sb_player'}], 'flop': [{'action': 'CALL', 'amount': 0, 'paid': 0, 'uuid': 'bb_player'}, {'action': 'CALL', 'amount': 0, 'paid': 0, 'uuid': 'sb_player'}], 'turn': [{'action': 'CALL', 'amount': 0, 'paid': 0, 'uuid': 'bb_player'}, {'action': 'CALL', 'amount': 0, 'paid': 0, 'uuid': 'sb_player'}], 'river': [{'action': 'FOLD', 'uuid': 'bb_player'}]}
[[-0.02805644  0.05535145 -0.15495068 -0.07144513]]


In [81]:
len(experience_state)

672

In [68]:
events

[{'round_state': {'action_histories': {'flop': [],
    'preflop': [{'action': 'SMALLBLIND',
      'add_amount': 10,
      'amount': 10,
      'uuid': 'sb_player'},
     {'action': 'BIGBLIND',
      'add_amount': 10,
      'amount': 20,
      'uuid': 'bb_player'},
     {'action': 'FOLD', 'uuid': 'sb_player'}]},
   'big_blind_pos': 0,
   'community_card': ['C3', 'D2', 'CA', 'SJ', 'H2'],
   'dealer_btn': 0,
   'next_player': 0,
   'pot': {'main': {'amount': 30}, 'side': []},
   'round_count': 1,
   'seats': [{'name': 'bb_player',
     'stack': 510,
     'state': 'participating',
     'uuid': 'bb_player'},
    {'name': 'sb_player',
     'stack': 490,
     'state': 'folded',
     'uuid': 'sb_player'}],
   'small_blind_amount': 10,
   'small_blind_pos': 1,
   'street': 'showdown'},
  'type': 'event_round_finish',
  'winners': [{'stack': 510, 'uuid': 'bb_player'}]}]