# Poker AI Setup

## Imports

In [130]:
import numpy as np
import matplotlib.pyplot as plt
import random
import pokerenv.obs_indices as indices
from pokerenv.table import Table
from treys import Deck, Evaluator, Card
from pokerenv.common import GameState, PlayerState, PlayerAction, TablePosition, Action, action_list
from pokerenv.player import Player
from pokerenv.utils import pretty_print_hand, approx_gt, approx_lte
import types

## Create enviorment

In [131]:

def mod_int_to_str(card_int: int) -> str:
        rank_int = Card.get_rank_int(card_int)
        suit_int = Card.get_suit_int(card_int)
        return Card.STR_RANKS[rank_int] + Card.INT_SUIT_TO_CHAR_SUIT[suit_int]

def mod_get_rank_int(card_int: int) -> int:
    if card_int is int:
        return (card_int >> 8) & 0xF
    else:
        return (card_int[0] >> 8) & 0xF

def mod_get_suit_int(card_int: int) -> int:
    if card_int is int:
        return (card_int >> 12) & 0xF
    else:
        return (card_int[0] >> 12) & 0xF

def mod_get_bitrank_int(card_int: int) -> int:
    if card_int is int:
        return (card_int >> 16) & 0x1FFF
    else:
        return (card_int[0] >> 16) & 0x1FFF

def mod_get_prime(card_int: int) -> int:
    if card_int is int:
        return card_int & 0x3F
    else:
        return card_int[0] & 0x3F

def mod_street_transition(self, transition_to_end=False):
        transitioned = False
        if self.street == GameState.PREFLOP:
            self.cards = self.deck.draw(3)
            self._write_event("*** FLOP *** [%s %s %s]" %
                              (Card.int_to_str(self.cards[0]), Card.int_to_str(self.cards[1]),
                               Card.int_to_str(self.cards[2])))
            self.street = GameState.FLOP
            transitioned = True
        if self.street == GameState.FLOP and (not transitioned or transition_to_end):
            new = self.deck.draw(1)[0]
            self.cards.append(new)
            self._write_event("*** TURN *** [%s %s %s] [%s]" %
                              (Card.int_to_str(self.cards[0]), Card.int_to_str(self.cards[1]),
                               Card.int_to_str(self.cards[2]), Card.int_to_str(self.cards[3])))
            self.street = GameState.TURN
            transitioned = True
        if self.street == GameState.TURN and (not transitioned or transition_to_end):
            new = self.deck.draw(1)[0]
            self.cards.append(new)
            self._write_event("*** RIVER *** [%s %s %s %s] [%s]" %
                              (Card.int_to_str(self.cards[0]), Card.int_to_str(self.cards[1]),
                               Card.int_to_str(self.cards[2]), Card.int_to_str(self.cards[3]),
                               Card.int_to_str(self.cards[4])))
            self.street = GameState.RIVER
            transitioned = True
        if self.street == GameState.RIVER and (not transitioned or transition_to_end):
            if not self.hand_is_over:
                if self.hand_history_enabled:
                    self._write_show_down()
            self.hand_is_over = True
        self.street_finished = False
        self.last_bet_placed_by = None
        self.first_to_act = None
        self.bet_to_match = 0
        self.minimum_raise = 0
        for player in self.players:
            player.finish_street()

# Create the enviorment:
def createEnviorment(active_players, agents, player_names, low_stack_bbs, high_stack_bbs, hand_history_location, invalid_action_penalty, track_single_player=False):
    table = Table(active_players, 
                player_names=player_names,
                track_single_player=track_single_player,
                stack_low=low_stack_bbs,
                stack_high=high_stack_bbs,
                hand_history_location=hand_history_location,
                invalid_action_penalty=invalid_action_penalty
    )
    table.seed(1)
    

    table.int_to_str = mod_int_to_str
    table.get_rank_int = mod_get_rank_int
    table.get_suit_int = mod_get_suit_int
    table.get_bitrank_int = mod_get_bitrank_int
    table.get_prime = mod_get_prime
    table._street_transition = types.MethodType(mod_street_transition, table)
    return table

## Env Modifications

## Learning Loop

In [132]:
def learningLoop(table, agents, active_players, n_iterations):
    iteration = 1
    cumulative_rewards = np.zeros(active_players)
    while True:
        if iteration % 50 == 0:
            table.hand_history_enabled = True
        table.n_players = active_players
        obs = table.reset()
        for agent in agents:
            agent.reset()
        acting_player = int(obs[indices.ACTING_PLAYER])
        while True:
            action = agents[acting_player].get_action(obs)
            obs, reward, done, _ = table.step(action)
            if  done:
                # Distribute final rewards
                for i in range(active_players):
                    agents[i].rewards.append(reward[i])
                break
            else:
                # This step can be skipped unless invalid action penalty is enabled, 
                # since we only get a reward when the pot is distributed, and the done flag is set
                agents[acting_player].rewards.append(reward[acting_player])
                acting_player = int(obs[indices.ACTING_PLAYER])
        iteration += 1
        table.hand_history_enabled = False
        
        if iteration >= n_iterations:
            break
    
    return iteration

# Agents

## Random Agent

In [133]:
# Agent that makes random actions
class RandomAgent:
    def __init__(self):
        self.actions = []
        self.observations = []
        self.rewards = []

    def get_action(self, observation):
        self.observations.append(observation)
        valid_actions = np.argwhere(observation[indices.VALID_ACTIONS] == 1).flatten()
        valid_bet_low = observation[indices.VALID_BET_LOW]
        valid_bet_high = observation[indices.VALID_BET_HIGH]
        chosen_action = PlayerAction(np.random.choice(valid_actions))
        bet_size = 0
        if chosen_action is PlayerAction.BET:
            bet_size = np.random.uniform(valid_bet_low, valid_bet_high)
        table_action = Action(chosen_action, bet_size)
        self.actions.append(table_action)
        return table_action

    def reset(self):
        self.actions = []
        self.observations = []
        self.rewards = []

## Fold Agent

In [None]:
# Agent that always folds
class FoldAgent:
    def __init__(self):
        self.actions = []
        self.observations = []
        self.rewards = []

    def get_action(self, observation):
        self.observations.append(observation)
        valid_actions = np.argwhere(observation[indices.VALID_ACTIONS] == 1).flatten()
        chosen_action = PlayerAction.FOLD
        table_action = Action(chosen_action, 0)
        self.actions.append(table_action)
        return table_action

    def reset(self):
        self.actions = []
        self.observations = []
        self.rewards = []

## Determined Agent

In [None]:
# Agent that never folds
class DeterminedAgent:
    def __init__(self):
        self.actions = []
        self.observations = []
        self.rewards = []

    def get_action(self, observation):
        self.observations.append(observation)
        valid_actions = np.argwhere(observation[indices.VALID_ACTIONS] == 1).flatten()
        chosen_action = PlayerAction.CALL
        valid_bet_low = observation[indices.VALID_BET_LOW]
        valid_bet_high = observation[indices.VALID_BET_HIGH]
        table_action = Action(chosen_action, 0)
        self.actions.append(table_action)
        return table_action

    def reset(self):
        self.actions = []
        self.observations = []
        self.rewards = []

## Aggressive agent

In [None]:
# Agent that always raises
class AggressiveAgent:
    def __init__(self):
        self.actions = []
        self.observations = []
        self.rewards = []

    def get_action(self, observation):
        self.observations.append(observation)
        valid_actions = np.argwhere(observation[indices.VALID_ACTIONS] == 1).flatten()
        chosen_action = PlayerAction.BET
        valid_bet_low = observation[indices.VALID_BET_LOW]
        valid_bet_high = observation[indices.VALID_BET_HIGH]
        table_action = Action(chosen_action, valid_bet_high)
        self.actions.append(table_action)
        return table_action

    def reset(self):
        self.actions = []
        self.observations = []
        self.rewards = []

## Chance agent

In [None]:
# Agent that uses encoded hand values to make decisions
class ValueAgent:
    def __init__(self, value_model):
        self.actions = []
        self.observations = []
        self.rewards = []
        self.value_model = value_model

    def get_action(self, observation):
        self.observations.append(observation)
        valid_actions = np.argwhere(observation[indices.VALID_ACTIONS] == 1).flatten()
        valid_bet_low = observation[indices.VALID_BET_LOW]
        valid_bet_high = observation[indices.VALID_BET_HIGH]
        hand_value = observation[indices.ACTING_PLAYER_STACK_SIZE]
        pot_size = observation[indices.POT_SIZE]
        bet_value = self.value_model.predict(np.array([[hand_value, pot_size]]))[0][0]
        
        if bet_value > 0.5:
            chosen_action = PlayerAction.BET
            bet_size = np.random.uniform(valid_bet_low, valid_bet_high)
        elif valid_bet_high == 0.0:
            chosen_action = PlayerAction.CALL
        else:
            chosen_action = PlayerAction.FOLD
        table_action = Action(chosen_action, bet_size)
        self.actions.append(table_action)
        return table_action

    def reset(self):
        self.actions = []
        self.observations = []
        self.rewards = []

# Run Throughs

## Random Run Through

In [126]:
active_players = 6
agents = [RandomAgent() for _ in range(6)]
player_names = {0: 'TrackedAgent1', 1: 'Agent2'} # Rest are defaulted to player3, player4...
# Should we only log the 0th players (here TrackedAgent1) private cards to hand history files
track_single_player = True 
# Bounds for randomizing player stack sizes in reset()
low_stack_bbs = 50
high_stack_bbs = 200
hand_history_location = 'hands/'
invalid_action_penalty = 0

table = createEnviorment(active_players, agents, player_names, low_stack_bbs, high_stack_bbs, hand_history_location, invalid_action_penalty, track_single_player)
table.seed(1)


In [127]:
n_iterations = 1000
learningLoop(table, agents, active_players, n_iterations)

1000

In [129]:
agent_rewards = [agent.rewards for agent in agents]
print("Agent rewards: ", agent_rewards)

Agent rewards:  [[0, 0, 0, 0, 0], [0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0], [0, 0]]
