# Single Play Poker Enviroment

This Notebooks contains enviroment which plays Poker with single person.

## install require packages

In [4]:
!pip install gym

Collecting gym
[?25l  Downloading https://files.pythonhosted.org/packages/0c/c4/307107c687f75267d645415d57db8c0a6e29e20ac30d8f4a10e8030b6737/gym-0.12.5.tar.gz (1.5MB)
[K    100% |████████████████████████████████| 1.5MB 19.0MB/s 
Collecting pyglet>=1.2.0 (from gym)
[?25l  Downloading https://files.pythonhosted.org/packages/1c/fc/dad5eaaab68f0c21e2f906a94ddb98175662cc5a654eee404d59554ce0fa/pyglet-1.3.2-py2.py3-none-any.whl (1.0MB)
[K    100% |████████████████████████████████| 1.0MB 29.2MB/s 
Building wheels for collected packages: gym
  Building wheel for gym (setup.py) ... [?25ldone
[?25h  Stored in directory: /root/.cache/pip/wheels/cf/a5/c9/87967963aa32540d543e51bcf0d0fc19c5d68b8f49598d3b98
Successfully built gym
Installing collected packages: pyglet, gym
Successfully installed gym-0.12.5 pyglet-1.3.2
[33mYou are using pip version 19.0.3, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## Class of Enviroment

### imports

In [1]:
from enum import Enum
import random
import numpy as np
import sys
from typing import Tuple

### Suit Class(Enum)

* this class defines Card's suit. Clover or Diamond or Heart or Spade.

In [2]:
class Suit(Enum):
    Clover    = 0
    Diamond   = 1
    Heart     = 2
    Spade     = 3

### Helper variable that converts Suit -> String or number -> String

In [3]:
suit_to_str = {
    Suit.Heart:   "Heart",
    Suit.Spade:   "Spade",
    Suit.Clover:  "Clover",
    Suit.Diamond: "Diamond"
}

num_to_string = [0, "Ace", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Jack", "Queen", "King"]

### Card class

* this class implements one card

In [4]:
class Card:
    def __init__(self, suit: Suit, num: int):
        self.suit = suit  # type: Suit
        self.num  = num   # type: int
    
    def __repr__(self) -> str:
        return "{} of {}".format(num_to_string[self.num], suit_to_str[self.suit])
    
    def __eq__(self, other:'Card') -> bool:
        return self.num == other.num and self.suit == other.suit
    
    def __lt__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value < other.suit.value
        else:
            return self.num < other.num
    
    def __le__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value <= other.suit.value
        else:
            return self.num <= other.num 
        
    def __gt__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value > other.suit.value
        else:
            return self.num > other.num
        
    def __ge__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value >= other.suit.value
        else:
            return self.num >= other.num
    
    def __hash__(self) -> str:
        return hash((self.suit.value, self.num))
    
    def clone(self) -> 'Card':
        return Card(self.suit, self.num)
    
    def get_serial(self) -> int:
        return self.suit.value * 13 + self.num

### Player class

* this class implements player status.

In [5]:
class Player:
    def __init__(self):
        self.hand          = []
        self.total_rewards = 0
        self.reset()
        
    def reset(self) -> None:
        self.bid = 0
        self.hand.clear()

### Poker class

* this class shows poker and helper method to judge poker for your hand.

In [58]:
class Poker(Enum):
    RoyalStraightFlash = 0
    StraightFlash      = 1
    FourCard           = 2
    FullHouse          = 3
    Flash              = 4
    Straight           = 5
    ThreeCard          = 6
    TwoPair            = 7
    OnePair            = 8
    Nothing            = 9
    Fold               = 10
    
    def __lt__(self, other):
        return self.value < other.value
    
    def __gt__(self, other):
        return self.value > other.value
    
    @classmethod
    def createCardMap(cls, cards:[Card]):
        card_map = [[],[],[],[]]
        for i in range(4):
            card_map[i] = [0] * 14
        
        for card in cards:
            card_map[card.suit.value][card.num] += 1

        return np.array(card_map)
    
    @classmethod
    def getPoker(cls, hand:[Card], public:[Card]) -> 'Poker':
        all_cards = sorted(hand + public)
        card_map = cls.createCardMap(all_cards)
        for judgement in (Poker.isRoyalStraightFlash, Poker.isStraightFlash, Poker.isFourCard,
                          Poker.isFullHouse, Poker.isFlash, Poker.isStraight, Poker.isThreeCard,
                          Poker.isNPair):
            judge = judgement(card_map)
            if judge is not Poker.Nothing:
                return judge
        return Poker.Nothing
    
    @classmethod
    def isRoyalStraightFlash(cls, card_map) -> 'Poker':
        possibility = [0,1,0,0,0,0,0,0,0,0,1,1,1,1]
        if np.count_nonzero(np.dot(card_map, possibility)==5) == 1:
            return cls.RoyalStraightFlash
        return cls.Nothing
    
    @classmethod
    def isStraightFlash(cls, card_map) -> 'Poker':
        for line in card_map:
            for i in range(1, 10):
                if np.all(line[i:i+5]):
                    return cls.StraightFlash
        return cls.Nothing

    @classmethod
    def isFourCard(cls, card_map) -> 'Poker':
        if True in np.all(card_map == 1, axis = 0):
                return cls.FourCard
        return cls.Nothing

    @classmethod
    def isFullHouse(cls, card_map) -> 'Poker':
        hist = np.sum(card_map, axis=0)
        pair = np.count_nonzero(hist == 2)
        triple = np.count_nonzero(hist == 3)
        if pair >= 1 and triple >= 1 or triple >= 2:
            return cls.FullHouse
        return cls.Nothing
    
    @classmethod
    def isFlash(cls, card_map) -> 'Poker':
        hist = np.sum(card_map, axis=1)
        if np.count_nonzero(hist == 5) > 0:
            return Poker.Flash
        return cls.Nothing
    
    @classmethod
    def isStraight(cls, card_map) -> 'Poker':
        exists = np.where(np.sum(card_map, axis=0) > 0, 1, 0)
        for i in range(1, 10):
            if np.all(exists[i:i+5]):
                return cls.Straight
        return cls.Nothing
    
    @classmethod
    def isThreeCard(cls, card_map) -> 'Poker':
        hist = np.sum(card_map, axis=0)
        pair = np.count_nonzero(hist == 2)
        triple = np.count_nonzero(hist == 3)
        if np.count_nonzero(hist >= 3) >= 1:
            return cls.ThreeCard
        return cls.Nothing
    
    @classmethod
    def isNPair(cls, card_map) -> 'Poker':
        hist = np.sum(card_map, axis=0)
        pair = np.count_nonzero(hist >= 2)
        return [cls.Nothing, cls.OnePair, cls.TwoPair][pair]

### Test code for each poker judgement 

- Reference: [遊びかた：ポーカー](https://www.nintendo.co.jp/others/playing_cards/howtoplay/poker/index.html)

In [59]:
from collections import OrderedDict
cards = OrderedDict()
cards["RoyalStraightFlash"] = [[Card(Suit.Spade,1), Card(Suit.Spade,10), Card(Suit.Spade,11), Card(Suit.Spade,12), Card(Suit.Spade,13), Card(Suit.Heart,1), Card(Suit.Diamond,7)]]
cards["StraightFlash"] = [[Card(Suit.Spade,9), Card(Suit.Spade,10), Card(Suit.Spade,11), Card(Suit.Spade,12), Card(Suit.Spade,13), Card(Suit.Heart,1), Card(Suit.Diamond,7)]]
cards["FourCard"] = [[Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Diamond,1), Card(Suit.Clover,1), Card(Suit.Spade,13), Card(Suit.Heart,3), Card(Suit.Diamond,7)]]
cards["FullHouse"] = [[Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Diamond,3), Card(Suit.Clover,3), Card(Suit.Spade,3), Card(Suit.Heart,1), Card(Suit.Diamond,10)]]
cards["Flash"] = [[Card(Suit.Spade,1), Card(Suit.Spade,3), Card(Suit.Spade,8), Card(Suit.Spade,10), Card(Suit.Spade,13), Card(Suit.Heart,10), Card(Suit.Diamond,13)]]
cards["Straight"] = [[Card(Suit.Spade,1), Card(Suit.Heart,2), Card(Suit.Clover,3), Card(Suit.Diamond,4), Card(Suit.Spade,5), Card(Suit.Heart,10), Card(Suit.Diamond,13)]]
cards["ThreeCard"] = [[Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,1), Card(Suit.Diamond,5), Card(Suit.Spade,12), Card(Suit.Clover,3), Card(Suit.Spade,4)]]
cards["TwoPair"] = [[Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,2), Card(Suit.Diamond,2), Card(Suit.Spade,12), Card(Suit.Clover,7), Card(Suit.Spade,9)]]
cards["OnePair"] = [[Card(Suit.Spade,1), Card(Suit.Clover,1), Card(Suit.Spade,3), Card(Suit.Spade,4), Card(Suit.Diamond,5), Card(Suit.Clover,7), Card(Suit.Spade,9)]]
cards["NoPoker"] = [[Card(Suit.Spade,1), Card(Suit.Clover,3), Card(Suit.Heart,5), Card(Suit.Diamond,7), Card(Suit.Spade,9), Card(Suit.Clover,2), Card(Suit.Heart,8)]]


for key in cards:
    print("Expected {}.".format(key))
    for i in range(len(cards[key])):
        c = cards[key][i]
        random.shuffle(c)
        print(" - judgement is {}.".format(Poker.getPoker(c[0:2], c[2:])))
    print()

Expected RoyalStraightFlash.
 - judgement is Poker.RoyalStraightFlash.

Expected StraightFlash.
 - judgement is Poker.StraightFlash.

Expected FourCard.
 - judgement is Poker.FourCard.

Expected FullHouse.
 - judgement is Poker.FullHouse.

Expected Flash.
 - judgement is Poker.Flash.

Expected Straight.
 - judgement is Poker.Straight.

Expected ThreeCard.
 - judgement is Poker.ThreeCard.

Expected TwoPair.
 - judgement is Poker.TwoPair.

Expected OnePair.
 - judgement is Poker.OnePair.

Expected NoPoker.
 - judgement is Poker.Nothing.



### SinglePlayPorkerStates class

In [127]:
class TexusHoldemRounds(Enum):
    PREFLOP     = 1
    FLOP        = 2
    TURN        = 3
    RIVER       = 4
    SHOW_DOWN   = 5

### Poker Action Class

- this class(Enum) shows player's actions.

In [61]:
class Action(Enum):
    CALL   = 0
    DOUBLE = 1
    FOLD   = 2

In [137]:
class AbstractPlayer:
    def __init__(self):
        self.total_rewards = 0
        self.resetByGame()
        self.resetByRound()
        self.resetData()
        
    def resetByGame(self) -> None:
        self.hand          = []
        self.is_fold       = False
        self.bid           = 0
        self.resetTempolaryData()
        self.bid           = 1
    
    def resetByRound(self) -> None:
        self.rest_actions  = []
        for i in range(3):
            self.rest_actions.append(Action(i))
    
    # You must override policy of choosing action
    def policy(self) -> Action:
        raise NotImplementedError()
    
    # Override if you set Player Data durling all game
    def resetData(self):
        pass
    
    # Override if you set Player Data durling 1 game
    def resetTempolaryData(self):
        pass

class TexusHoldemEnv:

    def __init__(self, players:[AbstractPlayer], default_bid:int=10):
        super().__init__()
        self.decks          = []
        self.public_cards   = []
        self.players        = players
        
        self.resetByGame()

    def resetByGame(self) -> dict:
        self.round_pointer = 0
        self.whose_turn = 0
        self.max_bid_in_game = 1
        self.decks.clear()
        self.round = TexusHoldemRounds.FLOP
        
        for suit in (Suit.Heart, Suit.Spade, Suit.Clover, Suit.Diamond):
            for i in range(1, 14):
                self.decks.append(Card(suit, i))
        random.shuffle(self.decks)
        self.max_bid_in_game = 1
        
        for p in self.players:
            p.resetByGame()
        
        for i in range(2):
            for j in range(2):
                self.players[i].hand.append(self.decks[i*2 + j])
        
        for i in range(5):
            self.public_cards.append(self.decks[4 + i])
        
        self.resetByRound()
    
    def resetByRound(self):
        self.action_in_round = []
        for p in self.players:
            p.resetByRound()
    
    def _next_round(seld):
        self.round_pointer = 0
    
    def observe_all(self) -> dict:
        
        open_card = 0
        if self.round == TexusHoldemRounds.FLOP:
            open_card = 3
        
        elif self.round == TexusHoldemRounds.TURN:
            open_card = 4
        
        elif self.round == TexusHoldemRounds.RIVER or self.round == TexusHoldemRounds.SHOW_DOWN:
            open_card = 5
        
        clone_public = []
        for card in self.public_cards[:open_card]:
            clone_public.append(card.clone())
        
        clone_hands = []
        for p in self.players:
            hand = []
            for card in p.hand:
                hand.append(card.clone())
            clone_hands.append(hand)
            
        bids = []
        for p in self.players:
            bids.append(p.bid)
        
        return {
            "public_cards": clone_public,
            "hands": clone_hands,
            "bids": bids,
            "round": self.round
        }
    
    def pre(self):
        for p in self.players:
            p.reset()
        
        
    def step(self, a: Action) -> dict:
        reward = 0 # type: int
        observation = None # type: UserState
        done = False #type: bool
        info = {} #type: dict
        
        p = self.players[self.whose_turn]
        
        
        if self.round not in (TexusHoldemRounds.FLOP, TexusHoldemRounds.TURN, TexusHoldemRounds.RIVER):
            return self.observe_all()

        if a == Action.CALL:
            p.bid = self.max_bid_in_game
            if not len(self.action_in_round) == 0:
                observation = self.observe_all()
                self.round = TexusHoldemRounds(self.round.value+1)
                self.resetByRound()
            observation = self.observe_all()
        elif a == Action.DOUBLE:
            p.bid = self.max_bid_in_game * 2
            self.max_bid_in_game = p.bid
            observation = self.observe_all()
        elif a == Action.FOLD:
            p.is_fold = True
            observation = self.observe_all()
            self.round = TexusHoldemRounds.SHOW_DOWN()
        
        self.action_in_round.append(a)

        return observation
    
    def evaluate(self):
        result = {
            "winner": None,
            "detail":{
                "player0": {
                    "poker": Poker.Nothing,
                    "reward": 0
                },
                "player1": {
                    "poker": Poker.Nothing,
                    "reward": 0
                }
            }
        }
        rewards = 0
        
        looser_num = -1
        for i, p in enumerate(self.players):
            if p.is_fold:
                looser_num = i
                
        if not looser_num == -1:
            winner = self.players[(looser_num+1)%2]
            looser = self.players[looser_num]
            result["winner"] = (looser_num+1)%2
            result["detail"]["player{}".format((looser_num+1)%2)]["reward"] = looser.bid
            result["detail"]["player{}".format(looser_num)]["reward"] = -looser.bid     
        
        else:

            comp = [
                Poker.getPoker(self.players[0].hand, self.public_cards),
                Poker.getPoker(self.players[1].hand, self.public_cards)
            ]
            
            if comp[0] == comp[1]:
                # Pokerの種類が同じ時は引き分けとする
                result["winner"] = -1
                result["detail"]["player0"]["poker"] = comp[0]
                result["detail"]["player0"]["reward"] = 0
                result["detail"]["player1"]["poker"] = comp[1]
                result["detail"]["player1"]["reward"] = 0
            
            else:
                winner_num = np.argmin(comp)
    
                winner = self.players[winner_num]
                looser = self.players[(winner_num+1)%2]
                result["winner"] = winner_num
                result["detail"]["player{}".format(winner_num)]["poker"] = comp[winner_num]
                result["detail"]["player{}".format(winner_num)]["reward"] = looser.bid
                result["detail"]["player{}".format((winner_num+1)%2)]["poker"] = comp[(winner_num+1)%2]
                result["detail"]["player{}".format((winner_num+1)%2)]["reward"] = -looser.bid  

        return result
    
    def do(self, max_episodes=1):
        for i in range(max_episodes):
            done = False
            while not done:
                print("Round: {}".format(self.round))
                print("Turn: player{}".format(self.whose_turn))
                p = self.players[self.whose_turn]
                a = p.policy()
                print("Choice: {}".format(a))
                observation = self.step(a)
                p.rest_actions.remove(a)
                print("observation: {}".format(observation))
                self.whose_turn = (self.whose_turn + 1) % 2
                if observation["round"] == TexusHoldemRounds.SHOW_DOWN:
                    done = True
                    evaluation = self.evaluate()
                    print()
                    print("Results")
                    print(evaluation)
                print()
            
            print("The Game is end")
                    

In [138]:
class RandomPlayer(AbstractPlayer):
    def policy(self):
        print(self.rest_actions)
        choice = Action(2)
        while choice == Action(2):
            choice = random.choice(self.rest_actions)
        return choice

In [139]:
p = [RandomPlayer(), RandomPlayer()]
env = TexusHoldemEnv(players=p)

In [140]:
env.do()
env.resetByGame()

Round: TexusHoldemRounds.FLOP
Turn: player0
[<Action.CALL: 0>, <Action.DOUBLE: 1>, <Action.FOLD: 2>]
Choice: Action.DOUBLE
observation: {'hands': [[Ace of Spade, Queen of Clover], [Jack of Heart, Eight of Diamond]], 'public_cards': [Nine of Diamond, King of Clover, Four of Diamond], 'bids': [2, 1], 'round': <TexusHoldemRounds.FLOP: 2>}

Round: TexusHoldemRounds.FLOP
Turn: player1
[<Action.CALL: 0>, <Action.DOUBLE: 1>, <Action.FOLD: 2>]
Choice: Action.CALL
observation: {'hands': [[Ace of Spade, Queen of Clover], [Jack of Heart, Eight of Diamond]], 'public_cards': [Nine of Diamond, King of Clover, Four of Diamond, Five of Diamond], 'bids': [2, 2], 'round': <TexusHoldemRounds.TURN: 3>}

Round: TexusHoldemRounds.TURN
Turn: player0
[<Action.CALL: 0>, <Action.DOUBLE: 1>, <Action.FOLD: 2>]
Choice: Action.DOUBLE
observation: {'hands': [[Ace of Spade, Queen of Clover], [Jack of Heart, Eight of Diamond]], 'public_cards': [Nine of Diamond, King of Clover, Four of Diamond, Five of Diamond], 'bids'