# Single Play Poker Enviroment

This Notebooks contains enviroment which plays Poker with single person.

## install require packages

In [1]:
!pip install gym

[33mYou are using pip version 19.0.3, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## Class of Enviroment

### imports

In [2]:
from enum import Enum
import random
import gym
import numpy as np
from io import StringIO
import sys
from typing import Tuple

### Suit Class(Enum)

* this class defines Card's suit. Clover or Diamond or Heart or Spade.

In [3]:
class Suit(Enum):
    Clover    = 0
    Diamond   = 1
    Heart     = 2
    Spade     = 3

### Helper variable that converts Suit -> String or number -> String

In [4]:
suit_to_str = {
    Suit.Heart:   "Heart",
    Suit.Spade:   "Spade",
    Suit.Clover:  "Clover",
    Suit.Diamond: "Diamond"
}

num_to_string = [0, "Ace", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Jack", "Queen", "King"]

### Card class

* this class implements one card

In [5]:
class Card:
    def __init__(self, suit: Suit, num: int):
        self.suit = suit  # type: Suit
        self.num  = num   # type: int
    
    def __repr__(self) -> str:
        return "{} of {}".format(num_to_string[self.num], suit_to_str[self.suit])
    
    def __lt__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value < other.suit.value
        else:
            return self.num < other.num
    
    def __le__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value <= other.suit.value
        else:
            return self.num <= other.num 
        
    def __gt__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value > other.suit.value
        else:
            return self.num > other.num
        
    def __ge__(self, other:'Card') -> bool:
        if self.num == other.num:
            return self.suit.value >= other.suit.value
        else:
            return self.num >= other.num
    
    def __hash__(self) -> str:
        return hash((self.suit.value, self.num))
    
    def clone(self) -> 'Card':
        return Card(self.suit, self.num)
    
    def get_serial(self) -> int:
        return self.suit.value * 13 + self.num

### Player class

* this class implements player status.

In [6]:
class Player:
    def __init__(self, initial_coin:int=100, bid_amount:int=1):
        self.initial_coin = initial_coin # type: int
        self.hand         = [] # type: List[Card]
        self.bid_amount   = bid_amount
        
    def reset(self) -> None:
        self.hand.clear()

    def draw_card(self, card:Card) -> None:
        self.hand.append(card.clone())
        self.hand.sort()
    
    def bid(self) -> int:
        return self.bid_amount

### Poker Action Class

- this class(Enum) shows player's actions.

In [7]:
class SinglePlayPokerActions(Enum):
    DRAW   = 0
    DOUBLE = 1

### UserAction class

* this class implements User took action

In [8]:
class UserAction:
    def __init__(self, action:SinglePlayPokerActions, drop_cards:[int]=None):
        self.action     = action # type: SinglePlayPokerActions
        self.drop_cards = drop_cards # type: [int]

### SinglePlayPorkerStates class

In [9]:
class SinglePlayPokerStates(Enum):
    PLAYABLE     = 0
    DONE_EPISODE = 1

### Poker class

* this class shows poker and helper method to judge poker for your hand.

In [10]:
class Poker(Enum):
    RoyalStraightFlash = 0
    StraightFlash      = 1
    FourCard           = 2
    FullHouse          = 3
    Flash              = 4
    Straight           = 5
    ThreeCard          = 6
    TwoPair            = 7
    OnePair            = 8
    Nothing            = 9

    @classmethod
    def isSameNumber(cls, cards:[Card]) -> bool:
        std = cards[0].num
        for card in cards[1:]:
            if not std == card.num:
                return False
        return True
    
    @classmethod
    def isSameSuit(cls, cards:[Card]) -> bool:
        std = cards[0].suit
        for card in cards[1:]:
            if not std == card.suit:
                return False
        return True
    
    @classmethod
    def isSequential(cls, cards:[Card]) -> bool:
        prev = cards[0].num
        for i in range(1,5):
            if not prev + 1 == cards[i].num:
                return False
            prev = cards[i].num
        return True 
    
    @classmethod
    def isRoyalStraightFlash(cls, hand:[Card]) -> 'Poker':
        if hand[0].num == 1 and hand[1].num == 10 and hand[2].num == 11 and hand[3].num == 12 and hand[4].num == 13 \
            and len(set([hand[0].suit, hand[1].suit, hand[2].suit, hand[3].suit, hand[4].suit])) == 1:
            return cls.RoyalStraightFlash
        return cls.Nothing
    
    @classmethod
    def isStraightFlash(cls, hand:[Card]) -> 'Poker':
        if cls.isSequential(hand) \
            and cls.isSameSuit(hand):
            return cls.StraightFlash
        return cls.Nothing

    @classmethod
    def isFourCard(cls, hand:[Card]) -> 'Poker':
        for i in range(0,2):
            if cls.isSameNumber(hand[i:i+4]):
                return Poker.FourCard
        return cls.Nothing

    @classmethod
    def isFullHouse(cls, hand:[Card]) -> 'Poker':
        for i in range(2,4):
            if cls.isSameNumber(hand[0:i]) and cls.isSameNumber(hand[i:]):
                return Poker.FullHouse
        return cls.Nothing
    
    @classmethod
    def isFlash(cls, hand:[Card]) -> 'Poker':
        if cls.isSameSuit(hand):
            return Poker.Flash
        return cls.Nothing
    
    @classmethod
    def isStraight(cls, hand:[Card]) -> 'Poker':
        return cls.Straight if cls.isSequential(hand) else cls.Nothing
    
    @classmethod
    def isThreeCard(cls, hand:[Card]) -> 'Poker':
        for i in range(3):
            if cls.isSameNumber(hand[i:i+3]):
                return cls.ThreeCard
        return cls.Nothing
    
    @classmethod
    def isNPair(cls, hand:[Card]) -> 'Poker':
        count = 0
        for i in range(4):
            if cls.isSameNumber(hand[i:i+2]):
                count += 1
        return [cls.Nothing, cls.OnePair, cls.TwoPair][count]

### UserState class

* this class implements UserState, such as amount of bid in this game and current hand.

In [11]:
class UserState:
    def __init__(self, current_bid:int, current_hand:int):
        self.current_bid = current_bid
        self.current_hand = []
        for card in current_hand:
            self.current_hand.append(card.clone())
    
    def __hash__(self) -> str:
        return hash((self.current_bid, self.current_hand[0], self.current_hand[1], self.current_hand[2], self.current_hand[3], self.current_hand[4]))
    
    def __repr__(self) -> str:
        return (self.current_bid, self.current_hand).__repr__()

### SimplePokerEnv

* You hardly reached this class.
* this class shows Environment OpenAI gym.

In [12]:
class SimplePokerEnv(gym.Env):
    
    reward_rate_map = {
        Poker.RoyalStraightFlash: 100,
        Poker.StraightFlash:       25,
        Poker.FourCard:            15,
        Poker.FullHouse:            7,
        Poker.Flash:                5,
        Poker.Straight:             4,
        Poker.ThreeCard:            3,
        Poker.TwoPair:              2,
        Poker.OnePair:              1,
        Poker.Nothing:             -1
    }

    def __init__(self, max_draw:int=1, initial_coin:int=100):
        super().__init__()
        self.action_space = gym.spaces.Discrete(31)
        self.decks        = [] # type: List[Card]
        self.max_draw     = max_draw # type: int
        self.initial_coin = initial_coin # type: int
        self.player       = Player(initial_coin=self.initial_coin) # type: Player
        self.states       = [SinglePlayPokerStates.PLAYABLE,
                             SinglePlayPokerStates.DONE_EPISODE] # type: List[SinglePlayPokerStates]
        self.reward_range = [  
            SimplePokerEnv.reward_rate_map[Poker.Nothing] * self.max_draw,
            SimplePokerEnv.reward_rate_map[Poker.RoyalStraightFlash] * self.max_draw
        ]
        self.actions = [SinglePlayPokerActions.DRAW, SinglePlayPokerActions.DOUBLE]
        self._reset()

    def _reset(self) -> UserState:
        self.draw_count   = 0 # type: int
        self.state        = self.states[0] # type: SinglePlayPokerStates
        self.player_coin  = self.initial_coin # type: int
        self.card_pointer = 0
        self.decks.clear()
        for suit in (Suit.Heart, Suit.Spade, Suit.Clover, Suit.Diamond):
            for i in range(1, 14):
                self.decks.append(Card(suit, i))
        random.shuffle(self.decks)
        
        self.player.reset()
        for i in range(5):
            self.player.draw_card(self.decks[i])
        #self.player.hand.sort()
        self.card_pointer = 5
        self.bids = self.player.bid()
        return self._observe()
    
    def _observe(self) -> UserState:
        clone_hand = []
        for card in self.player.hand:
            clone_hand.append(card.clone())
        return UserState(self.bids, clone_hand)
    
    def _step(self, action: UserAction) -> Tuple[UserState, int, bool, dict]:
        reward = 0 # type: int
        observation = None # type: UserState
        done = False #type: bool
        info = {} #type: dict
        if self.state == SinglePlayPokerStates.PLAYABLE:
            if action.action == SinglePlayPokerActions.DOUBLE:
                bid = self.player.bid()
                self.bids += bid
                if self.draw_count >= self.max_draw:
                    #print("You cannot draw anymore(max drawable count is {})".format(self.max_draw))
                    self.state = SinglePlayPokerStates.DONE_EPISODE
            if not self.state == SinglePlayPokerStates.DONE_EPISODE:
                n = action.drop_cards.count(1)
                if n == 0:
                    self.state = SinglePlayPokerStates.DONE_EPISODE
                else:
                    for i in reversed(range(5)):
                        if action.drop_cards[i] == 1:
                            self.player.hand.remove(self.player.hand[i])
                    for i in range(n):
                        self.player.draw_card(self.decks[self.card_pointer+i])
                    self.card_pointer += n

                    self.draw_count += 1
            # print("You draw {} times.".format(self.draw_count))
            if self.draw_count >= self.max_draw:
                #print("You cannot draw anymore(max drawable count is {})".format(self.max_draw))
                self.state = SinglePlayPokerStates.DONE_EPISODE

                    
        if self.state == SinglePlayPokerStates.DONE_EPISODE:
            poker, reward = self._get_reward()
            done = True
            info["poker"] = poker
        observation = self._observe()
        return observation, reward, done, info

    def _get_reward(self) -> Tuple[Poker, int]:
        poker = Poker.Nothing
        #self.player.hand.sort()

        for judgement in (Poker.isRoyalStraightFlash, Poker.isStraightFlash, Poker.isFourCard, Poker.isFullHouse, Poker.isFlash, Poker.isStraight, Poker.isThreeCard, Poker.isNPair):
            judge = judgement(self.player.hand)
            if not judge == Poker.Nothing:
                poker = judge
                break

        return poker, self.bids * SimplePokerEnv.reward_rate_map[poker]

    def _close(self):
        pass

    def _seed(self, seed=None):
        pass
    
    def _render(self, mode='human', close=False):
        outfile = StringIO() if mode == 'ansi' else sys.stdout
        outfile.write(self._observe() + '\n')
        return outfile

## Test code for each poker judgement 

- Reference: [遊びかた：ポーカー](https://www.nintendo.co.jp/others/playing_cards/howtoplay/poker/index.html)

### RoyalStraightFlash

In [13]:
rsf = [Card(Suit.Spade,1), Card(Suit.Spade,10), Card(Suit.Spade,11), Card(Suit.Spade,12), Card(Suit.Spade,13)]
not_rsf = [Card(Suit.Spade,1), Card(Suit.Heart,10), Card(Suit.Spade,11), Card(Suit.Spade,12), Card(Suit.Spade,13)]
print(Poker.isRoyalStraightFlash(rsf))
print(Poker.isRoyalStraightFlash(not_rsf))

Poker.RoyalStraightFlash
Poker.Nothing


### StraightFlash

In [14]:
sf = [Card(Suit.Spade,1), Card(Suit.Spade,2), Card(Suit.Spade,3), Card(Suit.Spade,4), Card(Suit.Spade,5)]
not_sf = [Card(Suit.Spade,1), Card(Suit.Clover,2), Card(Suit.Spade,3), Card(Suit.Spade,4), Card(Suit.Spade,5)]
print(Poker.isStraightFlash(sf))
print(Poker.isStraightFlash(not_sf))

Poker.StraightFlash
Poker.Nothing


### FourCard

In [15]:
fc = [Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,1), Card(Suit.Diamond,1), Card(Suit.Spade,5)]
not_fc = [Card(Suit.Spade,1), Card(Suit.Clover,2), Card(Suit.Spade,3), Card(Suit.Spade,4), Card(Suit.Spade,5)]
print(Poker.isFourCard(fc))
print(Poker.isFourCard(not_fc))

Poker.FourCard
Poker.Nothing


### FullHouse

In [16]:
fh = [Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,1), Card(Suit.Diamond,3), Card(Suit.Spade,3)]
not_fh = [Card(Suit.Spade,1), Card(Suit.Clover,1), Card(Suit.Spade,1), Card(Suit.Spade,4), Card(Suit.Spade,5)]
print(Poker.isFullHouse(fh))
print(Poker.isFullHouse(not_fh))

Poker.FullHouse
Poker.Nothing


### Flash

In [17]:
fl = [Card(Suit.Spade,1), Card(Suit.Spade,3), Card(Suit.Spade,8), Card(Suit.Spade,10), Card(Suit.Spade,13)]
not_fl = [Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,1), Card(Suit.Diamond,1), Card(Suit.Spade,5)]
print(Poker.isFlash(fl))
print(Poker.isFlash(not_fl)) 

Poker.Flash
Poker.Nothing


### Straight

In [18]:
st = [Card(Suit.Spade,1), Card(Suit.Clover,2), Card(Suit.Spade,3), Card(Suit.Spade,4), Card(Suit.Spade,5)]
not_st = [Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,1), Card(Suit.Diamond,1), Card(Suit.Spade,5)]
print(Poker.isStraight(st))
print(Poker.isStraight(not_st)) 

Poker.Straight
Poker.Nothing


### ThreeCard

In [19]:
tc = [Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,1), Card(Suit.Diamond,5), Card(Suit.Spade,12)]
not_tc = [Card(Suit.Spade,1), Card(Suit.Clover,2), Card(Suit.Spade,3), Card(Suit.Spade,4), Card(Suit.Spade,5)]
print(Poker.isThreeCard(tc))
print(Poker.isThreeCard(not_tc))

Poker.ThreeCard
Poker.Nothing


### Two or One Pair

In [20]:
tp = [Card(Suit.Spade,1), Card(Suit.Heart,1), Card(Suit.Clover,2), Card(Suit.Diamond,2), Card(Suit.Spade,12)]
op = [Card(Suit.Spade,1), Card(Suit.Clover,1), Card(Suit.Spade,3), Card(Suit.Spade,4), Card(Suit.Spade,5)]
np = [Card(Suit.Spade,1), Card(Suit.Clover,3), Card(Suit.Spade,5), Card(Suit.Spade,7), Card(Suit.Spade,9)]
print(Poker.isNPair(tp))
print(Poker.isNPair(op)) 
print(Poker.isNPair(np)) 

Poker.TwoPair
Poker.OnePair
Poker.Nothing


## Create Environment instance

In [21]:
env = SimplePokerEnv()

## Solve with Random policy

### Define choose action policy and choose drop card policy.

In [22]:
import random
def choose_drop_card_nums():
    n = random.randint(0,5)
    choosen = [0,0,0,0,0]
    for c in random.sample([0,1,2,3,4],n):
        choosen[c] = 1
    return choosen

def choose_action():
    actions = [SinglePlayPokerActions.DRAW, SinglePlayPokerActions.DOUBLE]
    return random.choice(actions)

### Let's try with Random policy

In [23]:
n_times = 100
env = SimplePokerEnv()
rewards = []
pokers = []
results = []

def judge(cards):
    poker = Poker.Nothing
    for judgement in (Poker.isRoyalStraightFlash, Poker.isStraightFlash, Poker.isFourCard, Poker.isFullHouse, Poker.isFlash, Poker.isStraight, Poker.isThreeCard, Poker.isNPair):
        judge = judgement(cards)
        if not judge == Poker.Nothing:
            poker = judge
            break
    return poker

for i in range(n_times):
    env._reset()
    done = False
    reward = 0
    info = None
    while not done:
        p_state = env._observe()
        action = choose_action()
        drop_cards = choose_drop_card_nums()
        a = UserAction(action, drop_cards)
        n_state, reward, done, info = env._step(a)
        if done:
            #print("*** {}th try is done***".format(i+1))
            #print("{}th try observation is: {}.".format(i+1, observation))
            #print("{}th try poker is: {}.".format(i+1, info["poker"]))
            #print("{}th try reward is: {}.".format(i+1, reward))
            line = []
            line.append(n_state.current_bid)
            for c in p_state.current_hand:
                line.append(c.get_serial())
            
            line.append(judge(p_state.current_hand).value)
            line.append(action.value)
            
            for bit in drop_cards:
                line.append(bit)
                
            for c in n_state.current_hand:
                line.append(c.get_serial())
                
            line.append(info["poker"].value)
            line.append(reward)
            
            results.append(line)
        #print(observation)
        #print(done)
    #rewards.append(reward)
    #pokers.append(info["poker"])
#print("finally, you gain {} coins.".format(sum(rewards)))

In [24]:
import pandas as pd
df = pd.DataFrame(results,columns=["amount_bid", "pre_card1", "pre_card2", "pre_card3", "pre_card4", "pre_card5", "current_poker", "action", "drop_card1", "drop_card2", "drop_card3", "drop_card4", "drop_card5", "result_card1", "result_card2", "result_card3", "result_card4", "result_card5", "result_poker", "reward"])

In [25]:
df.to_pickle('random-{}try.pickle'.format(n_times))

In [26]:
df.head()

Unnamed: 0,amount_bid,pre_card1,pre_card2,pre_card3,pre_card4,pre_card5,current_poker,action,drop_card1,drop_card2,drop_card3,drop_card4,drop_card5,result_card1,result_card2,result_card3,result_card4,result_card5,result_poker,reward
0,1,19,7,20,35,12,8,0,1,0,1,1,1,7,46,48,11,51,8,1
1,2,42,30,5,6,12,9,1,1,1,1,1,0,14,15,46,34,12,9,-2
2,2,28,30,46,9,26,9,1,0,1,1,0,1,28,3,31,9,10,9,-2
3,2,27,4,5,31,26,8,1,0,0,0,1,0,27,29,4,5,26,9,-2
4,2,17,18,23,49,11,8,1,1,1,1,1,1,41,16,21,34,51,8,2
