In [99]:
import numpy as np

from collections import defaultdict
from plotting import *

deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]

class BlackjackEnv(object):
    """Create an environment of a Black Jack Game
    dealer = list - dealer list card in his deck
    player = list - player list card in his deck
    done = bool - True if the game is done, and False otherwise
    this class only can be accessed from act(), reset(), state()
    act() = PARAMS : 1 if hit, 0 if stick
            RETURN : (state), done_status, reward
    reset() = PARAMS : None
              RETURN : None
    state() = PARAMS : None
             RETURN : (state)
    (state) is a tuple of player score, dealers score, usable ace condition
    Black Jack Refferences:
    [1] https://webdocs.cs.ualberta.ca/~sutton/book/ebook/node51.html (Example 5.1)
    [2] http://www.bicyclecards.com/how-to-play/blackjack/
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.dealer = [self.draw()]
        self.player = [self.draw()]
        self.done = False

    def natural(self,hand): # check if he got natural/blackjack condition
        return sorted(hand)==[1,10]

    def draw(self): # get one card
        return np.random.choice(deck)

    def usable(self, hand): # check if he got usable ace condition
        return 1 in hand and sum(hand) + 10 <= 21

    def busted(self, hand): # check if he got busted
        return self.sum_hand(hand) > 21

    def sum_hand(self, hand):
        if self.usable(hand):
            return sum(hand) + 10
        else:
            return sum(hand)

    def state(self):
        return self.sum_hand(self.player), self.sum_hand(self.dealer), \
                    self.usable(self.player)

    def act(self, hit):
        if not self.done:
            if hit:
                self.hit()
                if self.busted(self.player):
                    self.done = True
                    return self.state(), self.done, -1
                else:
                    return self.state(), self.done, 0
            else:
                return self.stick()

    def hit(self):
        self.player.append(self.draw())

    def stick(self):
        self.done = True

        # Dealer doing hit while his score below 17
        # see refference [2]
        while self.sum_hand(self.dealer) < 17:
            self.dealer.append(self.draw())

        # player'll never get busted in here so just sum it
        player_score = self.sum_hand(self.player)

        dealer_score = -1 if self.busted(self.dealer) else self.sum_hand(self.dealer)

        if self.natural(self.player) and self.natural(self.dealer):
            reward = 1
        elif self.natural(self.player):
            reward = 1.5
        elif dealer_score > player_score:
            reward = -1
        elif dealer_score < player_score:
            reward = 1
        else:
            reward = 0

        return self.state(), self.done, reward

def print_state(recvdState):
    player_sum, dealer_sum, usable_ace = recvdState
    print("Player sum : {}, Dealer sum: {}, Usable Ace : {}".format(player_sum, dealer_sum, usable_ace))
    
def take_action(recvdState):
    player_sum, dealer_sum, usable_ace = recvdState
    if int(player_sum) >= 20: 
        return 0  
    else: 
        return 1 # 1 = hit, 0 = stick
        

In [100]:
env = BlackjackEnv()
env.reset()
init_state = env.state()

In [101]:
# State here represents the tuple returned by the environment when an agent reaches there
# state returns - score of the player, score of the dealer, usable ace or not 
# Usable ace - an ace where its value can be considered to be 11
#print(init_state)

In [129]:
# Lets play a game 
for episodes in range(20):
    print("New Game Starting!")
    env.reset()
    new_state = env.state()
    for one_step in range(100):
        print_state(new_state)
        action = take_action(new_state)
        print("Taking action: {}".format( ["Stick", "Hit"][action]))
        new_state_vals = env.act(action)
        new_state = new_state_vals[0]
        #print(new_state_vals)
        
        if new_state_vals[1] == True:
            print_state(new_state)
            print("End of game, reward is {}".format(new_state_vals[2]))
            break
        

New Game Starting!
Player sum : 10, Dealer sum: 10, Usable Ace : False
Taking action: Hit
Player sum : 17, Dealer sum: 10, Usable Ace : False
Taking action: Hit
Player sum : 23, Dealer sum: 10, Usable Ace : False
End of game, reward is -1
New Game Starting!
Player sum : 2, Dealer sum: 10, Usable Ace : False
Taking action: Hit
Player sum : 11, Dealer sum: 10, Usable Ace : False
Taking action: Hit
Player sum : 20, Dealer sum: 10, Usable Ace : False
Taking action: Stick
Player sum : 20, Dealer sum: 20, Usable Ace : False
End of game, reward is 0
New Game Starting!
Player sum : 10, Dealer sum: 5, Usable Ace : False
Taking action: Hit
Player sum : 16, Dealer sum: 5, Usable Ace : False
Taking action: Hit
Player sum : 23, Dealer sum: 5, Usable Ace : False
End of game, reward is -1
New Game Starting!
Player sum : 10, Dealer sum: 5, Usable Ace : False
Taking action: Hit
Player sum : 18, Dealer sum: 5, Usable Ace : False
Taking action: Hit
Player sum : 23, Dealer sum: 5, Usable Ace : False
End o

In [104]:
np.random.choice([1,2,3,4,5,6,67])

1