In [1]:
import numpy as np

In [2]:
class BlackJack_PolicyEval():
    
    def __init__(self):
        
        # Assume the deck is infinitly large
        self.decks = [i for i in range(1,11)]
        
        # why 32 observation states?
        # the sum of two face-down cards for player can be 2~22
        # while the one face-up card from the dealer can be 1~11
        # so all possible sums range from 2~33, which contains 32 observation states
        # Even though in our intuition, if the player already has sum of 21, he/she shouldn't hit
        # but in programming logic, we'll keep these situations
        self.player_sum_states = [i for i in range(2,34)]
        
        self.dealer_face_up = [i for i in range(1,12)]
        self.actions = ['hit','stick']
        
        self.player_thres = 18
        self.dealer_thres = 17
        
    # Execute the start of the game: 2 cards for player and dealer each
    def draw_hands(self):
        
        return [np.random.choice(self.decks),
                np.random.choice(self.decks)]
    
    # Execute drawing card
    def draw_card(self):
        
        return np.random.choice(self.decks)
    
    # Decide whether there is a usable ace at hand currently
    def Ace(self,cards):
        if (1 in cards) and (sum(cards)+10<=21):
            return True
            
        else:
            return False
        
    # Calculate the current sum of cards at hand
    def CuurentSum(self,cards,thres):
        
        if self.Ace(cards):
            if sum(cards)+10>21:
                return 'Bust'

            elif sum(cards)+10>=thres:
                return 'Stick'

            else:
                return 'Hit'
        else:
            if sum(cards)>21:
                return 'Bust'

            elif sum(cards)>=thres:
                return 'Stick'

            else:
                return 'Hit'
        
    # If game ends, calculate player's score of this round of game
    def Score(self,player_track,dealer_track,player_sum,dealer_sum):
        if player_track[-1]=='Bust':
            return -1
        elif dealer_track[-1]=='Bust':
            return 1
        elif player_sum == dealer_sum:
            return 0
        else:
            return 2*(player_sum>dealer_sum)-1
            
    
    # Let's get the party started
    def GameBegins(self):
        
        # 1. Game Starts, two cards for dealer and player for each
        player_start,dealer_start = self.draw_hands(),self.draw_hands()
        
        self.player_hand = player_start.copy()
        self.dealer_hand = dealer_start.copy()

        self.track_player = player_start.copy()
        self.track_dealer = dealer_start.copy()
        # ---------------------------------------------------------
        
        
        # 2. Player will decide the action according to the current state:
        player_done = False
        dealer_done = False
        
        while player_done==False:
            action = self.CuurentSum(self.player_hand,self.player_thres)
            
            if action=='Stick':
                self.track_player.append(action)
                player_done=True
                
            if action=='Hit':
                self.track_player.append(action)
                new_draw = self.draw_card()
                self.player_hand.append(new_draw)
                self.track_player.append(new_draw)
            
            if action=='Bust':
                self.track_player.append(action)
                player_done=True
                dealer_done=True
        # ---------------------------------------------------------
        
        
        # 3. If the player stops acting, turn to dealer
        while dealer_done == False:
            action = self.CuurentSum(self.dealer_hand,self.dealer_thres)
            
            if action=='Stick':
                self.track_dealer.append(action)
                dealer_done=True
                
            if action=='Hit':
                self.track_dealer.append(action)
                new_draw = self.draw_card()
                self.dealer_hand.append(new_draw)
                self.track_dealer.append(new_draw)
            
            if action=='Bust':
                self.track_dealer.append(action)
                dealer_done=True
        self.track_player = tuple(self.track_player)
        self.track_dealer = tuple(self.track_dealer)
        # ---------------------------------------------------------
        
        
        # 4. Get the final score of player:
        self.score = self.Score(self.track_player,
                                self.track_dealer,
                                sum(self.player_hand),
                                sum(self.dealer_hand))
        
        if self.score==1:
            print('you win')
        if self.score==-1:
            print('you lost')  
        if self.score==0:
            print('draw')
        # ---------------------------------------------------------

In [3]:
game = BlackJack_PolicyEval()

In [4]:
game.GameBegins()

you win


In [5]:
game.track_player

(9, 6, 'Hit', 6, 'Stick')

In [6]:
game.track_dealer

(1, 5, 'Hit', 2, 'Stick')