# Black Jack

In [23]:
import matplotlib.pyplot as plt
import pprint
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

In [24]:
class Card:
    
    def __init__(self, rank, face, value, suit):
        self.rank = rank
        self.face = face
        self.value = value
        self.suit = suit

    def get_rank(self):
        return self.rank
    
    def get_face(self):
        return self.face
    
    def get_value(self):
        return self.value
    
    def get_suit(self):
        return self.suit
        
    def __str__(self):
        return f"{self.value}"
    
    def __repr__(self):
        if self.face is not None:
            return f"{self.face} of {self.suit}, Value: {self.value}"
        return f"{self.rank} of {self.suit}, Value: {self.value}"

In [25]:
class Deck:
    
    def __init__(self):
        self.value = 11
        self.faces = {1: "Ace", 11: "Jack", 12: "Queen", 13: "King"}
        self.ranks = [i for i in range(1, 14)]
        self.suits = ["Hearts","Diamonds", "Clubs", "Spades"]
            
    def card(self):
        """
        Create and return card from infinite deck with replacement
        """
        import random as rn
        
        rank = rn.choice(self.ranks)
        face = None
        value = rank
        suit = rn.choice(self.suits)
        
        if rank == 1:
            value = self.value
            face = self.faces[rank]
        
        elif rank > 10:
            value = self.value - 1
            face = self.faces[rank]
        
        return Card(rank, face, value, suit)

In [26]:
class Dealer:
    
    def __init__(self):
        self.deck = Deck()
        self.hand = self.deal()
        self.ace_positions = []
        self.current_score = 0
        self.ace_in_hand = False
        self.soft = True
        self.player_wins = False
        self.player_busts = False
        self.player_sticks = False
        self.player_loses = self.player_busts == True
        
    def deal(self):
        """
        First deal of game, returns agent's hand
        """
        # deal dealer's hand
        self.hand = [self.deck.card() for i in range(2)]
        
        # deal agent's hand
        player = [self.deck.card() for i in range(2)]
        
        return player
    
    def get_hand(self):
        """
        Return's dealer's hand
        """
        
        return self.hand
        
    def show(self):
        """
        Returns dealer's show card
        """
        
        return self.hand[0]
    
    def reveal(self):
        """
        Reveals hidden card
        """
        
        return self.hand[1]
    
    def reset(self):
        """
        Resets hand after game
        """
        
        self.hand.clear()
    
    def hit(self):
        """
        Hit and deal card to self
        """
        
        self.hand.append(self.deck.card())
        
    def stick(self):
        """
        Returns boolean to stay
        """
        
        return self.score() >= 17
        
    def hit_player(self):
        """
        Deal one card to player
        """
        
        return self.deck.card()
    
    def useable(self):
        """
        Return whether ace is useable
        """
        return self.soft and self.current_score < 17
    
    def score(self):
        """
        Calculate dealer's card sum
        """
        
        # reset to sum hand
        total = 0
        
        # iterate through hand
        for i in range(len(self.hand)):
            
            # sum hand
            total += self.hand[i].get_value()
            
            # check if ace in hand
            if self.hand[i].get_value() == 11:
                
                # ace found
                self.ace_in_hand = True
                
                # keep track of position in hand
                self.ace_positions.append(i)
                
        # keep track of running total
        self.current_score = total
        
        # ace makes sum too large
        if self.ace_in_hand and self.useable() and total > 21:
            
            # check if aces left
            if len(self.ace_positions) > 0:
                
                # change ace value from 11 to 1
                self.hand[self.ace_positions.pop()].value = 1
            
                # no lonrger soft/useable
                self.soft = False
            
        return total
    
    def strategy(self):
        """
        Dealer's game strategy
        """
        
        # player wins return to start a new episode
        if self.player_wins:
            return False
        
        # dealer busts return to start a new episode
        if self.score() > 21:
            self.player_wins = True
            return False
        
        # dealer's turn
        elif self.player_sticks:
            
            # hand sum 17 - 20
            if self.stick():
                return False
            
            # ace is useable/soft
            elif self.useable():
                return True
            
            # otherwise hit until at least 17
            elif self.score() < 17:
                return True


In [27]:
class Agent:
    
    def __init__(self):
        self.hand = []
        self.ace_in_hand = False
        self.useable = False
        self.action = 0
        self.ace_positions = []
        self.policy = self.create_policy()
        
    def reset(self):
        """
        Reset hand after game
        """
        self.hand.clear()
        
    def create_policy(self):
        """
        Creates agent's arbitrary policy 
        """
        
        # tabular solutions method
        policy = {}
        
        # agent hit for all sums under 20
        for hand in range(12, 20):
          
            # create policy
            for show in range(2, 12):
                
                # Non-Useable ace
                policy[(hand, show, False)] = 1
                
                # Useable ace
                policy[(hand, show, True)] = 1
         
                    
                # Non-Useable ace
                policy[(20, show, False)] = 0

                # Useable ace
                policy[(20, show, True)] = 0

                # Non-Useable ace
                policy[(21, show, False)] = 0

                # Useable ace
                policy[(21, show, True)] = 0
            
        return policy
    
    def get_policy(self):
        """
        Returns agent's policy
        """
        
        return self.policy
    
    def get_hand(self):
        """
        Returns agent's hand
        """
        
        return self.hand
    
    def set_hand(self, hand):
        """
        Dealer's initial deal
        """
        
        self.hand = hand
    
    def hit(self, card):
        """
        Add's hit card to hand
        """
        
        self.hand.append(card)
    
    def score(self):
        """
        Calculate and return agent's sum of cards
        """
        
        # reset to keep track of current sum 
        total = 0
        
        # sum hand and keep track of aces
        for i in range(len(self.hand)):
            
            # sum hand
            total += self.hand[i].get_value()
            
            # track aces in hand
            if self.hand[i].get_value() == 11:
                
                # ace found
                self.ace_in_hand = True
                self.useable = True
                
                # keep track of position in hand
                self.ace_positions.append(i)
        
        # utility condition for agent's policy
        if self.ace_in_hand and self.useable and total > 21:
            
            # check if aces left
            if len(self.ace_positions) > 0:
                
                # change ace value from 11 to 1
                self.hand[self.ace_positions.pop(0)].value = 1
            
                # ace is no longer useable
                self.useable = False
              
        if total > 22: total = 22
        
        return total 
    
    def is_useable(self):
        """
        Checks and returns if Ace is useable
        """
        
        return self.ace_in_hand and self.useable
    
    def decision(self, state):
        """
        Returns agent's decision: score, show card 
        """
        
        return self.policy[state]

In [28]:
class Game:
    
    def __init__(self):
        self.player_turn = True
        self.dealer_turn = False
        self.episode = []
        self.state = []
        self.reward = 0
        self.score = 0
        self.win = 21        
        self.agent = Agent()
        self.dealer = Dealer()
        
    def on(self, initial_state = None):
        """
        Game Environment
        Returns Episode: (state, action, reward)
        """
        
        # MC First Visit - first deal
        self.agent.set_hand(self.dealer.deal())

        # Check for MC Exploration Starts
        if initial_state is not None:

            # first deal
            self.agent.set_hand(initial_state)

        # Check if natural
        if self.agent.score() == self.win:
            
            # Return Natural Episode
            return [((self.agent.score(),
                      self.dealer.show().get_value(),
                      self.agent.is_useable()), 0, 
                     self.terminal(self.dealer.score(), self.score))]

        # Keep hitting until in score is greater 12
        while self.agent.score() < 12: self.agent.hit(self.dealer.hit_player())
        
        # Start state
        self.state = (self.agent.score(),
                      self.dealer.show().get_value(),
                      self.agent.is_useable())
        
        # Agent's turn
        while self.player_turn:

            # Current score St-1
            self.score = self.agent.score()

            # Take action per policy At-1
            self.agent.action = self.agent.decision((self.score,
                                               self.dealer.show().get_value(),
                                               self.agent.is_useable()))
            # Check policy for Agent's decision
            if self.agent.action == 1: self.agent.hit(self.dealer.hit_player())

            # Otherwise Agent sticks Agent's turn over
            elif self.agent.action == 0: self.dealer.player_sticks = True

            # Check if switch players
            if self.agent.score() == self.win or self.dealer.player_sticks:

                # Agent's turn over
                self.player_turn = False

                # Dealer's turn
                self.dealer_turn = True

            # check if agent busts; Game Over
            elif self.agent.score() > self.win: self.player_turn = self.dealer_turn = False
                
            # Episode following pi: S0, A0, R1, ..., St-1, At-1, Rt 
            self.episode.append(((self.score, 
                                  self.dealer.show().get_value(),
                                  self.agent.is_useable()), 
                                 self.agent.action, 
                                 self.reward))
        # Dealer's turn
        while self.dealer_turn:

            # Dealer's strategy
            if self.dealer.strategy(): self.dealer.hit()

            # Dealer's turn over; game ends
            else: self.dealer_turn = False
        
        # Take action per policy At-1
        self.agent.action = self.agent.decision((self.agent.score(),
                                           self.dealer.show().get_value(),
                                           self.agent.is_useable()))
        
        # Episode following pi: S0, A0, R1, ..., St-1, At-1, Rt 
        self.episode.append(((self.agent.score(), 
                              self.dealer.show().get_value(),
                              self.agent.is_useable()), 
                             self.agent.action, 
                             self.terminal(self.dealer.score(), self.agent.score())))
        # GAME OVER
        return self.episode[::-1]
        
    def terminal(self, dealer_score, agent_score):
        """
        Terminal state
        Returns reward: [-1, 0, 1]
        """
        
        # Win
        if dealer_score < agent_score and agent_score <= self.win:
            return 1
        
        # Lose
        if dealer_score > agent_score and dealer_score <= self.win or agent_score > self.win:
            return -1
        
        # Draw 
        if dealer_score == agent_score:
            return 0


In [29]:
def average(returns):
    """
    Returns average(Returns(St, At))
    """

    return sum(returns)/len(returns)

In [30]:
def first_visit(episodes):
    """
    Returns MC Prediction
    """

    #########
    # Input #
    #########
    V = {}; returns = {}; appears = []

    # game
    game = Game()

    # Input Policy
    policy = game.agent.get_policy()

    ##############
    # Initialize #
    ##############

    # Initialize V(s) and Returns(s) arbitrarily
    for state in policy: 

        # State-Value Function V(s)
        V[state] = 0.0

        # Returns(St)
        returns[state] = []

    ################
    # Loop Forever #
    ################

    # Loop for each episode
    for i in range(episodes):

        # Generate an episode following p⇡: S0, A0, R1, ... ,ST-1, AT-1, RT
        game = Game(); episode = game.on()

        # G <- 0
        G = 0

        # Loop for each step of episode, t = T-1, T-2, ..., 0:
        for S, A, R in episode:

            # G <- Gamma*G + Rt+1
            G = G + R

            if S not in appears:

                # Append G to Returns(St)
                returns[S].append(G)

                # V(St) <- average(Returns(St))
                V[S] = average(returns[S])

                # state appeared
                appears.append(S)
    
    # V = v_pi
    return V

In [31]:

    
# create terminal printer instance
pp = pprint.PrettyPrinter(width=160, compact=True)

# pp.pprint(first_visit(10000))
game = Game()
print(game.on())


KeyError: (22, 8, False)

In [None]:
# setup the figure and axes
fig = plt.figure(figsize=(8, 3))
ax1 = fig.add_subplot(121, projection='3d')
ax2 = fig.add_subplot(122, projection='3d')

# fake data
ax2.xlabel = "A   Dealer Showing   10"
ax2.ylabel = "2 Player Score 12"
_x = np.arange(4)
_y = np.arange(5)
_xx, _yy = np.meshgrid(_x, _y)
x, y = _xx.ravel(), _yy.ravel()

top = x + y
bottom = np.zeros_like(top)
width = depth = 1

ax1.bar3d(x, y, bottom, width, depth, top, shade=True)
ax1.set_title('After 10,000 episodes')

ax2.bar3d(x, y, bottom, width, depth, top, shade=True)
ax2.set_title('After 500,000 episodes')

plt.show()