In [None]:
from random import randrange

In [None]:
class BlackJack:
    
    cards = 4 * ([ i for i in range(1, 10)] + [10, 10, 10, 10])
    def __init__(self):
        self.game_state = -2
    
    @classmethod
    def draw_card(cls):
        return cls.cards[randrange(52)] # in real card games as has prob p(as) 4/52 p(10)=16/52 and the rest 4/52
    
    def distribute(self, log = False):
        self.game_state = -2
        self.dealer_cards = [BlackJack.draw_card(), BlackJack.draw_card()]
        self.player_cards = [BlackJack.draw_card(), BlackJack.draw_card()]
        if log: print("Dealer:", self.dealer_cards[0])
        if log: print("My cards:", self.player_cards)
        if self.my_sum() == 21:
            if self.dealer_sum() == 21:
                self.game_state = 0
            else:
                self.game_state = 1
        return self.game_state
    
    def has_usable_as(self):
        if 1 in self.player_cards:
            return 1
        return 0
    
    def hit(self, log = False):
        if self.game_state != -2:
            print("Game already over")
            return self.game_state
        self.player_cards.append(BlackJack.draw_card())
        if log: print("My cards:", self.player_cards)
        if self.my_sum() > 21:
            self.game_state = -1
        if self.my_sum() == 21:
            return self.stick()
        return self.game_state
        
    def stick(self, log = False):
        if self.game_state != -2:
            print("Game already over")
            return self.game_state
        # dealer policy
        while (self.dealer_sum() < 17):
            self.dealer_cards.append(BlackJack.draw_card())
        if log: print("Dealer score:", self.dealer_sum())
        if log: print("My sum:", self.my_sum())
        my_sum = self.my_sum()
        dealer_sum = self.dealer_sum()
        if dealer_sum > 21:
            self.game_state = 1
            return self.game_state
        if my_sum == 21:
            if dealer_sum != 21:
                self.game_state = 1
                return self.game_state
            self.game_state = 0
            return self.game_state
        if my_sum > dealer_sum:
            self.game_state = 1
            return self.game_state
        elif my_sum == dealer_sum:
            self.game_state = 0
            return self.game_state
        self.game_state = -1
        return self.game_state
        
            
    @classmethod
    def compute_sum(_, cards):
        value = sum(cards)
        nb_as = cards.count(1)
        i = 0
        while (i < nb_as and value <= 10):
            value += 10
            i += 1
        return value
    
    def  my_current_sum(self):
        return sum(self.player_cards)

    def my_sum(self):
        return BlackJack.compute_sum(self.player_cards)
    
    def dealer_sum(self):
        return BlackJack.compute_sum(self.dealer_cards)
    
    def interactive_play(self, action):
        if self.game_state == -2:
            if action == 0:
                print("Hit")
                self.hit(log = True)
            elif action == 1:
                print("Stick")
                self.stick(log = True)
        if self.game_state != -2:
            print("Game over", self.game_state)
            print("Dealer:", self.dealer_cards)
            print("My cards:", self.player_cards)
        return self.game_state

In [None]:
blackJack = BlackJack()
blackJack.distribute(log = True)

In [None]:
blackJack.interactive_play(0)

In [None]:
import numpy as np
# state(dealer_showing, my_sum, usable_as)
# action 0: hit, 1: stick

def basic_hit_policy(state, limit):
    if state[1] >= limit:
        return 1 #stick
    return 0

def play_policy(policy):
    # Generate episode:
    blackJack = BlackJack()
    blackJack.distribute()
    
    states = []
    if blackJack.game_state != -2:
        states.append((blackJack.dealer_cards[0] - 1, blackJack.my_sum() - 2, blackJack.has_usable_as()))
    while blackJack.game_state == -2:
        state = ( blackJack.dealer_cards[0] - 1, blackJack.my_sum() - 2, blackJack.has_usable_as() )
        states.append(state)
        action = policy(state)
        if action == 0:
            blackJack.hit()
        elif action == 1:
            blackJack.stick()
        else:
            raise IndexError
    return states, blackJack.game_state

# 10 - possible dealer cards / 21 - possible sums: 2 - 21 + >21 / 2- Has Ace True or False / 2 - Possible actions
def optimal_policy(Q, state):
    return Q[state[0], state[1], state[2]].argmax()

def update_policy_scores(Q, Returns, states, game_state, policy):
    for state in states: # I can assume the states are never repeated
        # can also get the "average" reward simply by getting the end state
        action = optimal_policy(Q, state)
        if (state, action) not in Returns:
            Returns[(state,action)] = []
        Returns[(state,action)].append(game_state)
        Q[state[0], state[1], state[2], action] = sum(Returns[(state,action)])/len(Returns[(state,action)])

In [None]:
# Play a first round of the game
Q = np.zeros((10, 20, 2, 2)) 
Returns = {}

In [None]:
running_policy = lambda state: basic_hit_policy(state, 20)
states, game_state = play_policy(running_policy)
update_policy_scores(Q, Returns, states, game_state, running_policy)

running_policy = lambda state: optimal_policy(Q, state)
for i in range(500000):
    states, game_state = play_policy(running_policy)
    update_policy_scores(Q, Returns, states, game_state, running_policy)

In [None]:
def play_blackJack():
    blackJack = BlackJack()
    game_state = blackJack.distribute(log = True)

    while game_state == -2:
        state = ( blackJack.dealer_cards[0] - 1, blackJack.my_sum() - 2, blackJack.has_usable_as() )
        print(Q[state[0],state[1],state[2]])
        action = optimal_policy(Q, state)
        # action = basic_hit_policy(state, 17)
        game_state = blackJack.interactive_play(action)
    return game_state

total_score = 0
for i in range(1000):
    game_state = play_blackJack()
    total_score += game_state
    if game_state == 1:
        print("*** Win!***")
    elif game_state == 0:
        print("*** Draw :(***")
    else:
        print("*** Lose XD ***")
print("Total score:", total_score)