<a href="https://colab.research.google.com/github/tarod13/CardGames/blob/main/BlackJack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
import numpy as np
import random

In [92]:
def number2card(number):
  invalid_flag = False
  
  rank = (number % 13) + 1
  if rank < 1 or rank > 13:
    invalid_flag = True
  elif rank == 1:
    rank = 'A'
  elif rank == 11:
    rank = 'J'
  elif rank == 12:
    rank = 'Q'
  elif rank == 13:
    rank = 'K'
  else:
    rank = str(int(rank))
  
  symbol = number // 13
  if symbol == 0:
    symbol = 'clubs'
  elif symbol == 1:
    symbol = 'diamonds'
  elif symbol == 2:
    symbol = 'hearts'
  elif symbol == 3:
    symbol = 'spades'
  else:
    invalid_flag = True

  assert not invalid_flag, 'Invalid number'

  return (rank, symbol)


def card2value(card):
  rank = card[0]
  
  if rank == 'A':
    value = 1    
  elif rank in ['J', 'Q', 'K']:
    value = 10
  elif int(rank) > 1 and int(rank) <= 10:
    value = int(rank)
  else:
    print(rank)
    raise NameError('Invalid card')
  
  return value

In [124]:
class BlackJackEnv():
  def __init__(self, seed=0):    
    self.seed(seed)
    
  def step(self, action):
    state = [list(self.player_hand).copy(), self.visible_dealer_card]

    if action == 0 or action == '0' or action == 'stand':
      sum_dealer, n_aces_dealer = self.calculate_sum(who='dealer')
      less_than_17 = (sum_dealer + int(n_aces_dealer>0) * 10) < 17
      more_than_21 = False
      while less_than_17 and not more_than_21:
        new_dealer_card = set(random.sample(self.deck, 1))        
        self.deck = self.deck - new_dealer_card
        self.dealer_hand = self.dealer_hand.union(new_dealer_card)

        new_dealer_card = new_dealer_card.pop()
        sum_dealer += card2value(new_dealer_card)
        rank = new_dealer_card[0]
        if rank == 'A':
          n_aces_dealer += 1
        less_than_17 = (sum_dealer + int(n_aces_dealer>0) * 10) < 17   
        more_than_21 = sum_dealer > 21
      done = True
      score_dealer = self.calculate_score(sum_dealer, n_aces_dealer)
      if score_dealer > 21:
        reward = 1.0
      else:
        sum_player, n_aces_player = self.calculate_sum()
        score_player = self.calculate_score(sum_player, n_aces_player)
        if score_player > score_dealer:
          reward = 1.0
        elif score_player == score_dealer:
          reward = 0.0
        else:
          reward = -1.0

    elif action == 1 or action == '1' or action == 'hit':
      new_player_card = set(random.sample(self.deck, 1))
      self.deck = self.deck - new_player_card
      self.player_hand = self.player_hand.union(new_player_card)
      sum_player, n_aces_player = self.calculate_sum()
      went_bust = sum_player > 21      
      if went_bust:
        reward = -1
        done = True        
      else:
        reward = 0
        done = False        

    else:
      raise NameError('Invalid action')
    
    if done:    
      next_state = [list(self.player_hand).copy(), list(self.dealer_hand).copy()]
    else:
      next_state = [list(self.player_hand).copy(), self.visible_dealer_card]
    return state, next_state, reward, done 

  def seed(self, seed=None):
    random.seed(seed)

  
  def reset(self):
    self.deck = set(range(0,52))
    self.deck = set([number2card(x) for x in self.deck])

    self.player_hand = set(random.sample(self.deck, 2))
    self.deck = self.deck - self.player_hand
    self.dealer_hand = set(random.sample(self.deck, 2))
    self.deck = self.deck - self.dealer_hand

    self.visible_dealer_card = list(self.dealer_hand)[0]
    state = [list(self.player_hand).copy(), self.visible_dealer_card]
    return state


  @staticmethod
  def calculate_score(sum_no_aces, n_aces):
    score = sum_no_aces
    soft_hand = (21 - sum_no_aces) >= 10
    at_least_one_ace = n_aces > 0
    if soft_hand and at_least_one_ace:
      score += 10
    return score

  
  def calculate_sum(self, who='player'):
    n_aces = 0
    sum_ = 0
    
    if who == 'player':
      hand = self.player_hand
    else:
      hand = self.dealer_hand
    
    for card in hand:  
      rank = card[0]
      if rank == 'A':
        n_aces += 1          
      sum_ += card2value(card)
    
    return sum_, n_aces

In [125]:
seed = 0
juego = BlackJackEnv(seed=seed)
mano_inicial = juego.reset()

In [126]:
mano_inicial

[[('A', 'spades'), ('5', 'hearts')], ('4', 'hearts')]

In [127]:
m, nm, r, d = juego.step('hit')

In [128]:
m

[[('A', 'spades'), ('5', 'hearts')], ('4', 'hearts')]

In [129]:
nm

[[('A', 'spades'), ('5', 'hearts'), ('5', 'diamonds')], ('4', 'hearts')]

In [130]:
r

0

In [131]:
d

False

In [132]:
m, nm, r, d = juego.step('stand')

In [133]:
m

[[('A', 'spades'), ('5', 'hearts'), ('5', 'diamonds')], ('4', 'hearts')]

In [134]:
nm

[[('A', 'spades'), ('5', 'hearts'), ('5', 'diamonds')],
 [('4', 'hearts'), ('7', 'hearts'), ('5', 'spades'), ('10', 'diamonds')]]

In [135]:
r

1.0

In [136]:
d

True

In [138]:
N_episodes = 10000
N_actions = 2
epsilon = 0.1
beta = 1.0
np.random.seed(seed)

In [119]:
  def state2code(state):
    n_aces = 0
    sum_ = 0
    
    player_hand, dealer_card = state
    
    for card in player_hand:  
      rank = card[0]
      if rank == 'A':
        n_aces += 1          
      sum_ += card2value(card)
    
    visible_score_dealer = card2value(dealer_card)
    aces = n_aces > 0
    return (sum_, visible_score_dealer, aces)


def sample_epsilon_policy(hand_sum, dealer_score, aces, q_function):
  q_values = q_function[int(hand_sum-12), int(dealer_score-1), int(aces),:]
  if np.random.rand() > epsilon:
    action = q_values.a

In [None]:
q_function = np.zeros(10,10,2,2)

for episode in range(0, N_episodes):
  done = False
  episode_reward = 0
  state = juego.reset()

  while not done:
    hand_sum, dealer_score, aces = state2code(state)
    if hand_sum < 12:
      action = 'hit'
    else:
      action = sample_epsilon_policy() 