In [1]:
from dataclasses import dataclass, field
import copy
from enum import Enum, auto
import random
import pandas as pd

In [2]:
# cards are numbers from 1 to 13
# the score is capped at 10

## Define a Hand class and functions on it

In [3]:
@dataclass
class Hand:
    """Class for representing a blackjack hand."""
    score: int = 0
    soft: bool = False
    cards: [int] = field(default_factory=list)
    doubled: bool = False


In [4]:
def add_card(hand, card):
    if card != 1:  
        if (not hand.soft) or (hand.score <= 11): # simple case
            new_score = min(hand.score+min(10, card), 22) # cap busted hands at 22
        else: # make a soft hand hard
            new_score = hand.score+min(10, card) - 10 
            hand.soft = False
    else: # card is an ace
        if hand.score >= 11: # 11s and up count an ace as 1 (hard or soft)
            new_score = min(hand.score+min(10, card), 22) # cap busted hands at 22
        else: # soft ace
            new_score = hand.score+11
            hand.soft = True

    hand.score = new_score
    hand.cards += [card]
    return hand

In [5]:
def is_busted(hand):
    return hand.score > 21

In [6]:
def is_blackjack(hand):
    return hand.score==21 and len(hand.cards)==2

In [7]:
h = Hand()
h

Hand(score=0, soft=False, cards=[], doubled=False)

In [8]:
add_card(h, 6) # start a hand with a 6

Hand(score=6, soft=False, cards=[6], doubled=False)

In [9]:
add_card(h, 11) # show that J (11) counts as 10 points

Hand(score=16, soft=False, cards=[6, 11], doubled=False)

In [10]:
add_card(h, 7), is_busted(h) # bust (show that 23 is counted as 22)

(Hand(score=22, soft=False, cards=[6, 11, 7], doubled=False), True)

In [11]:
# OK, now let's try another hand with aces
h = Hand()
add_card(h, 1) # should be a soft 11

Hand(score=11, soft=True, cards=[1], doubled=False)

In [12]:
add_card(h, 1) # two aces: should be a soft 12

Hand(score=12, soft=True, cards=[1, 1], doubled=False)

In [13]:
add_card(h, 11) # two aces and a J: should be a hard 12

Hand(score=12, soft=False, cards=[1, 1, 11], doubled=False)

In [14]:
# OK, now let's try another hand with a blackjack
h = Hand()
add_card(h, 1) # should be a soft 11
add_card(h, 10)
h, is_blackjack(h)

(Hand(score=21, soft=True, cards=[1, 10], doubled=False), True)

## Now define gameplay and strategy

In [15]:

# TODO I might want a Flag class later, to provide a set of possible Actions
class Action(Enum):
    STAND = auto()
    HIT = auto()
    DOUBLE = auto()
    #SPLIT = auto()
    
    

In [16]:
# Most simple/conservative strategy imaginable:
def strat_nobust(hand, dealer):
    if hand.score > 11:
        return Action.STAND
    else:
        return Action.HIT
        

In [17]:
# Dealer strategy
def strat_dealer(hand, dealer):
    if hand.score < 17:
        return Action.HIT
    # TODO handle soft hands
    else:
        return Action.STAND
        

In [18]:
class HandOutcome(Enum):
    WIN = 1
    LOSE = -1
    WIN_DOUBLE = 2
    LOSE_DOUBLE = -2
    PUSH = 0
    BLACKJACK = 1.5

In [19]:
# Deck; completely random (i.e., infinite) for now


def deal_card():
    return random.randrange(13)+1

In [20]:
[deal_card() for _ in range(10)]

[4, 9, 3, 8, 4, 8, 2, 10, 7, 1]

In [21]:
# return the final hand after playing
def player_play_hand(strategy, hand, dealer, deck): 
    while True:
        decision = strategy(hand, dealer)
        if decision == Action.STAND:
            return hand
        if decision == Action.HIT:
            add_card(hand, deck())
            if is_busted(hand):
                return hand
        if decision == Action.DOUBLE:
            hand.doubled = True
            add_card(hand, deck())
            return hand



In [22]:
def player_hand_outcome(player_hand, dealer_hand):
    # First compute the initial outcome, then double it if necessary for a double-down
    def initial_outcome():
        if is_blackjack(player_hand):
            if is_blackjack(dealer_hand):
                return HandOutcome.PUSH
            else:
                return HandOutcome.BLACKJACK
        if is_busted(player_hand) or is_blackjack(dealer_hand):
            return HandOutcome.LOSE
        if is_busted(dealer_hand):
            return HandOutcome.WIN
        if player_hand.score > dealer_hand.score:
            return HandOutcome.WIN
        if player_hand.score == dealer_hand.score:
            return HandOutcome.PUSH
        if player_hand.score < dealer_hand.score:
            return HandOutcome.LOSE

    outcome = initial_outcome()

    outcome_doubler = {HandOutcome.WIN: HandOutcome.WIN_DOUBLE, HandOutcome.LOSE: HandOutcome.LOSE_DOUBLE}

    if player_hand.doubled:
        outcome = outcome_doubler.get(outcome) or outcome
    return outcome
        


In [41]:
# One player and a dealer
# Player has a strategy
# Each gets dealt cards and plays according to strategy
# Emit all the data, and figure the rest out later

def complete_one_hand(strat, player_hand, dealer_hand, dealer_hole_card):

    hand_p = copy.deepcopy(player_hand)
    hand_d = copy.deepcopy(dealer_hand)
    

    
    # player
    player_play_hand(strat, hand_p, hand_d, deal_card)
    # dealer
    player_play_hand(strat_dealer, add_card(hand_d, dealer_hole_card), None, deal_card)
    
    return (hand_p, hand_d, player_hand_outcome(hand_p, hand_d))



In [43]:
# One player and a dealer
# Player has a strategy
# Each gets dealt cards and plays according to strategy
# Emit all the data, and figure the rest out later

def deal_one_hand():
    hand_p = Hand()
    hand_d = Hand()

    add_card(hand_p, deal_card())
    add_card(hand_d, deal_card())
    add_card(hand_p, deal_card())
    
    return hand_p, hand_d, dealer_hole_card

    dealer_hole_card = deal_card()
def play_one_hand(strat):
    hand_p, hand_d, dealer_hole_card = deal_one_hand()
    return complete_one_hand(strat, hand_p, hand_d, dealer_hole_card)

play_one_hand(strat_nobust)

(Hand(score=14, soft=False, cards=[8, 6], doubled=False),
 Hand(score=22, soft=False, cards=[13, 2, 1, 11], doubled=False),
 <HandOutcome.WIN: 1>)

## Aggregate and summarize the data from the simulations

In [25]:

def generate_row_from_hand(h):
    (hand_p, hand_d, outcome) = h
    return {'hand_start': hand_p.cards[:2], 'dealer_card': hand_d.cards[0], 'hand_end': hand_p.cards, 'dealer_hand': hand_d.cards, 'outcome': outcome}

generate_row_from_hand(play_one_hand(strat_nobust))

{'hand_start': [8, 7],
 'dealer_card': 6,
 'hand_end': [8, 7],
 'dealer_hand': [6, 2, 1],
 'outcome': <HandOutcome.LOSE: -1>}

In [26]:
def run_n_sim_trials(strat, n):
    sims = pd.DataFrame([generate_row_from_hand(play_one_hand(strat)) for _ in range(n)])
    return sims

def summarize_totals(sims):
    return sims['outcome'].value_counts(), sims['outcome'].apply(lambda x: x.value).mean()

sims = run_n_sim_trials(strat_dealer, 1000)
sims, summarize_totals(sims)

(    hand_start  dealer_card         hand_end         dealer_hand  \
 0     [13, 12]            9         [13, 12]             [9, 11]   
 1      [12, 9]            1          [12, 9]              [1, 7]   
 2      [13, 6]            6      [13, 6, 11]          [6, 5, 11]   
 3       [6, 4]           13     [6, 4, 2, 9]          [13, 6, 2]   
 4      [6, 11]            9       [6, 11, 2]           [9, 5, 6]   
 ..         ...          ...              ...                 ...   
 995     [2, 2]           11  [2, 2, 1, 3, 9]             [11, 9]   
 996     [1, 1]            2  [1, 1, 6, 7, 6]        [2, 2, 8, 5]   
 997     [1, 3]            2    [1, 3, 8, 10]  [2, 1, 6, 2, 2, 4]   
 998    [6, 10]            2      [6, 10, 10]          [2, 10, 7]   
 999     [2, 9]            7        [2, 9, 9]           [7, 8, 5]   
 
               outcome  
 0     HandOutcome.WIN  
 1     HandOutcome.WIN  
 2    HandOutcome.LOSE  
 3     HandOutcome.WIN  
 4    HandOutcome.LOSE  
 ..                .

In [27]:
def strat_simple(hand, dealer):
    if hand.score == 11:  return Action.DOUBLE
    if hand.score >= 17:  return Action.STAND
    if hand.score <= 11:  return Action.HIT
    if dealer.score in (range(3,7)):  return Action.STAND
    else:  return Action.HIT
        
sims = run_n_sim_trials(strat_simple, 1000)
sims, summarize_totals(sims)

(    hand_start  dealer_card     hand_end dealer_hand                 outcome
 0       [8, 5]            8    [8, 5, 9]  [8, 8, 13]        HandOutcome.LOSE
 1      [12, 6]            8   [12, 6, 8]     [8, 13]        HandOutcome.LOSE
 2       [2, 9]           12   [2, 9, 13]     [12, 8]  HandOutcome.WIN_DOUBLE
 3       [8, 8]            8   [8, 8, 12]   [8, 7, 3]        HandOutcome.LOSE
 4      [2, 11]           13  [2, 11, 11]     [13, 8]        HandOutcome.LOSE
 ..         ...          ...          ...         ...                     ...
 995    [10, 7]            5      [10, 7]   [5, 8, 6]        HandOutcome.LOSE
 996    [2, 10]            5      [2, 10]  [5, 8, 12]         HandOutcome.WIN
 997    [10, 6]           10   [10, 6, 2]  [10, 5, 8]         HandOutcome.WIN
 998    [12, 4]            5      [12, 4]  [5, 2, 12]        HandOutcome.LOSE
 999     [3, 6]           10   [3, 6, 10]  [10, 5, 6]        HandOutcome.LOSE
 
 [1000 rows x 5 columns],
 (HandOutcome.LOSE           477
  H

In [44]:
for strat in [strat_dealer, strat_nobust, strat_simple]:
    print(summarize_totals(run_n_sim_trials(strat, 100000)))


(HandOutcome.LOSE         49252
HandOutcome.WIN          36375
HandOutcome.PUSH          9764
HandOutcome.BLACKJACK     4609
Name: outcome, dtype: int64, -0.059635)
(HandOutcome.LOSE         51666
HandOutcome.WIN          37516
HandOutcome.PUSH          6389
HandOutcome.BLACKJACK     4429
Name: outcome, dtype: int64, -0.075065)
(HandOutcome.LOSE           46050
HandOutcome.WIN            34815
HandOutcome.PUSH            8874
HandOutcome.BLACKJACK       4506
HandOutcome.WIN_DOUBLE      3330
HandOutcome.LOSE_DOUBLE     2425
Name: outcome, dtype: int64, -0.02666)


## Simulate specific situations to determine strategy

In [29]:
# 12 vs deuce

hand_p = add_card(add_card(Hand(), 5), 7)
hand_d = add_card(Hand(), 2)

hand_p, hand_d

(Hand(score=12, soft=False, cards=[5, 7], doubled=False),
 Hand(score=2, soft=False, cards=[2], doubled=False))

In [30]:
# Generate a set of strategies (one strategy per action) that default to strat_base, but
# when condition is true, always performs the specific action
def gen_cond_strategy(strat_base, condition, action):
    def strat_cond(hand, dealer):
        if condition(hand, dealer):
            return action
        else: return strat_base(hand, dealer)
    
    return strat_cond

def gen_cond_strategies(strat_base, condition, actions):
    return [gen_cond_strategy(strat_base, condition, a) for a in actions]

def cond_12_2(hand, dealer):
    return hand.score == 12 and dealer.score == 2 and not hand.soft

In [31]:
strats = gen_cond_strategies(strat_simple, cond_12_2, [Action.HIT, Action.STAND, Action.DOUBLE])
strats

[<function __main__.gen_cond_strategy.<locals>.strat_cond(hand, dealer)>,
 <function __main__.gen_cond_strategy.<locals>.strat_cond(hand, dealer)>,
 <function __main__.gen_cond_strategy.<locals>.strat_cond(hand, dealer)>]

In [32]:
complete_one_hand(strats[0], hand_p, hand_d)

(Hand(score=19, soft=False, cards=[5, 7, 7], doubled=False),
 Hand(score=19, soft=False, cards=[2, 7, 12], doubled=False),
 <HandOutcome.PUSH: 0>)

In [33]:


def run_n_sim_trials_from_state(strat, hand_p, hand_d, n):
    sims = pd.DataFrame([generate_row_from_hand(complete_one_hand(strat, hand_p, hand_d)) for _ in range(n)])
    return sims

run_n_sim_trials_from_state(strats[0], hand_p, hand_d, 10)

Unnamed: 0,hand_start,dealer_card,hand_end,dealer_hand,outcome
0,"[5, 7]",2,"[5, 7, 13]","[2, 7, 9]",HandOutcome.LOSE
1,"[5, 7]",2,"[5, 7, 1, 13]","[2, 11, 5]",HandOutcome.LOSE
2,"[5, 7]",2,"[5, 7, 12]","[2, 2, 12, 9]",HandOutcome.LOSE
3,"[5, 7]",2,"[5, 7, 9]","[2, 13, 9]",HandOutcome.PUSH
4,"[5, 7]",2,"[5, 7, 10]","[2, 4, 5, 6]",HandOutcome.LOSE
5,"[5, 7]",2,"[5, 7, 9]","[2, 4, 13, 5]",HandOutcome.PUSH
6,"[5, 7]",2,"[5, 7, 12]","[2, 2, 4, 2, 8]",HandOutcome.LOSE
7,"[5, 7]",2,"[5, 7, 4, 1]","[2, 13, 13]",HandOutcome.WIN
8,"[5, 7]",2,"[5, 7, 13]","[2, 11, 7]",HandOutcome.LOSE
9,"[5, 7]",2,"[5, 7, 11]","[2, 1, 12, 4]",HandOutcome.LOSE


In [34]:
for strat in [strats[0], strats[1], strats[2], strat_simple]:
    print(summarize_totals(run_n_sim_trials_from_state(strat, hand_p, hand_d, 100000)))

(HandOutcome.LOSE    60938
HandOutcome.WIN     32311
HandOutcome.PUSH     6751
Name: outcome, dtype: int64, -0.28627)
(HandOutcome.LOSE    64035
HandOutcome.WIN     35965
Name: outcome, dtype: int64, -0.2807)
(HandOutcome.LOSE_DOUBLE    59646
HandOutcome.WIN_DOUBLE     35459
HandOutcome.PUSH            4895
Name: outcome, dtype: int64, -0.48374)
(HandOutcome.LOSE    60968
HandOutcome.WIN     32672
HandOutcome.PUSH     6360
Name: outcome, dtype: int64, -0.28296)


In [35]:
for strat in [strats[0], strats[1], strat_simple]:
    print(summarize_totals(run_n_sim_trials_from_state(strat, hand_p, hand_d, 100)))

(HandOutcome.LOSE    60
HandOutcome.WIN     37
HandOutcome.PUSH     3
Name: outcome, dtype: int64, -0.23)
(HandOutcome.LOSE    67
HandOutcome.WIN     33
Name: outcome, dtype: int64, -0.34)
(HandOutcome.LOSE    59
HandOutcome.WIN     37
HandOutcome.PUSH     4
Name: outcome, dtype: int64, -0.22)


In [36]:
# Soft 12 vs 2
hand_p = add_card(add_card(Hand(), 1), 1)
hand_d = add_card(Hand(), 2)
hand_p, hand_d

(Hand(score=12, soft=True, cards=[1, 1], doubled=False),
 Hand(score=2, soft=False, cards=[2], doubled=False))

In [37]:
for strat in [strats[0], strats[1], strats[2], strat_simple]:
    print(summarize_totals(run_n_sim_trials_from_state(strat, hand_p, hand_d, 1000*1000)))

(HandOutcome.LOSE           480345
HandOutcome.WIN            322751
HandOutcome.PUSH            78836
HandOutcome.WIN_DOUBLE      74827
HandOutcome.LOSE_DOUBLE     43241
Name: outcome, dtype: int64, -0.094422)
(HandOutcome.LOSE           491299
HandOutcome.WIN            335824
HandOutcome.WIN_DOUBLE      74979
HandOutcome.PUSH            54634
HandOutcome.LOSE_DOUBLE     43264
Name: outcome, dtype: int64, -0.092045)
(HandOutcome.LOSE_DOUBLE    261701
HandOutcome.LOSE           258409
HandOutcome.WIN_DOUBLE     204718
HandOutcome.WIN            202930
HandOutcome.PUSH            72242
Name: outcome, dtype: int64, -0.169445)
(HandOutcome.LOSE           480573
HandOutcome.WIN            322564
HandOutcome.PUSH            78935
HandOutcome.WIN_DOUBLE      74517
HandOutcome.LOSE_DOUBLE     43411
Name: outcome, dtype: int64, -0.095797)


In [38]:
# Implement a strategy that composes conditions and actions
conditions = [(cond_12_2, Action.STAND)]

def generate_strat_conditional(strat_base, conditions):
    def strat_cond(hand, dealer):
        for (condition, action) in conditions:
            if condition(hand, dealer): return action
        return strat_base(hand, dealer)
    return strat_cond
    
strat_cond = generate_strat_conditional(strat_simple, conditions)

In [39]:
for strat in [strat_simple, strat_cond]:
    print(summarize_totals(run_n_sim_trials_from_state(strat, hand_p, hand_d, 100*1000)))

(HandOutcome.LOSE           48041
HandOutcome.WIN            32236
HandOutcome.PUSH            7908
HandOutcome.WIN_DOUBLE      7431
HandOutcome.LOSE_DOUBLE     4384
Name: outcome, dtype: int64, -0.09711)
(HandOutcome.LOSE           49325
HandOutcome.WIN            33514
HandOutcome.WIN_DOUBLE      7408
HandOutcome.PUSH            5482
HandOutcome.LOSE_DOUBLE     4271
Name: outcome, dtype: int64, -0.09537)


In [40]:
for strat in [strat_simple, strat_cond]:
    print(summarize_totals(run_n_sim_trials(strat, 1000000)))


(HandOutcome.LOSE           461026
HandOutcome.WIN            349006
HandOutcome.PUSH            87049
HandOutcome.BLACKJACK       45142
HandOutcome.WIN_DOUBLE      32813
HandOutcome.LOSE_DOUBLE     24964
Name: outcome, dtype: int64, -0.028609)
(HandOutcome.LOSE           461292
HandOutcome.WIN            349707
HandOutcome.PUSH            86323
HandOutcome.BLACKJACK       44990
HandOutcome.WIN_DOUBLE      33089
HandOutcome.LOSE_DOUBLE     24599
Name: outcome, dtype: int64, -0.02712)
