In [1]:
from dataclasses import dataclass, field
import copy
from enum import Enum, auto
import random
import pandas as pd

In [2]:
# cards are numbers from 1 to 13
# the score is capped at 10

## Define a Hand class and functions on it

In [3]:
@dataclass
class Hand:
    """Class for representing a blackjack hand."""
    score: int = 0
    soft: bool = False
    cards: [int] = field(default_factory=list)


In [4]:
def add_card(hand, card):
    if card != 1:  
        if (not hand.soft) or (hand.score <= 11): # simple case
            new_score = min(hand.score+min(10, card), 22) # cap busted hands at 22
        else: # make a soft hand hard
            new_score = hand.score+min(10, card) - 10 
            hand.soft = False
    else: # card is an ace
        if hand.score >= 11: # 11s and up count an ace as 1 (hard or soft)
            new_score = min(hand.score+min(10, card), 22) # cap busted hands at 22
        else: # soft ace
            new_score = hand.score+11
            hand.soft = True

    hand.score = new_score
    hand.cards += [card]
    return hand

In [5]:
def is_busted(hand):
    return hand.score > 21

In [6]:
def is_blackjack(hand):
    return hand.score==21 and len(hand.cards)==2

In [7]:
h = Hand()
h

Hand(score=0, soft=False, cards=[])

In [8]:
add_card(h, 6) # start a hand with a 6

Hand(score=6, soft=False, cards=[6])

In [9]:
add_card(h, 11) # show that J (11) counts as 10 points

Hand(score=16, soft=False, cards=[6, 11])

In [10]:
add_card(h, 7), is_busted(h) # bust (show that 23 is counted as 22)

(Hand(score=22, soft=False, cards=[6, 11, 7]), True)

In [11]:
# OK, now let's try another hand with aces
h = Hand()
add_card(h, 1) # should be a soft 11

Hand(score=11, soft=True, cards=[1])

In [12]:
add_card(h, 1) # two aces: should be a soft 12

Hand(score=12, soft=True, cards=[1, 1])

In [13]:
add_card(h, 11) # two aces and a J: should be a hard 12

Hand(score=12, soft=False, cards=[1, 1, 11])

In [14]:
# OK, now let's try another hand with a blackjack
h = Hand()
add_card(h, 1) # should be a soft 11
add_card(h, 10)
h, is_blackjack(h)

(Hand(score=21, soft=True, cards=[1, 10]), True)

## Now define gameplay and strategy

In [15]:

# TODO I might want a Flag class later, to provide a set of possible Actions
class Action(Enum):
    STAND = auto()
    HIT = auto()
    #DOUBLE = auto()
    #SPLIT = auto()
    
    

In [16]:
# Most simple/conservative strategy imaginable:
def strat_nobust(hand, dealer):
    if hand.score > 11:
        return Action.STAND
    else:
        return Action.HIT
        

In [17]:
# Dealer strategy
def strat_dealer(hand, dealer):
    if hand.score < 17:
        return Action.HIT
    # TODO handle soft hands
    else:
        return Action.STAND
        

In [18]:
class HandOutcome(Enum):
    WIN = 1
    LOSE = -1
    PUSH = 0
    BLACKJACK = 1.5

In [19]:
# Deck; completely random (i.e., infinite) for now


def deal_card():
    return random.randrange(13)+1

In [20]:
[deal_card() for _ in range(10)]

[9, 9, 3, 13, 7, 8, 7, 10, 4, 5]

In [21]:
# return the final hand after playing
def player_play_hand(strategy, hand, dealer, deck): 
    while True:
        decision = strategy(hand, dealer)
        if decision == Action.STAND:
            return hand
        if decision == Action.HIT:
            add_card(hand, deck())
            if is_busted(hand):
                return hand



In [22]:
def player_hand_outcome(player_hand, dealer_hand):
    if is_blackjack(player_hand):
        if is_blackjack(dealer_hand):
            return HandOutcome.PUSH
        else:
            return HandOutcome.BLACKJACK
    if is_busted(player_hand) or is_blackjack(dealer_hand):
        return HandOutcome.LOSE
    if is_busted(dealer_hand):
        return HandOutcome.WIN
    if player_hand.score > dealer_hand.score:
        return HandOutcome.WIN
    if player_hand.score == dealer_hand.score:
        return HandOutcome.PUSH
    if player_hand.score < dealer_hand.score:
        return HandOutcome.LOSE


In [23]:
# One player and a dealer
# Player has a strategy
# Each gets dealt cards and plays according to strategy
# Emit all the data, and figure the rest out later

def complete_one_hand(strat, player_hand, dealer_hand):

    hand_p = copy.deepcopy(player_hand)
    hand_d = copy.deepcopy(dealer_hand)
    
    dealer_hole_card = deal_card()
    
    # player
    player_play_hand(strat, hand_p, hand_d, deal_card)
    # dealer
    player_play_hand(strat_dealer, add_card(hand_d, dealer_hole_card), None, deal_card)
    
    return (hand_p, hand_d, player_hand_outcome(hand_p, hand_d))



In [24]:
# One player and a dealer
# Player has a strategy
# Each gets dealt cards and plays according to strategy
# Emit all the data, and figure the rest out later

def play_one_hand(strat):
    hand_p = Hand()
    hand_d = Hand()

    add_card(hand_p, deal_card())
    add_card(hand_d, deal_card())
    add_card(hand_p, deal_card())

    return complete_one_hand(strat, hand_p, hand_d)

play_one_hand(strat_nobust)

(Hand(score=21, soft=False, cards=[3, 8, 13]),
 Hand(score=17, soft=False, cards=[13, 7]),
 <HandOutcome.WIN: 1>)

## Aggregate and summarize the data from the simulations

In [25]:

def generate_row_from_hand(h):
    (hand_p, hand_d, outcome) = h
    return {'hand_start': hand_p.cards[:2], 'dealer_card': hand_d.cards[0], 'hand_end': hand_p.cards, 'dealer_hand': hand_d.cards, 'outcome': outcome}

generate_row_from_hand(play_one_hand(strat_nobust))

{'hand_start': [5, 10],
 'dealer_card': 7,
 'hand_end': [5, 10],
 'dealer_hand': [7, 10],
 'outcome': <HandOutcome.LOSE: -1>}

In [26]:
def run_n_sim_trials(strat, n):
    sims = pd.DataFrame([generate_row_from_hand(play_one_hand(strat)) for _ in range(n)])
    return sims

def summarize_totals(sims):
    return sims['outcome'].value_counts(), sims['outcome'].apply(lambda x: x.value).mean()

sims = run_n_sim_trials(strat_dealer, 1000)
sims, summarize_totals(sims)

(    hand_start  dealer_card          hand_end    dealer_hand           outcome
 0     [13, 12]            7          [13, 12]      [7, 6, 6]   HandOutcome.WIN
 1       [9, 5]           13         [9, 5, 9]        [13, 9]  HandOutcome.LOSE
 2      [11, 4]            1  [11, 4, 1, 1, 3]  [1, 1, 5, 12]   HandOutcome.WIN
 3       [5, 4]            1        [5, 4, 12]      [1, 5, 1]   HandOutcome.WIN
 4      [9, 12]            8           [9, 12]      [8, 8, 5]  HandOutcome.LOSE
 ..         ...          ...               ...            ...               ...
 995     [3, 2]           10     [3, 2, 13, 8]    [10, 6, 13]  HandOutcome.LOSE
 996    [11, 3]            8       [11, 3, 10]     [8, 3, 12]  HandOutcome.LOSE
 997    [12, 6]            5        [12, 6, 2]   [5, 6, 1, 7]  HandOutcome.LOSE
 998     [7, 5]            1      [7, 5, 1, 4]         [1, 9]  HandOutcome.LOSE
 999     [8, 3]            8        [8, 3, 13]        [8, 10]   HandOutcome.WIN
 
 [1000 rows x 5 columns],
 (HandOutcom

In [27]:
def strat_simple(hand, dealer):
    if hand.score >= 17:  return Action.STAND
    if hand.score <= 11:  return Action.HIT
    if dealer.score in (range(3,7)):  return Action.STAND
    else:  return Action.HIT
        
sims = run_n_sim_trials(strat_simple, 1000)
sims, summarize_totals(sims)

(    hand_start  dealer_card            hand_end  dealer_hand           outcome
 0       [5, 6]           12          [5, 6, 11]     [12, 12]   HandOutcome.WIN
 1       [3, 2]            9  [3, 2, 3, 2, 4, 5]      [9, 10]  HandOutcome.PUSH
 2      [11, 5]            6             [11, 5]   [6, 13, 2]  HandOutcome.LOSE
 3      [10, 8]            5             [10, 8]    [5, 8, 5]  HandOutcome.PUSH
 4      [8, 10]           11             [8, 10]      [11, 1]  HandOutcome.LOSE
 ..         ...          ...                 ...          ...               ...
 995    [7, 12]           10             [7, 12]     [10, 13]  HandOutcome.LOSE
 996    [11, 5]           13          [11, 5, 2]  [13, 3, 12]   HandOutcome.WIN
 997    [11, 9]            5             [11, 9]   [5, 11, 3]   HandOutcome.WIN
 998    [6, 10]            8          [6, 10, 8]    [8, 5, 7]  HandOutcome.LOSE
 999     [9, 3]           10        [9, 3, 1, 5]      [10, 8]  HandOutcome.PUSH
 
 [1000 rows x 5 columns],
 (HandOutcom

In [28]:
for strat in [strat_dealer, strat_nobust, strat_simple]:
    print(summarize_totals(run_n_sim_trials(strat, 1000000)))


(HandOutcome.LOSE         492234
HandOutcome.WIN          365203
HandOutcome.PUSH          97445
HandOutcome.BLACKJACK     45118
Name: outcome, dtype: int64, -0.059354)
(HandOutcome.LOSE         515452
HandOutcome.WIN          375607
HandOutcome.PUSH          63849
HandOutcome.BLACKJACK     45092
Name: outcome, dtype: int64, -0.072207)
(HandOutcome.LOSE         485753
HandOutcome.WIN          381329
HandOutcome.PUSH          87757
HandOutcome.BLACKJACK     45161
Name: outcome, dtype: int64, -0.0366825)


In [29]:
for strat in [strat_dealer, strat_nobust, strat_simple]:
    print(summarize_totals(run_n_sim_trials(strat, 1000000)))


(HandOutcome.LOSE         492813
HandOutcome.WIN          364623
HandOutcome.PUSH          97407
HandOutcome.BLACKJACK     45157
Name: outcome, dtype: int64, -0.0604545)
(HandOutcome.LOSE         514865
HandOutcome.WIN          375731
HandOutcome.PUSH          63822
HandOutcome.BLACKJACK     45582
Name: outcome, dtype: int64, -0.070761)
(HandOutcome.LOSE         485887
HandOutcome.WIN          381598
HandOutcome.PUSH          87533
HandOutcome.BLACKJACK     44982
Name: outcome, dtype: int64, -0.036816)


## Simulate specific situations to determine strategy

In [30]:
# 12 vs deuce

hand_p = add_card(add_card(Hand(), 5), 7)
hand_d = add_card(Hand(), 2)

hand_p, hand_d

(Hand(score=12, soft=False, cards=[5, 7]),
 Hand(score=2, soft=False, cards=[2]))

In [31]:
def strat_12_hit(hand, dealer):
    # use the simple strategy for most things, but override the 12 vs 2 behavior
    if hand.score == 12 and dealer.score == 2:
        return Action.HIT
    if hand.score >= 17:  return Action.STAND
    if hand.score <= 11:  return Action.HIT
    if dealer.score in (range(3,7)):  return Action.STAND
    else:  return Action.HIT
    
def strat_12_stand(hand, dealer):
    # use the simple strategy for most things, but override the 12 vs 2 behavior
    if hand.score == 12 and dealer.score == 2:
        return Action.STAND
    if hand.score >= 17:  return Action.STAND
    if hand.score <= 11:  return Action.HIT
    if dealer.score in (range(3,7)):  return Action.STAND
    else:  return Action.HIT
    
# TODO come up with a way to scale this

In [43]:
# Generate a set of strategies (one strategy per action) that default to strat_base, but
# when condition is true, always performs the specific action
def gen_strategies(strat_base, condition, actions):
    def strat_cond_hit(hand, dealer):
        if condition(hand, dealer):
            return Action.HIT
        else: return strat_base(hand, dealer)
    
    def strat_cond_stand(hand, dealer):
        if condition(hand, dealer):
            return Action.STAND
        else: return strat_base(hand, dealer)
    return [strat_cond_hit, strat_cond_stand]

def cond_12_2(hand, dealer):
    return hand.score == 12 and dealer.score == 2 and not hand.soft

In [39]:
strats = gen_strategies(strat_simple, cond_12_2, None)
strats

[<function __main__.gen_strategies.<locals>.strat_cond_hit(hand, dealer)>,
 <function __main__.gen_strategies.<locals>.strat_cond_stand(hand, dealer)>]

In [32]:
complete_one_hand(strat_12_hit, hand_p, hand_d)

(Hand(score=17, soft=False, cards=[5, 7, 5]),
 Hand(score=20, soft=False, cards=[2, 8, 11]),
 <HandOutcome.LOSE: -1>)

In [33]:


def run_n_sim_trials_from_state(strat, hand_p, hand_d, n):
    sims = pd.DataFrame([generate_row_from_hand(complete_one_hand(strat, hand_p, hand_d)) for _ in range(n)])
    return sims

run_n_sim_trials_from_state(strat_12_hit, hand_p, hand_d, 10)

Unnamed: 0,hand_start,dealer_card,hand_end,dealer_hand,outcome
0,"[5, 7]",2,"[5, 7, 10]","[2, 12, 1, 12]",HandOutcome.LOSE
1,"[5, 7]",2,"[5, 7, 7]","[2, 5, 7, 13]",HandOutcome.WIN
2,"[5, 7]",2,"[5, 7, 5]","[2, 11, 5]",HandOutcome.PUSH
3,"[5, 7]",2,"[5, 7, 12]","[2, 8, 5, 7]",HandOutcome.LOSE
4,"[5, 7]",2,"[5, 7, 6]","[2, 10, 3, 8]",HandOutcome.WIN
5,"[5, 7]",2,"[5, 7, 11]","[2, 8, 2, 6]",HandOutcome.LOSE
6,"[5, 7]",2,"[5, 7, 11]","[2, 6, 5, 9]",HandOutcome.LOSE
7,"[5, 7]",2,"[5, 7, 8]","[2, 12, 1, 4]",HandOutcome.WIN
8,"[5, 7]",2,"[5, 7, 7]","[2, 13, 6]",HandOutcome.WIN
9,"[5, 7]",2,"[5, 7, 12]","[2, 11, 1, 12]",HandOutcome.LOSE


In [35]:
for strat in [strat_12_hit, strat_12_stand, strat_simple]:
    print(summarize_totals(run_n_sim_trials_from_state(strat, hand_p, hand_d, 1000000)))

(HandOutcome.LOSE    607957
HandOutcome.WIN     326492
HandOutcome.PUSH     65551
Name: outcome, dtype: int64, -0.281465)
(HandOutcome.LOSE    637105
HandOutcome.WIN     362895
Name: outcome, dtype: int64, -0.27421)
(HandOutcome.LOSE    608740
HandOutcome.WIN     325259
HandOutcome.PUSH     66001
Name: outcome, dtype: int64, -0.283481)


In [40]:
for strat in [strats[0], strats[1], strat_simple]:
    print(summarize_totals(run_n_sim_trials_from_state(strat, hand_p, hand_d, 1000000)))

(HandOutcome.LOSE    608362
HandOutcome.WIN     325782
HandOutcome.PUSH     65856
Name: outcome, dtype: int64, -0.28258)
(HandOutcome.LOSE    636732
HandOutcome.WIN     363268
Name: outcome, dtype: int64, -0.273464)
(HandOutcome.LOSE    607947
HandOutcome.WIN     326050
HandOutcome.PUSH     66003
Name: outcome, dtype: int64, -0.281897)


In [41]:
# Soft 12 vs 2
hand_p = add_card(add_card(Hand(), 1), 1)
hand_d = add_card(Hand(), 2)
hand_p, hand_d

(Hand(score=12, soft=True, cards=[1, 1]), Hand(score=2, soft=False, cards=[2]))

In [42]:
for strat in [strats[0], strats[1], strat_simple]:
    print(summarize_totals(run_n_sim_trials_from_state(strat, hand_p, hand_d, 1000000)))

(HandOutcome.LOSE    525941
HandOutcome.WIN     392525
HandOutcome.PUSH     81534
Name: outcome, dtype: int64, -0.133416)
(HandOutcome.LOSE    636077
HandOutcome.WIN     363923
Name: outcome, dtype: int64, -0.272154)
(HandOutcome.LOSE    525816
HandOutcome.WIN     392721
HandOutcome.PUSH     81463
Name: outcome, dtype: int64, -0.133095)


In [48]:
# Implement a strategy that composes conditions and actions
conditions = [(cond_12_2, Action.HIT)]

def generate_strat_conditional(strat_base, conditions):
    def strat_cond(hand, dealer):
        for (condition, action) in conditions:
            if condition(hand, dealer): return action
        return strat_base(hand, dealer)
    return strat_cond
    
strat_cond = generate_strat_conditional(strat_simple, conditions)

In [50]:
for strat in [strat_dealer, strat_simple, strat_cond]:
    print(summarize_totals(run_n_sim_trials(strat, 1000000)))


(HandOutcome.LOSE         492350
HandOutcome.WIN          364833
HandOutcome.PUSH          97220
HandOutcome.BLACKJACK     45597
Name: outcome, dtype: int64, -0.0591215)
(HandOutcome.LOSE         484929
HandOutcome.WIN          382169
HandOutcome.PUSH          87545
HandOutcome.BLACKJACK     45357
Name: outcome, dtype: int64, -0.0347245)
(HandOutcome.LOSE         485149
HandOutcome.WIN          381988
HandOutcome.PUSH          88111
HandOutcome.BLACKJACK     44752
Name: outcome, dtype: int64, -0.036033)
