<a href="https://colab.research.google.com/github/mcnica89/Markov-Chains-RL-W24/blob/main/Final_Project_Easy21_Draft_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import numpy as np

# Final Project Draft Code

Below is a draft of some of the code that will be used in the final project to simulate Easy 21. Your job will be to create the functions playerActionIsHit (which is how you decided whether to hit or stick in any situation), playerBetSize (which is your bet size) and playerCardCounter (which is a "card counting" signal that you can send to yourself to help you choose hit/stick or bet size depending on what cards are left in the deck). More details will be given closer to the final project due date.

# Easy 21 Simple (1 Game, No Finite Deck)

In [None]:
def playerActionIsHit( player_sum : int, dealer_sum : int) -> bool:
  #Given the Player Value and the Dealer Value, return whether or not the Player Action is to hit (otherwise the player sticks)
  return 0

def nextCard() -> int:
  #returns the value of the next card

  cardSign = random.choice([-1,+1,+1]) #2/3 chance for a black card (+1) and 1/3 chance for a red card (-1)
  cardNum = random.randint(1,10)
  return cardSign*cardNum

def simulateEasy21 (playerStrategy : "Action Function", Verbose : bool = False) -> int:
  # simulates one hand of easy 21 using the player and delaer strategy

  #initialize player and dealer to start at a number between 1 and 10
  player_sum = random.randint(1,10)
  dealer_sum = random.randint(1,10)
  if Verbose:
    print("==================")
    print(f"Player Starting Sum: {player_sum}")
    print(f"Dealer Starting Sum: {dealer_sum}")

  #player's turn
  player_is_active = playerStrategy(player_sum,dealer_sum) #flag for the Player still playing
  player_busted = False
  if Verbose:
      print("--Player's Turn")
  while player_is_active:
    player_sum += nextCard()
    player_busted = ( player_sum < 1 or player_sum > 21 )
    player_is_active = (not player_busted) and playerStrategy(player_sum,dealer_sum)
    if Verbose:
      print(f"{player_sum = }, {player_busted=}, {player_is_active=}")

  #dealer's turn

  #The dealer will always hit if <=17 and player is not busted
  dealer_is_active = (not player_busted and dealer_sum <= 17) #flag for the Dealer still playing.

  dealer_busted = False
  if Verbose:
      print("--Dealer's Turn")
  while dealer_is_active:
    dealer_sum += nextCard()
    dealer_busted = (dealer_sum < 1 or dealer_sum > 21)
    dealer_is_active = (not dealer_busted) and dealer_sum <= 17
    if Verbose:
      print(f"{dealer_sum = }, {dealer_busted=}, {dealer_is_active=}")


  player_wins = dealer_busted or ( (not player_busted) and player_sum > dealer_sum )
  dealer_wins = player_busted or ( (not dealer_busted) and player_sum < dealer_sum )
  if Verbose:
    print(f"{player_wins=}, {dealer_wins=}")

  return 1.0*player_wins - 1.0*dealer_wins

In [None]:
m = 0
for i in range(10000000):
  m = i*m/(i+1) + simulateEasy21(playerActionIsHit,Verbose=False)/(i+1)

print(m)

0.18902739999998697


# Easy 21 vectorized (n games in parallel, No Finite Deck)

In [None]:
#vectorized versions. Runs the games in parallel.

batch_size = 10 #number of games to play in parallel.
#All the variables from before are replaced with arrays of size (batch_size,)

def playerActionIsHit_vec( player_sum, dealer_sum) -> bool:
  #Given the Player Value and the Dealer Value, return whether or not the Player Action is to hit (otherwise the player sticks)
  return np.zeros_like(player_sum)

def nextCard_vec(which_sims_to_deal) -> int:
  #returns the value of the next card in the ones that have a True in them
  cardSign = np.random.choice(np.array([-1,+1,+1]),size=which_sims_to_deal.shape) #2/3 chance for a black card (+1) and 1/3 chance for a red card (-1)
  cardNum = np.random.randint(low=1,high=10+1,size=which_sims_to_deal.shape)
  return cardSign*cardNum*which_sims_to_deal

def simulateEasy21_vec(batch_size:int, playerStrategy_vec : "Action Function", Verbose : bool = False) -> int:
  # simulates one hand of easy 21 using the player and delaer strategy

  #initialize player and dealer to start at a number between 1 and 10
  player_sum = np.random.randint(low=1,high=10+1,size=batch_size)
  dealer_sum = np.random.randint(low=1,high=10+1,size=batch_size)
  if Verbose:
    print("==================")
    print(f"Player Starting Sum: {player_sum}")
    print(f"Dealer Starting Sum: {dealer_sum}")

  #player's turn
  player_is_active = playerStrategy_vec(player_sum,dealer_sum) #flag for the Player still playing
  player_busted = np.zeros(batch_size,dtype=bool) #Array of all false to start
  NOT_player_busted = np.logical_not(player_busted)
  if Verbose:
      print("--Player's Turn")
  while np.any(player_is_active):
    player_sum += player_is_active * nextCard() #the multiplication will be zero for anyone who is no longer active
    player_busted = np.logical_or( player_sum < 1, player_sum > 21 )
    NOT_player_busted = np.logical_not(player_busted)
    player_is_active = np.logical_and(NOT_player_busted,playerStrategy_vec(player_sum,dealer_sum))
    if Verbose:
      print(f"{player_sum = }, {player_busted=}, {player_is_active=}")

  #dealer's turn

  #The dealer will always hit if <=17 and player is not busted
  dealer_is_active = np.logical_and(NOT_player_busted,dealer_sum <= 17) #flag for the Dealer still playing.

  dealer_busted = np.zeros(batch_size,dtype=bool) #Array of all false to start
  NOT_dealer_busted = np.logical_not(dealer_busted)

  if Verbose:
      print("--Dealer's Turn")

  while np.any(dealer_is_active):
    dealer_sum += nextCard_vec(dealer_is_active)
    dealer_busted = np.logical_or(dealer_sum < 1,dealer_sum > 21)
    NOT_dealer_busted = np.logical_not(dealer_busted)
    dealer_is_active = np.logical_and(NOT_dealer_busted, np.logical_and(NOT_player_busted,dealer_sum <= 17) )
    if Verbose:
      print(f"{dealer_sum = }, {dealer_busted=}, {dealer_is_active=}")


  player_wins = np.logical_or(dealer_busted, np.logical_and(NOT_player_busted,player_sum > dealer_sum))
  dealer_wins = np.logical_or(player_busted, np.logical_and(NOT_dealer_busted,player_sum < dealer_sum))
  if Verbose:
    print(f"{player_wins=}, {dealer_wins=}")

  return 1.0*player_wins - 1.0*dealer_wins

In [None]:
sim = simulateEasy21_vec(10000000,playerActionIsHit_vec,Verbose=False)
np.mean(sim)

0.1890354

In [None]:
sim[:100]

array([-1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1., -1., -1.,  1.,
        1.,  1., -1., -1., -1.,  1.,  1.,  1., -1., -1.,  1., -1.,  1.,
        1., -1., -1., -1.,  1.,  1.,  1.,  1., -1.,  1.,  1., -1., -1.,
        1., -1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1., -1., -1.,  1.,
        1.,  1.,  1., -1.,  1.,  1., -1.,  1., -1.,  1.,  1.,  1., -1.,
        1., -1.,  1.,  1., -1.,  1., -1., -1., -1.,  1., -1., -1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1., -1., -1., -1.,
        1., -1., -1.,  1.,  1., -1.,  1., -1.,  1.])

# Easy21 Finite Deck

In [None]:
cards_1_to_10 = np.arange(1,10+1)
deck = np.concatenate( (cards_1_to_10, cards_1_to_10, -cards_1_to_10) ) #the deck is 30 cards, 1-10 twice and -1 to 10 once
shoe = np.tile( deck, 5 )
np.random.shuffle(shoe) #shuffle the deck!


In [None]:
def playerActionIsHit( player_sum : int, dealer_sum : int, card_counting_signal:int = 0) -> bool:
  #Given the Player Value and the Dealer Value, return whether or not the Player Action is to hit (otherwise the player sticks)
  return 0

def playerCardCounter( card_counting_signal_input :int, observed_card : int) -> int:
  # updates the card Counting signal
  return random.choice([0,1])

def playerBetSize_choice(card_counting_signal_input:int, current_bankroll:int) -> int:
  # How much the player wants to bet
  return 1

#def nextCard() -> int:
#  #returns the value of the next card
#
#  cardSign = random.choice([-1,+1,+1]) #2/3 chance for a black card (+1) and 1/3 chance for a red card (-1)
#  cardNum = random.randint(1,10)
#  return cardSign*cardNum

def simulateEasy21_finite_deck(playerStrategy, playerBetSize_choice, playerCardCounter, Verbose : bool = False, num_decks_in_shoe = 20, min_decks_to_end = 2) -> int:
  # simulates one run through of a deck of easy 21 using the player and delaer strategy
  # using num_decks of 30 cards each in the shoe

  cards_1_to_10 = np.arange(1,10+1)
  deck = np.concatenate( (cards_1_to_10, cards_1_to_10, -cards_1_to_10) ) #the deck is 30 cards, 1-10 twice and -1 to 10 once
  shoe = np.tile( deck, num_decks_in_shoe )
  np.random.shuffle(shoe) #shuffle the deck!

  top_of_shoe_ix = 0 #index for how far we are in the shoe

  player_bankroll = 100 #your starting bankroll
  player_cardcount_signal = 0 #starting cardcount signal

  #while we still have at least min_decks_to_end left in the deck
  # (and the player has a non-zero bankroll!)

  while len(shoe) - top_of_shoe_ix > min_decks_to_end*len(deck) and player_bankroll > 0:
    bet_size = int(playerBetSize_choice(player_cardcount_signal, player_bankroll)) #choose the betsize! Note that it is rounded to an int.
    bet_size = max(min(bet_size,player_bankroll),0) #ensure the playerbet size is between 0 and player_bankroll

    if Verbose:
      print(f"{player_bankroll=}, {bet_size=}")


    #initialize player and dealer to start at a number between 1 and 10
    #IMPORTANT: These numbers are NOT drawn from the deck. They are just new numbers.
    player_sum = random.randint(1,10)
    dealer_sum = random.randint(1,10)
    if Verbose:
      print("==================")
      print(f"Player Starting Sum: {player_sum}")
      print(f"Dealer Starting Sum: {dealer_sum}")

    #player's turn
    player_is_active = playerStrategy(player_sum,dealer_sum,player_cardcount_signal) #flag for the Player still playing
    player_busted = False
    if Verbose:
        print("--Player's Turn")
    while player_is_active:

      #deal the next card and also let the player card counter see the next card
      nextCard = shoe[top_of_shoe_ix]
      top_of_shoe_ix += 1
      player_cardcount_signal = int(playerCardCounter(player_cardcount_signal,nextCard))
      player_sum += nextCard

      if Verbose:
        print(f"{player_cardcount_signal}")

      player_busted = ( player_sum < 1 or player_sum > 21 )
      player_is_active = (not player_busted) and playerStrategy(player_sum,dealer_sum,player_cardcount_signal)
      player_is_active = player_is_active and top_of_shoe_ix < len(shoe) #if we are at out of cards everything is automatically over!
      if Verbose:
        print(f"{player_sum = }, {player_busted=}, {player_is_active=}")

    #dealer's turn

    #The dealer will always hit if <=17 and player is not busted
    dealer_is_active = (not player_busted and dealer_sum <= 16) #flag for the Dealer still playing.
    dealer_is_active = dealer_is_active and top_of_shoe_ix < len(shoe) #if we are out of cards everything is automatically over

    dealer_busted = False
    if Verbose:
        print("--Dealer's Turn")
    while dealer_is_active:
      nextCard = shoe[top_of_shoe_ix]
      top_of_shoe_ix += 1
      player_cardcount_signal = int(playerCardCounter(player_cardcount_signal,nextCard))

      dealer_sum += nextCard
      dealer_busted = (dealer_sum < 1 or dealer_sum > 21)
      dealer_is_active = (not dealer_busted) and dealer_sum <= 16
      dealer_is_active = dealer_is_active and top_of_shoe_ix < len(shoe) #if we are out of cards everything is automatically over

      if Verbose:
        print(f"{dealer_sum = }, {dealer_busted=}, {dealer_is_active=}")


    player_wins = dealer_busted or ( (not player_busted) and player_sum > dealer_sum )
    dealer_wins = player_busted or ( (not dealer_busted) and player_sum < dealer_sum )
    if Verbose:
      print(f"{player_wins=}, {dealer_wins=}")

    player_bankroll += bet_size*player_wins - bet_size*dealer_wins

  return player_bankroll

In [None]:
for i in range(1000):
  print(simulateEasy21_finite_deck(playerActionIsHit, playerBetSize_choice, playerCardCounter, Verbose=False, num_decks_in_shoe = 20))

114
110
100
95
105
115
110
105
97
114
108
112
112
112
103
126
110
116
119
114
91
114
98
120
110
112
104
107
94
109
98
102
87
115
112
101
106
103
116
98
103
110
106
80
117
122
90
108
98
117
104
109
104
98
117
108
119
118
94
88
118
95
98
117
107
112
105
107
96
105
87
94
94
103
122
83
118
113
115
118
134
118
97
110
99
105
122
107
97
122
119
111
102
109
103
105
116
102
108
106
109
113
112
94
103
113
101
141
112
114
110
93
114
116
126
91
89
90
122
92
107
95
97
115
128
116
117
90
104
100
111
109
110
106
104
113
95
111
111
101
104
97
114
115
123
108
90
110
107
125
105
100
101
108
96
108
105
105
117
91
116
118
113
104
103
87
102
91
101
106
125
108
119
112
97
96
99
109
97
112
115
111
96
112
96
109
102
107
117
87
93
132
104
113
104
116
142
84
105
114
122
102
120
120
117
99
141
114
98
111
109
101
101
93
110
110
110
109
103
117
102
98
102
130
120
96
91
111
89
93
105
122
99
109
85
111
114
108
92
105
113
87
107
102
98
107
108
104
130
87
113
102
98
109
124
132
100
124
120
102
101
93
111
111
109
108
1