In [85]:
import random
class BlackJackEnv:

  rCount = 0
  roundStarted = False

  def __str__(self):
    return f"Deck: {self.deck} \nDiscard: {self.discard} \nRunning Count: {self.rCount} \nPlayer Hand: {self.playerHand} \nDealer Hand: {self.dealerHand} \nRound Started: {self.roundStarted}\n hasAce: {self.has_ace()} \n"

  def __init__(self, numDecks):
    self.deck = [2,3,4,5,6,7,8,9,10,10,10,10,11] * 4 * numDecks #ingoring suits, not needed for blackjack
    self.rCount = 0 #Wont update for now, will just keep discrad pile and create method to update later
    self.discard = [] #Also contains cards that are in play (on the table)
    self.playerHand = []
    self.dealerHand = []
    self.doubled = False
    
    self.shuffle()

  
  def shuffle(self):
    for card in self.discard:
      self.deck.append(card)
    self.discard = []
    self.rCount = 0
    #shuffle 5 times
    for i in range (5):
      random.shuffle(self.deck)

  def hand_value(self,hand):
    value = sum(hand)
    if value > 21 and 11 in hand:
      hand.remove(11)
      hand.append(1)
      value = sum(hand)
    return value
  

  #might need to change this account for aces that are already overdrawn for value 11
  def has_ace(self):
    if 11 in self.playerHand:
      return 1
    else:
      return 0
  
  def deal_card(self):
    card = self.deck.pop()
    if card < 7:
      self.rCount += 1
    elif card > 9:
      self.rCount -= 1
    self.discard.append(card)
    return card
  
  def get_state(self):
    """
     Returns: player hand-val, dealers first card, hasAce, roundstarted(bbool), reward
     """
    pl = self.hand_value(self.playerHand)
    dl = self.dealerHand[0]
    runningCount = self.get_rCount()
    reward = self.get_reward()
    roundStarted = self.roundStarted
    #Returns: player hand-val, dealers first card, hasAce, roundstarted(bbool), reward
    return (pl,dl,self.has_ace(), roundStarted,reward,runningCount, self.doubled)
  
  def get_rCount(self):
    if self.rCount > 2:
      return 2
    elif self.rCount < -2:
      return 0
    else:
      return 1
  


  def get_reward(self):
    if self.roundStarted:
      return 0.0
    else:

      playerValue = self.hand_value(self.playerHand)
      dealerValue = self.hand_value(self.dealerHand)
      if playerValue > 21:
        return -200.0 if self.doubled else -100.0
      if dealerValue > 21:
        return 200.0 if self.doubled else 100.0
      if playerValue > dealerValue:
        return 200.0 if self.doubled else 100.0
      if playerValue < dealerValue:
        return -200.0 if self.doubled else -100.0
      return 5.0
    
  def end_round(self):
    ''' 
    if self.get_reward() == 30.0:
      print("Tie")
    elif self.get_reward() == 100.0:
      print("Player wins")
    else:
      print("Dealer wins")
    '''
    self.playerHand = []
    self.dealerHand = []
    

  def start_round(self):
    self.doubled = False
    if len(self.deck) < 15:
      self.shuffle()
    
    if self.roundStarted:
      print("Round already started")
      return None
    self.roundStarted = True
    self.playerHand = [self.deal_card(), self.deal_card()]
    self.dealerHand = [self.deal_card(), self.deal_card()]
    if self.hand_value(self.playerHand) == 21:
      self.roundStarted = False
      state = self.get_state()
      self.end_round()
      return state
    return self.get_state()

  def next(self,action):
    if not self.roundStarted:
      print("Round not started")
      return None
    
    if action == 1:
      self.playerHand.append(self.deal_card())
      if self.hand_value(self.playerHand) > 21:
        self.roundStarted = False
        state = self.get_state()
        self.end_round()
        return state
      return self.get_state()
    
    elif action == 0:
      
      while self.hand_value(self.dealerHand) < 17:
        self.dealerHand.append(self.deal_card())
      self.roundStarted = False
      state = self.get_state()
      self.end_round()
      return state
    
    elif action == 2:
      self.doubled = True
      self.playerHand.append(self.deal_card())
      if self.hand_value(self.playerHand) > 21:
        self.roundStarted = False
        state = self.get_state()
        self.end_round()
        return state
      
      while self.hand_value(self.dealerHand) < 17:
        self.dealerHand.append(self.deal_card())
      self.roundStarted = False
      state = self.get_state()
      self.end_round()
      return state
    

    else:
      return None



    
  
     



In [86]:
table = BlackJackEnv(10)
print(table)


Deck: [9, 4, 7, 9, 9, 9, 9, 9, 2, 2, 8, 2, 10, 3, 2, 5, 3, 11, 9, 10, 4, 4, 3, 10, 2, 11, 9, 10, 11, 8, 10, 10, 7, 10, 3, 2, 6, 7, 10, 10, 10, 10, 9, 2, 2, 8, 5, 10, 7, 6, 5, 10, 9, 3, 5, 9, 5, 10, 10, 11, 7, 10, 10, 11, 2, 10, 11, 5, 3, 6, 8, 6, 10, 2, 10, 5, 10, 10, 5, 5, 7, 7, 6, 7, 10, 7, 7, 3, 6, 11, 2, 10, 10, 7, 3, 2, 2, 8, 4, 6, 10, 10, 4, 8, 8, 8, 9, 10, 2, 10, 8, 11, 6, 5, 10, 4, 10, 11, 10, 10, 5, 10, 2, 5, 8, 4, 5, 8, 6, 4, 10, 10, 8, 10, 9, 11, 10, 6, 9, 7, 4, 10, 10, 10, 10, 10, 10, 10, 9, 6, 9, 9, 2, 11, 9, 9, 11, 6, 6, 2, 11, 3, 6, 9, 2, 8, 10, 6, 7, 10, 7, 8, 10, 9, 10, 10, 4, 4, 3, 4, 2, 7, 10, 9, 11, 3, 10, 3, 10, 5, 6, 6, 7, 2, 10, 10, 3, 8, 10, 9, 2, 6, 10, 10, 4, 10, 10, 7, 6, 4, 8, 4, 10, 10, 10, 2, 11, 7, 4, 6, 10, 4, 9, 5, 3, 3, 2, 10, 9, 11, 3, 5, 5, 4, 10, 11, 5, 10, 6, 11, 10, 3, 5, 4, 2, 7, 10, 10, 5, 2, 10, 10, 11, 6, 10, 6, 10, 10, 8, 4, 7, 11, 7, 3, 10, 3, 3, 10, 10, 2, 7, 8, 5, 7, 4, 10, 4, 10, 4, 7, 10, 10, 6, 10, 11, 3, 3, 8, 10, 9, 3, 10, 10, 10, 6, 