In [1]:
import pandas as pd
import numpy as np
import pickle
import random
import itertools

In [35]:
# clear the CSV's for new data, SKIP this if files do not exist
f = open("my_csv.csv", "w")
f.truncate()
f.close()
f = open("Q_values.csv", "w")
f.truncate()
f.close()


In [38]:
class BlackJackSolution:

    def __init__(self, lr=0.1, exp_rate=0.3, num_decks=6):
        
        # Create two dictionaries (one to hold Q values, one to hold win/total games)
        self.player_Q_Values = {}  # [(player_value, show_card, usable_ace)][action]: Qvalue
        self.stateActionOutcome = {} # [(player_value, show_card, usable_ace)][action]: [win,totalgames]
        
        # initialize Q values | (2-21) x (1-10) x (True, False) x (0,1,2,3) x (10,50,100) creates 4800
        # combinations for Q values
        
        # i loop for player hand total
        for i in range(2, 22):
            # j loop for dealer show card
            for j in range(1, 11):
                # k loop for if player has usable ace
                for k in [True, False]:
                    
                    # initialize our win/total game values in dictionary
                    self.stateActionOutcome[(i,j,k)] = {0:[0,0],1:[0,0],2:[0,0],3:[0,0]}
                    
                    for bet in [10,50,100]:
                        
                        # initialize a dictionary for each combination of states to hold action:Qvalue
                        self.player_Q_Values[(i, j, k, bet)] = {}
                        
                        # Loop through actions (0: Stay, 1: Hit, 2: Double Down, 3: Split)
                        for a in [0, 1, 2, 3]:
               
                            # Below, we initialize the model with some very primitive rules
            
                            # If the player has a 21, then we want a choice of stay (0) to have a 
                            # very high Q value  
                            if (i == 21) and (a == 0):
                                self.player_Q_Values[(i, j, k, bet)][a] = 99999999
                                
                            # If the player has less than 12, then staying is very dumb 
                            # so we set the Q value to a very negative number
                            elif (i < 12) and (a == 0):
                                self.player_Q_Values[(i, j, k, bet)][a] = -99999999
                                
                            # If you have two aces, always split (3)
                            elif (i == 2) and (a == 3):
                                self.player_Q_Values[(i, j, k, bet)][a] = 99999999
                            
                            # Otherwise, set the Q value to 0
                            else:
                                self.player_Q_Values[(i, j, k, bet)][a] = 0

        # Create attributes to keep track of various states, actions, etc.
        self.player_state_action = []
        self.state = (0, 0, False, 10)  # initial state
        self.actions = [1, 0, 2, 3]
        self.end = False # Used to end game
        self.lr = lr
        self.exp_rate = exp_rate
        self.num_decks = num_decks
        self.testStack = []
        
        # Action count is used to keep track of how many actions have been performed in a game
        self.actionCount = 0
        self.bet = 0
        
        # Initial thought: Have it be 3 elements (state on which split occurred, states for hand 1,
        # & states for hand 2)
        
        # splitStates is a special attribute to keep track of important state/action pairs during
        # a split. The first position will hold the state/action pair when a split is chosen.
        # The second and third positions will hold all state/action pairs to be rewarded/punished
        # in the first and second hands, respectively.
        self.splitStates = [0,[],[]] 
        
        # Split potential is used to keep track of if the dealt cards makes it possible to split
        self.split_potential = 0
        
        # Split outcome is used to tally up the outcomes from both hands (to reward/punish original
        # decision to split)
        self.splitOutcome = 0
        
        # Split final values is used to store the final player value of each hand in a split
        self.splitFinalValues = [0,0]
        
        # Added attribute to keep track of how well the model is learning
        self.gameTrack = []
        
        # Dict with faces
        self.f_dict = {'A': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6,
                 '7': 7, '8': 8, '9': 9, '10': 10, 'J': 10, 'Q': 10,
                 'K': 10}
    
    # Create stack
    def makeStack(self):
        
        # Create empty stack
        test_stack = []
        
        # Define new list with faces
        f_list = ['A','2','3','4','5','6','7','8','9','10','J','Q','K']
        
        # Extend empty stack by 4*num_decks*(list of cards)
        for i in range(self.num_decks):
            for j in range(4):
                test_stack.extend(f_list)
        
        # Shuffle the stack
        random.shuffle(test_stack)
        
        # Set the new stack
        self.testStack = test_stack
        
    # Deal single card
    def giveCard(self):
        
        # When stack falls below 52 cards, make a new stack
        if (len(self.testStack) < 52):
        
            self.makeStack()
            
        # Remove the first card from the stack and set it to card to deal
        cardToDeal = self.testStack.pop(0)
        
        return cardToDeal
    

    def dealerPolicy(self, current_value, usable_ace, is_end):
        
        if current_value > 21:
            
            # If dealer has a usable ace, convert it from an 11 to a 1 (subtract 10)
            # Otherwise, game is over, dealer busts
            if usable_ace:
                current_value -= 10
                usable_ace = False
            else:
                return current_value, usable_ace, True
            
        # Dealer stays on 17 or greater
        # Otherwise, deal a new card
        if current_value >= 17:
            return current_value, usable_ace, True
        else:
            card = self.giveCard()
            card_value = self.f_dict[card]
            
            # If card is an ace, check current_value and decide if we can convert
            # it to 11 or have to keep it as 1
            if card_value == 1:
                if current_value <= 10:
                    return current_value + 11, True, False
                return current_value + 1, usable_ace, False
            else:
                return current_value + card_value, usable_ace, False
    
    # Method for action to be chosen by player.
    # curRound and trainRound are used to keep track of what percentage of games
    # we've iterated through. This is used to adjust the exploration rate during sim.
    
    def chooseAction(self,curRound=0,trainRound=1000):
        
        # Player's current hand value is the first element of self.state
        current_value = self.state[0]

        # Whether to 'explore' or follow policy -- current code gradually shifts towards policy following
        # as more rounds are completed
        
        # Note that even the exploration phase has some simple rules built in, such as never staying
        # below 12, always splitting with two aces, etc. This is to avoid exploring obviously bad
        # strategies.
        
        # ********* Don't forget to switch back!!! **********
        #if (np.random.uniform(0, 1) <= self.exp_rate):
            # ********* STANDARD RL: **********
        if (np.random.uniform(0, 1) <= self.exp_rate*(1 - curRound/trainRound)):
            # ********* RANDOM (for comparisons; using basic guidance if 1=1, else statement is ignored): **********
            # ********* (else statement is where RL occurs and the decisions are guided by Q Value) **********
        #if (1 == 1):
            
            # 1) Obviously if current_value = 21, stay (0)
            # 2) If current_value < 12, exclude staying (0)
            # 3) If this is a second or later action, exclude double downs (2) and splits (3)
            if (current_value == 21):
                action = 0
                
            # If current hand value is greater than 11...
            elif (current_value > 11):
                
                # Allow all actions if it's first action and splits available
                if (self.actionCount == 0) and (self.split_potential == 1):
                    action = np.random.choice(self.actions)
                    
                # Exclude splits (3) if self.split_potential = 0
                elif (self.actionCount == 0):
                    action = np.random.choice(self.actions[0:3])
                    
                # Exclude splits and double downs if it's after first action
                else:
                    action = np.random.choice(self.actions[0:2])
                    
            # If the hand value is 2, then it's two aces --> split
            elif (current_value == 2):
                action = 3
                
            # Else, the hand value is less than 12 but not 2
            # Never stay!
            else:
                
                # If split potential is 1 and it's first action,
                # we can hit, double, split. If it's not first
                # action, we can only hit.
                if (self.split_potential == 1):
                    if (self.actionCount == 0):
                        action = np.random.choice([1,2,3])
                    else:
                        action = 1
                        
                # If it's first action with no split potential,
                # we can hit or double. Otherwise, we can only
                # hit.
                else:
                    if (self.actionCount == 0):
                        action = np.random.choice([1,2])
                    else:
                        action = 1
        
        # If we don't 'explore,' then we're following Q value policy
        else:
            
            # Use our basic rules to set an action if we can
            if current_value == 21:
                action = 0
            elif current_value == 2:
                action = 3
                
            # Otherwise we go through checking our Q scores
            else:
                # Initialize a 'v' variable to compare against first Q value and set a default
                # action of staying
                v = -9999999
                action = 0
                
                # Check each action's Q value for that state -- if it's higher than previous Q value,
                # make this the new chosen action.

                # Note that we skip checking some actions, as they cannot be performed with
                # certain states
                for a in self.player_Q_Values[self.state]:

                    # If we've already made a prior action, we can't double down or split
                    # Therefore, skip these actions in the loop
                    if ((self.actionCount > 0) and (a > 1)):        
                        continue

                    # If there's no split potential, skip splitting as a choice
                    if ((self.split_potential == 0) and (a == 3)):
                        continue

                    # if the above two conditions aren't true, all actions are on the table
                    if self.player_Q_Values[self.state][a] > v:
                        action = a
                        v = self.player_Q_Values[self.state][a]
        
        
        # Increment our action count
        self.actionCount += 1
        
        return action

    # Method to create the next state, based on the chosen action, and check if game
    # will continue
    def playerNxtState(self, action):
        current_value = self.state[0]
        show_card = self.state[1]
        usable_ace = self.state[2]

        # Action is a HIT
        if (action == 1):
            
            card = self.giveCard()
            
            # If dealt card is an ace, check if current value is <= 10.
            # If so, we can use ace as 11 (usable) and we add it to current value.
            # Otherwise, ace is 1, and we add 1 to current value
            if self.f_dict[card] == 1:
                if current_value <= 10:
                    current_value += 11
                    usable_ace = True
                else:
                    current_value += 1
            else:
                current_value += self.f_dict[card]
        
        # Action is a DOUBLE DOWN
        elif (action == 2):

            # We need to keep track of original bet (for state/action pairs)
            # but need to double the bet for rewarding/punishing Q value
            originalBet = self.bet
            self.bet *= 2
            
            # Deal card / ace logic same as for hitting
            card = self.giveCard()
            if self.f_dict[card] == 1:
                if current_value <= 10:
                    current_value += 11
                    usable_ace = True
                else:
                    current_value += 1
            else:
                current_value += self.f_dict[card]
            
            # End the game since double down results in one hit and nothing else
            # Return the state
            
            self.end = True
            
            return (current_value, show_card, usable_ace, originalBet)
             
        # Action is to STAY
        else:
            
            # End the game and return the state
            self.end = True
            return (current_value, show_card, usable_ace, self.bet)

        # If totaling up cards is greater than 21, need to adjust or end game
        if current_value > 21:
            if usable_ace:
                current_value -= 10
                usable_ace = False
            else:
                self.end = True
                return (current_value, show_card, usable_ace, self.bet)
        
        # If we hit and haven't busted, return the state but keep game going
        return (current_value, show_card, usable_ace, self.bet)

    # Method to check winner
    def winner(self, player_value, dealer_value):
        # player 1 | draw 0 | dealer -1
        winner = 0
        if player_value > 21:
            winner = -1
        else:
            if dealer_value > 21:
                winner = 1
            else:
                if player_value < dealer_value:
                    winner = -1
                elif player_value > dealer_value:
                    winner = 1
                else:
                    winner = 0
        return winner

    # Method to update Q values
    def _giveCredit(self, player_value, dealer_value, splitCheck=False, ind_splits=False, split=0):
        
        # We are updating the Q value associated with the decision to split
        if splitCheck:
            
            # Use the outcome count in splitoutcome with the bet to find total reward for split
            # E.g., two wins with an original bet of $10 results in an award of 10*2=20
            # A win and a draw with an original bet of $50 results in an award of 50*1=50
            # A loss and a draw with an original bet of $100 results in an award of 100*-1=-100
            award = self.splitOutcome*self.bet
            
            # We don't need a loop for this state/action pair since there's only one action (when we split)
            s = self.splitStates[0]
            
            # Set each part of our pair to the proper variable and update reward
            state, action = s[0], s[1]
            
            reward = self.player_Q_Values[state][action] + self.lr*(award - self.player_Q_Values[state][action])
            self.player_Q_Values[state][action] = round(reward, 3)
            
        # We are updating the Q value for actions associated with playing each hand of a split
        elif ind_splits:
            
            # Check who won this hand and create reward
            whoWon = self.winner(player_value, dealer_value)
            award = whoWon*self.bet
            
            # Backpropagate the reward through our list of state/action pairs
            # if split=0, we take self.splitStates[1] which is the 2nd slot (used for first hand)
            # if split=1, we take self.splitStates[2] which is the 3rd slot (used for second hand)
            for s in reversed(self.splitStates[split+1]):
                state, action = s[0], s[1]
                
                reward = self.player_Q_Values[state][action] + self.lr*(award - self.player_Q_Values[state][action])
                self.player_Q_Values[state][action] = round(reward, 3)
        
        # We are updating the Q value for non-split-associated actions
        else:
            whoWon = self.winner(player_value, dealer_value)
            award = whoWon*self.bet

            # backpropagate the reward through our list of state/action pairs
            # for non-split-associated moves, these are stored in player_state_action
            for s in reversed(self.player_state_action):
                state, action = s[0], s[1]
                reward = self.player_Q_Values[state][action] + self.lr*(award - self.player_Q_Values[state][action])
                self.player_Q_Values[state][action] = round(reward, 3)
            
            
    # Reset for new game/round
    def reset(self):
        self.player_state_action = []
        self.state = (0, 0, False, 10)  # initial state
        self.end = False
        self.actionCount = 0
        self.bet = 0
        self.split_potential = 0
        self.splitStates = [0,[],[]]
        self.splitOutcome = 0
        self.splitFinalValues = [0,0]

    # Show is set to False if cards go to player, True if cards go to house
    def deal2cards(self, show=False):
        
        # return value, usable_ace, and split_potential after two cards dealt
        # so initialize those here
        value, usable_ace = 0, False
        
        cards = [self.giveCard(), self.giveCard()]
        
        # If cards dealt to player are the same, set split potential to 1
        if (cards[0] == cards[1]) and (show == False):
            self.split_potential = 1
        
        # Create a list of card values from our cards
        card_values = [self.f_dict[cards[0]],self.f_dict[cards[1]]]
        
        # If we have two aces, we'll consider our value as 2 if it's the player
        # Otherwise for the dealer, since the dealer can't split, we'll consider it as 12
        if (card_values[0] == 1) and (card_values[1] == 1):
            
            if show:
                value = 12
                usable_ace = True
            else:
                value = 2
                usable_ace = True
            
        # If we make it to this condition, but it's True, we have one ace
        elif 1 in card_values:
            
            # Sum(card_values) = card + Ace
            # Since Ace is stored as a value of 1, we need to add 10 more to make the Ace 11
            value = sum(card_values) + 10
            usable_ace = True
            
        # Else no aces
        else:
            value = sum(card_values)
            usable_ace = False

        # If dealer, also return the show card
        if show:
            return value, usable_ace, card_values[0]
        else:
            return value, usable_ace

    # Time to define the play method
    def play(self, rounds=1000):
        
        # Iterate through the number of rounds specified
        for i in range(rounds):
            
            # **************************************************
            # this is the schema for more variable granularity in the data collection
            # **************************************************
            if i < 10:
                x=0
            elif i < 100:
                x=1
            elif i < 1000:
                x=2
            elif i < 10000:
                x=3
            else:
                x=4
            
            if not i%10**x:
                sa_df = pd.DataFrame.from_dict(sol.stateActionOutcome, orient='index')
                sa_df.columns = ['Stay', 'Hit', 'Double', 'Split']
                calcs = []
                # to split the data from the cells to a usable info for comparison.
                for cell in sa_df['Stay']:
                    if cell[1] !=0:
                        calcs.append(cell[0]/cell[1])
                    else:
                        calcs.append(0)
                sa_df['stay_pct'] = calcs
                calcs = []
                for cell in sa_df['Hit']:
                    if cell[1] !=0:
                        calcs.append(cell[0]/cell[1])
                    else:
                        calcs.append(0)
                sa_df['hit_pct'] = calcs
                calcs = []
                for cell in sa_df['Double']:
                    if cell[1] !=0:
                        calcs.append(cell[0]/cell[1])
                    else:
                        calcs.append(0)
                sa_df['dbl_pct'] = calcs
                calcs = []
                for cell in sa_df['Split']:
                    if cell[1] !=0:
                        calcs.append(cell[0]/cell[1])
                    else:
                        calcs.append(0)
                sa_df['splt_pct'] = calcs
                # this is for cleaning to identify max and dropping data not needed after
                sa_df.drop(['Stay', 'Hit', 'Double', 'Split'], axis=1, inplace=True)
                sa_df.insert(0, "None", 0)
                sa_df.columns = ['None', 'Stay', 'Hit', 'Double', 'Split']
                sa_df['action'] = sa_df.idxmax(axis=1)
                sa_df['pct'] = round((sa_df.max(axis=1))*100,2)
                sa_df.drop(['None', 'Stay', 'Hit', 'Double', 'Split'], axis=1, inplace=True)
                # Add the Game ID 
                sa_df.insert(0, "Game_No", i)
                # Flatten the DF
                sa_df2 = sa_df.reset_index()
                sa_df2.columns = ['Player', 'Dealer', 'Ace', 'Game_No','Action', 'w_Pct']
                #Output to CSV appending for each round captured
                sa_df2.to_csv('my_csv.csv', mode='a', header=False) 
                #continue
            # **************************************************
            # **************************************************
            # **************************************************
                
            # Create bet (just random choice of three values)
            self.bet = random.choice([10,50,100])
            
            # Deal cards to player and dealer
            dealer_value, d_usable_ace, show_card = self.deal2cards(show=True)
            player_value, p_usable_ace = self.deal2cards(show=False)
            
            # Set the current state of the player
            self.state = (player_value, show_card, p_usable_ace, self.bet)
            
            # Check to see if there was blackjack after two cards dealt 
            if player_value == 21 or dealer_value == 21:
                
                whoWon = self.winner(player_value, dealer_value)
            
                if whoWon == 1:
                    self.gameTrack.append(1)
                else:
                    self.gameTrack.append(0)
            
            # Otherwise, let's make some moves!
            else:
                
                # Make a decision for our first action
                firstaction = self.chooseAction(i,rounds)
                
                # If the first action is a split, we need to follow this path
                if (firstaction == 3):
                    
                    # Place initial state/action pair in first slot of splitStates
                    self.splitStates[0] = [self.state, 3]
                    
                    # Since we split, we need half the value of the player_value
                    single_card = int(player_value/2)
                    
                    # Loop through both hands in the split
                    for split in range(0,2):
                        
                        # Set the current value equal to the single card value
                        current_value = single_card
                        
                        # Need to deal a card to the hand
                        card = self.giveCard()
                        
                        # If we draw an ace, it is usable so we add 11 to current value.
                        # Current value is just the single card value so we add the
                        # entirety of the ace value (11) rather than just 10
                        if self.f_dict[card] == 1:
                            current_value += 11
                            usable_ace = True
                            
                        # Otherwise we just add the card value to current value
                        else:
                            current_value += self.f_dict[card]
                        
                        # Need to check for 21 with two cards of the hand
                        # If there is a 21, there are no action decisions
                        # to be made within the individual hand.
                        # Continue to next hand
                        
                        if current_value == 21:
                            
                            self.splitFinalValues[split] = 21
                            continue
                        
                        # Set the state with two cards for the hand
                        self.state = (current_value, show_card, p_usable_ace, self.bet)
                        
                        # Run through hand
                        while True:          
                            
                            # Choose an action for this hand
                            action = self.chooseAction(i,rounds)
                            
                            # Append the state/action pair so we can associate the outcome with it
                            state_action_pair = [self.state, action]
                            self.splitStates[split+1].append(state_action_pair)

                            # update next state
                            self.state = self.playerNxtState(action)

                            # End game if self.end = True
                            if self.end:
                                
                                # Set splitFinalValue as the player value of this final state
                                self.splitFinalValues[split] = self.state[0]
                                
                                break
                                
                    # Dealer draws
                    is_end = False
                    while not is_end:
                        dealer_value, d_usable_ace, is_end = self.dealerPolicy(dealer_value, d_usable_ace, is_end)
                    
                    # Get the state (without the bet since it has no impact on win prob) of the decision to split
                    # Need it for tracking wins/total games in stateActionOutcome
                    initialSplit = self.splitStates[0][0][0:3]
                    
                    # Loop through hands to judge winner and reward/punish
                    for split in range(0,2):
                        
                        # Judge the winner of the hand
                        whoWon = self.winner(self.splitFinalValues[split], dealer_value)
                        
                        # Increment split outcome to keep track of total from split
                        self.splitOutcome += whoWon
                        
                        # If player won the hand
                        if whoWon == 1:
                            
                            # Add a win (1) to gameTrack
                            self.gameTrack.append(1)
                            
                            # Add 1 to the wins [0] and total games [1] for 
                            # the state when the decision to split was made
                            self.stateActionOutcome[initialSplit][3][0] += 1
                            self.stateActionOutcome[initialSplit][3][1] += 1
                            
                            # Now update stateActionOutcome for state/actions within the hand
                            for situation in self.splitStates[split+1]:
                                
                                # Add 1 to win/total columns for these state/action combos
                                # Exclude the bet portion of the state ([0:3]) since the 
                                # bet amount has no impact on win probability
                                
                                # situation[0] is state, situation[1] is action
                                self.stateActionOutcome[situation[0][0:3]][situation[1]][0] += 1
                                self.stateActionOutcome[situation[0][0:3]][situation[1]][1] += 1
                        
                        # Else if player loses or draws
                        else:
                            
                            # Add a 0 to gameTrack
                            self.gameTrack.append(0)
                            
                            # Add 1 only to total games [1] for the state when the
                            # decision to split was made
                            self.stateActionOutcome[initialSplit][3][1] += 1
                            
                            # Now update stateActionOutcome for state/actions within the hand
                            for situation in self.splitStates[split+1]:
                                
                                # Add 1 to the total column for these state/action combos
                                # Exclude the bet portion of the state ([0:3]) since the 
                                # bet amount has no impact on win probability
                                
                                # situation[0] is state, situation[1] is action
                                self.stateActionOutcome[situation[0][0:3]][situation[1]][1] += 1
                                 
                        # reward and update Q value for individual actions within hand
                        self._giveCredit(player_value, dealer_value, splitCheck=False, ind_splits=True, split=split)
                       
                    # reward and update Q value for decision to split
                    # Since we already checked the outcomes of the two hands, we don't
                    # have player/dealer values to pass along so we just put in placeholder 0's
                    self._giveCredit(0,0,splitCheck=True,ind_splits=False)
                    
                # If not a split, we follow this path
                else: 
                    
                    # Due to the way in which our first action is used to check *split or 
                    # another action*, we must use this first action below if we didn't decide on
                    # a split. However, we need a counter to ensure that we make a new action
                    # if it's a second or later run through the while loop below.
                    
                    counter = 0
                    
                    while True:
                        
                        # If it's the first action, set action = firstaction
                        # first action is where we decide split or something else
                        if (counter == 0):
                            action = firstaction
                        # Otherwise, we choose a new action
                        else:
                            action = self.chooseAction(i,rounds)
                        
                        counter += 1
                        
                        # Append the state/action pair so we can associate the outcome with it
                        state_action_pair = [self.state, action]
                        self.player_state_action.append(state_action_pair)

                        # update next state
                        self.state = self.playerNxtState(action)

                        # End game if self.end = True
                        if self.end:
                            break

                    # dealer's turn
                    is_end = False
                    while not is_end:
                        dealer_value, d_usable_ace, is_end = self.dealerPolicy(dealer_value, d_usable_ace, is_end)

                    # Get player value from final state
                    player_value = self.state[0]
                    
                    # Judge winner
                    whoWon = self.winner(player_value, dealer_value)

                    # If player won
                    if whoWon == 1:
                        
                        # Append 1 to gameTrack
                        self.gameTrack.append(1)
                        
                        # Loop through the state/action pairs of consequence to track win prob
                        # and rewarding/punishing
                        for situation in self.player_state_action:
                            
                            # Add 1 to win/total columns for these state/action combos
                            # Exclude the bet portion of the state ([0:3]) since the 
                            # bet amount has no impact on win probability
                            
                            # situation[0] is state, situation[1] is action
                            self.stateActionOutcome[situation[0][0:3]][situation[1]][0] += 1
                            self.stateActionOutcome[situation[0][0:3]][situation[1]][1] += 1
                    else:
                        
                        # Append 0 to gameTrack
                        self.gameTrack.append(0)
                        
                        # Loop through the state/action pairs of consequence to track win prob
                        # and rewarding/punishing
                        for situation in self.player_state_action:

                            # Add 1 to the total column for these state/action combos
                            # Exclude the bet portion of the state ([0:3]) since the 
                            # bet amount has no impact on win probability
                            
                            # situation[0] is state, situation[1] is action
                            self.stateActionOutcome[situation[0][0:3]][situation[1]][1] += 1
                            

                    # Reward and update Q values for decisions within the hand
                    self._giveCredit(player_value, dealer_value, splitCheck=False, ind_splits=False)
  
            self.reset()
    
    # Use this method to save the developed model policy
    def savePolicy(self, file="policy"):
        fw = open(file, 'wb')
        pickle.dump(self.player_Q_Values, fw)
        fw.close()
#     def savePolicy(self, file="win_pct"):
#         fw = open(file, 'wb')
#         pickle.dump(self.stateActionOutcome, fw)
#         fw.close()

    # Use this method to load the developed model policy
    def loadPolicy(self, file="policy"):
        fr = open(file, 'rb')
        self.player_Q_Values = pickle.load(fr)
        fr.close()

    # ******************************************************
    # *** This method is to play dealer with the policy  ***
    # *** However, it needs to be updated if it is to be *** 
    # *** used.                                          ***
    # ******************************************************
    def playWithDealer(self, rounds=1000):
        self.reset()
        self.loadPolicy()
        self.exp_rate = 0

        result = np.zeros(3)  # player [win, draw, lose]
        for _ in range(rounds):
            # hit 2 cards each
            # give 2 cards
            dealer_value, d_usable_ace, show_card = self.deal2cards(show=True)
            player_value, p_usable_ace = self.deal2cards(show=False)

            self.state = (player_value, show_card, p_usable_ace, random.choice([10,50,100]))

            # judge winner after 2 cards
            if player_value == 21 or dealer_value == 21:
                if player_value == dealer_value:
                    result[1] += 1
                elif player_value > dealer_value:
                    result[0] += 1
                else:
                    result[2] += 1
            else:
                # player's turn
                while True:
                    action = self.chooseAction()
                    # update next state
                    self.state = self.playerNxtState(action)
                    if self.end:
                        break

                        # dealer's turn
                is_end = False
                while not is_end:
                    dealer_value, d_usable_ace, is_end = self.dealerPolicy(dealer_value, d_usable_ace, is_end)

                # judge
                player_value = self.state[0]
                # print("player value {} | dealer value {}".format(player_value, dealer_value))
                w = self.winner(player_value, dealer_value)
                if w == 1:
                    result[0] += 1
                elif w == 0:
                    result[1] += 1
                else:
                    result[2] += 1
            self.reset()
        return result

In [29]:
%%time
sol = BlackJackSolution(lr=0.05,exp_rate=0.05)

CPU times: user 2.32 ms, sys: 1 µs, total: 2.32 ms
Wall time: 2.32 ms


In [30]:
%%time
sol.play(1000001)

CPU times: user 23.5 s, sys: 71.8 ms, total: 23.6 s
Wall time: 23.6 s


In [31]:
%%time
sol.stateActionOutcome

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.96 µs


{(2, 1, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [184, 496]},
 (2, 1, False): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [0, 0]},
 (2, 2, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [463, 906]},
 (2, 2, False): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [0, 0]},
 (2, 3, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [481, 948]},
 (2, 3, False): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [0, 0]},
 (2, 4, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [403, 848]},
 (2, 4, False): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [0, 0]},
 (2, 5, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [480, 848]},
 (2, 5, False): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [0, 0]},
 (2, 6, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [515, 898]},
 (2, 6, False): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [0, 0]},
 (2, 7, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [420, 888]},
 (2, 7, False): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [0, 0]},
 (2, 8, True): {0: [0, 0], 1: [0, 0], 2: [0, 0], 3: [431, 886]},
 (2, 8, False): {0: [0, 0], 1: [0, 0], 2: [0, 

In [32]:
%%time
sol.player_Q_Values

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.96 µs


{(2, 1, True, 10): {0: -99999999, 1: 0, 2: 0, 3: 1926269.755},
 (2, 1, True, 50): {0: -99999999, 1: 0, 2: 0, 3: 1568934.246},
 (2, 1, True, 100): {0: -99999999, 1: 0, 2: 0, 3: 988847.188},
 (2, 1, False, 10): {0: -99999999, 1: 0, 2: 0, 3: 99999999},
 (2, 1, False, 50): {0: -99999999, 1: 0, 2: 0, 3: 99999999},
 (2, 1, False, 100): {0: -99999999, 1: 0, 2: 0, 3: 99999999},
 (2, 2, True, 10): {0: -99999999, 1: 0, 2: 0, 3: 50478.827},
 (2, 2, True, 50): {0: -99999999, 1: 0, 2: 0, 3: 55943.446},
 (2, 2, True, 100): {0: -99999999, 1: 0, 2: 0, 3: 28720.311},
 (2, 2, False, 10): {0: -99999999, 1: 0, 2: 0, 3: 99999999},
 (2, 2, False, 50): {0: -99999999, 1: 0, 2: 0, 3: 99999999},
 (2, 2, False, 100): {0: -99999999, 1: 0, 2: 0, 3: 99999999},
 (2, 3, True, 10): {0: -99999999, 1: 0, 2: 0, 3: 18099.049},
 (2, 3, True, 50): {0: -99999999, 1: 0, 2: 0, 3: 39064.775},
 (2, 3, True, 100): {0: -99999999, 1: 0, 2: 0, 3: 39085.148},
 (2, 3, False, 10): {0: -99999999, 1: 0, 2: 0, 3: 99999999},
 (2, 3, False,

In [33]:
%%time
# Create dataframe
qv_df = pd.DataFrame.from_dict(sol.player_Q_Values, orient='index')
qv_df.columns = ['Stay', 'Hit', 'Double', 'Split']
cols = ['Stay', 'Hit', 'Double', 'Split']
# Replace default values
qv_df[cols] = qv_df[cols].replace({0:-9999999.0, 999999.000:-9999999.0})
qv_df.replace(0.0, -9999999.0)
qv_df.insert(0, "None", -9999999.0)
# Identify Maximums
qv_df['action'] = qv_df.idxmax(axis=1)
qv_df['winnings'] = qv_df.max(axis=1)
qv_df.drop(['None', 'Stay', 'Hit', 'Double', 'Split'], axis=1, inplace=True)
qv_df.insert(0, "Game_No", 1000000)
cols = ['winnings']
qv_df[cols] = qv_df[cols].replace({-9999999.000:0})
# Flatten file
qv_df2 = qv_df.reset_index()
qv_df2.columns = ['Player', 'Dealer', 'Ace', 'Bet', 'Game_No', 'Action', 'w_Pct']
# Output to CSV
qv_df2.to_csv('Q_values.csv', mode='a', header=False) 

CPU times: user 34.1 ms, sys: 2.95 ms, total: 37.1 ms
Wall time: 35.6 ms


In [None]:
# # Pickle export sol.stateActionOutcome as "win_pct" and sol.player_Q_Values as "Q_policy"
# # IMPORTANT: must be done manually and separately by updating
# #             the BlackJackSolution class: savePolicy method
# # for sol.player_Q_Values
# sol.savePolicy("Q_policy")
# # for sol.stateActionOutcome
# sol.savePolicy("win_pct")

Plotting to make sure things look normal

In [None]:
# import matplotlib.pyplot as plt
# import seaborn as sns
# import itertools
# import math

In [None]:
# #sol.gameTrack
# countWindow = 1000
# sumGames = np.add.reduceat(sol.gameTrack, np.arange(0, len(sol.gameTrack), countWindow))
# # We have to subset the sum games because it will extend a little past where we need it (due to split hands)
# sumGames_sub = sumGames[0:501]

In [None]:
# # Set up coefficients for regression line
# coef = np.polyfit(np.arange(len(sumGames_sub)),sumGames_sub,1)
# poly1d_fn = np.poly1d(coef)

In [None]:
# f, axes = plt.subplots(1, 1, figsize=(9, 9))

# sns.set_style("darkgrid")
# sns.lineplot(data=sumGames_sub,label=f'Wins per {countWindow}')
# sns.lineplot(data=poly1d_fn(np.arange(len(sumGames_sub))), label=f"Wins per {countWindow} (regression)")

# #plt.plot(sumGames)
# #plt.plot(np.arange(len(sumGames)),poly1d_fn(np.arange(len(sumGames))),'--k')

In [None]:
# lr_l = [0.1, 0.3, 0.5]
# exp_rate_l = [0.3,0.5,1]
# combos = list(itertools.product(lr_l,exp_rate_l))
# f, axes = plt.subplots(3, 3, figsize=(12, 12))

# for index, combo in enumerate(combos):
#     sol = BlackJackSolution(lr=combo[0],exp_rate=combo[1])
#     print(f'Playing Set {index}...')
#     sol.play(10000)
    
#     countWindow = 100
#     sumGames = np.add.reduceat(sol.gameTrack, np.arange(0, len(sol.gameTrack), countWindow))
    
#     # Create regression 
#     coef = np.polyfit(np.arange(len(sumGames)),sumGames,1)
#     poly1d_fn = np.poly1d(coef)
    
#     sns.lineplot(data=poly1d_fn(np.arange(len(sumGames))), ax=axes[math.floor(index/3),index % 3])
#     sns.lineplot(data=sumGames,ax=axes[math.floor(index/3),index % 3])

In [None]:
# sol.player_Q_Values

#### EDA: Exploratory Data Analysis
---
###### Cells beyond this point are for exploratory data analysis

In [None]:
# **************************************************
# **************************************************
# **************************************************
# **************************************************
# IMPORTANT: Code beyond this point is used to help determine
#            which combo of Learning Rate and Exploration rate yields the best results
#            after 1,000,000+ game plays for each combo
# **************************************************
#    Rerun BlackJackSolution Class cell with updates to the following:
#         1) chooseAction() method:
#             i) ensure line 173 is NOT commented out: "if (np.random.uniform(0, 1) <= self.exp_rate*(1 - curRound/trainRound)):"
#             ii) ensure line 176 IS commented out: "if (1 == 1):"
#         2) play() method: Chris' code for snapshots at the intervals (1-10, 10, 20, ... 100, ... 1000, ...)
#             i) ensure lines 469 through 532 are commented out for running through combos
#             ii) note: this (should only be used for final data output for policy)
#                 (otherwise unnecessary outputs and resources will be wasted on generating these)
# **************************************************
# **************************************************
# **************************************************
# **************************************************

In [4]:
# Create list of Learning and Exploration Rate combinations to run RL simulation through
# ***** USE FOR RL (Learning & Exploration Rate) combos *****
learningRateList = [0.05, 0.1, 0.3, 0.5, 1]
explorationRateList = [0.05, 0.1, 0.3, 0.5, 1]

# ***** USE FOR RANDOM (no LR & ER combos) *****
random_learningRateList = [0]
random_explorationRateList = [0]

# Create comboLearningAndExplorationRateList by making list of all permutations
comboLearningAndExplorationRateList = list(itertools.product(learningRateList,explorationRateList))
random_comboLearningAndExplorationRateList = list(itertools.product(random_learningRateList,random_explorationRateList))

# # Visualize the comboLearningAndExplorationRateList
# print(comboLearningAndExplorationRateList)
# print(random_comboLearningAndExplorationRateList)

In [5]:
# Define number of games to have RL Simulation run through for each Learning and Exploration Rate Combo
numberOfGamesToPlay = 1000000

# Create lists to hold values for all data (stateActionOutcome, player_Q_Values and gameTrack)
comboLearningAndExplorationRateData = []

# Create DataFrame to hold values for stateActionOutcome data
stateActionOutcome_df = pd.DataFrame(columns = ["lr_er_combo",\
                                                "learningRate",\
                                                "explorationRate",\
                                                "playerHandValue",\
                                                "dealerUpCardValue",\
                                                "usableAce",\
                                                "standGamesWon",\
                                                "standTotalGames",\
                                                "standAvgWinRate",\
                                                "hitGamesWon",\
                                                "hitTotalGames",\
                                                "hitAvgWinRate",\
                                                "doubleDownGamesWon",\
                                                "doubleDownTotalGames",\
                                                "doubleDownAvgWinRate",\
                                                "splitGamesWon",\
                                                "splitTotalGames",\
                                                "splitAvgWinRate",\
                                                "joinKey"])

# Create DataFrame to hold values for player_Q_Values data
player_Q_Values_df = pd.DataFrame(columns=["learningRate_Q",\
                                           "explorationRate_Q",\
                                           "playerHandValue_Q",\
                                           "dealerUpCardValue_Q",\
                                           "usableAce_Q",\
                                           "bet_Q",\
                                           "standQvalue",\
                                           "hitQvalue",\
                                           "doubleDownQvalue",\
                                           "splitQvalue",\
                                           "joinKey_Q"])

# Create lists to hold values for gameTrack data
learningRate_gt = []
explorationRate_gt = []
joinKey_gt = []
gameTrackData = []

# **********************
# ***** Use for RL *****
# **********************
# Loop through Combinations of learningRate & explorationRate
for index, combo in enumerate(comboLearningAndExplorationRateList):
    print(f'Playing set {index + 1} (lr: {combo[0]}; exp_rate: {combo[1]})...')
    sol = BlackJackSolution(lr = combo[0], exp_rate = combo[1])
    sol.play(numberOfGamesToPlay)
    stateActionOutcome = sol.stateActionOutcome
    playerQvalues = sol.player_Q_Values
    gameTrack = sol.gameTrack
    gameSet = {"comboDesc": f"lr: {combo[0]}, er: {combo[1]}",
               "combo": f"{combo[0]}_{combo[1]}",
               "gameTrack": gameTrack,
               "gamesPlayed": len(gameTrack),
               "gamesWon": sum(gameTrack),
               "percentWon": sum(gameTrack) / len(gameTrack),
               "stateActionOutcome": stateActionOutcome,
               "playerQvalues": playerQvalues
              }
    comboLearningAndExplorationRateData.append(gameSet)
    print(f"    {len(gameTrack)} games played")
    
    # Create staging DataFrame to hold values from the stateActionOutcome dictionary
    stg_sao_df = pd.DataFrame.from_dict(sol.stateActionOutcome, orient="index")
    
    # Rename first four columns & reset index
    stg_sao_df.columns = ["Stand", "Hit", "DoubleDown", "Split"]
    stg_sao_df = stg_sao_df.reset_index()
    
    # Rename columns that were removed from being indecies
    stg_sao_df = stg_sao_df.rename(columns={"level_0": "playerHandValue",\
                                            "level_1": "dealerUpCardValue",\
                                            "level_2": "usableAce"})

    # Get and calculate values for Stand, Hit, Double Down and Split columns
    """STAND"""
    stg_sao_df["standGamesWon"] = [cell[0] for cell in stg_sao_df["Stand"]]
    stg_sao_df["standTotalGames"] = [cell[1] for cell in stg_sao_df["Stand"]]
    stg_sao_df["standAvgWinRate"] = round((stg_sao_df["standGamesWon"] / stg_sao_df["standTotalGames"]), 3).fillna(0)

    """HIT"""
    stg_sao_df["hitGamesWon"] = [cell[0] for cell in stg_sao_df["Hit"]]
    stg_sao_df["hitTotalGames"] = [cell[1] for cell in stg_sao_df["Hit"]]
    stg_sao_df["hitAvgWinRate"] = round((stg_sao_df["hitGamesWon"] / stg_sao_df["hitTotalGames"]), 3).fillna(0)

    """DOUBLE DOWN"""
    stg_sao_df["doubleDownGamesWon"] = [cell[0] for cell in stg_sao_df["DoubleDown"]]
    stg_sao_df["doubleDownTotalGames"] = [cell[1] for cell in stg_sao_df["DoubleDown"]]
    stg_sao_df["doubleDownAvgWinRate"] = round((stg_sao_df["doubleDownGamesWon"] / stg_sao_df["doubleDownTotalGames"]), 3).fillna(0)

    """SPLIT"""
    stg_sao_df["splitGamesWon"] = [cell[0] for cell in stg_sao_df["Split"]]
    stg_sao_df["splitTotalGames"] = [cell[1] for cell in stg_sao_df["Split"]]
    stg_sao_df["splitAvgWinRate"] = round((stg_sao_df["splitGamesWon"] / stg_sao_df["splitTotalGames"]), 3).fillna(0)

    # Add learningRate and explorationRate columns with values for current lr & er combo pair
    stg_sao_df.insert(0, "lr_er_combo", f"{combo[0]}_{combo[1]}")
    stg_sao_df.insert(1, "learningRate", combo[0])
    stg_sao_df.insert(2, "explorationRate", combo[1])
    
    # Create joinKey for later (use to join stateActionOutcome_df to flattend player_Q_Values_df
    stg_sao_df["joinKey"] = stg_sao_df.learningRate.map(str) + "_" +\
                            stg_sao_df.explorationRate.map(str) + "_" +\
                            stg_sao_df.playerHandValue.map(str)  + "_" +\
                            stg_sao_df.dealerUpCardValue.map(str)  + "_" +\
                            stg_sao_df.usableAce.map(str)

    # Drop columns "Stand", "Hit", "DoubleDown", "Split" as they are not needed any longer
    stg_sao_df.drop(["Stand", "Hit", "DoubleDown", "Split"], axis=1, inplace=True)
    
    # Append staging DataFrame to final stateActionOutcome_df
    stateActionOutcome_df = stateActionOutcome_df.append(stg_sao_df, ignore_index=True)

    # Create staging DataFrame to hold values from the stateActionOutcome dictionary
    stg_pqv_df = pd.DataFrame.from_dict(sol.player_Q_Values, orient="index")
    
    # Rename first four columns & reset index
    stg_pqv_df.columns = ["standQvalue", "hitQvalue", "doubleDownQvalue", "splitQvalue"]
    stg_pqv_df = stg_pqv_df.reset_index()
    
    # Rename columns that were removed from being indecies
    stg_pqv_df = stg_pqv_df.rename(columns={"level_0": "playerHandValue_Q",\
                                            "level_1": "dealerUpCardValue_Q",\
                                            "level_2": "usableAce_Q",\
                                            "level_3": "bet_Q"})
    
    # Add learningRate and explorationRate columns with values for current lr & er combo pair
    stg_pqv_df.insert(0, "learningRate_Q", combo[0])
    stg_pqv_df.insert(1, "explorationRate_Q", combo[1])
    
    # Create joinKey for later (use to join stateActionOutcome_df to flattend player_Q_Values_df
    stg_pqv_df["joinKey_Q"] = stg_pqv_df.learningRate_Q.map(str) + "_" +\
                            stg_pqv_df.explorationRate_Q.map(str) + "_" +\
                            stg_pqv_df.playerHandValue_Q.map(str)  + "_" +\
                            stg_pqv_df.dealerUpCardValue_Q.map(str)  + "_" +\
                            stg_pqv_df.usableAce_Q.map(str)
    
    # Append staging DataFrame to final player_Q_Values_df
    player_Q_Values_df = player_Q_Values_df.append(stg_pqv_df, ignore_index=True)

Playing set 1 (lr: 0.05; exp_rate: 0.05)...
    1016530 games played
Playing set 2 (lr: 0.05; exp_rate: 0.1)...
    1015664 games played
Playing set 3 (lr: 0.05; exp_rate: 0.3)...
    1015963 games played
Playing set 4 (lr: 0.05; exp_rate: 0.5)...
    1017338 games played
Playing set 5 (lr: 0.05; exp_rate: 1)...
    1018404 games played
Playing set 6 (lr: 0.1; exp_rate: 0.05)...
    1012532 games played
Playing set 7 (lr: 0.1; exp_rate: 0.1)...
    1013521 games played
Playing set 8 (lr: 0.1; exp_rate: 0.3)...
    1014212 games played
Playing set 9 (lr: 0.1; exp_rate: 0.5)...
    1015305 games played
Playing set 10 (lr: 0.1; exp_rate: 1)...
    1018175 games played
Playing set 11 (lr: 0.3; exp_rate: 0.05)...
    1009853 games played
Playing set 12 (lr: 0.3; exp_rate: 0.1)...
    1010562 games played
Playing set 13 (lr: 0.3; exp_rate: 0.3)...
    1012870 games played
Playing set 14 (lr: 0.3; exp_rate: 0.5)...
    1015153 games played
Playing set 15 (lr: 0.3; exp_rate: 1)...
    1018159 

In [7]:
# **************************************************
# **************************************************
# **************************************************
# **************************************************
# IMPORTANT: Next cell is for RANDOM game play for comparison purposes
# **************************************************
#    Rerun BlackJackSolution Class cell with updates to the following:
#         1) chooseAction() method:
#             i) ensure line 173 IS commented out: "if (np.random.uniform(0, 1) <= self.exp_rate*(1 - curRound/trainRound)):"
#             ii) ensure line 176 is NOT commented out: "if (1 == 1):"
#         2) play() method: Chris' code for snapshots at the intervals (1-10, 10, 20, ... 100, ... 1000, ...)
#             i) ensure lines 469 through 532 are STILL commented out for running through combos
#             ii) note: this (should only be used for final data output for policy)
#                 (otherwise unnecessary outputs and resources will be wasted on generating these)
# **************************************************
# **************************************************
# **************************************************
# **************************************************

In [8]:
# **************************
# ***** Use for RANDOM ***** # ***** AFTER ADJUSTING BlackJackSolution class: chooseAction method *****
# **************************
# Loop through RANDOM Combinations of learningRate & explorationRate (only one basic combo; not passed)
for index, combo in enumerate(random_comboLearningAndExplorationRateList):
    print(f'Playing set {index + 1} (random)...')
    sol = BlackJackSolution(lr = combo[0], exp_rate = combo[1])
    sol.play(numberOfGamesToPlay)
    stateActionOutcome = sol.stateActionOutcome
    playerQvalues = sol.player_Q_Values
    gameTrack = sol.gameTrack
    gameSet = {"comboDesc": "random",
               "combo": "random",
               "gameTrack": gameTrack,
               "gamesPlayed": len(gameTrack),
               "gamesWon": sum(gameTrack),
               "percentWon": sum(gameTrack) / len(gameTrack),
               "stateActionOutcome": stateActionOutcome,
               "playerQvalues": playerQvalues
              }
    comboLearningAndExplorationRateData.append(gameSet)
    print(f"    {len(gameTrack)} games played")
    
    # Create staging DataFrame to hold values from the stateActionOutcome dictionary
    stg_sao_df = pd.DataFrame.from_dict(sol.stateActionOutcome, orient="index")
    
    # Rename first four columns & reset index
    stg_sao_df.columns = ["Stand", "Hit", "DoubleDown", "Split"]
    stg_sao_df = stg_sao_df.reset_index()
    
    # Rename columns that were removed from being indecies
    stg_sao_df = stg_sao_df.rename(columns={"level_0": "playerHandValue",\
                                            "level_1": "dealerUpCardValue",\
                                            "level_2": "usableAce"})

    # Get and calculate values for Stand, Hit, Double Down and Split columns
    """STAND"""
    stg_sao_df["standGamesWon"] = [cell[0] for cell in stg_sao_df["Stand"]]
    stg_sao_df["standTotalGames"] = [cell[1] for cell in stg_sao_df["Stand"]]
    stg_sao_df["standAvgWinRate"] = round((stg_sao_df["standGamesWon"] / stg_sao_df["standTotalGames"]), 3).fillna(0)

    """HIT"""
    stg_sao_df["hitGamesWon"] = [cell[0] for cell in stg_sao_df["Hit"]]
    stg_sao_df["hitTotalGames"] = [cell[1] for cell in stg_sao_df["Hit"]]
    stg_sao_df["hitAvgWinRate"] = round((stg_sao_df["hitGamesWon"] / stg_sao_df["hitTotalGames"]), 3).fillna(0)

    """DOUBLE DOWN"""
    stg_sao_df["doubleDownGamesWon"] = [cell[0] for cell in stg_sao_df["DoubleDown"]]
    stg_sao_df["doubleDownTotalGames"] = [cell[1] for cell in stg_sao_df["DoubleDown"]]
    stg_sao_df["doubleDownAvgWinRate"] = round((stg_sao_df["doubleDownGamesWon"] / stg_sao_df["doubleDownTotalGames"]), 3).fillna(0)

    """SPLIT"""
    stg_sao_df["splitGamesWon"] = [cell[0] for cell in stg_sao_df["Split"]]
    stg_sao_df["splitTotalGames"] = [cell[1] for cell in stg_sao_df["Split"]]
    stg_sao_df["splitAvgWinRate"] = round((stg_sao_df["splitGamesWon"] / stg_sao_df["splitTotalGames"]), 3).fillna(0)

    # Add learningRate and explorationRate columns with values for current lr & er combo pair
    stg_sao_df.insert(0, "lr_er_combo", "random")
    stg_sao_df.insert(1, "learningRate", combo[0])
    stg_sao_df.insert(2, "explorationRate", combo[1])
    
    # Create joinKey for later (use to join stateActionOutcome_df to flattend player_Q_Values_df
    stg_sao_df["joinKey"] = "random_" +\
                            stg_sao_df.playerHandValue.map(str)  + "_" +\
                            stg_sao_df.dealerUpCardValue.map(str)  + "_" +\
                            stg_sao_df.usableAce.map(str)

    # Drop columns "Stand", "Hit", "DoubleDown", "Split" as they are not needed any longer
    stg_sao_df.drop(["Stand", "Hit", "DoubleDown", "Split"], axis=1, inplace=True)
    
    # Append staging DataFrame to final stateActionOutcome_df
    stateActionOutcome_df = stateActionOutcome_df.append(stg_sao_df, ignore_index=True)

    # Create staging DataFrame to hold values from the stateActionOutcome dictionary
    stg_pqv_df = pd.DataFrame.from_dict(sol.player_Q_Values, orient="index")
    
    # Rename first four columns & reset index
    stg_pqv_df.columns = ["standQvalue", "hitQvalue", "doubleDownQvalue", "splitQvalue"]
    stg_pqv_df = stg_pqv_df.reset_index()
    
    # Rename columns that were removed from being indecies
    stg_pqv_df = stg_pqv_df.rename(columns={"level_0": "playerHandValue_Q",\
                                            "level_1": "dealerUpCardValue_Q",\
                                            "level_2": "usableAce_Q",\
                                            "level_3": "bet_Q"})
    
    # Add learningRate and explorationRate columns with values for current lr & er combo pair
    stg_pqv_df.insert(0, "learningRate_Q", combo[0])
    stg_pqv_df.insert(1, "explorationRate_Q", combo[1])
    
    # Create joinKey for later (use to join stateActionOutcome_df to flattend player_Q_Values_df
    stg_pqv_df["joinKey_Q"] = "random_" +\
                            stg_pqv_df.playerHandValue_Q.map(str)  + "_" +\
                            stg_pqv_df.dealerUpCardValue_Q.map(str)  + "_" +\
                            stg_pqv_df.usableAce_Q.map(str) 
    
    # Append staging DataFrame to final player_Q_Values_df
    player_Q_Values_df = player_Q_Values_df.append(stg_pqv_df, ignore_index=True)

Playing set 1 (random)...
    1023548 games played


In [9]:
# Visualize stateActionOutcome_df
stateActionOutcome_df

Unnamed: 0,lr_er_combo,learningRate,explorationRate,playerHandValue,dealerUpCardValue,usableAce,standGamesWon,standTotalGames,standAvgWinRate,hitGamesWon,hitTotalGames,hitAvgWinRate,doubleDownGamesWon,doubleDownTotalGames,doubleDownAvgWinRate,splitGamesWon,splitTotalGames,splitAvgWinRate,joinKey
0,0.05_0.05,0.05,0.05,2,1,True,0,0,0.000,0,0,0.0,0,0,0.0,226,538,0.420,0.05_0.05_2_1_True
1,0.05_0.05,0.05,0.05,2,1,False,0,0,0.000,0,0,0.0,0,0,0.0,0,0,0.000,0.05_0.05_2_1_False
2,0.05_0.05,0.05,0.05,2,2,True,0,0,0.000,0,0,0.0,0,0,0.0,454,948,0.479,0.05_0.05_2_2_True
3,0.05_0.05,0.05,0.05,2,2,False,0,0,0.000,0,0,0.0,0,0,0.0,0,0,0.000,0.05_0.05_2_2_False
4,0.05_0.05,0.05,0.05,2,3,True,0,0,0.000,0,0,0.0,0,0,0.0,426,886,0.481,0.05_0.05_2_3_True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10395,random,0.00,0.00,21,8,False,2375,2555,0.930,0,0,0.0,0,0,0.0,0,0,0.000,random_21_8_False
10396,random,0.00,0.00,21,9,True,454,484,0.938,0,0,0.0,0,0,0.0,0,0,0.000,random_21_9_True
10397,random,0.00,0.00,21,9,False,2483,2659,0.934,0,0,0.0,0,0,0.0,0,0,0.000,random_21_9_False
10398,random,0.00,0.00,21,10,True,1568,1628,0.963,0,0,0.0,0,0,0.0,0,0,0.000,random_21_10_True


In [10]:
# Visualize player_Q_Values_df
player_Q_Values_df

Unnamed: 0,learningRate_Q,explorationRate_Q,playerHandValue_Q,dealerUpCardValue_Q,usableAce_Q,bet_Q,standQvalue,hitQvalue,doubleDownQvalue,splitQvalue,joinKey_Q
0,0.05,0.05,2,1,True,10,-99999999.0,0.0,0.0,1.095660e+06,0.05_0.05_2_1_True
1,0.05,0.05,2,1,True,50,-99999999.0,0.0,0.0,1.095657e+06,0.05_0.05_2_1_True
2,0.05,0.05,2,1,True,100,-99999999.0,0.0,0.0,8.478065e+05,0.05_0.05_2_1_True
3,0.05,0.05,2,1,False,10,-99999999.0,0.0,0.0,1.000000e+08,0.05_0.05_2_1_False
4,0.05,0.05,2,1,False,50,-99999999.0,0.0,0.0,1.000000e+08,0.05_0.05_2_1_False
...,...,...,...,...,...,...,...,...,...,...,...
31195,0.00,0.00,21,10,True,50,99999999.0,0.0,0.0,0.000000e+00,random_21_10_True
31196,0.00,0.00,21,10,True,100,99999999.0,0.0,0.0,0.000000e+00,random_21_10_True
31197,0.00,0.00,21,10,False,10,99999999.0,0.0,0.0,0.000000e+00,random_21_10_False
31198,0.00,0.00,21,10,False,50,99999999.0,0.0,0.0,0.000000e+00,random_21_10_False


In [11]:
# Break up player_Q_Values_df into three DataFrames based on the bet value (min = 10; mid = 50; max = 100)
# This is to extend the DataFrame as wide with additional columns for each bet
minBetQvalues_df = player_Q_Values_df.loc[player_Q_Values_df["bet_Q"] == 10]
midBetQvalues_df = player_Q_Values_df.loc[player_Q_Values_df["bet_Q"] == 50]
maxBetQvalues_df = player_Q_Values_df.loc[player_Q_Values_df["bet_Q"] == 100]

In [12]:
# Rename minBetQvalues_df columns 
minBetQvalues_df = minBetQvalues_df.rename(columns={"learningRate_Q": "minBet_learningRate_Q",
                                                    "explorationRate_Q": "minBet_explorationRate_Q",
                                                    "playerHandValue_Q": "minBet_playerHandValue",
                                                    "dealerUpCardValue_Q": "minBet_dealerUpCardValue",
                                                    "usableAce_Q": "minBet_usableAce",
                                                    "bet_Q": "minBet_bet",
                                                    "standQvalue": "minBet_standQvalue",
                                                    "hitQvalue": "minBet_hitQvalue",
                                                    "doubleDownQvalue": "minBet_doubleDownQvalue",
                                                    "splitQvalue": "minBet_splitQvalue"
                                                   })

# Rename midBetQvalues_df columns 
midBetQvalues_df = midBetQvalues_df.rename(columns={"learningRate_Q": "midBet_learningRate_Q",
                                                    "explorationRate_Q": "midBet_explorationRate_Q",
                                                    "playerHandValue_Q": "midBet_playerHandValue",
                                                    "dealerUpCardValue_Q": "midBet_dealerUpCardValue",
                                                    "usableAce_Q": "midBet_usableAce",
                                                    "bet_Q": "midBet_bet",
                                                    "standQvalue": "midBet_standQvalue",
                                                    "hitQvalue": "midBet_hitQvalue",
                                                    "doubleDownQvalue": "midBet_doubleDownQvalue",
                                                    "splitQvalue": "midBet_splitQvalue"
                                                   })

# Rename maxBetQvalues_df columns 
maxBetQvalues_df = maxBetQvalues_df.rename(columns={"learningRate_Q": "maxBet_learningRate_Q",
                                                    "explorationRate_Q": "maxBet_explorationRate_Q",
                                                    "playerHandValue_Q": "maxBet_playerHandValue",
                                                    "dealerUpCardValue_Q": "maxBet_dealerUpCardValue",
                                                    "usableAce_Q": "maxBet_usableAce",
                                                    "bet_Q": "maxBet_bet",
                                                    "standQvalue": "maxBet_standQvalue",
                                                    "hitQvalue": "maxBet_hitQvalue",
                                                    "doubleDownQvalue": "maxBet_doubleDownQvalue",
                                                    "splitQvalue": "maxBet_splitQvalue"
                                                   })

In [13]:
# Visualize minBetQvalues_df
# minBetQvalues_df

In [14]:
# Visualize midBetQvalues_df
# midBetQvalues_df

In [15]:
# Visualize maxBetQvalues_df
# maxBetQvalues_df

In [16]:
# Join/merge minBetQvalues_df and midBetQvalues_df on joinKey_Q to create qValues_df
qValues_df = pd.merge(minBetQvalues_df, midBetQvalues_df, how = "outer",\
                      on = "joinKey_Q", suffixes = ("_minBet","_midBet"))

# Join/merge maxBetQvalues_df to qValues_df
qValues_df = qValues_df.merge(maxBetQvalues_df, how = "outer",\
                      on = "joinKey_Q", suffixes = ("_midBet","_maxBet"))

# Drop unnecessary columns
qValues_df = qValues_df[["minBet_bet", "minBet_standQvalue", "minBet_hitQvalue", "minBet_doubleDownQvalue", "minBet_splitQvalue",\
                         "midBet_bet", "midBet_standQvalue", "midBet_hitQvalue", "midBet_doubleDownQvalue", "midBet_splitQvalue",\
                         "maxBet_bet", "maxBet_standQvalue", "maxBet_hitQvalue", "maxBet_doubleDownQvalue", "maxBet_splitQvalue",\
                         "joinKey_Q"]]

# Visualize qValues_df
qValues_df

Unnamed: 0,minBet_bet,minBet_standQvalue,minBet_hitQvalue,minBet_doubleDownQvalue,minBet_splitQvalue,midBet_bet,midBet_standQvalue,midBet_hitQvalue,midBet_doubleDownQvalue,midBet_splitQvalue,maxBet_bet,maxBet_standQvalue,maxBet_hitQvalue,maxBet_doubleDownQvalue,maxBet_splitQvalue,joinKey_Q
0,10,-99999999.0,0.0,0.0,1.095660e+06,50,-99999999.0,0.0,0.0,1.095657e+06,100,-99999999.0,0.0,0.0,8.478065e+05,0.05_0.05_2_1_True
1,10,-99999999.0,0.0,0.0,1.000000e+08,50,-99999999.0,0.0,0.0,1.000000e+08,100,-99999999.0,0.0,0.0,1.000000e+08,0.05_0.05_2_1_False
2,10,-99999999.0,0.0,0.0,2.110179e+04,50,-99999999.0,0.0,0.0,2.339261e+04,100,-99999999.0,0.0,0.0,5.597054e+04,0.05_0.05_2_2_True
3,10,-99999999.0,0.0,0.0,1.000000e+08,50,-99999999.0,0.0,0.0,1.000000e+08,100,-99999999.0,0.0,0.0,1.000000e+08,0.05_0.05_2_2_False
4,10,-99999999.0,0.0,0.0,4.795893e+04,50,-99999999.0,0.0,0.0,9.833367e+04,100,-99999999.0,0.0,0.0,2.871160e+04,0.05_0.05_2_3_True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10395,10,99999999.0,0.0,0.0,0.000000e+00,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00,random_21_8_False
10396,10,99999999.0,0.0,0.0,0.000000e+00,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00,random_21_9_True
10397,10,99999999.0,0.0,0.0,0.000000e+00,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00,random_21_9_False
10398,10,99999999.0,0.0,0.0,0.000000e+00,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00,random_21_10_True


In [17]:
# Join/merge stateActionOutcome_df and qValues_df on joinKey = joinKey_Q to create blackJackGameData_df
blackJackGameData_df = pd.merge(stateActionOutcome_df, qValues_df, how = "outer",\
                                left_on = "joinKey", right_on = "joinKey_Q"
                               )

# Drop columns "Stand", "Hit", "DoubleDown", "Split" as they are not needed any longer
blackJackGameData_df.drop(["joinKey_Q"], axis=1, inplace=True)

# Visualize blackJackGameData_df
blackJackGameData_df

Unnamed: 0,lr_er_combo,learningRate,explorationRate,playerHandValue,dealerUpCardValue,usableAce,standGamesWon,standTotalGames,standAvgWinRate,hitGamesWon,...,midBet_bet,midBet_standQvalue,midBet_hitQvalue,midBet_doubleDownQvalue,midBet_splitQvalue,maxBet_bet,maxBet_standQvalue,maxBet_hitQvalue,maxBet_doubleDownQvalue,maxBet_splitQvalue
0,0.05_0.05,0.05,0.05,2,1,True,0,0,0.000,0,...,50,-99999999.0,0.0,0.0,1.095657e+06,100,-99999999.0,0.0,0.0,8.478065e+05
1,0.05_0.05,0.05,0.05,2,1,False,0,0,0.000,0,...,50,-99999999.0,0.0,0.0,1.000000e+08,100,-99999999.0,0.0,0.0,1.000000e+08
2,0.05_0.05,0.05,0.05,2,2,True,0,0,0.000,0,...,50,-99999999.0,0.0,0.0,2.339261e+04,100,-99999999.0,0.0,0.0,5.597054e+04
3,0.05_0.05,0.05,0.05,2,2,False,0,0,0.000,0,...,50,-99999999.0,0.0,0.0,1.000000e+08,100,-99999999.0,0.0,0.0,1.000000e+08
4,0.05_0.05,0.05,0.05,2,3,True,0,0,0.000,0,...,50,-99999999.0,0.0,0.0,9.833367e+04,100,-99999999.0,0.0,0.0,2.871160e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10395,random,0.00,0.00,21,8,False,2375,2555,0.930,0,...,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00
10396,random,0.00,0.00,21,9,True,454,484,0.938,0,...,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00
10397,random,0.00,0.00,21,9,False,2483,2659,0.934,0,...,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00
10398,random,0.00,0.00,21,10,True,1568,1628,0.963,0,...,50,99999999.0,0.0,0.0,0.000000e+00,100,99999999.0,0.0,0.0,0.000000e+00


In [18]:
# Export to CSV file to use in other locations (i.e., jupyter notebooks, visual studio, tableau, etc)
blackJackGameData_df.to_csv("blackJackGameData_EDA.csv", index = False)

In [19]:
# Create lists to hold values for gameTrack data
handNumberList = []

# Create lists to hold values for all hands (Running Totals)
handResultList = []
handsWonRunningTotalList = []
avgHandsWonRunningTotalList = []

# Create lists to hold values for hands within current interval (of 10,000)
handsInIntervalList = []
handsWonInIntervalList = []
avgHandsWonInIntervalList = []

# Create lists to hold values for hands within first and final large interval (of 100,000)
handsInFirstFinalLargeIntervalList = []
handsWonInFirstFinalLargeIntervalTotalList = []
avgHandsWonInFirstFinalLargeIntervalTotalList = []
handsInFirstFinalIntervalFlagList = []

# Create lists to hold flags for first and final large interval values
firstLargeIntervalFlagList = []
finalLargeIntervalFlagList = []

# Create lists to hold learning and exploration rate combo list / desc
lr_er_list = []
lr_er_descList = []

# Loop through each combo interation within comboLearningAndExplorationRateData
for game in comboLearningAndExplorationRateData:
        
    # Reset hand number and win counters to 0
    handNumber = 0
    intervalHandCounter = 0
    firstFinalLargeIntervalHandCounter = 0
    winCounter = 0
    intervalWinCounter = 0
    firstFinalLargeIntervalWinCounter = 0
    
    # Set interval for what games to aggregate on (and pass through to final dataset)
    interval = 10000
    firstFinalLargeInterval = 100000
    
    # Determine the number of games played for current iteration
    numberOfHands = len(game["gameTrack"])
    
    # Determine the hand number where the final large interval that takes place begins
    lastLargeIntervalFirst = numberOfHands - firstFinalLargeInterval + 1
    
    # Create a list holding the hand number of every 10,000th hand played
    #     (in range 0 to numberOfHands, stepping at interval value)
    intervalsToAppend = list(range(0, numberOfHands + 1, interval))
    
    # Remove the first number in the list (0)
    intervalsToAppend.pop(0)
    
    # Add the final hand number to the intervalsToAppend list
    intervalsToAppend.append(numberOfHands)
    
    # Sort the intervalsToAppend list (ascending)
    intervalsToAppend.sort()

    # Loop through each hand's result in the current gameTrack list
    for hand in game["gameTrack"]:
        
        # Increase handNumber & intervalHandCountercounter by 1
        handNumber += 1
        intervalHandCounter += 1
        
        # Increase winCounter & intervalWinCounter by value of current hand (0 for push/loss; 1 for win)
        winCounter += hand
        intervalWinCounter += hand
        
        # For First and final large interval
        if handNumber <= firstFinalLargeInterval or handNumber >= lastLargeIntervalFirst:
            # Increase firstFinalLargeIntervalHandCounter by 1
            firstFinalLargeIntervalHandCounter += 1

            # Increase firstFinalLargeIntervalWinCounter counter by value of current hand (0 for push/loss; 1 for win)
            firstFinalLargeIntervalWinCounter += hand

       # Otherwise...     
        else:
            # Reset or ensure firstFinalLargeIntervalHandCounter & firstFinalLargeIntervalWinCounter are set back to 0
            firstFinalLargeIntervalHandCounter = 0
            firstFinalLargeIntervalWinCounter = 0        
        
        # Append value to each list for each item within the comboLearningAndExplorationRateData dictionary
        # if handNumber is within the intervalsToAppend list, append that to the final output lists
        if handNumber in intervalsToAppend:
            # Append values to respective lists
            handNumberList.append(handNumber)
            handResultList.append(hand)
            handsWonRunningTotalList.append(winCounter)
            avgHandsWonRunningTotalList.append(round(winCounter / handNumber, 6))
            
            handsInIntervalList.append(intervalHandCounter)
            handsWonInIntervalList.append(intervalWinCounter)
            avgHandsWonInIntervalList.append(round(intervalWinCounter / intervalHandCounter, 6))
            
            # For when we are in the first or final large interval
            if (handNumber == firstFinalLargeInterval) or (handNumber == numberOfHands):
                handsInFirstFinalLargeIntervalList.append(firstFinalLargeIntervalHandCounter)
                handsWonInFirstFinalLargeIntervalTotalList.append(firstFinalLargeIntervalWinCounter)
                avgHandsWonInFirstFinalLargeIntervalTotalList.append(round(firstFinalLargeIntervalWinCounter / firstFinalLargeIntervalHandCounter, 6))
                handsInFirstFinalIntervalFlagList.append(1)
                if (handNumber == firstFinalLargeInterval):
                    firstLargeIntervalFlagList.append(1)
                    finalLargeIntervalFlagList.append(0)
                elif (handNumber == numberOfHands):
                    firstLargeIntervalFlagList.append(0)
                    finalLargeIntervalFlagList.append(1)

            # For all other intervals
            else:
                handsInFirstFinalLargeIntervalList.append(np.nan)
                handsWonInFirstFinalLargeIntervalTotalList.append(np.nan)
                avgHandsWonInFirstFinalLargeIntervalTotalList.append(np.nan)
                handsInFirstFinalIntervalFlagList.append(0)
                firstLargeIntervalFlagList.append(0)
                finalLargeIntervalFlagList.append(0)
            
            lr_er_list.append(game["combo"])
            lr_er_descList.append(game["comboDesc"])
            
            # Now reset intervalHandCounter & intervalWinCounter back to 0
            intervalHandCounter = 0
            intervalWinCounter = 0


In [20]:
# Create blackJackGameTracker_df DataFrame from each of the lists by zipping them together
blackJackGameTracker_df = pd.DataFrame(list(zip(lr_er_list,\
                                                lr_er_descList,\
                                                handNumberList,\
                                                handResultList,\
                                                handsWonRunningTotalList,\
                                                avgHandsWonRunningTotalList,\
                                                handsInIntervalList,\
                                                handsWonInIntervalList,\
                                                avgHandsWonInIntervalList,\
                                                handsInFirstFinalLargeIntervalList,\
                                                handsWonInFirstFinalLargeIntervalTotalList,\
                                                avgHandsWonInFirstFinalLargeIntervalTotalList,\
                                                handsInFirstFinalIntervalFlagList,\
                                                firstLargeIntervalFlagList,\
                                                finalLargeIntervalFlagList)))

# Rename blackJackGameTracker_df columns
blackJackGameTracker_df = blackJackGameTracker_df.rename(columns = {0: "lr_er",\
                                                                    1: "lr_er_desc",\
                                                                    2: "handNumber",\
                                                                    3: "handResult",\
                                                                    4: "handsWonRunningTotal",\
                                                                    5: "avgHandsWonRunningTotal",\
                                                                    6: "handsInInterval",\
                                                                    7: "handsWonInInterval",\
                                                                    8: "avgHandsWonInInterval",\
                                                                    9: "handsInFirstFinalLargeInterval",\
                                                                    10: "handsWonInFirstFinalLargeInterval",\
                                                                    11: "avgHandsWonInFirstFinalLargeInterval",\
                                                                    12: "handsInFirstFinalIntervalFlag",\
                                                                    13: "firstLargeIntervalFlag",\
                                                                    14: "finalLargeIntervalFlagList"
                                                                   })

# Visualize blackJackGameTracker_df
blackJackGameTracker_df


Unnamed: 0,lr_er,lr_er_desc,handNumber,handResult,handsWonRunningTotal,avgHandsWonRunningTotal,handsInInterval,handsWonInInterval,avgHandsWonInInterval,handsInFirstFinalLargeInterval,handsWonInFirstFinalLargeInterval,avgHandsWonInFirstFinalLargeInterval,handsInFirstFinalIntervalFlag,firstLargeIntervalFlag,finalLargeIntervalFlagList
0,0.05_0.05,"lr: 0.05, er: 0.05",10000,0,3966,0.396600,10000,3966,0.396600,,,,0,0,0
1,0.05_0.05,"lr: 0.05, er: 0.05",20000,1,8099,0.404950,10000,4133,0.413300,,,,0,0,0
2,0.05_0.05,"lr: 0.05, er: 0.05",30000,0,12231,0.407700,10000,4132,0.413200,,,,0,0,0
3,0.05_0.05,"lr: 0.05, er: 0.05",40000,1,16247,0.406175,10000,4016,0.401600,,,,0,0,0
4,0.05_0.05,"lr: 0.05, er: 0.05",50000,0,20416,0.408320,10000,4169,0.416900,,,,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2643,random,random,990000,1,313688,0.316857,10000,3207,0.320700,,,,0,0,0
2644,random,random,1000000,0,316854,0.316854,10000,3166,0.316600,,,,0,0,0
2645,random,random,1010000,0,319971,0.316803,10000,3117,0.311700,,,,0,0,0
2646,random,random,1020000,0,323081,0.316746,10000,3110,0.311000,,,,0,0,0


In [21]:
# Export to CSV file to use in other locations (i.e., jupyter notebooks, visual studio, tableau, etc)
blackJackGameTracker_df.to_csv("blackJackGameTrackerData_EDA.csv", index = False)

In [25]:
# Best performing LR_ER Combo, by avgHandsWonRunningTotal
bestBy_avgHandsWonRunningTotal = blackJackGameTracker_df.\
    loc[blackJackGameTracker_df["finalLargeIntervalFlagList"] == 1]\
    .sort_values(by=["avgHandsWonRunningTotal"], ascending=False).head(10)

bestBy_avgHandsWonRunningTotal[["lr_er_desc", "handNumber", "avgHandsWonRunningTotal", "handsWonRunningTotal"]]

Unnamed: 0,lr_er_desc,handNumber,avgHandsWonRunningTotal,handsWonRunningTotal
101,"lr: 0.05, er: 0.05",1016530,0.424387,431402
611,"lr: 0.1, er: 0.05",1012532,0.423526,428834
203,"lr: 0.05, er: 0.1",1015664,0.421302,427901
713,"lr: 0.1, er: 0.1",1013521,0.419433,425104
1120,"lr: 0.3, er: 0.05",1009853,0.419324,423456
1629,"lr: 0.5, er: 0.05",1008225,0.419071,422518
1222,"lr: 0.3, er: 0.1",1010562,0.415419,419807
2137,"lr: 1, er: 0.05",1006818,0.414875,417704
1730,"lr: 0.5, er: 0.1",1009223,0.414147,417967
2238,"lr: 1, er: 0.1",1007730,0.411392,414572


In [26]:
# Best performing LR_ER Combo, by avgHandsWonInFirstFinalLargeInterval
bestBy_avgHandsWonInFirstFinalLargeInterval = blackJackGameTracker_df.\
    loc[blackJackGameTracker_df["finalLargeIntervalFlagList"] == 1]\
    .sort_values(by=["avgHandsWonInFirstFinalLargeInterval"], ascending=False).head(10)

bestBy_avgHandsWonInFirstFinalLargeInterval[["lr_er_desc", "handNumber", "avgHandsWonInFirstFinalLargeInterval", "handsWonInFirstFinalLargeInterval", "handsInFirstFinalLargeInterval"]]

Unnamed: 0,lr_er_desc,handNumber,avgHandsWonInFirstFinalLargeInterval,handsWonInFirstFinalLargeInterval,handsInFirstFinalLargeInterval
101,"lr: 0.05, er: 0.05",1016530,0.43245,43245.0,100000.0
203,"lr: 0.05, er: 0.1",1015664,0.43229,43229.0,100000.0
611,"lr: 0.1, er: 0.05",1012532,0.4276,42760.0,100000.0
305,"lr: 0.05, er: 0.3",1015963,0.42536,42536.0,100000.0
407,"lr: 0.05, er: 0.5",1017338,0.42506,42506.0,100000.0
1629,"lr: 0.5, er: 0.05",1008225,0.42493,42493.0,100000.0
713,"lr: 0.1, er: 0.1",1013521,0.42394,42394.0,100000.0
815,"lr: 0.1, er: 0.3",1014212,0.42369,42369.0,100000.0
1019,"lr: 0.1, er: 1",1018175,0.42313,42313.0,100000.0
1324,"lr: 0.3, er: 0.3",1012870,0.42273,42273.0,100000.0


In [27]:
# Best performing LR_ER Combo, by avgHandsWonInInterval
bestBy_avgHandsWonInInterval = blackJackGameTracker_df.\
    loc[blackJackGameTracker_df["finalLargeIntervalFlagList"] == 1]\
    .sort_values(by=["avgHandsWonInInterval"], ascending=False).head(10)

bestBy_avgHandsWonInInterval[["lr_er_desc", "handNumber", "avgHandsWonInInterval", "handsWonInInterval", "handsInInterval"]]

Unnamed: 0,lr_er_desc,handNumber,avgHandsWonInInterval,handsWonInInterval,handsInInterval
917,"lr: 0.1, er: 0.5",1015305,0.438266,2325,5305
1019,"lr: 0.1, er: 1",1018175,0.437309,3575,8175
203,"lr: 0.05, er: 0.1",1015664,0.435381,2466,5664
611,"lr: 0.1, er: 0.05",1012532,0.433254,1097,2532
713,"lr: 0.1, er: 0.1",1013521,0.431412,1519,3521
407,"lr: 0.05, er: 0.5",1017338,0.431044,3163,7338
1120,"lr: 0.3, er: 0.05",1009853,0.429412,4231,9853
101,"lr: 0.05, er: 0.05",1016530,0.428484,2798,6530
305,"lr: 0.05, er: 0.3",1015963,0.426295,2542,5963
1629,"lr: 0.5, er: 0.05",1008225,0.424681,3493,8225
