In [92]:
import copy
import sys
import collections

sys.setrecursionlimit(1000)

class MDP:
    # Return the start state.
    def startState(self): raise NotImplementedError("Override me")

    # Return set of actions possible from |state|.
    def actions(self, state): raise NotImplementedError("Override me")

    # Return a list of (newState, prob, reward) tuples corresponding to edges
    # coming out of |state|.
    # Mapping to notation from class:
    #   state = s, action = a, newState = s', prob = T(s, a, s'), reward = Reward(s, a, s')
    # If IsEnd(state), return the empty list.
    def succAndProbReward(self, state, action): raise NotImplementedError("Override me")

    def discount(self): raise NotImplementedError("Override me")

    # Compute set of states reachable from startState.  Helper function for
    # MDPAlgorithms to know which states to compute values and policies for.
    # This function sets |self.states| to be the set of all states.
    def computeStates(self):
        self.states = set()
        queue = []
        self.states.add(self.startState())
        queue.append(self.startState())
        while len(queue) > 0:
            state = queue.pop()
            for action in self.actions(state):
                for newState, prob, reward in self.succAndProbReward(state, action):
                    if newState not in self.states:
                        self.states.add(newState)
                        queue.append(newState)
        # print "%d states" % len(self.states)


class BlackjackMDP(MDP):
    def __init__(self, cardValues=('2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A'), multiplicity=2,
                 threshold=21, bet=1, blackjack=1.5):
        """
        cardValues: list of integers (face values for each card included in the deck)
        multiplicity: single integer representing the number of cards with each face value
        threshold: maximum number of points (i.e. sum of card values in hand) before going bust
        peekCost: how much it costs to peek at the next card
        """
        self.cardValues = cardValues
        self.multiplicity = multiplicity
        self.threshold = threshold
        self.bet = bet
        self.blackjack = blackjack

    # Return the start state.
    # Look closely at this function to see an example of state representation for our Blackjack game.
    # Each state is a tuple with 3 elements:
    #   -- The first element of the tuple is the sum of the cards in the player's hand.
    #   -- If the player's last action was to peek, the second element is the index
    #      (not the face value) of the next card that will be drawn; otherwise, the
    #      second element is None.
    #   -- The third element is a tuple giving counts for each of the cards remaining
    #      in the deck, or None if the deck is empty or the game is over (e.g. when
    #      the user quits or goes bust).
    def initialCardCount(self):
        return (self.multiplicity,) * len(self.cardValues)

    #####################################################################################################
    def startState(self):
        return ('', '', self.initialCardCount())

    def sortCards(self, cardValue, deepAnalysis = True):
        if cardValue == '':
            return 0
        i = 0
        split = False
        double = False

        if cardValue[-1].isdigit():
            value = int(cardValue)
        else:
            for j in range(len(cardValue)):
                i += 1
                if not cardValue[j].isdigit():
                    value = int(cardValue[:i])
                else: assert ValueError

        if deepAnalysis:
            if 'S' in cardValue:
                split = True
            if 'D' in cardValue:
                double = True

            acecounter = cardValue.count('A')
            return value, split, acecounter, double
        else:
            return value




    #####################################################################################################
    # Return set of actions possible from |state|.
    # You do not need to modify this function.
    # All logic for dealing with end states should be placed into the succAndProbReward function below.
    def actions(self, state):
        cardValue, DealerCards, CardsRemaining = state

        if cardValue == '':
            return ['Begin']

        value, split, _, double = self.sortCards(cardValue)

        if double and split:
            return ['Draw', 'Stay']

        elif split:
            return ['Draw', 'Stay']  # Will implement split, double, later

        elif double:
            return ['Draw', 'Stay']

        elif value is int:
            return ['Draw', 'Stay']

        else:
            raise NotImplemented('error')

    # Compute value of cards given the face value of each card from the state
    def cards_value(self, card_state):
        acecounter = card_state.count('A')

        if card_state[-1].isdigit():
            value = int(card_state)

        else:
            i = 0
            for j in range(len(card_state)):
                i += 1
                if not card_state[j].isdigit():
                    value = int(card_state[:i])
                else:
                    assert ValueError('Shouldnt enter here in count, value may be equal to zero')

        while value > self.threshold and acecounter > 0:
            value -= 10
            acecounter -= 1

        return value
    
    def old_card_value(self, card_state):
        aces = card_state.count('A')
        
        if card_state[-1].isdigit():
            value = int(card_state)

        else:
            for j in range(len(card_state)):
                if not card_state[j].isdigit():
                    value = int(card_state[:j])
                else:
                    assert ValueError('Shouldnt enter here in count, value may be equal to zero')
        return value, aces

    ###############################################################################################################
    def player_draw(self, card_state, CardsRemaining, prob_card):
        # Return hand of one or two cards, probabilities associated with each hand
        card_states = collections.defaultdict(float)
        card_value, aces = self.old_card_value(card_state)

        for i in range(len(self.cardValues)):
            if prob_card[i] != 0:
                CardsRemaining_lst = list(CardsRemaining)
                CardsRemaining_lst[i] += -1
                new_card_state = self.createDrawState(self.cardValues[i], card_value, acecounter=aces)
                card_states[(new_card_state, tuple(CardsRemaining_lst))] += prob_card[i]

        return card_states

    ##############################################################################################################
    def dealer_continuous_draw(self, dealerShow, CardsRemaining, prob_card, old_prob):
        # Wowie
        cardvalues = []
        new_probability = []
        remaining = []
        CardsRemaining = list(CardsRemaining)

        for j in range(len(dealerShow)):
            for i in range(len(self.cardValues)):
                if prob_card[i] != 0:
                    CardsRemaining[i] += -1
                    cardvalues.append(dealerShow + self.cardValues[j])
                    remaining.append(CardsRemaining)
                    new_probability.append(prob_card[i] * old_prob[j])

        return cardvalues, tuple(CardsRemaining), new_probability

    def dealer_single_draw(self, dealerCards, CardsRemaining, prob_card):
        CardsRemaining_tup = tuple(CardsRemaining)
        cardValues = []
        remaining = []
        probability = []
        dealerCards = list(dealerCards)

        for i in range(len(self.cardValues)):
            if prob_card[i] != 0:
                CardsRemaining = list(CardsRemaining_tup)
                CardsRemaining[i] += -1
                tempvalues = copy.copy(dealerCards)
                tempvalues[i] = dealerCards[i] + 1
                cardValues.append(tuple(tempvalues))
                remaining.append(CardsRemaining)
                probability.append(prob_card[i])
        return tuple(cardValues), tuple(remaining), probability

    def createDrawState(self, cards, prev_val=0, specialplayer=False, acecounter=0):
        value = prev_val

        for i in range(len(cards)):
            if cards[i] in ['T', 'J', 'Q', 'K']:
                value += 10
            elif cards[i] in ['A']:
                acecounter += 1
                value += 11
            else:
                value += int(cards[i])

        while value > self.threshold and acecounter > 0:
            value -= 10
            acecounter -= 1
        state = str(value)
        state += acecounter*'A'

        # Double and split options available if player has two cards
        if specialplayer:
            state += 'D'
            if cards[0] == cards[1]:
                state += 'S'
        
        return state

    def initial_draw(self, CardsRemaining):
        draws = []
        probabilities = []
        remaining = []
        
        for i in range(len(self.cardValues)):
            
            
            if CardsRemaining[i] > 0:
                counts = list(CardsRemaining)
                number = sum(CardsRemaining)
                prob_i = counts[i]/number
                counts[i] += -1
                number += -1
                
                for j in range(len(self.cardValues)):
                    if counts[j] > 0:
                        count1 = copy.copy(counts)
                        prob_j = count1[j] / number
                        count1[j] += -1
                        number += -1
                        for k in range(len(self.cardValues)):
                            if count1[k] > 0:
                                count2 = copy.copy(count1)
                                prob_k = count2[k] / number
                                number += -1
                                count2[k] += -1
                                for q in range(len(self.cardValues)):  
                                    if count2[q] > 0:
                                        count3 = copy.copy(count2)
                                        prob_q = count3[q] / number
                                        count3[q] += -1
                                        remaining.append(count3)
                                        draws.append(self.cardValues[i] + self.cardValues[j] + self.cardValues[k] +
                                                     self.cardValues[q])
                                        probabilities.append(prob_i*prob_j*prob_k*prob_q)
                                number += 1
                        number+=1
                number += 1
                                
                                        
                                        

        state_prob = collections.defaultdict(float)
        for i, draw in enumerate(draws):
            player = self.createDrawState(draw[0] + draw[2], specialplayer=True)
            dealer = self.createDrawState(draw[1] + draw[3], specialplayer=False)
            state_prob[(player, dealer, tuple(remaining[i]))] += probabilities[i]

        return state_prob, remaining


    def succAndProbReward(self, state, action):
        result = []
        cardValue, dealerCards, CardsRemaining = state
        player_value = self.cards_value(cardValue)

        # If number cards tuple is set to none, end state is reached and we return and empty result
        if CardsRemaining is None:
            return result

        # If the action is take, we enter here
        if action == 'Draw':
            prob_card = [float(i) / sum(CardsRemaining) for i in CardsRemaining]
            playerStates = self.player_draw(cardValue, CardsRemaining, prob_card)
            for i, key in range(enumerate(playerStates.keys())):
                # Put cardpairs in result with cardsRemaining, probabilities, no reward, or negative bet
                if self.cards_value(key[0]) > 21:
                    result.append(((key[0], dealerCards, None), playerStates[key], -self.bet))
                else:
                    result.append(((key[0], dealerCards, tuple(key[1])), playerStates[key], 0))
            return result

        # If action is stay, enter here
        elif action == 'Stay':
            # Draw all dealer cards, until over 17
            dealer_value = self.cards_value(dealerCards)
            queue = collections.defaultdict(float)
            queue[(dealerCards, CardsRemaining, 1)]
            finalStates = collections.defaultdict(float)
            
            # If less than 17, dealer draws a card, we take action
            if self.cards_value(currentState) < 17:
                while queue is True:
                    currentState = queue.pop(0)
                    probability = [float(i) / sum(currentState[1]) for i in sum(currentState[1])]
                    newDealerStates = self.player_draw(currentState[0], currentState[1], probability)
                    for state, prob in newDealerStates.items():
                        new_value = self.cards_value(state[0])
                        if new_value < 17:
                            queue.append((state[0], state[1], currentState[2]*prob))
                        else:
                            finalStates[new_value] += currentState[2]*prob
            else:
                finalStates[self.cards_value(currentState)] += 1
                

            for dealervalue, prob in finalStates.items():
                if 17 <= dealervalue <= 21:
                    if dealervalue > playervalue:
                        result.append(((cardValue, dealervalue, None), prob, -self.bet))
                    elif dealervalue < playervalue:
                        result.append(((cardValue, dealervalue, None), prob, self.bet))
                    else:
                        result.append(((cardValue, dealervalue, None), prob, 0))
                
                elif dealervalue > 21:
                    result.append(((cardValue, dealervalue, None), prob, self.bet))

                else:
                    raise ValueError('Shouldnt have dealer less than this value')

            return result



        # elif action == 'DealerDraw':
        #     dealercards, CardsRemaining, probabilities = self.dealer_single_draw(dealerCards, CardsRemaining, prob_card)
        #
        #     if sum(dealercards[0]) == 1:
        #         for i in range(len(dealercards)):
        #             # Put cardpairs in result with cardsRemaining, probabilities, no reward, or negative bet
        #             result.append(((cardValue, dealercards[i], tuple(CardsRemaining[i])), probabilities[i], 0))
        #         return result
        #
        #     elif sum(dealercards[0]) == 2:
        #         for i in range(len(dealercards)):
        #             # Put cardpairs in result with cardsRemaining, probabilities, no reward, or negative bet
        #             playervalue = self.cards_value(cardValue)
        #             dealervalue = self.cards_value(dealercards[i])
        #
        #             if playervalue > 21:
        #                 raise ValueError("Yo, u screwed up, can't get over 21 with 2 cards")
        #
        #             elif dealervalue == 21 and playervalue == 21:
        #                 result.append(((cardValue, dealercards[i], None), probabilities[i], 0))
        #
        #             elif playervalue == 21 and dealervalue != 21:
        #                 result.append(((cardValue, dealercards[i], None), probabilities[i], self.blackjack * self.bet))
        #
        #             elif playervalue != 21 and dealervalue == 21:
        #                 result.append(((cardValue, dealercards[i], None), probabilities[i], -self.bet))
        #
        #             elif playervalue < 21 and dealervalue < 21:
        #                 result.append(((cardValue, dealercards[i], tuple(CardsRemaining[i])), probabilities[i], 0))
        #
        #             else:
        #                 raise ValueError("Shouldn't be anything over 21 here")
        #             return result

        elif action == 'Begin':
            state_prob = self.initial_draw(CardsRemaining)

            for i, key in enumerate(state_prob.keys()):
                playervalue = self.cards_value(key[0])
                dealervalue = self.cards_value(key[0])

                if dealervalue == 21 and playervalue == 21:
                    result.append(((key[0], key[1], None), state_prob[key], 0))
                elif playervalue == 21 and dealervalue != 21:
                    result.append(((key[0], key[1], None), state_prob[key], self.blackjack * self.bet))

                elif playervalue != 21 and dealervalue == 21:
                    result.append(((key[0], key[1], None), state_prob[key], -self.bet))

                elif playervalue < 21 and dealervalue < 21:
                    result.append(((key[0], key[1], tuple(key[2])), state_prob[key], 0))
                elif playervalue > 21 or dealervalue > 21:
                    raise ValueError("Yo, u screwed up, can't get over 21 with 2 cards")

                else:
                    raise ValueError('Why you in here line 426')

            return result
        else:
            raise ValueError("Shouldn't be calling Dealer Draw Here")


#####################################################################################################################

    def discount(self):
        return 1


In [93]:
a = BlackjackMDP()

In [94]:
probability = [float(i) / sum(a.initialCardCount()) for i in a.initialCardCount()]
result, remaining = a.initial_draw(a.initialCardCount())
print(result)
print(sum(result.values()))


defaultdict(<class 'float'>, {('5D', '5', (0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)): 4.459308807134894e-05, ('5D', '6', (0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)): 8.918617614269788e-05, ('5D', '7', (0, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2)): 8.918617614269788e-05, ('5D', '8', (0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2)): 8.918617614269788e-05, ('5D', '9', (0, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2)): 8.918617614269788e-05, ('5D', '10', (0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2)): 8.918617614269788e-05, ('5D', '11', (0, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2)): 8.918617614269788e-05, ('5D', '12', (0, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2)): 8.918617614269788e-05, ('5D', '12', (0, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2)): 8.918617614269788e-05, ('5D', '12', (0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2)): 8.918617614269788e-05, ('5D', '12', (0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2)): 8.918617614269788e-05, ('5D', '13A', (0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1)): 8.918617614269788e-05, ('6D', '5', (0, 1, 1, 2, 2, 2, 2, 2, 2

In [97]:
probability = [float(i) / sum(a.initialCardCount()) for i in a.initialCardCount()]
result = a.player_draw('15D', a.initialCardCount(), probability)

print(result)

defaultdict(<class 'float'>, {('17', (1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)): 0.07692307692307693, ('18', (2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)): 0.07692307692307693, ('19', (2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)): 0.07692307692307693, ('20', (2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2)): 0.07692307692307693, ('21', (2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2)): 0.07692307692307693, ('22', (2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2)): 0.07692307692307693, ('23', (2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2)): 0.07692307692307693, ('24', (2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2)): 0.07692307692307693, ('25', (2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2)): 0.07692307692307693, ('25', (2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2)): 0.07692307692307693, ('25', (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2)): 0.07692307692307693, ('25', (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2)): 0.07692307692307693, ('16', (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1)): 0.07692307692307693})
