In [40]:
import numpy as np
np.random.seed(1)

'black'

In [310]:
from functools import reduce
class Card(object):
    def __init__(self):
        self.number = 0
        self.color = None
        
    def __radd__(self, other):
        if self.color == 'red':
            return other - self.number
        return other + self.number
    
    def __repr__(self):
        return str(self.number) + ':' + self.color


class Player(object):
    def __init__(self):
        self.cards = []
        self.total_rewards = 0
        
    @property
    def aggregate(self):
        return sum(self.cards)
    
    def drawcard(self, start=False):
        card = Card()
        card.number = np.random.choice(10) + 1
        card.color = 'black' if start else np.random.choice(['red','black'], p=[1/3,2/3])
        self.cards.append(card)    
    
    def reward(self, r):
        self.total_rewards += r
        
    def bust(self):
        if self.aggregate > 21 or self.aggregate < 1:
            return True
        return False
    
    def __repr__(self):
        return str(self.aggregate) + ':' + str(self.cards)
    
class Dealer(Player):
    def __init__(self):
        self.cards = []
        self.total_rewards = 0
    
    def play(self):
        if self.aggregate < 17:
            self.drawcard()
            return True
        else:
            return False


In [311]:
def step(state, action):
    if state.get('terminated'): 
        print('TERMINATED')
        return
    
    reward = 0
    if action == 'hit':
        state['player'].drawcard()
    
        if state['player'].bust():
            print('player bust')
            state['terminated'] = True
            state['player'].reward(-1)
            reward = -1
        
        return state, reward
    
    while state['dealer'].play():
        pass
    
    state['terminated'] = True
    if state['dealer'].bust():
        print('dealer bust')
        state['player'].reward(1)
        reward = 1
    else:
        p_agg = state['player'].aggregate
        d_agg = state['dealer'].aggregate
        
        if p_agg > d_agg:
            state['player'].reward(1)
            reward = 1
        elif p_agg < d_agg:
            state['player'].reward(-1)
            reward = -1
        else:
            state['player'].reward(0)
            reward = 0

    return state, reward

In [325]:
player = Player()
dealer = Dealer()

state = { 'player': player,
          'dealer': dealer }

player.drawcard(True)
dealer.drawcard(True)

In [326]:
print(state)

{'player': 4:[4:black], 'dealer': 4:[4:black]}


In [327]:
state, reward = step(state, 'hit')
print(state, reward)

{'player': 11:[4:black, 7:black], 'dealer': 4:[4:black]} 0
