## Uncertainty 

1. uncertain about the current state
2. uncertain what the action will do

### Porcine Probability 

#### Game Theory 

In [11]:
#------------------------------------
# Utility(state) -> number 1 | 0 => win/lose
# Quality(state, action) -> number

million = 1000000

def Q(state, action, U):
    "The expected value of taking action in state, according to Utility U."
    if action == 'hold':
        return U(state + 1 * million)
    if action == 'gamble':
        return U(state + 3 * million) * 0.5 + U(state) * 0.5
    
def action(state): return ['hold', 'gamble']

def identity(x): return x

U = identity

def best_action(state, action, Q, U):
    "return the optimal action for state, given U"
    def EU(action): return Q(state, action, U)
    return max(action(state), key = EU)

best_action(100, action, Q, identity)

'gamble'

In [12]:
import math
best_action(100, action, Q, math.log)

'hold'

In [9]:
from functools import update_wrapper

def decorator(d):
    "Make function d a decorator: d wraps a function fn."
    def _d(fn):
        return update_wrapper(d(fn), fn)
    update_wrapper(_d, d)
    return _d

@decorator
def memo(f):
    """Decorator that caches the return value for each call to f(args).
    Then when called again with same args, we can just look it up."""
    cache = {}
    def _f(*args):
        try:
            return cache[args]
        except KeyError:
            cache[args] = result = f(*args)
            return result
        except TypeError:
            # some element of args can't be a dict key
            return f(args)
    return _f

In [16]:
from collections import namedtuple
import random

# --------------------------Pig out----------------------------
# high level concept
# play-pig: fun(A, B) -> A
#   - keep scores
#   - take turns
#   - call strategy -> action
#   - do action -> {state}
#   - roll die, + action -> action
# strategy: fun(state) -> action
#
# middle level concept: 
# (1) current state: scores, pending, players,
# (2) actions we can take take: Roll/Hold
#         action: using string 'roll'/'state'
#         roll(state) -> {states}/ hold(state) -> state
#         roll(state, D) -> state //D -> one of {1,2,3,4,5,6} 
#
# low level concept
# (1) the roll of a die: integer
# (2) the implementation of scores: integer
# (3) the implementation of the players and the players to move
#       move: 0/1   players: strategy     
# (4) the goal: integer
#
# States are represented as a tuple of (p, me, you, pending) where
# p:       an int, 0 or 1, indicating which player's turn it is.
# me:      an int, the player-to-move's current score
# you:     an int, the other player's current score.
# pending: an int, the number of points accumulated on current 
# turn, not yet scored
# ---------------------------------------------------------------

# -----------------------------tools-----------------------------
# nametuple:
#     from collections import namedtuple
#     State = namedtuple('state', 'p me you pending')
#     s = State(1,2,3,4)
#     s.p => 1, s.me => 2, s.you => 3, s.pending => 4
#
# ---------------------------------------------------------------

# State = namedtuple('State', 'p me you pending')
other = {1:0, 0:1}                          # mapping from player to other player
possible_moves = ['roll', 'hold']
goal = 50

def hold(state):
    """Apply the hold action to a state to yield a new state:
    Reap the 'pending' points and it becomes the other player's turn."""
    (p, me, you, pending) = state
    return (other[p], you, me + pending, 0)
#     return State(other[state.p], state.you, state.me + state.pending, 0)

def roll(state, d):
    """Apply the roll action to a state (and a die roll d) to yield a new state:
    If d is 1, get 1 point (losing any accumulated 'pending' points),
    and it is the other player's turn. If d > 1, add d to 'pending' points."""
    (p, me, you, pending) = state
    if  d == 1:
        return (other[p], you, me + 1, 0)   # pig out; other player's turn
#         return State(other[state.p], state.you, state.me + 1, 0)
    else:
        return (p, me, you, pending + d)    # accumulate die roll in pending
#         return State(other[state.p], state.me, state.you, state.pending + d)


def hold_at(x):
    """Return a strategy that holds if and only if 
    pending >= x or player reaches goal."""
    def strategy(state):
        (p, me, you, pending) = state
        return 'hold' if (pending >= x or me + pending >= goal) else 'roll'
    strategy.__name__ = 'hold_at(%d)' % x
    return strategy

def clueless(state):
    "A strategy that ignores the state and chooses at random from possible moves."
    return random.choice(possible_moves)

def dierolls():
    "Generator die rolls"
    while True:
        yield random.randint(1,6)

def play_pig(A, B, dierolls = dierolls()):
    """Play a game of pig between two players, represented by their strategies.
    Each time through the main loop we ask the current player for one decision,
    which must be 'hold' or 'roll', and we update the state accordingly.
    When one player's score exceeds the goal, return that player."""
    strategies = [A, B]
    state = (0, 0, 0, 0)
    while True:
        (p, me, you, pending) = state
        if me >= goal :
            return strategies[p]
        elif you >= goal:
            return strategies[other[p]]
        elif strategies[p](state) == 'hold':
            state = hold(state)
        else:
            state = roll(state, next(dierolls))

## Optimal Pig

def Q_pig(state, action, Pwin):
    "The expected value of choosing action in state"
    if action == 'hold':
        return 1 - Pwin(hold(state))
        # the next state of hold is belong to you, so we need to minus the hole(state)
    
    if action == 'roll':
        # the reason about minusing the roll state is the same as hold(state)
        return (1 - Pwin(roll(state, 1)) + sum(Pwin(roll(state, d)) for d in (2, 3, 4, 5, 6))) / 6
    
def pig_actions(state):
    "The legal actions from a state"
    _, _, _, pending = state
    return ['roll', 'hold'] if pending else ['roll']

goal = 40

@memo
def Pwin(state):
    """The utility of a state; here just the probability that an optimal player
    whose turn it is to move can win from the current state."""
    # Assumes opponent also player with optimal strategy
    (p, me, you, pending) = state
    if me + pending >= goal:
        return 1
    elif you >= goal:
        return 0
    else:
        return max(Q_pig(state, action, Pwin) for action in pig_actions(state))

def max_wins(state):
    "The optimal ipg strategy chooses an action with the highest win probability"
    return best_action(state, pig_actions, Q_pig, Pwin)
    
def always_roll(state):
    return 'roll'

def always_hold(state):
    return 'hold'


## Pig: maximizing differential

@memo
def win_diff(state):
    "The utility of a state: here the winning differential (pos or neg)."
    (p, me, you, pending) = state
    if me + pending >= goal or you >= goal:
        return (me + pending - you)
    else:
        return max(Q_pig(state, action, win_diff)
                   for action in pig_actions(state))

def max_diffs(state):
    """A strategy that maximizes the expected difference between my final score
    and my opponent's."""
    # your code here
    return best_action(state, pig_actions, Q_pig, win_diff)

def play_pig(A, B, dierolls=dierolls()):
    """Play a game of pig between two players, represented by their strategies.
    Each time through the main loop we ask the current player for one decision,
    which must be 'hold' or 'roll', and we update the state accordingly.
    When one player's score exceeds the goal, return that player."""
    strategies = [A, B]
    state = (0, 0, 0, 0)
    while True:
        (p, me, you, pending) = state
        if me >= goal:
            return strategies[p]
        elif you >= goal:
            return strategies[other[p]]
        else:
            action = strategies[p](state)
            if action == 'hold':
                state = hold(state)
            elif action == 'roll': 
                state = roll(state, next(dierolls))
            else: # Illedge action? you lose
                return strategies[other[p]]
def test():    
    assert hold((1, 10, 20, 7))    == (0, 20, 17, 0)
    assert hold((0, 5, 15, 10))    == (1, 15, 15, 0)
    
    assert roll((1, 10, 20, 7), 1) == (0, 20, 11, 0)
    assert roll((0, 5, 15, 10), 5) == (0, 5, 15, 15)
    
    assert hold_at(30)((1, 29, 15, 20)) == 'hold'
    assert hold_at(30)((1, 29, 15, 21)) == 'hold'
    assert hold_at(15)((0, 2, 30, 10))  == 'roll'
    assert hold_at(15)((0, 2, 30, 15))  == 'hold'
    
    A, B = hold_at(50), clueless
    rolls = iter([6, 6, 6, 6, 6, 6, 6, 6, 2])
    assert play_pig(A, B, rolls) == A
    
    # The first three test cases are examples where max_wins and
    # max_diffs return the same action.
    assert(max_diffs((1, 26, 21, 15))) == "hold"
    assert(max_diffs((1, 23, 36, 7)))  == "roll"
    assert(max_diffs((0, 29, 4, 3)))   == "roll"
    # The remaining test cases are examples where max_wins and
    # max_diffs return different actions.
    assert(max_diffs((0, 36, 32, 5)))  == "roll"
    assert(max_diffs((1, 37, 16, 3)))  == "roll"
    assert(max_diffs((1, 33, 39, 7)))  == "roll"
    assert(max_diffs((0, 7, 9, 18)))   == "hold"
    assert(max_diffs((1, 0, 35, 35)))  == "hold"
    assert(max_diffs((0, 36, 7, 4)))   == "roll"
    assert(max_diffs((1, 5, 12, 21)))  == "hold"
    assert(max_diffs((0, 3, 13, 27)))  == "hold"
    assert(max_diffs((0, 0, 39, 37)))  == "hold"
        
    return 'tests pass'

print (test())

tests pass


In [17]:
from collections import namedtuple
State = namedtuple('State', 'p me you pending')
s = State(1,2,3,4)
s.p

1

####  Using tools

In [18]:
states = [(0, me, you, pending) 
          for me in range(41) for you in range(41) for pending in range(41)
         if me + pending <= goal]
len(states)

35301

In [22]:
from collections import defaultdict

r = defaultdict(int)
for s in states: r[max_wins(s), max_diffs(s)] += 1

dict(r)

{('roll', 'roll'): 29741,
 ('hold', 'hold'): 1204,
 ('roll', 'hold'): 3975,
 ('hold', 'roll'): 381}

In [25]:
def story():
    r = defaultdict(lambda:[0, 0])
    for s in states:
        w, d = max_wins(s), max_diffs(s)
        if w != d:
            _, _, _, pending = s
            i = 0 if (w == 'roll') else 1
            r[pending][i] += 1
    for (delta, (wrolls, drolls)) in sorted(r.items()):
        print('{:4d}:{:3d} {:3d}'.format(delta, wrolls, drolls))

#### Conditonal Probability

In [34]:
import itertools
from fractions import Fraction

sex = 'BG'

def product(*variables):
    'The cartesian product (as a str) of the possibilities for each variable.'
    return map(''.join, itertools.product(*variables))

two_kids = product(sex, sex)

one_boy = [s for s in two_kids if 'B' in s]

def two_boys(s): return s.count('B') == 2

def conP(predicate, event):
    """Condional probability: P(predicate(s) | s in event).
    The proportion of states in event for which predict is true."""
    pred = [s for s in event if predicate(s)]
    return Fraction(len(pred), len(event))

### Problem Set 5 

#### Improving Optimal 

In [40]:
# -----------------
# User Instructions
# 
# In this problem, you will use a faster version of Pwin, which we will call
# Pwin2, that takes a state as input but ignores whether it is player 1 or 
# player 2 who starts. This will reduce the number of computations to about 
# half. You will define a function, Pwin3, which will be called by Pwin2.
#
# Pwin3 will only take me, you, and pending as input and will return the 
# probability of winning. 
#
# Keep in mind that the probability that I win from a position is always
# (1 - probability that my opponent wins).


from functools import update_wrapper

def decorator(d):
    "Make function d a decorator: d wraps a function fn."
    def _d(fn):
        return update_wrapper(d(fn), fn)
    update_wrapper(_d, d)
    return _d

@decorator
def memo(f):
    """Decorator that caches the return value for each call to f(args).
    Then when called again with same args, we can just look it up."""
    cache = {}
    def _f(*args):
        try:
            return cache[args]
        except KeyError:
            cache[args] = result = f(*args)
            return result
        except TypeError:
            # some element of args refuses to be a dict key
            return f(args)
    _f.cache = cache
    return _f

goal = 40

def Pwin2(state):
   """The utility of a state; here just the probability that an optimal player
   whose turn it is to move can win from the current state."""
   _, me, you, pending = state
   return Pwin3(me, you, pending)

@memo
def Pwin3(me, you, pending):
    "The probability of winning for player to play with score me to you, and pending"
    if me + pending >= goal:
        return 1
    elif you >= goal:
        return 0
    else:
        Proll = (1 - Pwin3(you, me + 1, 0) + sum(Pwin3(me, you, pending + d) for d in (2,3,4,5,6)))/6
        return (Proll if not pending else
                max(Proll, 1 - Pwin3(you, me + pending, 0)))
   
def test():
    epsilon = 0.0001 # used to make sure that floating point errors don't cause test() to fail
    assert goal == 40
    assert len(Pwin3.cache) <= 50000
    assert Pwin2((0, 42, 25, 0)) == 1
    assert Pwin2((1, 12, 43, 0)) == 0
    assert Pwin2((0, 34, 42, 1)) == 0
    assert abs(Pwin2((0, 25, 32, 8)) - 0.736357188272) <= epsilon
    assert abs(Pwin2((0, 19, 35, 4)) - 0.493173612834) <= epsilon
    return 'tests pass'

print (test())



tests pass


#### Doubling Pigs 

In [45]:
# -----------------
# User Instructions
# 
# In this problem, we introduce doubling to the game of pig. 
# At any point in the game, a player (let's say player A) can
# offer to 'double' the game. Player B then has to decide to 
# 'accept', in which case the game is played through as normal,
# but it is now worth two points, or 'decline,' in which case
# player B immediately loses and player A wins one point. 
#
# Your job is to write two functions. The first, pig_actions_d,
# takes a state (p, me, you, pending, double), as input and 
# returns all of the legal actions.
# 
# The second, strategy_d, is a strategy function which takes a
# state as input and returns one of the possible actions. This
# strategy needs to beat hold_20_d in order for you to be
# marked correct. Happy pigging!

import random

def pig_actions_d(state):
    """The legal actions from a state. Usually, ["roll", "hold"].
    Exceptions: If double is "double", can only "accept" or "decline".
    Can't "hold" if pending is 0.
    If double is 1, can "double" (in addition to other moves).
    (If double > 1, cannot "double").
    """
    # state is like before, but with one more component, double,
    # which is 1 or 2 to denote the value of the game, or 'double'
    # for the moment at which one player has doubled and is waiting
    # for the other to accept or decline
    (p, me, you, pending, double) = state 
    # your code here
    actions = (['accept', 'decline'] if double == 'double' else
              ['roll', 'hold'] if pending else
              ['roll'])
    if double == 1: actions.append('double')
    return actions

# def strategy_d(state):
    # your code here


## You can use the code below, but don't need to modify it.

def hold_20_d(state):
    "Hold at 20 pending.  Always accept; never double."
    (p, me, you, pending, double) = state
    return ('accept' if double == 'double' else
            'hold' if (pending >= 20 or me + pending >= goal) else
            'roll')
    
def clueless_d(state):
    return random.choice(pig_actions_d(state))
 
def dierolls():
    "Generate die rolls."
    while True:
        yield random.randint(1, 6)

def play_pig_d(A, B, dierolls=dierolls()):
    """Play a game of pig between two players, represented by their strategies.
    Each time through the main loop we ask the current player for one decision,
    which must be 'hold' or 'roll', and we update the state accordingly.
    When one player's score exceeds the goal, return that player."""
    strategies = [A, B]
    state = (0, 0, 0, 0, 1)
    while True:
        (p, me, you, pending, double) = state
        if me >= goal:
            return strategies[p], double
        elif you >= goal:
            return strategies[other[p]], double
        else:
            action = strategies[p](state)
            state = do(action, state, dierolls)

## No more roll() and hold(); instead, do:

def do(action, state, dierolls):
    """Return the state that results from doing action in state.
     If action is not legal, return a state where the opponent wins.
    Can use dierolls if needed."""
    (p, me, you, pending, double) = state
    if action not in pig_actions_d(state):
        return (other[p], goal, 0, 0, double)
    elif action == 'roll':
        d = next(dierolls)
        if d == 1:
            return (other[p], you, me+1, 0, double) # pig out; other player's turn
        else:
            return (p, me, you, pending+d, double)  # accumulate die in pending
    elif action == 'hold':
        return (other[p], you, me+pending, 0, double)
    elif action == 'double':
        return (other[p], you, me, pending, 'double')
    elif action == 'decline':
        return (other[p], goal, 0, 0, 1)
    elif action == 'accept':
        return (other[p], you, me, pending, 2)

goal = 40
other = {1:0, 0:1}

def strategy_compare(A, B, N=1000):
    """Takes two strategies, A and B, as input and returns the percentage
    of points won by strategy A."""
    A_points, B_points = 0, 0
    for i in range(N):
        if i % 2 == 0:  # take turns with who goes first
            winner, points = play_pig_d(A, B)
        else: 
            winner, points = play_pig_d(B, A)
        if winner.__name__ == A.__name__:
            A_points += points
        else: B_points += points
    A_percent = 100*A_points / float(A_points + B_points)
    print ('In %s games of pig, strategy %s took %s percent of the points against %s.' % (N, A.__name__, A_percent, B.__name__))
    return A_percent
    
def test():
    assert set(pig_actions_d((0, 2, 3, 0, 1)))          == set(['roll', 'double'])
    assert set(pig_actions_d((1, 20, 30, 5, 2)))        == set(['hold', 'roll']) 
    assert set(pig_actions_d((0, 5, 5, 5, 1)))          == set(['roll', 'hold', 'double'])
    assert set(pig_actions_d((1, 10, 15, 6, 'double'))) == set(['accept', 'decline']) 
#     assert strategy_compare(strategy_d, hold_20_d) > 60 # must win 60% of the points      
    return 'test passes'

print (test())

test passes


####  Foxes and Hens

In [49]:
# -----------------
# User Instructions
# 
# This problem deals with the one-player game foxes_and_hens. This 
# game is played with a deck of cards in which each card is labelled
# as a hen 'H', or a fox 'F'. 
# 
# A player will flip over a random card. If that card is a hen, it is
# added to the yard. If it is a fox, all of the hens currently in the
# yard are removed.
#
# Before drawing a card, the player has the choice of two actions, 
# 'gather' or 'wait'. If the player gathers, she collects all the hens
# in the yard and adds them to her score. The drawn card is discarded.
# If the player waits, she sees the next card. 
#
# Your job is to define two functions. The first is do(action, state), 
# where action is either 'gather' or 'wait' and state is a tuple of 
# (score, yard, cards). This function should return a new state with 
# one less card and the yard and score properly updated.
#
# The second function you define, strategy(state), should return an 
# action based on the state. This strategy should average at least 
# 1.5 more points than the take5 strategy.

import random

def foxes_and_hens(strategy, foxes=7, hens=45):
    """Play the game of foxes and hens."""
    # A state is a tuple of (score-so-far, number-of-hens-in-yard, deck-of-cards)
    state = (score, yard, cards) = (0, 0, 'F'*foxes + 'H'*hens)
    while cards:
        action = strategy(state)
        state = (score, yard, cards) = do(action, state)
    return score + yard

def do(action, state):
    "Apply action to state, returning a new state."
    (score, yard, cards) = state
    card = random.choice(cards)
    cards_left = cards.replace(card, '', 1) # 1 represent only replace once
    if action == 'gather':
        return (score + yard, 0, cards_left)
    elif action == 'wait' and card == 'H':
        return (score, yard + 1, cards_left)
    elif action == 'wait' and card == 'F':
        return (score, 0, cards_left)
    else:
        return state
    
def take5(state):
    "A strategy that waits until there are 5 hens in yard, then gathers."
    (score, yard, cards) = state
    if yard < 5:
        return 'wait'
    else:
        return 'gather'

def average_score(strategy, N=1000):
    return sum(foxes_and_hens(strategy) for _ in range(N)) / float(N)

def superior(A, B=take5):
    "Does strategy A have a higher average score than B, by more than 1.5 point?"
    return average_score(A) - average_score(B) > 1.5

def strategy(state):
    (score, yard, cards) = state
    if 'F' not in cards:
        return 'wait'
    elif yard >= 3:
        return 'gather'
    else:
        return 'wait'

def test():
    gather = do('gather', (4, 5, 'F'*4 + 'H'*10))
    assert (gather == (9, 0, 'F'*3 + 'H'*10) or 
            gather == (9, 0, 'F'*4 + 'H'*9))
    
    wait = do('wait', (10, 3, 'FFHH'))
    assert (wait == (10, 4, 'FFH') or
            wait == (10, 0, 'FHH'))
    
    assert superior(strategy)
    return 'tests pass'

print (test())   

tests pass
