Copyright 2024 Patrick Riley <patriley@gmail.com>

In [57]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
import numpy as np

import pennydropsolver

In [62]:
def run_from_state(world, state):
    rng = np.random.default_rng()
    while not world.is_state_terminal(state):
        display(state)
        act = rng.choice(world.allowed_actions(state))
        all_outcomes = list(world.do_action(state, act))
        #display(all_outcomes)
        outcome_idx = rng.choice(range(len(all_outcomes)), p=[o.prob for o in all_outcomes])
        outcome = all_outcomes[outcome_idx]
        display(act, outcome)
        state = outcome.next_state
    

In [63]:
world = pennydropsolver.GameModel(num_players=2, num_spots=6)

In [64]:
run_from_state(world, pennydropsolver.GameState(num_out=0, player=0, is_first=True))

GameState(num_out=0, player=0, is_first=True)

<Action.ROLL: 0>

ActionResult(prob=0.16666666666666666, reward=-1, next_state=GameState(num_out=0, player=0, is_first=False))

GameState(num_out=0, player=0, is_first=False)

<Action.ROLL: 0>

ActionResult(prob=0.8333333333333334, reward=-1, next_state=GameState(num_out=1, player=0, is_first=False))

GameState(num_out=1, player=0, is_first=False)

<Action.PASS: 1>

ActionResult(prob=1.0, reward=0, next_state=GameState(num_out=1, player=1, is_first=True))

GameState(num_out=1, player=1, is_first=True)

<Action.ROLL: 0>

ActionResult(prob=0.16666666666666666, reward=0, next_state=GameState(num_out=-1, player=-1, is_first=False))

In [65]:
run_from_state(world, pennydropsolver.GameState(num_out=0, player=1, is_first=True))

GameState(num_out=0, player=1, is_first=True)

<Action.ROLL: 0>

ActionResult(prob=0.8333333333333334, reward=0, next_state=GameState(num_out=1, player=1, is_first=False))

GameState(num_out=1, player=1, is_first=False)

<Action.ROLL: 0>

ActionResult(prob=0.6666666666666667, reward=0, next_state=GameState(num_out=2, player=1, is_first=False))

GameState(num_out=2, player=1, is_first=False)

<Action.PASS: 1>

ActionResult(prob=1.0, reward=0, next_state=GameState(num_out=2, player=0, is_first=True))

GameState(num_out=2, player=0, is_first=True)

<Action.ROLL: 0>

ActionResult(prob=0.5000000000000001, reward=-1, next_state=GameState(num_out=3, player=0, is_first=False))

GameState(num_out=3, player=0, is_first=False)

<Action.ROLL: 0>

ActionResult(prob=0.5, reward=3, next_state=GameState(num_out=-1, player=-1, is_first=False))

In [116]:
def solve_world(world):
    value_table = pennydropsolver.ValueTable(world)
    value_table.full_iterative_value_updates()
    df = value_table.to_dataframe()
    print("\nThe value of the actions for the places you actually have a choice")
    display(df[(df["player"] == 0 ) & (~df["is_first"])])
    print("\nThe value when you are first passed the box")
    display(df[(df["player"] == 0 ) & df["is_first"]])
    print("\nThe value to you when you pass the box to teh next player")
    display(df[(df["player"] == 1) & (df["is_first"])])

# Players 2, 6 spots

In [118]:
world = pennydropsolver.GameModel(num_players=2, num_spots=6)

solve_world(world)

total_abs_change=18.0 max_abs_change=4.0
total_abs_change=5.333333333333335 max_abs_change=1.0555555555555558
total_abs_change=3.962962962962964 max_abs_change=1.0555555555555558
total_abs_change=3.6728395061728407 max_abs_change=0.5092592592592594
total_abs_change=1.9207818930041154 max_abs_change=0.31430041152263377
total_abs_change=1.5516117969821677 max_abs_change=0.2600737311385459
total_abs_change=1.0763603109282116 max_abs_change=0.15432098765432123
total_abs_change=0.49821387745770485 max_abs_change=0.07394547325102885
total_abs_change=0.2880071000355632 max_abs_change=0.06228348225626168
total_abs_change=0.14558473153651974 max_abs_change=0.028110492715795354
total_abs_change=0.057086196421616386 max_abs_change=0.0090669809624917
total_abs_change=0.026129256499074827 max_abs_change=0.006206341248075953
total_abs_change=0.011181091588326364 max_abs_change=0.0024777300471441333
total_abs_change=0.003998103924342189 max_abs_change=0.000763646683280994
total_abs_change=0.001624970

Unnamed: 0,state_idx,num_out,player,is_first,action,value
2,2,0,0,False,Action.ROLL,-1.453197
3,2,0,0,False,Action.PASS,0.751117
8,6,1,0,False,Action.ROLL,-0.253197
9,6,1,0,False,Action.PASS,0.751117
14,10,2,0,False,Action.ROLL,0.683503
15,10,2,0,False,Action.PASS,0.938897
20,14,3,0,False,Action.ROLL,1.564828
21,14,3,0,False,Action.PASS,1.139172
26,18,4,0,False,Action.ROLL,2.635102
27,18,4,0,False,Action.PASS,1.124898



The value when you are first passed the box


Unnamed: 0,state_idx,num_out,player,is_first,action,value
1,1,0,0,True,Action.ROLL,-1.453197
7,5,1,0,True,Action.ROLL,-0.253197
13,9,2,0,True,Action.ROLL,0.683503
19,13,3,0,True,Action.ROLL,1.564828
25,17,4,0,True,Action.ROLL,2.635102
31,21,5,0,True,Action.ROLL,4.114286



The value to you when you pass the box to teh next player


Unnamed: 0,state_idx,num_out,player,is_first,action,value
4,3,0,1,True,Action.ROLL,0.751117
10,7,1,1,True,Action.ROLL,0.751117
16,11,2,1,True,Action.ROLL,0.938897
22,15,3,1,True,Action.ROLL,1.139172
28,19,4,1,True,Action.ROLL,1.124898
34,23,5,1,True,Action.ROLL,0.685714


# Players 2, 6 spots, original action choice

First version of actions was having other players maximize the number of pennies for player, not minimize their own.

In [83]:
world = pennydropsolver.GameModel(num_players=2, num_spots=6)
value_table = pennydropsolver.ValueTable(world)

In [84]:
value_table.full_iterative_value_updates()

total_abs_change=18.0 max_abs_change=4.0
total_abs_change=5.333333333333335 max_abs_change=1.0555555555555558
total_abs_change=3.962962962962964 max_abs_change=1.0555555555555558
total_abs_change=3.6728395061728407 max_abs_change=0.5092592592592594
total_abs_change=1.9207818930041154 max_abs_change=0.31430041152263377
total_abs_change=1.5516117969821677 max_abs_change=0.2600737311385459
total_abs_change=1.0763603109282116 max_abs_change=0.15432098765432123
total_abs_change=0.49821387745770485 max_abs_change=0.07394547325102885
total_abs_change=0.2880071000355632 max_abs_change=0.06228348225626168
total_abs_change=0.14558473153651974 max_abs_change=0.028110492715795354
total_abs_change=0.057086196421616386 max_abs_change=0.0090669809624917
total_abs_change=0.026129256499074827 max_abs_change=0.006206341248075953
total_abs_change=0.011181091588326364 max_abs_change=0.0024777300471441333
total_abs_change=0.003998103924342189 max_abs_change=0.000763646683280994
total_abs_change=0.001624970

In [85]:
df = value_table.to_dataframe()
df.sample(3)

Unnamed: 0,state_idx,num_out,player,is_first,action,value
25,17,4,0,True,Action.ROLL,2.635102
32,22,5,0,False,Action.ROLL,4.114286
28,19,4,1,True,Action.ROLL,1.124898


In [86]:
df[(df["player"] == 0 ) & (~df["is_first"])]

Unnamed: 0,state_idx,num_out,player,is_first,action,value
2,2,0,0,False,Action.ROLL,-1.453197
3,2,0,0,False,Action.PASS,0.751117
8,6,1,0,False,Action.ROLL,-0.253197
9,6,1,0,False,Action.PASS,0.751117
14,10,2,0,False,Action.ROLL,0.683503
15,10,2,0,False,Action.PASS,0.938897
20,14,3,0,False,Action.ROLL,1.564828
21,14,3,0,False,Action.PASS,1.139172
26,18,4,0,False,Action.ROLL,2.635102
27,18,4,0,False,Action.PASS,1.124898


# Players 3, 6 spots

In [119]:
world = pennydropsolver.GameModel(num_players=3, num_spots=6)
solve_world(world)

total_abs_change=18.0 max_abs_change=4.0
total_abs_change=5.333333333333335 max_abs_change=1.0555555555555558
total_abs_change=3.962962962962964 max_abs_change=1.0555555555555558
total_abs_change=2.765432098765433 max_abs_change=0.5092592592592594
total_abs_change=2.055555555555556 max_abs_change=0.5092592592592594
total_abs_change=1.911351165980797 max_abs_change=0.2623456790123458
total_abs_change=1.0228623685413814 max_abs_change=0.16973879743941478
total_abs_change=0.7367684042066758 max_abs_change=0.14137398071940255
total_abs_change=0.4149861238124277 max_abs_change=0.07071398065589629
total_abs_change=0.18209426704939854 max_abs_change=0.025243865264441334
total_abs_change=0.09831327848481686 max_abs_change=0.015211810318548902
total_abs_change=0.04189122563699399 max_abs_change=0.007409100916188183
total_abs_change=0.021980721671321368 max_abs_change=0.004109055302440545
total_abs_change=0.009975213249800774 max_abs_change=0.0019484137393841472
total_abs_change=0.00374581066128

Unnamed: 0,state_idx,num_out,player,is_first,action,value
2,2,0,0,False,Action.ROLL,-1.74904
3,2,0,0,False,Action.PASS,0.478744
11,10,1,0,False,Action.ROLL,-0.54904
12,10,1,0,False,Action.PASS,0.478744
20,18,2,0,False,Action.ROLL,0.3137
21,18,2,0,False,Action.PASS,0.59843
29,26,3,0,False,Action.ROLL,1.183782
30,26,3,0,False,Action.PASS,0.522834
38,34,4,0,False,Action.ROLL,2.40026
39,34,4,0,False,Action.PASS,0.28993



The value when you are first passed the box


Unnamed: 0,state_idx,num_out,player,is_first,action,value
1,1,0,0,True,Action.ROLL,-1.74904
10,9,1,0,True,Action.ROLL,-0.54904
19,17,2,0,True,Action.ROLL,0.3137
28,25,3,0,True,Action.ROLL,1.183782
37,33,4,0,True,Action.ROLL,2.40026
46,41,5,0,True,Action.ROLL,4.018605



The value to you when you pass the box to teh next player


Unnamed: 0,state_idx,num_out,player,is_first,action,value
4,3,0,1,True,Action.ROLL,0.478744
13,11,1,1,True,Action.ROLL,0.478744
22,19,2,1,True,Action.ROLL,0.59843
31,27,3,1,True,Action.ROLL,0.522834
40,35,4,1,True,Action.ROLL,0.28993
49,43,5,1,True,Action.ROLL,0.111628


# Players 4, 6 spots

In [120]:
world = pennydropsolver.GameModel(num_players=4, num_spots=6)
solve_world(world)

total_abs_change=18.0 max_abs_change=4.0
total_abs_change=5.333333333333335 max_abs_change=1.0555555555555558
total_abs_change=3.962962962962964 max_abs_change=1.0555555555555558
total_abs_change=2.765432098765433 max_abs_change=0.5092592592592594
total_abs_change=2.055555555555556 max_abs_change=0.5092592592592594
total_abs_change=1.6459190672153643 max_abs_change=0.2623456790123458
total_abs_change=1.038351623228167 max_abs_change=0.18055555555555558
total_abs_change=0.8366626657521734 max_abs_change=0.14146090534979433
total_abs_change=0.4200674439871984 max_abs_change=0.07073045267489717
total_abs_change=0.26652070737861905 max_abs_change=0.0501512448877206
total_abs_change=0.13640412782180905 max_abs_change=0.025076592683265897
total_abs_change=0.05565329982538059 max_abs_change=0.008358949674975635
total_abs_change=0.026386788296074853 max_abs_change=0.0037607802418330727
total_abs_change=0.010975689946061767 max_abs_change=0.0018705499087621758
total_abs_change=0.005764038174130

Unnamed: 0,state_idx,num_out,player,is_first,action,value
2,2,0,0,False,Action.ROLL,-1.912987
3,2,0,0,False,Action.PASS,0.245981
14,10,1,0,False,Action.ROLL,-0.712987
15,10,1,0,False,Action.PASS,0.245981
26,18,2,0,False,Action.ROLL,0.108766
27,18,2,0,False,Action.PASS,0.307476
38,26,3,0,False,Action.ROLL,1.052368
39,26,3,0,False,Action.PASS,0.181277
50,34,4,0,False,Action.ROLL,2.3475
51,34,4,0,False,Action.PASS,0.066467



The value when you are first passed the box


Unnamed: 0,state_idx,num_out,player,is_first,action,value
1,1,0,0,True,Action.ROLL,-1.912987
13,9,1,0,True,Action.ROLL,-0.712987
25,17,2,0,True,Action.ROLL,0.108766
37,25,3,0,True,Action.ROLL,1.052368
49,33,4,0,True,Action.ROLL,2.3475
61,41,5,0,True,Action.ROLL,4.003089



The value to you when you pass the box to teh next player


Unnamed: 0,state_idx,num_out,player,is_first,action,value
4,3,0,1,True,Action.ROLL,0.245981
16,11,1,1,True,Action.ROLL,0.245981
28,19,2,1,True,Action.ROLL,0.307476
40,27,3,1,True,Action.ROLL,0.181277
52,35,4,1,True,Action.ROLL,0.066467
64,43,5,1,True,Action.ROLL,0.018533


# Players 2, 10 spots

In [121]:
world = pennydropsolver.GameModel(num_players=2, num_spots=10)
solve_world(world)

total_abs_change=54.800000000000004 max_abs_change=8.0
total_abs_change=12.14 max_abs_change=1.7000000000000004
total_abs_change=10.399999999999999 max_abs_change=1.7000000000000004
total_abs_change=8.9044 max_abs_change=0.8400000000000001
total_abs_change=4.02224 max_abs_change=0.4870000000000001
total_abs_change=3.306595999999999 max_abs_change=0.4870000000000001
total_abs_change=3.1789084000000014 max_abs_change=0.3124220000000001
total_abs_change=1.8711786400000006 max_abs_change=0.21679416000000012
total_abs_change=1.242553296 max_abs_change=0.1858626160000001
total_abs_change=0.9190338039999948 max_abs_change=0.1793916494000003
total_abs_change=0.4802863085599972 max_abs_change=0.06667300079999994
total_abs_change=0.2492266895039995 max_abs_change=0.036332584599999995
total_abs_change=0.1384709162744 max_abs_change=0.036229669083799765
total_abs_change=0.05876724734336025 max_abs_change=0.012568983101900022
total_abs_change=0.020155407122040003 max_abs_change=0.003050718228431948

Unnamed: 0,state_idx,num_out,player,is_first,action,value
2,2,0,0,False,Action.ROLL,-1.464209
3,2,0,0,False,Action.PASS,0.96884
8,6,1,0,False,Action.ROLL,-0.353098
9,6,1,0,False,Action.PASS,0.96884
14,10,2,0,False,Action.ROLL,0.602765
15,10,2,0,False,Action.PASS,1.089945
20,14,3,0,False,Action.ROLL,1.346412
21,14,3,0,False,Action.PASS,1.401358
26,18,4,0,False,Action.ROLL,2.102037
27,18,4,0,False,Action.PASS,1.686285



The value when you are first passed the box


Unnamed: 0,state_idx,num_out,player,is_first,action,value
1,1,0,0,True,Action.ROLL,-1.464209
7,5,1,0,True,Action.ROLL,-0.353098
13,9,2,0,True,Action.ROLL,0.602765
19,13,3,0,True,Action.ROLL,1.346412
25,17,4,0,True,Action.ROLL,2.102037
31,21,5,0,True,Action.ROLL,2.952163
37,25,6,0,True,Action.ROLL,3.929003
43,29,7,0,True,Action.ROLL,5.069345
49,33,8,0,True,Action.ROLL,6.425875
55,37,9,0,True,Action.ROLL,8.080808



The value to you when you pass the box to teh next player


Unnamed: 0,state_idx,num_out,player,is_first,action,value
4,3,0,1,True,Action.ROLL,0.96884
10,7,1,1,True,Action.ROLL,0.96884
16,11,2,1,True,Action.ROLL,1.089945
22,15,3,1,True,Action.ROLL,1.401358
28,19,4,1,True,Action.ROLL,1.686285
34,23,5,1,True,Action.ROLL,1.866818
40,27,6,1,True,Action.ROLL,1.913704
46,31,7,1,True,Action.ROLL,1.792109
52,35,8,1,True,Action.ROLL,1.450668
58,39,9,1,True,Action.ROLL,0.808081


# Attic

In [105]:
df = value_table.to_dataframe()
(df
 [(df["player"] == 0 ) & (~df["is_first"])]
 .set_index(["num_out", "action"])
 [["value", "state_idx"]]
)

Unnamed: 0_level_0,Unnamed: 1_level_0,value,state_idx
num_out,action,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Action.ROLL,-1.464209,2
0,Action.PASS,0.96884,2
1,Action.ROLL,-0.353098,6
1,Action.PASS,0.96884,6
2,Action.ROLL,0.602765,10
2,Action.PASS,1.089945,10
3,Action.ROLL,1.346412,14
3,Action.PASS,1.401358,14
4,Action.ROLL,2.102037,18
4,Action.PASS,1.686285,18
