In [None]:
# default_exp player

# player
> Mathematical players for games of truth and consequences

In [None]:
# hide
from nbdev.showdoc import *

%load_ext nb_black
%matplotlib inline
%config InlineBackend.figure_format='retina'
%config IPCompleter.greedy=True

<IPython.core.display.Javascript object>

In [None]:
# export
import csv
import numpy as np

from truthiness import gym
from truthiness.game import available_moves
from truthiness.game import random_move
from truthiness.game import create_maze

<IPython.core.display.Javascript object>

# a run function
First we make a run function to our players. 
- An 'episode' is a full game of play. Change `num_episdoes` to play more or less.
- Each episode uses a different random seed.
- Data for each game can be logged to csv file. Change `name` from `None`.

In [None]:
# export
def run(
    n,
    player,
    num_episodes=10,
    env_name="ShameGame1",
    env_kwargs=None,
    maze_kwargs=None,
    name=None,
    seed=None,
):
    """Play some games."""

    # Sanity
    num_episodes = int(num_episodes)
    if num_episodes < 1:
        raise ValueError("num_episode must be > 0")
    if maze_kwargs is None:
        maze_kwargs = {}
    if env_kwargs is None:
        env_kwargs = {}

    # Get the env
    prng = np.random.RandomState(seed)
    Env = getattr(gym, env_name)
    maze, prng = create_maze(n, prng=prng, **maze_kwargs)
    env = Env(n, maze=maze, seed=seed, **env_kwargs)

    # Init logging. Save data as tuples:
    # (n, t, x, y, e, q)
    results = []

    # !
    for i in range(num_episodes):
        # Reconfig the env
        maze, prng = create_maze(n, prng=prng, **maze_kwargs)

        # Reset
        env.set_maze(maze)
        x, y, Q, E = env.reset()
        done = False

        # -
        t = 0
        while not done:
            # Choose and act
            moves = env.moves()
            x, y = player(E, Q, moves)

            state, reward, done, _ = env.step((x, y))

            # Log data
            e, q = reward
            result = (i, t, x, y, e, q)
            results.append(result)

            # Shift
            x, y, Q, E = state
            t += 1

    # Save to disk?
    if name is not None:
        save_results(name, results)

    return results

<IPython.core.display.Javascript object>

# utilities
Define functions that will be helpful in then defining players

In [None]:
# export
def save_results(name, results):
    with open(name, mode="w") as handle:
        # Init
        writer = csv.writer(handle)

        # Header
        head = ("n", "t", "x", "y", "E", "Q")
        writer.writerow(head)

        # Results
        for row in results:
            writer.writerow(row)

<IPython.core.display.Javascript object>

In [None]:
# export
def move_filter(board, moves):
    """Returns a list of values/conseqeunces for each move
    on an E or Q board.
    """
    return [board[x, y] for (x, y) in moves]

<IPython.core.display.Javascript object>

# all players 

Here we define different simple players and dispositions to truth and consequences. 
- Honesty: $max\ E_{i,j}$
- Planned honesty: $argmax_{\pi} \sum_P E_{i,j}$
- Sensitive: $min\ Q_{i,j}$
- Planned sensitive: $argmin_{\pi} \sum_P Q_{i,j}$
- Evil: $max\ Q_{i,j}$
- Planned evil: $argmax_{\pi} \sum_P Q_{i,j}$
- Foraging: $max\ \frac{E_{i,j}}{Q_{i,j}}$
- Planned foraging: $argmax_{\pi} \sum_P \frac{E_{i,j}}{Q_{i,j}}$
- Random

We also consider a couple more complex approaches which swtich amoung the simple policies

**TODO**

# a random player
They play by making random moves. Simple and bad.

In [None]:
# export
class Random:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        i = self.prng.randint(0, len(moves))

        return moves[i]

<IPython.core.display.Javascript object>

In [None]:
run(8, Random(), num_episodes=2)

[(0, 0, 4, 1, 0.26145314991685825, 0.13072657495842913),
 (0, 1, 4, 3, 0.23681253916421993, 0.11840626958210997),
 (0, 2, 4, 5, 0.2248485553450748, 0.1124242776725374),
 (0, 3, 6, 5, 0.15849977462500306, 0.07924988731250153),
 (0, 4, 6, 4, 0.3814956165947594, 0.1907478082973797),
 (0, 5, 3, 4, 0.28071846230513836, 0.14035923115256918),
 (0, 6, 4, 4, 0.4951572494159928, 0.2475786247079964),
 (0, 7, 4, 2, 0.3681275970132217, 0.18406379850661084),
 (0, 8, 4, 7, 0.18350568407101564, 0.09175284203550782),
 (0, 9, 3, 7, 0.3828570097403968, 0.1914285048701984),
 (1, 0, 7, 4, 0.22132430018056884, 0.11066215009028442),
 (1, 1, 7, 1, 0.3347449093496032, 0.1673724546748016),
 (1, 2, 4, 1, 0.5498546516592182, 0.2749273258296091),
 (1, 3, 0, 1, 0.24896456315644794, 0.12448228157822397),
 (1, 4, 0, 4, 0.5052117915603355, 0.25260589578016773),
 (1, 5, 0, 2, 0.38828576479790855, 0.19414288239895428),
 (1, 6, 0, 5, 0.8555901987012694, 0.4277950993506347),
 (1, 7, 0, 6, 0.2322470824200608, 0.11612354121

<IPython.core.display.Javascript object>

# curious players

Choose the move with the most information value

In [None]:
# export
class Curious:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(E, moves)
        best = np.argmax(values)

        return moves[best]

<IPython.core.display.Javascript object>

In [None]:
run(8, Curious(), num_episodes=2)

[(0, 0, 4, 1, 1.0, 0.5),
 (0, 1, 4, 4, 0.7356048445571544, 0.3678024222785772),
 (0, 2, 1, 4, 0.8500387594230846, 0.4250193797115423),
 (0, 3, 1, 5, 0.5373087070335105, 0.26865435351675526),
 (0, 4, 2, 5, 0.5152762047551313, 0.2576381023775656),
 (0, 5, 2, 3, 0.5488140428702197, 0.27440702143510987),
 (0, 6, 5, 3, 0.545919559908258, 0.272959779954129),
 (0, 7, 5, 1, 0.5271673177906249, 0.26358365889531243),
 (0, 8, 3, 1, 0.5192380057306231, 0.25961900286531153),
 (0, 9, 3, 4, 0.5013255876939752, 0.2506627938469876),
 (1, 0, 5, 5, 0.4979880283986563, 0.24899401419932815),
 (1, 1, 4, 5, 0.43259747268957316, 0.21629873634478658),
 (1, 2, 4, 3, 0.4884750233372565, 0.24423751166862825),
 (1, 3, 3, 3, 0.4886603437462452, 0.2443301718731226),
 (1, 4, 3, 1, 0.6216802794358689, 0.31084013971793445),
 (1, 5, 3, 4, 0.4739963969988566, 0.2369981984994283),
 (1, 6, 0, 4, 1.0, 0.5),
 (1, 7, 7, 4, 0.6659611696584621, 0.33298058482923104),
 (1, 8, 5, 4, 0.28699598915985625, 0.14349799457992812),
 (1, 

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the most information over the whole best possible path

In [None]:
# export
class CuriousMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        pass
        # TODO - do MCTS to find the best path overall
        # moves.
        return moves[i]

# sensitive players

Choose the move with the least consequences

In [None]:
# export
class Sensitive:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(Q, moves)
        best = np.argmin(values)

        return moves[best]

<IPython.core.display.Javascript object>

In [None]:
run(8, Sensitive(), num_episodes=2)

[(0, 0, 5, 2, 0.10342749506222454, 0.05171374753111227),
 (0, 1, 5, 6, 0.08960030721810454, 0.04480015360905227),
 (0, 2, 5, 0, 0.11677055334387008, 0.05838527667193504),
 (0, 3, 7, 0, 0.11904319152987924, 0.05952159576493962),
 (0, 4, 7, 5, 0.08398912321735029, 0.041994561608675145),
 (0, 5, 6, 5, 0.09265284268455676, 0.04632642134227838),
 (0, 6, 6, 3, 0.07548848635726588, 0.03774424317863294),
 (0, 7, 1, 3, 0.1207836015350642, 0.0603918007675321),
 (0, 8, 5, 3, 0.12661832175814852, 0.06330916087907426),
 (0, 9, 5, 7, 0.12548798483567947, 0.06274399241783973),
 (1, 0, 6, 0, 0.19193513642884466, 0.09596756821442233),
 (1, 1, 4, 0, 0.21576244740323727, 0.10788122370161864),
 (1, 2, 5, 0, 0.3262440757651604, 0.1631220378825802),
 (1, 3, 3, 0, 0.3947310936016519, 0.19736554680082594),
 (1, 4, 3, 1, 0.4569757080885015, 0.22848785404425076),
 (1, 5, 6, 1, 0.299089310983659, 0.1495446554918295),
 (1, 6, 6, 3, 0.32738130414950545, 0.16369065207475272),
 (1, 7, 6, 2, 0.3626323293768869, 0.181

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the least consequences over the whole best possible path

In [None]:
# export
class SensitiveMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        pass
        # TODO - do MCTS to find the best path overall
        # moves.
        return moves[i]

# evil players

Choose the move with the worst consequences

In [None]:
# export
class Evil:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(Q, moves)
        worst = np.argmax(values)

        return moves[worst]

<IPython.core.display.Javascript object>

In [None]:
run(8, Evil(), num_episodes=2)

[(0, 0, 3, 4, 1.0, 0.5),
 (0, 1, 3, 6, 0.40953880474243687, 0.20476940237121843),
 (0, 2, 2, 6, 0.49513441585374246, 0.24756720792687123),
 (0, 3, 5, 6, 0.33644957044805573, 0.16822478522402787),
 (0, 4, 1, 6, 0.22379322225620996, 0.11189661112810498),
 (0, 5, 1, 4, 0.36863014523360355, 0.18431507261680177),
 (0, 6, 4, 4, 0.21585509646808265, 0.10792754823404133),
 (0, 7, 4, 7, 0.5205756683692001, 0.26028783418460005),
 (0, 8, 7, 7, 0.28712079971733884, 0.14356039985866942),
 (0, 9, 7, 3, 0.33441565313444466, 0.16720782656722233),
 (1, 0, 1, 4, 0.5310333645756825, 0.26551668228784125),
 (1, 1, 0, 4, 0.6341260067151542, 0.3170630033575771),
 (1, 2, 2, 4, 0.45448021763726143, 0.22724010881863071),
 (1, 3, 2, 0, 0.726763860505809, 0.3633819302529045),
 (1, 4, 2, 6, 0.6857127599235895, 0.34285637996179474),
 (1, 5, 3, 6, 0.6273038068896928, 0.3136519034448464),
 (1, 6, 3, 3, 0.868668992235461, 0.4343344961177305),
 (1, 7, 4, 3, 0.5220308689630373, 0.26101543448151865),
 (1, 8, 6, 3, 0.5187

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the worst consequences over the whole best possible path

In [None]:
# export
class EvilMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        # TODO
        pass

        return moves[i]

# foraging players

Choose the move with the best ratio of $\frac{E}{Q}$

In [None]:
# export
class OptimalForage:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(E / Q, moves)
        best = np.argmax(values)

        return moves[best]

<IPython.core.display.Javascript object>

In [None]:
run(8, OptimalForage(), num_episodes=2)

[(0, 0, 6, 1, 0.37206564193160313, 0.18603282096580157),
 (0, 1, 7, 1, 0.7500771345573176, 0.3750385672786588),
 (0, 2, 5, 1, 0.2800679962351955, 0.14003399811759776),
 (0, 3, 4, 1, 1.0, 0.5),
 (0, 4, 3, 1, 0.7765173648279912, 0.3882586824139956),
 (0, 5, 2, 1, 0.1795551953949275, 0.08977759769746375),
 (0, 6, 1, 1, 0.2436309721374121, 0.12181548606870606),
 (0, 7, 0, 1, 0.5674365085059092, 0.2837182542529546),
 (0, 8, 0, 2, 0.41038783962670394, 0.20519391981335197),
 (0, 9, 1, 2, 0.2709669933192827, 0.13548349665964135),
 (1, 0, 6, 3, 0.19383150715021746, 0.09691575357510873),
 (1, 1, 7, 3, 0.1795349953716581, 0.08976749768582905),
 (1, 2, 5, 3, 0.20859455312449404, 0.10429727656224702),
 (1, 3, 4, 3, 0.2492938192125769, 0.12464690960628845),
 (1, 4, 3, 3, 0.21539255926656786, 0.10769627963328393),
 (1, 5, 2, 3, 0.26607517028394345, 0.13303758514197173),
 (1, 6, 1, 3, 0.22705895630176573, 0.11352947815088286),
 (1, 7, 0, 3, 0.2033109663893939, 0.10165548319469694),
 (1, 8, 0, 4, 0.205

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the best ratio of $\frac{E}{Q}$ over the whole best possible path

In [None]:
# export
class OptimalForageMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        pass

        return moves[best]