In [None]:
# default_exp player

# player
> Mathematical players for games of truth and consequences

In [None]:
# hide
from nbdev.showdoc import *

%load_ext nb_black
%matplotlib inline
%config InlineBackend.figure_format='retina'
%config IPCompleter.greedy=True

<IPython.core.display.Javascript object>

In [None]:
# export
import csv
import numpy as np

from truthiness import gym
from truthiness.game import available_moves
from truthiness.game import random_move
from truthiness.game import create_maze

<IPython.core.display.Javascript object>

# a run function
First we make a run function to our players. 
- An 'episode' is a full game of play. Change `num_episdoes` to play more or less.
- Each episode uses a different random seed.
- Data for each game can be logged to csv file. Change `name` from `None`.

In [None]:
# export
def run(
    n,
    player,
    num_episodes=10,
    env_name="ShameGame1",
    env_kwargs=None,
    maze_kwargs=None,
    name=None,
    seed=None,
):
    """Play some games."""

    # Sanity
    num_episodes = int(num_episodes)
    if num_episodes < 1:
        raise ValueError("num_episode must be > 0")
    if maze_kwargs is None:
        maze_kwargs = {}
    if env_kwargs is None:
        env_kwargs = {}

    # Get the env
    prng = np.random.RandomState(seed)
    Env = getattr(gym, env_name)
    maze, prng = create_maze(n, prng=prng, **maze_kwargs)
    env = Env(n, maze=maze, seed=seed, **env_kwargs)

    # Init logging. Save data as tuples:
    # (n, t, x, y, e, q)
    results = []
    mazes = []
    Es = []
    Qs = []

    # !
    for i in range(num_episodes):
        # Reconfig the env
        maze, prng = create_maze(n, prng=prng, **maze_kwargs)
        mazes.append(maze)

        # Reset
        env.set_maze(maze)
        x, y, Q, E = env.reset()
        Es.append(E)
        Qs.append(Q)
        done = False

        # -
        t = 0
        while not done:
            # Choose and act
            moves = env.moves()
            x, y = player(E, Q, moves)

            state, reward, done, _ = env.step((x, y))

            # Log data
            e, q = reward
            result = (i, t, x, y, e, q)
            results.append(result)

            # Shift
            x, y, Q, E = state
            t += 1

    # Save to disk?
    if name is not None:
        save_results(name, results)

    return results, mazes, Es, Qs

<IPython.core.display.Javascript object>

# utilities
Define functions that will be helpful in then defining players

In [None]:
# export
def save_results(name, results):
    with open(name, mode="w") as handle:
        # Init
        writer = csv.writer(handle)

        # Header
        head = ("n", "t", "x", "y", "E", "Q")
        writer.writerow(head)

        # Results
        for row in results:
            writer.writerow(row)

<IPython.core.display.Javascript object>

In [None]:
# export
def extract_moves(episode, results):
    moves = []
    for row in results:
        if np.isclose(row[0], episode):
            moves.append((row[2], row[3]))
    return moves

<IPython.core.display.Javascript object>

In [None]:
# export
def move_filter(board, moves):
    """Returns a list of values/conseqeunces for each move
    on an E or Q board.
    """
    return [board[x, y] for (x, y) in moves]

<IPython.core.display.Javascript object>

# all players 

Here we define different simple players and dispositions to truth and consequences. 
- Honesty: $max\ E_{i,j}$
- Planned honesty: $argmax_{\pi} \sum_P E_{i,j}$
- Sensitive: $min\ Q_{i,j}$
- Planned sensitive: $argmin_{\pi} \sum_P Q_{i,j}$
- Evil: $max\ Q_{i,j}$
- Planned evil: $argmax_{\pi} \sum_P Q_{i,j}$
- Foraging: $max\ \frac{E_{i,j}}{Q_{i,j}}$
- Planned foraging: $argmax_{\pi} \sum_P \frac{E_{i,j}}{Q_{i,j}}$
- Random

We also consider a couple more complex approaches which swtich amoung the simple policies

**TODO**

# a random player
They play by making random moves. Simple and bad.

In [None]:
# export
class Random:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        i = self.prng.randint(0, len(moves))

        return moves[i]

<IPython.core.display.Javascript object>

In [None]:
run(8, Random(), num_episodes=1)

([(0, 0, 4, 1, 0.39342929857697595, 0.19671464928848797),
  (0, 1, 4, 4, 0.2135497820537608, 0.1067748910268804),
  (0, 2, 5, 4, 0.2343645353550754, 0.1171822676775377),
  (0, 3, 6, 4, 0.18024797231940465, 0.09012398615970232),
  (0, 4, 2, 4, 0.221531748708649, 0.1107658743543245),
  (0, 5, 2, 6, 0.28202180296217483, 0.14101090148108741),
  (0, 6, 0, 6, 0.3255967071065113, 0.16279835355325564),
  (0, 7, 0, 4, 0.3328673968499635, 0.16643369842498176),
  (0, 8, 1, 4, 0.4091757020191406, 0.2045878510095703),
  (0, 9, 1, 7, 0.48977989641771946, 0.24488994820885973)],
 [array([[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 1.],
         [0., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 0.],
         [1., 1., 0., 0., 0., 0., 1., 0.]])],
 [array([[0.19564368, 0.10369509, 0.19495505, 0.2359949 , 0.        ,
          0.0

<IPython.core.display.Javascript object>

# curious players

Choose the move with the most information value

In [None]:
# export
class Curious:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(E, moves)
        best = np.argmax(values)

        return moves[best]

<IPython.core.display.Javascript object>

In [None]:
run(8, Curious(), num_episodes=1)

([(0, 0, 6, 6, 0.5947476086535576, 0.2973738043267788),
  (0, 1, 6, 5, 0.7277302548740752, 0.3638651274370376),
  (0, 2, 6, 7, 0.5365820545567427, 0.26829102727837134),
  (0, 3, 0, 7, 1.0, 0.5),
  (0, 4, 0, 4, 0.646215858957665, 0.3231079294788325),
  (0, 5, 2, 4, 0.4493840555809426, 0.2246920277904713),
  (0, 6, 2, 3, 0.5654941274126757, 0.28274706370633784),
  (0, 7, 1, 3, 0.5424939715760336, 0.2712469857880168),
  (0, 8, 3, 3, 0.4022066143066504, 0.2011033071533252),
  (0, 9, 0, 3, 0.3738462712999921, 0.18692313564999605)],
 [array([[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 1., 1., 0., 0., 1., 0., 0.],
         [0., 1., 0., 0., 1., 1., 0., 0.],
         [1., 1., 0., 0., 0., 1., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]])])

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the most information over the whole best possible path

In [None]:
# export
class CuriousMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        pass
        # TODO - do MCTS to find the best path overall
        # moves.
        return moves[i]

<IPython.core.display.Javascript object>

# sensitive players

Choose the move with the least consequences

In [None]:
# export
class Sensitive:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(Q, moves)
        best = np.argmin(values)

        return moves[best]

<IPython.core.display.Javascript object>

In [None]:
run(8, Sensitive(), num_episodes=1)

([(0, 0, 1, 2, 0.10718640047331875, 0.05359320023665937),
  (0, 1, 1, 0, 0.17811522513892583, 0.08905761256946292),
  (0, 2, 0, 0, 0.17713938518342645, 0.08856969259171323),
  (0, 3, 0, 4, 0.088693676312465, 0.0443468381562325),
  (0, 4, 6, 4, 0.07452944432936048, 0.03726472216468024),
  (0, 5, 6, 5, 0.09231686903107517, 0.04615843451553758),
  (0, 6, 6, 3, 0.1050172190535292, 0.0525086095267646),
  (0, 7, 4, 3, 0.04456640658633405, 0.022283203293167023),
  (0, 8, 3, 3, 0.06587609923838801, 0.032938049619194006),
  (0, 9, 7, 3, 0.12478644114776796, 0.06239322057388398)],
 [array([[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 1., 0.],
         [0., 0., 0., 0., 0., 1., 1., 0.],
         [1., 1., 0., 0., 0., 1., 1., 0.],
         [0., 1., 0., 0., 0., 1., 0., 0.],
         [0., 1., 0., 0., 0., 1., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]])])

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the least consequences over the whole best possible path

In [None]:
# export
class SensitiveMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        pass
        # TODO - do MCTS to find the best path overall
        # moves.
        return moves[i]

<IPython.core.display.Javascript object>

# evil players

Choose the move with the worst consequences

In [None]:
# export
class Evil:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(Q, moves)
        worst = np.argmax(values)

        return moves[worst]

<IPython.core.display.Javascript object>

In [None]:
run(8, Evil(), num_episodes=1)

([(0, 0, 4, 6, 0.44905156122482054, 0.22452578061241027),
  (0, 1, 0, 6, 0.9933647854344662, 0.4966823927172331),
  (0, 2, 7, 6, 0.3259852680566106, 0.1629926340283053),
  (0, 3, 7, 1, 1.0, 0.5),
  (0, 4, 7, 2, 0.5178716479222039, 0.25893582396110193),
  (0, 5, 7, 3, 0.3176550000118889, 0.15882750000594445),
  (0, 6, 6, 3, 0.25426476008001725, 0.12713238004000862),
  (0, 7, 6, 0, 0.2309392644872198, 0.1154696322436099),
  (0, 8, 4, 0, 0.3014834078060164, 0.1507417039030082),
  (0, 9, 5, 0, 0.2591076576090879, 0.12955382880454394)],
 [array([[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 1., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0., 0., 1.]])])

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the worst consequences over the whole best possible path

In [None]:
# export
class EvilMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        # TODO
        pass

        return moves[i]

<IPython.core.display.Javascript object>

# foraging players

Choose the move with the best ratio of $\frac{E}{Q}$

In [None]:
# export
class OptimalForage:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        values = move_filter(E / Q, moves)
        best = np.argmax(values)

        return moves[best]

<IPython.core.display.Javascript object>

In [None]:
run(8, OptimalForage(), num_episodes=2)

([(0, 0, 1, 7, 0.3629320770366112, 0.1814660385183056),
  (0, 1, 2, 7, 0.4416805632779256, 0.2208402816389628),
  (0, 2, 3, 7, 0.26052086616452863, 0.13026043308226432),
  (0, 3, 4, 7, 0.707177531781293, 0.3535887658906465),
  (0, 4, 5, 7, 0.22237879074425165, 0.11118939537212583),
  (0, 5, 0, 7, 0.11036764660128862, 0.05518382330064431),
  (0, 6, 0, 6, 0.09214896656086231, 0.04607448328043116),
  (0, 7, 1, 6, 0.1021861757643963, 0.05109308788219815),
  (0, 8, 2, 6, 0.13715060104760785, 0.06857530052380392),
  (0, 9, 3, 6, 0.42248159046973677, 0.21124079523486838),
  (1, 0, 2, 5, 0.5495984570398893, 0.27479922851994465),
  (1, 1, 1, 5, 0.3075164394182902, 0.1537582197091451),
  (1, 2, 0, 5, 0.3626040351205334, 0.1813020175602667),
  (1, 3, 0, 6, 0.31703387749392364, 0.15851693874696182),
  (1, 4, 1, 6, 0.3363225409182722, 0.1681612704591361),
  (1, 5, 1, 7, 0.23021340967365844, 0.11510670483682922),
  (1, 6, 2, 7, 0.44691594284625824, 0.22345797142312912),
  (1, 7, 3, 7, 0.202494135232

<IPython.core.display.Javascript object>

Try and choose the move that can lead to the best ratio of $\frac{E}{Q}$ over the whole best possible path

In [None]:
# export
class OptimalForageMCTS:
    def __init__(self, prng=None):
        if prng is None:
            self.prng = np.random.RandomState()
        else:
            self.prng = prng

    def __call__(self, E, Q, moves):
        return self.forward(E, Q, moves)

    def forward(self, E, Q, moves):
        pass

        return moves[best]

<IPython.core.display.Javascript object>