In [None]:
# default_exp gym

<IPython.core.display.Javascript object>

# gym
> A `Gym` enviroment for teaching truth and conseqences

In [None]:
# hide
from nbdev.showdoc import *

%load_ext nb_black
%matplotlib inline
%config InlineBackend.figure_format='retina'
%config IPCompleter.greedy=True

<IPython.core.display.Javascript object>

In [None]:
# export
import numpy as np
import gym

from copy import deepcopy
from gym import spaces
from gym.utils import seeding
from itertools import cycle

from truthiness.game import create_maze
from truthiness.game import shame_game
from truthiness.game import tough_game
from truthiness.game import available_moves
from truthiness.game import random_move

# Gym is annoying these days...
import warnings

warnings.filterwarnings("ignore")

<IPython.core.display.Javascript object>

# the base
First we define a `Base` game class. It has methods that are common between all the games we will want to play. 

In [None]:
# export
class Base(gym.Env):
    def moves(self):
        """Return all available moves"""
        # Get all the moves then filter for moves that
        # have already been played
        candidates = available_moves(self.x, self.y, self.maze)

        available = []
        for a in candidates:
            if a not in self.move_history:
                available.append(a)

        return available

    def set_maze(self, maze):
        self.maze = maze

    def render(self, mode="human", close=False):
        pass

<IPython.core.display.Javascript object>

# the shame game env
The details of this game are described in the `game` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class ShameGame1(Base):
    """A one-sided game of learning and shame"""

    def __init__(self, n, maze=None, sigma=0.5, shame=0.5, max_steps=10, seed=None):
        self.n = n
        self.maze = maze
        self.max_steps = max_steps
        self.sigma = sigma
        self.shame = shame

        self.prng = np.random.RandomState(seed)
        self.reset()

    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        x, y = move
        self.move_history.append(move)

        # Values are only found once
        reward = deepcopy((self.E[x, y], self.Q[x, y]))
        self.E[x, y] = 0
        self.Q[x, y] = 0
        self.x, self.y = x, y
        state = (self.x, self.y, self.E, self.Q)

        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True

        return state, reward, self.done, {}

    def reset(self):
        # Generate new
        x, y, self.prng = random_move(self.maze, prng=self.prng)
        self.E, self.Q, self.prng = shame_game(
            self.n, sigma=self.sigma, shame=self.shame, maze=self.maze, prng=self.prng
        )
        # Can't move where we start
        self.E[x, y] = 0
        self.Q[x, y] = 0
        self.move_history = [(x, y)]
        self.x, self.y = x, y

        # reinit
        self.count = 0
        self.done = False

        return (self.x, self.y, self.E, self.Q)

<IPython.core.display.Javascript object>

- Example of random play

In [None]:
maze, _ = create_maze(8, k=5, t=10)
env = ShameGame1(8, maze=maze)

<IPython.core.display.Javascript object>

In [None]:
done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))

    print(x, y, reward, done)

3 4 (0.31119912539286565, 0.15559956269643282) False
5 4 (0.21218876586842556, 0.10609438293421278) False
1 4 (0.20955978804793843, 0.10477989402396921) False
4 4 (0.1695969952070397, 0.08479849760351985) False
7 4 (0.28256739265183584, 0.14128369632591792) False
7 1 (0.22863473837421477, 0.11431736918710739) False
7 3 (0.33786287433903184, 0.16893143716951592) False
4 3 (0.18721411326824847, 0.09360705663412423) False
5 3 (0.19871887095337323, 0.09935943547668662) False
5 5 (0.3352682848964429, 0.16763414244822145) True


<IPython.core.display.Javascript object>

In [None]:
moves

[(3, 0),
 (3, 4),
 (5, 4),
 (1, 4),
 (4, 4),
 (7, 4),
 (7, 1),
 (7, 3),
 (4, 3),
 (5, 3),
 (5, 5)]

<IPython.core.display.Javascript object>

# the tough game env
As above the details of this game are described in the `game` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class ToughGame1(Base):
    """A one-sided game of learning and consequences"""

    def __init__(self, n, maze=None, sigma_E=0.5, sigma_Q=1.0, max_steps=10, seed=None):
        self.n = n
        self.maze = maze
        self.max_steps = max_steps
        self.sigma_E = sigma_E
        self.sigma_Q = sigma_Q

        self.prng = np.random.RandomState(seed)
        self.reset()

    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        x, y = move
        self.move_history.append(move)

        # Values are only found once
        reward = deepcopy((self.E[x, y], self.Q[x, y]))
        self.E[x, y] = 0
        self.Q[x, y] = 0
        self.x, self.y = x, y
        state = (self.x, self.y, self.E, self.Q)

        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True

        return state, reward, self.done, {}

    def reset(self):
        # Generate new
        # Generate new
        x, y, self.prng = random_move(self.maze, self.prng)
        self.E, self.Q, self.prng = tough_game(
            self.n,
            sigma_E=self.sigma_E,
            sigma_Q=self.sigma_Q,
            maze=self.maze,
            prng=self.prng,
        )

        # Can't move where we start
        self.E[x, y] = 0
        self.Q[x, y] = 0
        self.move_history = [(x, y)]
        self.x, self.y = x, y

        # reinit
        self.count = 0
        self.done = False

        return (self.x, self.y, self.E, self.Q)

<IPython.core.display.Javascript object>

- Example of random play

In [None]:
maze, _ = create_maze(8, k=5, t=10)
env = ToughGame1(8, maze=maze)

done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))

    print(x, y, reward, done)

5 1 (0.14948488080861458, 0.16926813205271804) False
7 1 (0.10167141435254168, 0.5334809369266825) False
2 1 (0.1664335677612682, 0.23872090966575146) False
3 1 (0.14600440882194315, 0.19388022481082204) False
3 2 (0.15816096157063284, 0.5485921840164499) False
4 2 (0.08501560393600738, 0.28886848632829565) False
4 3 (0.11363619494279026, 0.06066172448969996) False
3 3 (1.0, 0.05167036599727297) False
2 3 (0.18842055217800968, 0.36048906902660627) False
2 6 (0.3366594164006848, 0.056335023429288596) True


<IPython.core.display.Javascript object>