In [None]:
# default_exp gym

<IPython.core.display.Javascript object>

# gym
> A `Gym` enviroment for teaching truth and conseqences

In [None]:
# hide
from nbdev.showdoc import *

%load_ext nb_black
%matplotlib inline
%config InlineBackend.figure_format='retina'
%config IPCompleter.greedy=True

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [None]:
# export
import numpy as np
import gym

from copy import deepcopy
from gym import spaces
from gym.utils import seeding
from itertools import cycle

from truthiness.game import create_maze
from truthiness.game import shame_game
from truthiness.game import plain_game
from truthiness.game import available_moves
from truthiness.game import random_move

# Gym is annoying these days...
import warnings

warnings.filterwarnings("ignore")

<IPython.core.display.Javascript object>

# the base
First we define a `Base` game class. It has methods that are common between all the games we will want to play. 

In [None]:
# export
class Base(gym.Env):
    def moves(self):
        """Return all available moves"""
        # Get all the moves then filter for moves that
        # have already been played
        candidates = available_moves(self.x, self.y, self.maze)

        available = []
        for a in candidates:
            if a not in self.move_history:
                available.append(a)

        return available

    def set_maze(self, maze):
        self.maze = maze

    def render(self, mode="human", close=False):
        pass

<IPython.core.display.Javascript object>

# the shame game env
The details of this game are described in the `game` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class ShameGame1(Base):
    """A one-sided game of learning and shame"""

    def __init__(self, n, maze=None, sigma=0.5, shame=0.5, max_steps=10, seed=None):
        self.n = n
        self.maze = maze
        self.max_steps = max_steps
        self.sigma = sigma
        self.shame = shame

        self.prng = np.random.RandomState(seed)
        self.reset()

    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        x, y = move
        self.move_history.append(move)

        # Values are only found once
        reward = deepcopy((self.E[x, y], self.Q[x, y]))
        self.E[x, y] = 0
        self.Q[x, y] = 0
        self.x, self.y = x, y
        state = (self.y, self.x, self.E, self.Q)

        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True

        return state, reward, self.done, {}

    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []

        # Generate new
        self.x, self.y, self.prng = random_move(self.maze, prng=self.prng)
        self.E, self.Q, self.prng = shame_game(
            self.n, sigma=self.sigma, shame=self.shame, maze=self.maze, prng=self.prng
        )

        return (self.y, self.x, self.E, self.Q)

<IPython.core.display.Javascript object>

- Example of random play

In [None]:
maze, _ = create_maze(8, k=5, t=10)
env = ShameGame1(8, maze=maze)

<IPython.core.display.Javascript object>

In [None]:
done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))

    print(x, y, reward, done)

3 0 (0.543063947498749, 0.2715319737493745) False
6 0 (0.24185180440084803, 0.12092590220042401) False
5 0 (0.3549139185644378, 0.1774569592822189) False
2 0 (0.532138627124911, 0.2660693135624555) False
2 1 (0.22372211180899168, 0.11186105590449584) False
2 2 (0.19610973811756927, 0.09805486905878463) False
2 3 (0.22161960991908772, 0.11080980495954386) False
2 4 (0.49087005515802534, 0.24543502757901267) False
2 6 (0.6582432802958313, 0.3291216401479157) False
3 6 (0.3277963244778513, 0.16389816223892564) True


<IPython.core.display.Javascript object>

In [None]:
moves

# the plain game env
As above the details of this game are described in the `game` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class PlainGame1(Base):
    """A one-sided game of learning and consequences"""

    def __init__(self, n, maze=None, sigma=0.5, max_steps=10, seed=None):
        self.n = n
        self.maze = maze
        self.max_steps = max_steps
        self.sigma = sigma

        self.prng = np.random.RandomState(seed)
        self.reset()

    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        x, y = move
        self.move_history.append(move)

        # Values are only found once
        reward = deepcopy((self.E[x, y], self.Q[x, y]))
        self.E[x, y] = 0
        self.Q[x, y] = 0
        self.x, self.y = x, y
        state = (self.y, self.x, self.E, self.Q)

        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True

        return state, reward, self.done, {}

    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []

        # Generate new
        self.x, self.y, self.prng = random_move(self.maze, self.prng)
        self.E, self.Q, self.prng = plain_game(
            self.n, sigma=self.sigma, maze=self.maze, prng=self.prng
        )

        return (self.y, self.x, self.E, self.Q)

- Example of random play

In [None]:
maze, _ = create_maze(8, k=5, t=10)
env = PlainGame1(8, maze=maze)

done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))

    print(x, y, reward, done)

0 1 (0.4904820260169564, 0.1790301673150056) False
2 1 (0.6813804830319288, 0.23451320249712546) False
1 1 (0.34753180591072114, 0.2123929456479355) False
1 7 (0.2870727542562359, 0.22440067960401763) False
1 0 (0.3171674951801019, 0.12874958515768187) False
1 5 (0.3055031694766594, 0.12858685198940464) False
1 6 (0.27830288777421114, 0.23401335053224365) False
3 6 (0.2330983915832105, 0.6420775691246181) False
6 6 (0.5911046435563989, 0.14937250624670195) False
5 6 (0.3198211542975239, 0.13073026920053332) True


<IPython.core.display.Javascript object>