In [None]:
# default_exp gym

# gym
> A `Gym` enviroment for teaching truth and conseqences

In [None]:
# hide
from nbdev.showdoc import *

%load_ext nb_black
%matplotlib inline
%config InlineBackend.figure_format='retina'
%config IPCompleter.greedy=True

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [None]:
# export
import numpy as np
import gym

from copy import deepcopy
from gym import spaces
from gym.utils import seeding
from itertools import cycle

from truthiness.game import create_maze
from truthiness.game import shame_game
from truthiness.game import plain_game
from truthiness.game import available_moves
from truthiness.game import random_move

# Gym is annoying these days...
import warnings

warnings.filterwarnings("ignore")

<IPython.core.display.Javascript object>

# the base
First we define a `Base` game class. It has methods that are common between all the games we will want to play. 

In [None]:
# export
class Base(gym.Env):
    def moves(self):
        """Return all available moves"""
        # Get all the moves then filter for moves that
        # have already been played
        candidates = available_moves(self.x, self.y, self.maze)

        available = []
        for a in candidates:
            if a not in self.move_history:
                available.append(a)

        return available

    def render(self, mode="human", close=False):
        pass

<IPython.core.display.Javascript object>

# the shame game env
The details of this game are described in the `game` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class ShameGame1(Base):
    """A one-sided game of learning and shame"""

    def __init__(self, maze=None, sigma=0.5, shame=0.5, max_steps=10):
        self.maze = maze
        self.n = self.maze.shape[0]
        self.max_steps = max_steps

        self.sigma = sigma
        self.shame = shame

        self.reset()

    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        self.x, self.y = move
        self.move_history.append(move)

        # Values are only found once
        reward = deepcopy((self.E[x, y], self.Q[x, y]))
        self.E[x, y] = 0
        self.Q[x, y] = 0
        state = (self.y, self.x, self.E, self.Q)

        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True

        return state, reward, self.done, {}

    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []

        # Generate new
        self.x, self.y = random_move(self.maze)
        self.E, self.Q = shame_game(
            self.n, sigma=self.sigma, shame=self.shame, maze=self.maze
        )

        return (self.y, self.x, self.E, self.Q)

<IPython.core.display.Javascript object>

- Example of random play

In [None]:
maze = create_maze(8, k=5, t=10)
env = ShameGame1(maze=maze)

<IPython.core.display.Javascript object>

In [None]:
done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))

    print(x, y, reward, done)

0 7 (0.47479773454981344, 0.23739886727490672) False
0 3 (0.3604267742772292, 0.1802133871386146) False
0 4 (0.2141592271611635, 0.10707961358058175) False
3 4 (0.6048715810850134, 0.3024357905425067) False
3 5 (0.8339116064271493, 0.41695580321357467) False
2 5 (0.23625672133265216, 0.11812836066632608) False
0 5 (0.3393362392769705, 0.16966811963848524) False
0 2 (0.18533491730044027, 0.09266745865022014) False
0 0 (0.8269993080846266, 0.4134996540423133) False
0 1 (0.531249209506189, 0.2656246047530945) True


<IPython.core.display.Javascript object>

In [None]:
moves

[(5, 0),
 (0, 7),
 (0, 3),
 (0, 4),
 (3, 4),
 (3, 5),
 (2, 5),
 (0, 5),
 (0, 2),
 (0, 0),
 (0, 1)]

<IPython.core.display.Javascript object>

# the plain game env
As above the details of this game are described in the `game` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class PlainGame1(Base):
    """A one-sided game of learning and consequences"""

    def __init__(self, maze=None, sigma=0.5, max_steps=10):
        self.maze = maze
        self.n = self.maze.shape[0]
        self.max_steps = max_steps

        self.sigma = sigma

        self.reset()

    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        self.x, self.y = move
        self.move_history.append(move)

        # Values are only found once
        reward = deepcopy((self.E[x, y], self.Q[x, y]))
        self.E[x, y] = 0
        self.Q[x, y] = 0
        state = (self.y, self.x, self.E, self.Q)

        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True

        return state, reward, self.done, {}

    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []

        # Generate new
        self.x, self.y = random_move(self.maze)
        self.E, self.Q = plain_game(self.n, sigma=self.sigma, maze=self.maze)

        return (self.y, self.x, self.E, self.Q)

<IPython.core.display.Javascript object>

- Example of random play

In [None]:
maze = create_maze(8, k=5, t=10)
env = PlainGame1(maze=maze)

done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))

    print(x, y, reward, done)

7 6 (0.2044922378386215, 0.7514436832784114) False
3 6 (0.3434613007533754, 0.3611194173299864) False
2 6 (0.41404845644717536, 0.12028641351746322) False
5 6 (0.2686705493572349, 0.48999401540627197) False
5 5 (0.3057108842499654, 0.5409764954888263) False
4 5 (0.32277690474170456, 0.3113023415552438) False
2 5 (0.4300136744324396, 0.4583455524898935) False
0 5 (0.26709108677483373, 0.4174776749517855) False
0 6 (0.3553945296909845, 0.9510132814425147) False
0 7 (0.2137223431582466, 0.2962399509947084) True


<IPython.core.display.Javascript object>