In [None]:
# default_exp gym

# gym
> A `Gym` enviroment for teaching truth and conseqences

In [None]:
# export
import numpy as np
import gym

from copy import deepcopy
from gym import spaces
from gym.utils import seeding
from itertools import cycle

from truthiness.game import create_maze
from truthiness.game import shame_game
from truthiness.game import plain_game
from truthiness.game import available_moves
from truthiness.game import random_move

# Gym is annoying these days...
import warnings
warnings.filterwarnings("ignore")

# the base
First we define a `Base` game class. It has methods that are common between all the games we will want to play. 

In [None]:
class Base(gym.Env):    
    def moves(self):
        """Return all available moves"""
        # Get all the moves then filter for moves that
        # have already been played
        candidates = available_moves(self.x, self.y, self.maze)
        
        available = []
        for a in candidates:
            if a not in self.move_history:
                available.append(a)
                
        return available
    
    
    def render(self, mode='human', close=False):
        pass

# the shame game env
The details of this game are described in the `boards` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class ShameGame1(Base):
    """A one-sided game of learning and shame"""
    def __init__(self, maze=None, sigma=0.5, shame=0.5, max_steps=10):
        self.maze = maze
        self.n = self.maze.shape[0]
        self.max_steps = max_steps
        
        self.sigma = sigma
        self.shame = shame

        self.reset()

    
    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        self.x, self.y = move
        self.move_history.append(move)
        
        # Values are only found once
        reward = deepcopy((self.E[x,y], self.Q[x,y]))
        self.E[x,y] = 0
        self.Q[x,y] = 0
        state = (self.y, self.x, self.E, self.Q)

        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True
            
        return state, reward, self.done, {}

    
    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []
        
        # Generate new  
        self.x, self.y = random_move(self.maze)
        self.E, self.Q = shame_game(
            self.n, sigma=self.sigma, shame=self.shame, maze=self.maze)
        
        return (self.y, self.x, self.E, self.Q)

- Example of random play

In [None]:
maze = create_maze(8, k=5, t=10)
env = ShameGame1(maze=maze)

In [None]:
done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()    
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))
    
    print(x, y, reward, done)

1 3 (0.16023463499991197, 0.08011731749995599) False
0 3 (0.4676921743912892, 0.2338460871956446) False
0 5 (0.23516501732895362, 0.11758250866447681) False
4 5 (0.4613412681151665, 0.23067063405758326) False
2 5 (0.33482306926727334, 0.16741153463363667) False
3 5 (0.22466932311115803, 0.11233466155557902) False
1 5 (0.604971504844737, 0.3024857524223685) False
1 4 (0.4868102183235079, 0.24340510916175395) False
1 7 (0.8244807104702709, 0.41224035523513547) False
0 7 (0.3923635388931987, 0.19618176944659935) True


In [None]:
moves

[(0, 0), (0, 1), (0, 0)]

# the plain game env
As above the details of this game are described in the `boards` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class PlainGame1(Base):
    """A one-sided game of learning and consequences"""
    def __init__(self, maze=None, sigma=0.5, max_steps=10):
        self.maze = maze
        self.n = self.maze.shape[0]
        self.max_steps = max_steps
        
        self.sigma = sigma

        self.reset()

    
    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        self.x, self.y = move
        self.move_history.append(move)
        
        # Values are only found once
        reward = deepcopy((self.E[x,y], self.Q[x,y]))
        self.E[x,y] = 0
        self.Q[x,y] = 0
        state = (self.y, self.x, self.E, self.Q)

        
        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True
            
        return state, reward, self.done, {}

    
    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []
        
        # Generate new  
        self.x, self.y = random_move(self.maze)
        self.E, self.Q = plain_game(self.n, sigma=self.sigma, maze=self.maze)
        
        return (self.y, self.x, self.E, self.Q)

- Example of random play

In [None]:
maze = create_maze(8, k=5, t=10)
env = PlainGame1(maze=maze)

done = False
x, y, Q, E = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves()    
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))
    
    print(x, y, reward, done)

4 6 (0.8173955708928679, 0.3678441608119558) False
3 6 (0.2120080399911605, 0.3317761013275871) False
3 4 (0.16630064549786405, 0.3271338472625862) False
1 4 (0.4227259498212686, 0.2947552064684267) False
1 6 (0.14824614442997494, 0.1950148362587161) False
1 3 (0.1421543836769066, 0.8314389725646877) False
1 7 (0.17567547348478108, 0.40232919491499175) False
2 7 (0.22003040387953465, 0.7762403705385325) False
3 7 (0.6779770834936715, 0.1637790976810524) False
6 7 (0.18727384631048136, 0.16384510426435447) True
