In [None]:
# default_exp gym

# gym
> A `Gym` enviroment for teaching truth and conseqences

In [None]:
# export
import numpy as np
import gym

from copy import deepcopy
from gym import spaces
from gym.utils import seeding
from itertools import cycle

from truthiness.game import create_maze
from truthiness.game import shame_game
from truthiness.game import plain_game
from truthiness.game import available_moves
from truthiness.game import random_move

# Gym is annoying these days...
import warnings
warnings.filterwarnings("ignore")

# the shame game env
The details of this game are described in the `boards` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class ShameGame1(gym.Env):
    """A one-sided game of learning and shame"""
    def __init__(self, maze=None, sigma=0.5, shame=0.5, max_steps=10):
        self.maze = maze
        self.n = self.maze.shape[0]
        self.max_steps = max_steps
        
        self.sigma = sigma
        self.shame = shame

        self.reset()

    
    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        x, y = move
        reward = deepcopy((self.E[x,y], self.Q[x,y]))
        state = move
        
        # Values are only found once
        self.E[x,y] = 0
        self.Q[x,y] = 0
        self.move_history.append(move)
        
        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True
            
        return state, reward, self.done, {}

    
    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []
        
        # Generate new  
        self.x, self.y = random_move(self.maze)
        self.E, self.Q = shame_game(
            self.n, sigma=self.sigma, shame=self.shame, maze=self.maze)
        
        return self.x, self.y

    
    def moves(self, x, y):
        # Get all the moves then filter for moves that
        # have already been played
        candidates = available_moves(x, y, self.maze)

        available = []
        for a in candidates:
            if a not in self.move_history:
                available.append(a)
                
        return available
    
    
    def render(self, mode='human', close=False):
        pass

- Example of random play

In [None]:
maze = create_maze(8, k=5, t=10)
env = ShameGame1(maze=maze)

In [None]:
done = False
x, y = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves(x, y)    
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))
    
    print(state, reward, done)

(0, 0) (0.20821054494816685, 0.10410527247408342) False
(2, 0) (0.12515856302413955, 0.06257928151206978) False
(2, 4) (0.10364199029348727, 0.05182099514674363) False
(4, 4) (0.4097303554202366, 0.2048651777101183) False
(0, 4) (0.18101699420430697, 0.09050849710215349) False
(0, 6) (0.11923470792047508, 0.05961735396023754) False
(0, 1) (0.5547232065516375, 0.27736160327581877) False
(0, 5) (0.39338525493690435, 0.19669262746845217) False
(1, 5) (0.24315680274740076, 0.12157840137370038) False
(2, 5) (0.2797129824166282, 0.1398564912083141) True


In [None]:
moves

[(0, 1),
 (0, 0),
 (2, 0),
 (2, 4),
 (4, 4),
 (0, 4),
 (0, 6),
 (0, 1),
 (0, 5),
 (1, 5),
 (2, 5)]

# the plain game env
As above the details of this game are described in the `boards` file. This module exists only to put the games described there into a [gym](https://github.com/openai/gym) environment, and then do some simple testing.

In [None]:
# export
class PlainGame1(gym.Env):
    """A one-sided game of learning and consequences"""
    def __init__(self, maze=None, sigma=0.5, max_steps=10):
        self.maze = maze
        self.n = self.maze.shape[0]
        self.max_steps = max_steps
        
        self.sigma = sigma

        self.reset()

    
    def step(self, move):
        if self.count > self.max_steps:
            raise ValueError(f"env exceeded max_steps ({self.count})")

        # Shuffle state, and generate returns
        x, y = move
        reward = deepcopy((self.E[x,y], self.Q[x,y]))
        state = move
        
        # Values are only found once
        self.E[x,y] = 0
        self.Q[x,y] = 0
        self.move_history.append(move)
        
        # Limit game length
        self.count += 1
        if self.count >= self.max_steps:
            self.done = True
            
        return state, reward, self.done, {}

    
    def reset(self):
        # reinit
        self.count = 0
        self.done = False
        self.move_history = []
        
        # Generate new  
        self.x, self.y = random_move(self.maze)
        self.E, self.Q = plain_game(
            self.n, sigma=self.sigma, maze=self.maze)
        
        return self.x, self.y

    
    def moves(self, x, y):
        # Get all the moves then filter for moves that
        # have already been played
        candidates = available_moves(x, y, self.maze)

        available = []
        for a in candidates:
            if a not in self.move_history:
                available.append(a)
                
        return available
    
    
    def render(self, mode='human', close=False):
        pass

- Example of random play

In [None]:
maze = create_maze(8, k=5, t=10)
env = PlainGame1(maze=maze)

done = False
x, y = env.reset()

moves = [(x, y)]
while not done:
    available = env.moves(x, y)    
    i = np.random.randint(0, len(available))
    x, y = available[i]
    state, reward, done, _ = env.step((x, y))
    moves.append((x, y))
    
    print(state, reward, done)

moves

(4, 0) (0.3182308878266204, 0.27814523634900623) False
(4, 1) (0.26030073020229155, 0.35724329256316883) False
(4, 2) (0.13738590496482622, 0.24692396138345404) False
(7, 2) (0.2874907183638171, 0.3915788684968101) False
(7, 1) (0.07802960391072564, 0.40689833661167113) False
(7, 3) (0.6091057523024535, 0.4552104051934325) False
(7, 7) (0.2464934929451858, 0.9046883158305468) False
(7, 5) (0.3447077572053019, 1.0) False
(0, 5) (0.5985683375897632, 0.6906901950730593) False
(0, 6) (0.44273400772512506, 0.3553407969506977) True


[(4, 2),
 (4, 0),
 (4, 1),
 (4, 2),
 (7, 2),
 (7, 1),
 (7, 3),
 (7, 7),
 (7, 5),
 (0, 5),
 (0, 6)]