In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import tensorflow as tf
import numpy as np

from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.specs import array_spec
from tf_agents.environments import wrappers
from tf_agents.environments import suite_gym
from tf_agents.trajectories import time_step as ts

from collections import deque

In [2]:
BOARD_SIZE = 15

Test code for Snake Environment

In [253]:
class SnakeEnv(py_environment.PyEnvironment):
    
    def reset_board(self):
        
            # Set Board and Initial Snake Positions
            self._state = [([0]*BOARD_SIZE) for i in range(BOARD_SIZE)]
            self.path_X = deque()
            self.path_X.append(np.random.randint(low=2, high=BOARD_SIZE-2))
            self.path_X.append(self.path_X[0])
            self.path_X.append(self.path_X[0])
            self.path_Y = deque()
            self.path_Y.append(np.random.randint(low=2, high=BOARD_SIZE-2))
            self.path_Y.append(self.path_Y[0]+1)
            self.path_Y.append(self.path_Y[0]+2)
            for i, (x, y) in enumerate(zip(self.path_X, self.path_Y)):
                if i == 0:
                    self._state[y][x] = -1
                else:
                    self._state[y][x] = 1
            self._episode_ended = False  
            
            # Set Health
            self.master_health = 100
            # Maybe store other snakes healths in an array?
    
    def __init__(self):
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=3, name='action')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=(BOARD_SIZE,BOARD_SIZE), dtype=np.int32, minimum=-1, name='observation')
        self.reset_board()
        
    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        self.reset_board()
        return ts.restart(np.array(self._state, dtype=np.int32))
    
    def move_assist(self, action):
        i=0; j=0
        if (action==0):
            i = 1
        elif (action==1):
            j = 1
        elif (action==2):
            i = -1
        elif (action==3):
            j = -1
        self.path_X.appendleft(self.path_X[0]+i)
        self.path_Y.appendleft(self.path_Y[0]+j)
        # Set Head Value to -1
        self._state[self.path_Y[0]][self.path_X[0]] = -1
        # Set Neck to 1
        self._state[self.path_Y[1]][self.path_X[1]] = 1
        self._state[self.path_Y.pop()][self.path_X.pop()] = 0
        

    def _step(self, action):

        if self._episode_ended:
            # The last action ended the episode. Ignore the current action and start
            # a new episode.
            return self.reset()

    # Make sure episodes don't go on forever.
        # Move Right
        if action == 0:
            if (self.path_X[0] == BOARD_SIZE-1):
                self._episode_ended = True
            elif (self._state[self.path_Y[0]][self.path_X[0]+1] ==1):
                self._episode_ended = True
            else:
                self.move_assist(action)
        # Move Up
        elif action == 1:
            if (self.path_Y[0] == BOARD_SIZE-1):
                self._episode_ended = True
            elif (self._state[self.path_Y[0]+1][self.path_X[0]] ==1):
                self._episode_ended = True
            else:
                self.move_assist(action)
        # Move Left
        elif action == 2:
            if (self.path_X[0] == 0):
                self._episode_ended = True
            elif (self._state[self.path_Y[0]][self.path_X[0]-1] ==1):
                self._episode_ended = True
            else:
                self.move_assist(action)
        # Move Down
        elif action == 3:
            if (self.path_Y[0] == 0):
                self._episode_ended = True
            elif (self._state[self.path_Y[0]-1][self.path_X[0]] ==1):
                self._episode_ended = True
            else:
                self.move_assist(action)
                
        else:
            raise ValueError('`action` should be 0 or 1 or 2 or 3.')

        if self._episode_ended:
            reward = 0
            return ts.termination(np.array(self._state, dtype=np.int32), reward)
        else:
            return ts.transition(
            np.array(self._state, dtype=np.int32), reward=1.0, discount=1.0)
        
        

In [254]:
environment = SnakeEnv()
utils.validate_py_environment(environment, episodes=5)

In [248]:
env = SnakeEnv()

In [235]:
time_step = env.reset()

In [236]:
time_step

TimeStep(step_type=array(0, dtype=int32), reward=array(0., dtype=float32), discount=array(1., dtype=float32), observation=array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0

In [245]:
env.step(0)

TimeStep(step_type=array(0, dtype=int32), reward=array(0., dtype=float32), discount=array(1., dtype=float32), observation=array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0

In [88]:
b = a.pop(-1)

NameError: name 'a' is not defined

In [None]:
b

In [None]:
a

In [None]:
path_X = deque()

path_X.append(np.random.randint(low=2, high=BOARD_SIZE-2))
path_X.append(path_X[0])

In [None]:
path_X

In [None]:
a = [([0]*BOARD_SIZE) for i in range(BOARD_SIZE)]

In [None]:
a

In [None]:
np.array(a, dtype=np.int32)