In [33]:
import gym
from gym.spaces import Box, Discrete

import numpy as np
from collections import deque

In [118]:
class LightCycleEnv(gym.Env):
    def __init__(self, grid_width=50, grid_height=50, trail_length=20) -> None:
        """
        Game takes place on a 50 x 50 grid
        An agent is a snake with a length of 20 (snake occupies 20 spaces)
        The agent can move in one of 4 directions or not at all
        Each agent has a constant speed and is moving in 1 direction

        action_space : none, left, right, up, down
        observation_space (i.e. state space): x position, y position, current direction (1=left, 2=right, 3=up, 4=down)

        Note that (0, 0) is the top-left corner, so up is decreasing y-coordinate, while down is increasing y-coordinate
        """
        self.grid_width = grid_width
        self.grid_height = grid_height
        self.trail_length = trail_length

        self.body = deque()

        for i in range(0, trail_length):
            self.body.append([int(self.grid_width/2), i]) # (x, y) pair for each space occupied
        
        self.state = np.concatenate([np.array(self.body).flatten(), [2]], dtype='int32') # include direction, initial is right
        
        low = np.concatenate([np.zeros(shape=(self.state.shape[0]-1,)), [1]])
        high = np.concatenate([self.grid_width*np.ones(shape=self.state.shape[0]-1,), [4]]) # non-box region not support at the moment
        self.action_space = Discrete(5)
        self.observation_space = Box(low=low, high=high, dtype='int32') # essentially a multi-dimensional discrete space
    
    def reset(self):
        """
        Resets the playing board
        Should be called before starting the game
        """
        self.body = deque()

        for i in range(0, self.trail_length):
            self.body.append([int(self.grid_width/2), i]) # (x, y) pair for each space occupied
        
        self.state = np.concatenate([np.array(self.body).flatten(), [2]], dtype='int32') # include direction, initial is right
        return self.state

    def step(self, action):
        """
        Move the agent in the direction specified by action
        Punish the agent for hitting an invalid area (severe) or making an
        useless action (light, i.e. trying to go left when it's already going left)

        
        """
        reward = 0
        done = False
        info = {}

        dir = self.state[-1] # current direction

        # moving left
        if dir == 1:
            # continue left
            if action < 3: 
                # unnecessary action that doesn't do anything   
                if action != 0:
                    reward -= 1
                
                # move left
                new_head = [self.body[-1][0], self.body[-1][1]-1]
                if new_head[1] < 0:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into left wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [1]], dtype='int32')
                return self.state, reward, done, info

            # go up
            if action == 3:
                new_head = [self.body[-1][0]-1, self.body[-1][1]]
                if new_head[0] < 0: 
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into top wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [3]], dtype='int32')
                return self.state, reward, done, info

            # go down
            if action == 4:
                new_head = [self.body[-1][0]+1, self.body[-1][1]]
                if new_head[0] >= self.grid_height: 
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into bottom wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [4]], dtype='int32')
                return self.state, reward, done, info


        # moving right
        if dir == 2:
            # continue right
            if action < 3: 
                # unnecessary action that doesn't do anything   
                if action != 0:
                    reward -= 1
                
                # move right
                new_head = [self.body[-1][0], self.body[-1][1]+1]
                if new_head[1] >= self.grid_width:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into right wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [2]], dtype='int32')
                return self.state, reward, done, info

            # go up
            if action == 3:
                new_head = [self.body[-1][0]-1, self.body[-1][1]]
                if new_head[0] < 0: 
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into top wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [3]], dtype='int32')
                return self.state, reward, done, info

            # go down
            if action == 4:
                new_head = [self.body[-1][0]+1, self.body[-1][1]]
                if new_head[0] >= self.grid_height: 
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into bottom wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [4]], dtype='int32')
                return self.state, reward, done, info

        # moving up
        if dir == 3:
            # continue up
            if action > 2 or action == 0:
                # unnecessary action
                if action > 2:
                    reward -= 1
                
                new_head = [self.body[-1][0]-1, self.body[-1][1]]
                if new_head[0] < 0: 
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into top wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [3]], dtype='int32')
                return self.state, reward, done, info

            # go left
            if action == 1:
                new_head = [self.body[-1][0], self.body[-1][1]-1]
                if new_head[1] < 0:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into left wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [1]], dtype='int32')
                return self.state, reward, done, info

            # go right
            if action == 2:
                new_head = [self.body[-1][0], self.body[-1][1]+1]
                if new_head[1] >= self.grid_width:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into right wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [2]], dtype='int32')
                return self.state, reward, done, info

        # moving down
        if dir == 4:
            # continue down
            if action > 2 or action == 0:
                # unnecessary action
                if action > 2:
                    reward -= 1
                
                new_head = [self.body[-1][0]+1, self.body[-1][1]]
                if new_head[0] >= self.grid_height: 
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into bottom wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [4]], dtype='int32')
                return self.state, reward, done, info

            # go left
            if action == 1:
                new_head = [self.body[-1][0], self.body[-1][1]-1]
                if new_head[1] < 0:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into left wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [1]], dtype='int32')
                return self.state, reward, done, info

            # go right
            if action == 2:
                new_head = [self.body[-1][0]+1, self.body[-1][1]]
                if new_head[1] >= self.grid_width:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into right wall'
                elif new_head in self.body:
                    reward -= 100
                    done = True
                    info['halt_state'] = 'ran into self'

                self.body.popleft()
                self.body.append(new_head)

                self.state = np.concatenate([np.array(self.body).flatten(), [2]], dtype='int32')
                return self.state, reward, done, info


    def render(self):
        """
        Print a visual representation of the board to standard output
        """
        for r in range(0, self.grid_height):
            if r > 0:
                print()
            for c in range(0, self.grid_width):
                if [r, c] in self.body:
                    print('X', end=' ')
                else:
                    print('O', end=' ')

        print()

    def create_grid(self):
        """
        Return a (grid_width, grid_height) array with O's for open spaces and X's for snake spaces
        """
        grid = []

        for r in range(0, self.grid_width):
            grid.append([])
            for c in range(0, self.grid_height):
                if [r, c] in self.body:
                    grid[r].append('O')
                else:
                    grid[r].append('X')
        
        return grid


In [135]:
from IPython.display import clear_output
import time

delay = 0.33
env = LightCycleEnv(grid_width=10, grid_height=10, trail_length=5)

env.render()
time.sleep(delay)

done = False

while not done:
    clear_output(wait=True)
    
    action = env.action_space.sample()
    state, action, done, info = env.step(action)

    if done:
        print(env.body)
        print(info['halt_state'])
        break

    env.render()

    time.sleep(delay)

deque([[2, 3], [2, 4], [1, 4], [0, 4], [-1, 4]])
ran into top wall


In [165]:
# test speed on a large game

start = time.time()

env = LightCycleEnv(grid_width=100, grid_height=100, trail_length=25)

for i in range(10000):
    action = env.action_space.sample()
    state, action, done, info = env.step(action)

    # if done:
    #     break

print(time.time() - start, 'seconds')
print(f"Completed {i+1} steps")

0.24900412559509277 seconds
Completed 10000 steps
