In this tutorial, we have a <span style="color:lightblue;">player blob (blue)</span> which aims to navigate its way as quickly as possible to the <span style="color:lightgreen;">food blob (green)</span>, while avoiding the <span style="color:red;">enemy blob (red)</span>.

In [5]:
# Import the libraries
import numpy as npt
from PIL import Image
import cv2
import matplotlib.pyplot as plt
from matplotlib import style
import pickle
import time

In [4]:
# Set the style
style.use("ggplot")

In [7]:
# Hyaperparameters
SIZE = 10     # 10x10 grid environment
EPISODES = 25000
MOVE_PENALTY = 1
ENEMY_PENALTY = 300
FOOD_REWARD = 25
epsilon = 0.9
EPS_DECAY = 0.9998   # Epsilon decay
SHOW_EVERY = 3000
LEARNING_RATE = 0.1
DISCOUNT = 0.95      # Gamma

In [8]:
# Initialize the Q table
start_q_table = None    # Also this can be assigned to some filename

In [9]:
# Give a representation number to each of the components
PLAYER_N = 1
FOOD_N = 2
ENEMY_N = 3

# Create a dictionary for each that maps number to their color
d = {1: (255, 175, 0),
     2: (0, 255, 0),
     3: (0, 0, 255)}

In [None]:
class Blob:
    
    # The constructor function
    def __init__(self):
        # X axis position
        self.x = np.random.randint(0, SIZE)
        # Y axis position
        self.y = np.random.randint(0, SIZE)
        
    # Method for printing the blob's location
    def __str__(self):
        return f"{self.x}, {self.y}"
    
    # Subtraction method
    def __sub__(self, other):
        return (self.x - other.x, self.y-other.y)
    
    # Action method
    def action(self, choice):
        if choice == 0:
            self.move(x = 1, y = 1)
        elif choice == 1:
            self.move(x = -1, y=-1)
        elif choice == 2:
            self.move(x=-1, y=1)
        elif choice == 3:
            self.move(x=1, y=-1)

    def move(self, x=False, y=False):

        # If no value for x, move randomly
        if not x:
            self.x += np.random.randint(-1, 2)
        else:
            self.x += x

        # If no value for y, move randomly
        if not y:
            self.y += np.random.randint(-1, 2)
        else:
            self.y += y


        # If we are out of bounds, fix!
        if self.x < 0:
            self.x = 0
        elif self.x > SIZE-1:
            self.x = SIZE-1
        if self.y < 0:
            self.y = 0
        elif self.y > SIZE-1:
            self.y = SIZE-1

In [None]:
# The environment
class Environment:
    
    # Initialization function
    def __init__(self):
        """
        Funcion for initializing the internal state of the environment. In here, we assign 
        a counter that limits the number of time steps the agent is allowed to take to 
        interact with the environment.
        """
        self.steps_left = 10

    # Get the observation
    def get_observation(self):
        """
        Function for getting the current environment's observation. In here, we return the 
        observation vector of zero since the environment has no internal state.
        """
        return [0.0, 0.0, 0.0]

    # Get the action
    def get_actions(self):
        """
        Function for querying the set of actions it can execute.
        """
        return [0, 1]

    # Signal the end of episode
    def is_done(self):
        """
        Function that signals the end of the episode to the agent.
        """
        return self.steps_left == 0

    
    def action(self, action):
        """
        Function for action which does the following two things:
            - Handles the agent's action
            - Returns the reward for this action.
            
        In here, the reward is random and its action is discarded. Additionally, we update the 
        count of steps and refuse to continue the episodes which are over.
        """
        # If in terminal state, then game is finisehd
        if self.is_done():
            raise Exception("Game is over")
            
        # Decreasing the time step by 1
        self.steps_left -= 1
        
        # Return a random reward
        return random.random()