In [1]:
import numpy as np

In [2]:
import gym
from gym import spaces, logger

# Description

A multi-agent setup where the closed environment contains a fish pond and agents. The agents are allowed to consume the fishes in the pond. The reproduction rate for fishes is directly proportional to the population left (R $\propto$ N).

Agent Actions: <br>

0. Move Left
1. Move Right
2. Move Up
3. Move Down
4. Eat Fish

Environment State:

0. Agent Position (X)
1. Agent Position (Y)
2. Fish Population
3. Agent Health

Reward:

The reward is +1 for surviving every time-step

Episode Termination: 

1. All the Agents Die
2. Fish Population becomes Zero

# Implementation

In [3]:
class FishPondEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super(FishPondEnv, self).__init__()
        self.size = 20
        self.action_space = spaces.Discrete(5)
        low = np.array([0, 0, 0, 0])
        high = np.array([self.size, int(self.size * 0.75), np.iinfo(int).max, 100])
        self.observation_space = spaces.Box(low, high, dtype=np.float32)
        self.state = None
        self.steps_beyond_done = None

    def check_termination(health, fish_population):
        condition1 = health <= 0
        condition2 = population < 1
        return (condition1 or condition2)
        
    def step(self, action):
        dist = 1 # Step Size of the Agent
        hunger = 1 # Depletion of Health per time-step
        nutrition = 5 # Improvement in Health by consumption of one fish
        x, y, fish_population, health = self.state
        x = (x - dist) if (action == 1) else (x + dist) if (action == 2) else x
        y = (y + dist) if (action == 3) else (y - dist) if (action == 4) else y
        # Doubt: To check for limits of co-ordinates or not since it is specified in the observation space?
        regeneration_rate = 0.1 * fish_population
        fish_population += (regeneration_rate * 1)
        health -= hunger
        if (action == 5 and fish_population >= 1):
            health += nutrition
            fish_population -= 1
        self.state = np.array([x, y, fish_population, health])
        done = check_termination(health, fish_population)
        
        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Terminated
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn("step() called while environment has already returned, call reset() first")
            self.steps_beyond_done += 1
            reward = 0.0
        
        return np.array([self.state, reward, done, info])
    
    def reset(self):
        self.state = np.array([0, 0, 10, 100]) # Initial Environment Conditions
        self.steps_beyond_done = None
        return self.state
    
    def render(self, mode='human', close=False):
        return 0