In [8]:
import numpy as np

In [9]:
import gym
from gym import spaces, logger

In [10]:
logger.set_level(40)

Agent Actions: <br>

0. Move Left
1. Move Right
2. Move Up
3. Move Down
4. Eat Fish

Environment State:

0. Grid (0-Ground, 1-Water, 2-Fishing Area, 3-Agents)
1. Fish Population
2. Agent Health

In [11]:
class FishPondEnv(gym.Env):
    def __init__(self, params):
        super(FishPondEnv, self).__init__()
        self.params = params
        self.num_agents = params['num_agents']
        self.grid_size = params['grid_size']
        self.pond_size = np.int(self.grid_size * self.params['pond_size_ratio'])
        self.grid_area = self.grid_size * self.grid_size
        self.pond_area = self.grid_size * self.pond_size
        self.action_space = spaces.Discrete(5)
        state_space_low = np.array(self.grid_area * [0] + [0] + self.num_agents * [0])
        state_space_high = np.array(self.grid_area * [2] + [params['fish_count_max']] + self.num_agents * [params['health_max']])
        self.state_space = spaces.Box(state_space_low, state_space_high, dtype=np.float32) #Check dtype here
        self.state = None
        self.steps_beyond_done = None
    
    def get_initial_state(self): #Missing Agent Issue Fixed
        grid = np.array(self.pond_area * [1] + (self.grid_area - self.pond_area) * [0])
        zone_correction = np.int((self.params['fishing_zone_size'] % 2) != (self.grid_size % 2))
        zone_size = self.params['fishing_zone_size'] + zone_correction
        zone_index = np.int((self.pond_size - 1) * self.grid_size + (self.grid_size - zone_size) / 2)
        grid[range(zone_index, zone_index + zone_size)] = 2
        agent_location = np.random.choice(range(self.pond_area, self.grid_area), self.num_agents)
        grid[agent_location] = 3
        fish_count = [self.params['fish_count_initial']]
        agent_health = self.num_agents * [self.params['health_max']]
        state = np.append(grid, [fish_count + agent_health])
        return state
        
    def reset(self):
        self.state = self.get_initial_state()
        self.steps_beyond_done = None
        return self.state
    
    def step_agent(self, action, agent_index):
        
        return [state, reward]
        
    def step(self, actions): #Discuss about synchronization in real time before each takes action
        for agent_index in range(self.num_agents):
            step_agent()
        return 0

In [16]:
params = {
    'num_agents': 3,
    'health_max': 100,
    'grid_size': 10,
    'pond_size_ratio': 0.25,
    'fishing_zone_size': 4, #Auto-Symmetry
    'fish_count_initial': 10,
    'fish_count_max': 50,
    'hunger_per_step': -5,
    'nutrition_per_fish': 10
}

In [17]:
env = FishPondEnv(params)

In [18]:
grid_size = params['grid_size']
grid_area = pow(grid_size, 2)
grid = env.reset()[:grid_area].reshape(grid_size, grid_size)
grid

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 2, 2, 2, 2, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 3, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 3, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 3, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [19]:
env.reset()

array([  1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         2,   2,   2,   2,   1,   1,   1,   0,   0,   3,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   3,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   3,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,  10, 100, 100, 100])