# Adversarial Environment Generation for Multi-agent Foraging
CS 289 Final Project

Kavya Kopparapu, Eric Lin, Lucy Liu

# Environment setup

Inspiration from following sources: 

*   https://github.com/semitable/lb-foraging and OpenAI Gym
*   https://github.com/mgoadric/ants-mesa 



In [41]:
import numpy as np
import random
import copy

In [77]:
ENV_PARAMS = {'coding_dict': {'empty': 0, 'agent': 1, 'hive': 2, 'blockade': 3, 
                                'food_start': 4},
                'N': 20, 'M': 20, 'max_food': 6, 'observation_radius': 5, 'steps': 5000, 'spawn_rate': 2,
                'pheromone': {'evaporation': 0.1, 'diffusion': 0.1, 'step': 0.2},
                'grid': {'food': 40, 'blockade': 20}}

class Environment:
    def __init__(self, env_params, grid=None):
        '''
            Grid
        '''
        self.cols = env_params['N']
        self.rows = env_params['M']
        self.static = np.zeros((self.cols, self.rows), dtype=float)     # one-hot encoding of agent locations
        self.agent_nums = np.zeros((self.cols, self.rows), dtype=float)     # number of agents in each location
        self.dynamic = np.zeros((self.cols, self.rows), dtype=float)  # pheromone values for every location in grid

        if grid is not None:        # if there is a grid passed through
            self.grid = grid
        else:
            self.grid = np.zeros((self.cols, self.rows), dtype=float)
            self.grid[0][0] = 2
            self.grid[self.cols-1][self.rows-1] = 9              # Place food in the corner
        
        
        '''
            Params
        '''
        self.evaporation_rate = env_params['pheromone']['evaporation']
        self.observation_radius = env_params['observation_radius']
        self.total_food = 0
        self.time_step = 0
        self.total_steps = env_params['steps']
        self.spawn_rate = env_params['spawn_rate']

        '''
            Spawn queue
        '''
        self.spawn_queue = []


    def update_valid_movements(self, agent):
        location, food = agent.get_state()
        pos_x, pos_y = location
        possible_movements = [(0,-1), (0,1), (1,0), (-1,0), (0,0)]
        valid_movements = []
        for movement in possible_movements:
            new_location = (pos_x + movement[0], pos_y + movement[1])
            if new_location[0] < self.cols and new_location[0] >= 0 and new_location[1] < self.rows and new_location[1] >= 0 and self.grid[new_location[0]][new_location[1]] != 3:
                valid_movements.append(movement)
        agent.valid_movements = valid_movements
    
    def step(self, agents):
        self.time_step += 1

        # Pheromone evaporation
        for i in range(self.cols):
            for j in range(self.rows):
                self.dynamic[i, j] *= (1 - self.evaporation_rate)

        # Update for each active agent
        for agent in agents:
            if agent.active == 1:
                self.update_valid_movements(agent)
                movement, pheromone = agent.get_action()
                location, food = agent.get_state()
                new_location = (location[0] + movement[0], location[1] + movement[1])
                # Add pheromone
                self.dynamic[location] += pheromone
                # Update agent location
                self.agent_nums[location] -= 1
                if self.agent_nums[location] == 0:
                    self.static[location] = 0   # Delete previous location from static
                agent.prev_location = location
                self.agent_nums[new_location] += 1
                self.static[new_location] = 1   # Add new location to static
                agent.location = new_location
                # If agent is now at food and doesn't already have food
                if self.grid[new_location[0]][new_location[1]] > 3 and agent.food == 0:
                    agent.food = 1
                    if self.grid[new_location[0]][new_location[1]] == 4:
                        self.grid[new_location[0]][new_location[1]] == 0    # now no more food
                    else:
                        self.grid[new_location[0]][new_location[1]] -= 1    # decrement food by 1
                    agent.bfs_active = 1        # activate bfs
                # If agent is now at hive
                if new_location == (0,0):
                    agent.active = 0
                    self.spawn_queue.append(agent.id)
                    self.total_food += agent.food

    def get_observation(self, location):
        # Returns partially observable observation centered around location
        observation_grid_static = np.add(self.grid[location[0]-self.observation_radius : location[0]+self.observation_radius+1][location[1]-self.observation_radius : location[1]+self.observation_radius+1], \
                                    self.static[location[0]-self.observation_radius : location[0]+self.observation_radius+1][location[1]-self.observation_radius : location[1]+self.observation_radius+1])
        observation_dynamic = self.dynamic[location[0]-self.observation_radius : location[0]+self.observation_radius+1][location[1]-self.observation_radius : location[1]+self.observation_radius+1]
        return observation_grid_static, observation_dynamic

    def reset(self, grid=None):
        self.static = np.zeros((self.cols, self.rows), dtype=float)     # one-hot encoding of agent locations
        self.agent_nums = np.zeros((self.cols, self.rows), dtype=float)
        self.dynamic = np.zeros((self.cols, self.rows), dtype=float)  # pheromone values for every location in grid

        if grid is not None:        # if there is a grid passed through
            self.grid = grid
        else:
            self.grid = np.zeros((self.cols, self.rows), dtype=float)
            for i in range(2): 
                for j in range(2):
                    self.grid[i][j] = 2               # Hive 
            self.grid[self.cols-1][self.rows-1] = 9              # Place food in the corner
        
        
        '''
            Params
        '''
        self.total_food = 0
        self.time_step = 0

        '''
            Spawn queue
        '''
        self.spawn_queue = []

    def run_episode(self, agents, grid=None):
        # Add all agents to spawn queue
        self.spawn_queue = [i for i in range(len(agents))]       

        # Run through time steps
        for time_step in range(self.total_steps):
            # Spawn agents if they can be spawned
            for i in range(min(self.spawn_rate, len(self.spawn_queue))):
                new_agent = agents[self.spawn_queue.pop(0)]
                new_agent.active = 1
                new_agent.location = (0,0)
                self.static[0][0] = 1
                self.agent_nums[0][0] += 1
                # print(agents[0])
            # Update observation for every active agent
            for agent in agents:
                if agent.active == 1:
                    agent.observation = self.get_observation(agent.location)
            # Environment.step
            self.step(agents)
        
        food_collected = self.total_food
        print('grid')
        self.visualize_map(self.grid)
        print('static')
        self.visualize_map(self.static)
        # Reset environment
        self.reset(grid)
        # Return amount of collected food
        return food_collected

    def visualize_map(self, np_array):
        # Print out np_array
        print('\n'.join([''.join(['{:4}'.format(item) for item in row]) for row in np_array]))



# Agent class

In [74]:
class Agent(object):
    def __init__(self, id=None):
        self.location = (0,0)
        self.prev_location = (0,0)
        self.food = 0
        self.active = 0
        self.id = id
        self.observation = None
        self.bfs_active = 0
        self.valid_movements = [(0,0)]
    def get_state(self):
        return self.location, self.food
    def get_action(self):
        # Returns movement, pheromone
        # Uses self.observation and self.valid movements
        # Abstract method
        return self.random_action()
        # raise NotImplementedError('get_action not implemented')
    def random_action(self):
        return random.choice(self.valid_movements), random.uniform(0,1.)
    def __str__(self):
        return ''+ str(self.location) + ' ' + str(self.active) + ' ' + str(self.food)
        

# Testing

In [79]:
testAgents = [Agent(i) for i in range(10)]
env = Environment(ENV_PARAMS)
output = env.run_episode(testAgents)
print(output)

grid
 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.