# How does the environment work

I'm trying to see the values and types of the variables the agent works with. To that end, I'm going to use the random agent provided by kaggle and print the observation and configuration values. Nice and easy!

One requirement : The agent must definitely and absolutely return a direction.

In [None]:
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, row_col

def agent(obs_dict, config_dict):
    """This agent always moves toward observation.food[0] but does not take advantage of board wrapping"""
    observation = Observation(obs_dict)
    configuration = Configuration(config_dict)
    
    print("Observation :", observation)
    print("Configuration : ", configuration)
    
    player_index = observation.index
    player_goose = observation.geese[player_index]
    player_head = player_goose[0]
    player_row, player_column = row_col(player_head, configuration.columns)
    food = observation.food[0]
    food_row, food_column = row_col(food, configuration.columns)

    if food_row > player_row:
        return Action.SOUTH.name
    if food_row < player_row:
        return Action.NORTH.name
    if food_column > player_column:
        return Action.EAST.name
    return Action.WEST.name

# Running the agent (Playing a game)
Below is the syntax to use one (or multiple) agent(s) and simulate one (or multiple) game(s).

**Never submit an agent before witnessing how it performs! Does it even finish a game?**

In [None]:
from kaggle_environments import make

# Creating the environment for simulating the game
env = make("hungry_geese", debug=True)

# Simulating a game between our agent and a predefined agent called "random"
env.run(
    [
        agent,
        "random"
    ],  
)

env.render(mode="ipython")

# Agent 1 : Making an agent that follows the same approach but with some constraints.
1. Avoids other geese, any positions adjacent to their heads(suspecting they will move and collide) and its own body
2. Avoids consecutive opposite actions
3. Makes use of board wrapping
4. Move to closest food instead of food[0]

In [None]:
%%writefile custom_greedy_agent.py

import random
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, row_col, adjacent_positions

actions = [Action.NORTH, Action.SOUTH, Action.EAST, Action.WEST]
cached_agents = {}

def nearest_food(position, food, columns):
    minvalue = columns**2
    minfood = None
    row, column = row_col(position, columns)
    for food_object in food:
        food_row, food_column = row_col(food_object, columns)
        distance = abs(row - food_row) + abs(column - food_column)
        if distance < minvalue:
            minfood = food_object
            minvalue = distance
    return minfood

def is_colliding(action, observation, configuration):
    player_index = observation.index
    player_goose = observation.geese[player_index]
    
    player_head = player_goose[0]
    player_head_row, player_head_column = row_col(player_head, configuration.columns)
    
    player_body = player_goose[1:]
    other_geese = observation.geese[:player_index] + observation.geese[player_index+1:]
    other_heads = [other_goose[0] for other_goose in other_geese if len(other_goose)>0]
    other_heads_adj = [adjacent_positions(other_head, configuration.columns, configuration.rows) for other_head in other_heads]
    
    # Flattening the other geese list and other heads adj list
    other_geese = [cell for goose in other_geese for cell in goose]
    other_heads_adj = [pos for head_adj in other_heads_adj for pos in head_adj]
    
    # Obstacles to avoid
    obstacles = player_body + other_geese + other_heads_adj
    coordinates_to_avoid = [tuple(row_col(obstacle, configuration.columns)) for obstacle in obstacles]
    
    if action == Action.SOUTH:
        return any([player_head_row+1 == obstacle_row and player_head_column == obstacle_column for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.NORTH:
        return any([player_head_row-1 == obstacle_row and player_head_column == obstacle_column for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.EAST:
        return any([player_head_column+1 == obstacle_column and player_head_row == obstacle_row for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.WEST:
        return any([player_head_column-1 == obstacle_column and player_head_row == obstacle_row for obstacle_row, obstacle_column in coordinates_to_avoid])
        
class CustomGreedyAgent:
    """Since we need to maintain the state of prev_action across multiple calls for the same agent, we have to use a class for agent"""
    def __init__(self, config_dict):
        self.curr_action = None
        self.prev_action = None
        
        self.configuration = Configuration(config_dict)
    
    def __call__(self, obs_dict):
        """This function gets called when you call an object of this class as a function"""
        global actions

        observation = Observation(obs_dict)

        player_index = observation.index
        player_goose = observation.geese[player_index]

        player_head = player_goose[0]
        player_head_row, player_head_column = row_col(player_head, self.configuration.columns)

        food = nearest_food(player_head, observation.food, self.configuration.columns)
        food_row, food_column = row_col(food, self.configuration.columns)

        self.prev_action = self.curr_action
        self.curr_action = None

        if food_row > player_head_row:
            if not is_colliding(Action.SOUTH, observation, self.configuration):
                self.curr_action = Action.SOUTH
            elif not is_colliding(Action.WEST, observation, self.configuration):
                self.curr_action = Action.WEST
            elif not is_colliding(Action.EAST, observation, self.configuration):
                self.curr_action = Action.EAST
            elif not is_colliding(Action.NORTH, observation, self.configuration):
                self.curr_action = Action.NORTH
        
        elif food_row < player_head_row:
            if not is_colliding(Action.NORTH, observation, self.configuration):
                self.curr_action = Action.NORTH
            elif not is_colliding(Action.EAST, observation, self.configuration):
                self.curr_action = Action.EAST
            elif not is_colliding(Action.WEST, observation, self.configuration):
                self.curr_action = Action.WEST
            elif not is_colliding(Action.SOUTH, observation, self.configuration):
                self.curr_action = Action.SOUTH
        
        elif food_column > player_head_column:
            if not is_colliding(Action.EAST, observation, self.configuration):
                self.curr_action = Action.EAST
            elif not is_colliding(Action.NORTH, observation, self.configuration):
                self.curr_action = Action.NORTH
            elif not is_colliding(Action.SOUTH, observation, self.configuration):
                self.curr_action = Action.SOUTH
            elif not is_colliding(Action.WEST, observation, self.configuration):
                self.curr_action = Action.WEST
        
        else:
            if not is_colliding(Action.WEST, observation, self.configuration):
                self.curr_action = Action.WEST
            elif not is_colliding(Action.SOUTH, observation, self.configuration):
                self.curr_action = Action.SOUTH
            elif not is_colliding(Action.NORTH, observation, self.configuration):
                self.curr_action = Action.NORTH
            elif not is_colliding(Action.EAST, observation, self.configuration):
                self.curr_action = Action.EAST
        
        # Even after all these conditions, if there is no decision of curr_action, just pick one randomly.
        if self.curr_action == None:
            self.curr_action = random.choice(actions)
        
        # if new action is opposite direction to previous action, pick another random action instead.
        if self.prev_action:
            while self.curr_action == self.prev_action.opposite():
                self.curr_action = random.choice(actions)

        print("Player index : ", player_index)
        print("Prev action : ", self.prev_action)
        print("Curr action : ", self.curr_action)
        return self.curr_action.name

def agent(obs_dict, config_dict):
    
    global cached_agents
    
    observation = Observation(obs_dict)
    player_index = observation.index
    
    if player_index in cached_agents:
        return cached_agents[player_index](obs_dict)
    else:
        cached_agents[player_index] = CustomGreedyAgent(config_dict)
        return cached_agents[player_index](obs_dict)

## Playing the agent in the environment

In [None]:
from kaggle_environments import make
import custom_greedy_agent
env = make("hungry_geese", debug=False)

env.run(
    [
        custom_greedy_agent.agent,
        custom_greedy_agent.agent,
        custom_greedy_agent.agent,
        custom_greedy_agent.agent
    ],  
)

env.render(mode="ipython")

# Agent 2 - Game Tree Agent (One-Step Look Ahead)
1. Check the reward for taking one step in all valid directions
2. Take the direction which has maximum reward

In [None]:
%%writefile heuristic_agent.py

import random
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, row_col, adjacent_positions, translate

actions = [Action.NORTH, Action.SOUTH, Action.EAST, Action.WEST]
cached_agents = {}

def nearest_food(position, food, columns):
    minvalue = columns**2
    minfood = None
    row, column = row_col(position, columns)
    for food_object in food:
        food_row, food_column = row_col(food_object, columns)
        distance = abs(row - food_row) + abs(column - food_column)
        if distance < minvalue:
            minfood = food_object
            minvalue = distance
    return minfood

def one_step_heuristic(action, observation, configuration):
    # One step collide
    if is_colliding(action, observation, configuration):
        return -100
    
    player_index = observation.index
    player_goose = observation.geese[player_index]
    
    player_head = player_goose[0]
    row, column = row_col(player_head, configuration.columns)
    
    # One step food
    if translate(player_head, action, configuration.columns, configuration.rows) in observation.food:
        return 10
    
    closest_food = nearest_food(player_head, observation.food, configuration.columns)
    food_row, food_column = row_col(closest_food, configuration.columns)
    
    distance = abs(row - food_row) + abs(column - food_column)
    
    after_translate = translate(player_head, action, configuration.columns, configuration.rows)
    new_row, new_column = row_col(after_translate, configuration.columns)
    
    new_distance = abs(new_row - food_row) + abs(new_column - food_column)
    
    if new_distance < distance:
        return 1
    
    return 0

def is_colliding(action, observation, configuration):
    player_index = observation.index
    player_goose = observation.geese[player_index]
    
    player_head = player_goose[0]
    player_head_row, player_head_column = row_col(player_head, configuration.columns)
    
    player_body = player_goose[1:]
    other_geese = observation.geese[:player_index] + observation.geese[player_index+1:]
    other_heads = [other_goose[0] for other_goose in other_geese if len(other_goose)>0]
    other_heads_adj = [adjacent_positions(other_head, configuration.columns, configuration.rows) for other_head in other_heads]
    
    # Flattening the other geese list and other heads adj list
    other_geese = [cell for goose in other_geese for cell in goose]
    other_heads_adj = [pos for head_adj in other_heads_adj for pos in head_adj]
    
    # Obstacles to avoid
    obstacles = player_body + other_geese + other_heads_adj
    coordinates_to_avoid = [tuple(row_col(obstacle, configuration.columns)) for obstacle in obstacles]
    
    if action == Action.SOUTH:
        return any([player_head_row+1 == obstacle_row and player_head_column == obstacle_column for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.NORTH:
        return any([player_head_row-1 == obstacle_row and player_head_column == obstacle_column for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.EAST:
        return any([player_head_column+1 == obstacle_column and player_head_row == obstacle_row for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.WEST:
        return any([player_head_column-1 == obstacle_column and player_head_row == obstacle_row for obstacle_row, obstacle_column in coordinates_to_avoid])
        
class HeuristicAgent:
    """Since we need to maintain the state of prev_action across multiple calls for the same agent, we have to use a class for agent"""
    def __init__(self, config_dict):
        self.curr_action = None
        self.prev_action = None
        
        self.configuration = Configuration(config_dict)
    
    def __call__(self, obs_dict):
        """This function gets called when you call an object of this class as a function"""
        global actions

        observation = Observation(obs_dict)
        
        self.prev_action = self.curr_action
        
        # We can remove the possibility of opposite actions even without lookahead
        if self.prev_action:
            valid_actions = [action for action in actions if not action == self.prev_action.opposite()]
        else:
            valid_actions = actions.copy()
        
        # Use the one_step_heuristic to assign a score to each possible valid action
        scores = dict(zip(valid_actions, [one_step_heuristic(action, observation, self.configuration) for action in valid_actions]))

        # Get a list of columns (moves) that maximize the heuristic
        best_actions = [key for key in scores.keys() if scores[key] == max(scores.values())]

        # Select at random from the best actions
        self.curr_action = random.choice(best_actions)
        
        print("Player index : ", observation.index)
        print("Prev action : ", self.prev_action)
        print("Curr action : ", self.curr_action)
        return self.curr_action.name

def agent(obs_dict, config_dict):
    
    global cached_agents
    
    observation = Observation(obs_dict)
    player_index = observation.index
    
    if player_index in cached_agents:
        return cached_agents[player_index](obs_dict)
    else:
        cached_agents[player_index] = HeuristicAgent(config_dict)
        return cached_agents[player_index](obs_dict)

## Playing the agent in the environment

In [None]:
from kaggle_environments import make
import heuristic_agent
env = make("hungry_geese", debug=False)

env.run(
    [
        heuristic_agent.agent,
        heuristic_agent.agent,
        heuristic_agent.agent,
        heuristic_agent.agent
    ],  
)

env.render(mode="ipython")

# Well, well, well.. what do we have here ? 
## Intermediate Result : 
1. So, One step lookahead agent performs barely as good as, if not worse than the greedy agent
2. Both greedy and one step lookahead agents are failing due to being forced into a corner.

## Solution : 
1. Try n_steps lookahead for the corner issue.
2. Finally, deep reinforcement neural networks for complex behavior.

## Other ideas : 
1. Maybe there's a fault with a heuristic values.

# Agent 3 : N-step Lookahead Agent
1. This agent looks forward 3 steps and chooses best action
2. Also tuned the heuristic rewards according to the contests reward policy

In [None]:
%%writefile n_step_agent.py

import random
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, row_col, adjacent_positions, translate

actions = [Action.NORTH, Action.SOUTH, Action.EAST, Action.WEST]
cached_agents = {}

n_steps = 3

def reward(action, player_head, depth, observation, configuration):
    global n_steps, actions
    
    # One step collide
    if is_colliding(action, observation, configuration):
        return 0
    
    # Last step
    if depth == n_steps-1:
        if player_head in observation.food:
            return 0
        else:
            return 1
    
    # Take action step
    score = 0
    prev_action = action
    if prev_action:
        valid_actions = [action for action in actions if not action == prev_action.opposite()]
    else:
        valid_actions = actions.copy()
    
    # For intermediate step, if player_head is in food increase length
    if player_head in observation.food:
        score += 1
    
    # Returning highest reward possible from that point  
    return score + max([reward(action, translate(player_head, action, configuration.columns, configuration.rows), depth+1, observation, configuration) for action in valid_actions])

def is_colliding(action, observation, configuration):
    player_index = observation.index
    player_goose = observation.geese[player_index]
    
    player_head = player_goose[0]
    player_head_row, player_head_column = row_col(player_head, configuration.columns)
    
    player_body = player_goose[1:]
    other_geese = observation.geese[:player_index] + observation.geese[player_index+1:]
    other_heads = [other_goose[0] for other_goose in other_geese if len(other_goose)>0]
    other_heads_adj = [adjacent_positions(other_head, configuration.columns, configuration.rows) for other_head in other_heads]
    
    # Flattening the other geese list and other heads adj list
    other_geese = [cell for goose in other_geese for cell in goose]
    other_heads_adj = [pos for head_adj in other_heads_adj for pos in head_adj]
    
    # Obstacles to avoid
    obstacles = player_body + other_geese + other_heads_adj
    coordinates_to_avoid = [tuple(row_col(obstacle, configuration.columns)) for obstacle in obstacles]
    
    if action == Action.SOUTH:
        return any([player_head_row+1 == obstacle_row and player_head_column == obstacle_column for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.NORTH:
        return any([player_head_row-1 == obstacle_row and player_head_column == obstacle_column for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.EAST:
        return any([player_head_column+1 == obstacle_column and player_head_row == obstacle_row for obstacle_row, obstacle_column in coordinates_to_avoid])
    if action == Action.WEST:
        return any([player_head_column-1 == obstacle_column and player_head_row == obstacle_row for obstacle_row, obstacle_column in coordinates_to_avoid])
        
class NStepAgent:
    """Since we need to maintain the state of prev_action across multiple calls for the same agent, we have to use a class for agent"""
    def __init__(self, config_dict):
        self.curr_action = None
        self.prev_action = None
        
        self.configuration = Configuration(config_dict)
    
    def __call__(self, obs_dict):
        """This function gets called when you call an object of this class as a function"""
        global actions, n_steps

        observation = Observation(obs_dict)
        
        self.prev_action = self.curr_action
        
        # We can remove the possibility of opposite actions even without lookahead
        if self.prev_action:
            valid_actions = [action for action in actions if not action == self.prev_action.opposite()]
        else:
            valid_actions = actions.copy()
        
        
        # Use the one_step_heuristic to assign a score to each possible valid action
        player_index = observation.index
        player_goose = observation.geese[player_index]
        player_head = player_goose[0]
        scores = dict(zip(valid_actions, [reward(action, player_head, 0, observation, self.configuration) for action in valid_actions]))

        # Get a list of columns (moves) that maximize the heuristic
        best_actions = [key for key in scores.keys() if scores[key] == max(scores.values())]

        # Select at random from the best actions
        self.curr_action = random.choice(best_actions)
        
        print("Player index : ", observation.index)
        print("Prev action : ", self.prev_action)
        print("Curr action : ", self.curr_action)
        return self.curr_action.name

def agent(obs_dict, config_dict):
    
    global cached_agents
    
    observation = Observation(obs_dict)
    player_index = observation.index
    
    if player_index in cached_agents:
        return cached_agents[player_index](obs_dict)
    else:
        cached_agents[player_index] = NStepAgent(config_dict)
        return cached_agents[player_index](obs_dict)

## Playing the agent in the environment

In [None]:
from kaggle_environments import make
import n_step_agent
env = make("hungry_geese", debug=True)

env.run(
    [
        n_step_agent.agent,
        n_step_agent.agent,
        n_step_agent.agent,
        n_step_agent.agent
    ],  
)

env.render(mode="ipython")