# The IDEA:

**The idea**: Create a state matrix from the current board and try to score certain points which we deem useful.
Then to fill the empty matrix, we apply a simple convolution to diffuse the "scores" / rewards. This should result in the agent being able to "plan ahead" even with simply choosing the move that's the largest around its head.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch ##for tensors
import torch.nn as nn ##for convolution
import torch.nn.functional as F #Might be not needed
import matplotlib.pyplot as plt #plotting for future
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, row_col, GreedyAgent, random_agent

In [None]:
# get position in (row, column) format from index
def pos(index):
    return row_col(index,11)

def state_from_observation(observation):
    food_score = 3
    this_head_score = 1
    this_body_score = -2
    this_tail_score = 0
    other_body_score = -2
    other_head_score = -4
    
    
    geese = observation["geese"] ##array of geese bodies
    food = observation["food"] ##array for food
    index = observation["index"] ##my index
    this_goose = geese[index] #our controlled goose
    other_goose = [geese[i] for i in range(len(geese)) if i != index]
    
    state = torch.zeros(7,11, dtype=torch.float) ##base state space
    
    #Set values for our goose
    body_length = len(this_goose)
    for i,body in enumerate(this_goose):
        state[pos(body)] = this_body_score
        if i == len(this_goose)-1:
            state[pos(body)] = this_tail_score
        if i == 0:
            state[pos(body)] = this_head_score
    
    ##Set values for other geese
    for goose in other_goose:
        for i,body in enumerate(goose):
            state[pos(body)] = other_body_score
            if i == len(goose)-1:
                state[pos(body)] = other_body_score/2
            if i==0:
                state[pos(body)] = other_head_score
                
    ##Set values for food
    for f in food:
        state[pos(f)] = food_score
    
    return state

## Diffusion Code

In [None]:
class TorusConv2d(nn.Module):
    def __init__(self, input_shape, output_shape, kernel_size, bias=False):
        super().__init__()
        self.edge_size = (kernel_size[0]//2, kernel_size[1]//2) ##how much we have to pad, eg.: for 3x3 -> we need 1 extra layer
        self.conv = nn.Conv2d(input_shape, output_shape, kernel_size, bias=False)
    
    def forward(self, x):
        h = torch.cat([x[:, :, -self.edge_size[1]:], x, x[:, :, :self.edge_size[1]]], dim = 2) #adding the extra layer on the columns      
        h = torch.cat([h[:, -self.edge_size[0]:], h, h[:, :self.edge_size[0]]], dim=1) #rows
        h = self.conv(h)
        return h

In [None]:
def diffuse_state(state, n = 1):
    #we expect the state to be a tensor of size (7,11)
    #Pytorch needs format (N, C, Dim0, Dim1), where N=number of samples, C=number of channels (used for 3D state space), Dim0 and Dim1 is our 2D state space dimensions 
    x = state.view(1,1, state.size(0), state.size(1)).detach() #we have to make it dimensions (1, 1, 7, 11)
    weight = torch.tensor([[0,1,0], [1,9,1], [0,1,0]], dtype=torch.float).view(1, 1, 3, 3).detach() ##custom weight for convolution 
    weight /= weight.sum() ##for normalisation of kernel
    conv = nn.Conv2d(1, 1, 3, padding=0, bias=False)
    conv.weight = nn.Parameter(weight) ##setting the weight of the convolution
    for i in range(n):
        h = torch.cat([x[:, :, :, -1:], x, x[:, :, :, :1]], dim = 3) #adding the extra layer on the columns      
        h = torch.cat([h[:, :, -1:], h, h[:, :, :1]], dim=2) #rows
        y = conv(h).detach()
        x = y
    
    return y.squeeze() ##we view y "normally" again, => size = (7,11)

# Agents

1. We can create a simple score value for certain points on the board that are constants (independent of the board)
2. We can create a bit more complex state dependent scoring (eg.: if we're the longest decrease score of food)




To find an slighly more optimal agent we could try to find/tune the hyperparameters(scores) for each score value, and to what we want to score.

The score could also include different ways of changing it based on the state

### State independent diffusive agent

In [None]:
%%writefile diffuse_agent_static.py
import torch
import torch.nn as nn
import numpy as np
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, row_col
Actions = ["NORTH", "SOUTH", "EAST", "WEST"]
Action_Map = {1:"NORTH", 3:"WEST", 5:"EAST", 7:"SOUTH"}
Opposite_Action = {1:7, 7:1, 3:5, 5:3}
environment_size = [7,11]

# get position in (row, column) format from index
def pos(index):
    return row_col(index,11)

def state_from_observation(observation):
    food_score = 3
    this_head_score = 1
    this_body_score = -2
    this_tail_score = 0
    other_body_score = -2
    other_head_score = -4
    
    
    geese = observation["geese"] ##array of geese bodies
    food = observation["food"] ##array for food
    index = observation["index"] ##my index
    this_goose = geese[index] #our controlled goose
    other_goose = [geese[i] for i in range(len(geese)) if i != index]
    
    state = torch.zeros(7,11, dtype=torch.float) ##base state space
    
    #Set values for our goose
    body_length = len(this_goose)
    for i,body in enumerate(this_goose):
        state[pos(body)] = this_body_score
        if i == len(this_goose)-1:
            state[pos(body)] = this_tail_score
        if i == 0:
            state[pos(body)] = this_head_score
    
    ##Set values for other geese
    for goose in other_goose:
        for i,body in enumerate(goose):
            state[pos(body)] = other_body_score
            if i == len(goose)-1:
                state[pos(body)] = other_body_score/2
            if i==0:
                state[pos(body)] = other_head_score
                
    ##Set values for food
    for f in food:
        state[pos(f)] = food_score
    
    return state

"""
class TorusConv2d(nn.Module):
    #Input has size (N, channels_in, dim0, dim1)
    def __init__(self, input_shape, output_shape, kernel_size, bias=False):
        super().__init__()
        self.edge_size = (kernel_size[0]//2, kernel_size[1]//2) ##how much we have to pad, eg.: for 3x3 -> we need 1 extra layer
        self.conv = nn.Conv2d(input_shape, output_shape, kernel_size, padding=0, bias=False)
    
    def forward(self, x):
        h = torch.cat([x[:, :, :, -self.edge_size[1]:], x, x[:, :, :, :self.edge_size[1]]], dim = 3) #adding the extra layer on the columns      
        h = torch.cat([h[:, :, -self.edge_size[0]:], h, h[:, :, :self.edge_size[0]]], dim=2) #rows
        h = self.conv(h)
        return h
"""

def diffuse_state(state, n = 1):
    #we expect the state to be a tensor of size (7,11)
    #Pytorch needs format (N, C, Dim0, Dim1), where N=number of samples, C=number of channels (used for 3D state space), Dim0 and Dim1 is our 2D state space dimensions 
    x = state.view(1,1, state.size(0), state.size(1)).detach() #we have to make it dimensions (1, 1, 7, 11)
    weight = torch.tensor([[0,1,0], [1,9,1], [0,1,0]], dtype=torch.float).view(1, 1, 3, 3).detach() ##custom weight for convolution 
    weight /= weight.sum() ##for normalisation of kernel
    conv = nn.Conv2d(1, 1, 3, padding=0, bias=False)
    conv.weight = nn.Parameter(weight) ##setting the weight of the convolution
    for i in range(n):
        h = torch.cat([x[:, :, :, -1:], x, x[:, :, :, :1]], dim = 3) #adding the extra layer on the columns      
        h = torch.cat([h[:, :, -1:], h, h[:, :, :1]], dim=2) #rows
        y = conv(h).detach()
        x = y
    
    return y.squeeze() ##we view y "normally" again, => size = (7,11)

##finding the local (3,3) box of the state around the point
def values_around_point(point, state):
    points = [(point[0] + i) for i in range(-1, 2)]
    for i  in range(len(points)):
        if points[i] > 6:
            points[i]-=7
    
    v = state[points] ##3 rows => current size=(3, 11)
    
    points = [(point[1] + i) for i in range(-1, 2)]
    for i  in range(len(points)):
        if points[i] > 10:
            points[i]-=11
    
    v = v[:, points] ## 3 columns => size= (3,3)
    
    ##we give a large negative for the unreachable local points (could be made different with different sampling of states)
    large = 1000
    mask = torch.tensor([[0,1,0], [1,0,1], [0,1,0]], dtype=torch.float) ##reachable points on local "box"
    inv_mask = (mask-torch.ones(3,3))*large ## the non-reachable points
    v = (v*mask)+inv_mask
    
    return v


prev_action = -1 #previous action taken (as index)
def agent(obs_dict, config_dict):
    global prev_action
    state = state_from_observation(obs_dict) #create current state matrix
    
    #the number of diffusions is a tuning parameter, same for kernel values
    diff_state = diffuse_state(state, 5) #diffuse the state n times
    my_index = obs_dict["index"]
    my_head = obs_dict["geese"][my_index][0]
    my_head_pos = pos(my_head)
    
    
    around = values_around_point(my_head_pos, diff_state)  ##get values around our head
    
    # Apply Penalty to previous move, so we don't move in reverse
    #create and apply mask for non-allowed moves eg.: reverse
    mask_for_unallowed = torch.zeros(9)
    if prev_action >=0:
        a = Opposite_Action[prev_action]
        mask_for_unallowed[a] = -1*1000000
    mask_for_unallowed = mask_for_unallowed.view(3,3)
    
    
    around = around + mask_for_unallowed ##apply mask
    
    #get direction by local reward for directions
    direction = torch.argmax(around) ##can be 1, 3, 5, 7  ##selects the argmax based on column-major index
    
    #save previous action
    prev_action = direction.item()
    
    #take action value and get the appropriate text
    action = Action_Map[direction.item()]
    #print(action)
    
    return action

## State dependent diffusive agent

similar to previous, just gave a penalty and bonus to the food and tail score respectively depending on relative size of the agent

In [None]:
%%writefile diffuse_agent_variable.py
import torch
import torch.nn as nn
import numpy as np
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, row_col
Actions = ["NORTH", "SOUTH", "EAST", "WEST"]
Action_Map = {1:"NORTH", 3:"WEST", 5:"EAST", 7:"SOUTH"}
Opposite_Action = {1:7, 7:1, 3:5, 5:3}
environment_size = [7,11]

# get position in (row, column) format from index
def pos(index):
    return row_col(index,11)

def state_from_observation(observation):
    food_score = 3
    this_head_score = 1
    this_body_score = -2
    this_tail_score = 0
    other_body_score = -2
    other_head_score = -5
    
    
    geese = observation["geese"] ##array of geese bodies
    food = observation["food"] ##array for food
    index = observation["index"] ##my index
    this_goose = geese[index] #our controlled goose
    other_goose = [geese[i] for i in range(len(geese)) if i != index]
    
    state = torch.zeros(7,11, dtype=torch.float) ##base state space
    
    ## State based score increase
    other_max_length = np.max([len(g) for g in other_goose])
    relative_length = len(this_goose) - other_max_length
    food_score -= max(0, relative_length) * 0.25 ##penalize the food score by 0.25 per length advantage
    this_tail_score += max(0, relative_length) * 0.25 ##give bonus to tail (so we follow ourselves) => increases survival
    
    #Set values for our goose
    body_length = len(this_goose)
    for i,body in enumerate(this_goose):
        state[pos(body)] = this_body_score
        if i == len(this_goose)-1:
            state[pos(body)] = this_tail_score
        if i == 0:
            state[pos(body)] = this_head_score
    
    ##Set values for other geese
    for goose in other_goose:
        for i,body in enumerate(goose):
            state[pos(body)] = other_body_score
            if i == len(goose)-1:
                state[pos(body)] = other_body_score/2
            if i==0:
                state[pos(body)] = other_head_score
                
    ##Set values for food
    for f in food:
        state[pos(f)] = food_score
    
    return state

"""
class TorusConv2d(nn.Module):
    #Input has size (N, channels_in, dim0, dim1)
    def __init__(self, input_shape, output_shape, kernel_size, bias=False):
        super().__init__()
        self.edge_size = (kernel_size[0]//2, kernel_size[1]//2) ##how much we have to pad, eg.: for 3x3 -> we need 1 extra layer
        self.conv = nn.Conv2d(input_shape, output_shape, kernel_size, padding=0, bias=False)
    
    def forward(self, x):
        h = torch.cat([x[:, :, :, -self.edge_size[1]:], x, x[:, :, :, :self.edge_size[1]]], dim = 3) #adding the extra layer on the columns      
        h = torch.cat([h[:, :, -self.edge_size[0]:], h, h[:, :, :self.edge_size[0]]], dim=2) #rows
        h = self.conv(h)
        return h
"""

def diffuse_state(state, n = 1):
    #we expect the state to be a tensor of size (7,11)
    #Pytorch needs format (N, C, Dim0, Dim1), where N=number of samples, C=number of channels (used for 3D state space), Dim0 and Dim1 is our 2D state space dimensions 
    x = state.view(1,1, state.size(0), state.size(1)).detach() #we have to make it dimensions (1, 1, 7, 11)
    weight = torch.tensor([[0,1,0], [1,9,1], [0,1,0]], dtype=torch.float).view(1, 1, 3, 3).detach() ##custom weight for convolution 
    weight /= weight.sum() ##for normalisation of kernel
    conv = nn.Conv2d(1, 1, 3, padding=0, bias=False)
    conv.weight = nn.Parameter(weight) ##setting the weight of the convolution
    for i in range(n):
        h = torch.cat([x[:, :, :, -1:], x, x[:, :, :, :1]], dim = 3) #adding the extra layer on the columns      
        h = torch.cat([h[:, :, -1:], h, h[:, :, :1]], dim=2) #rows
        y = conv(h).detach()
        x = y
    
    return y.squeeze() ##we view y "normally" again, => size = (7,11)

##finding the local (3,3) box of the state around the point
def values_around_point(point, state):
    points = [(point[0] + i) for i in range(-1, 2)]
    for i  in range(len(points)):
        if points[i] > 6:
            points[i]-=7
    
    v = state[points] ##3 rows => current size=(3, 11)
    
    points = [(point[1] + i) for i in range(-1, 2)]
    for i  in range(len(points)):
        if points[i] > 10:
            points[i]-=11
    
    v = v[:, points] ## 3 columns => size= (3,3)
    
    ##we give a large negative for the unreachable local points (could be made different with different sampling of states)
    large = 1000
    mask = torch.tensor([[0,1,0], [1,0,1], [0,1,0]], dtype=torch.float) ##reachable points on local "box"
    inv_mask = (mask-torch.ones(3,3))*large ## the non-reachable points
    v = (v*mask)+inv_mask
    
    return v


prev_action = -1 #previous action taken (as index)
def agent(obs_dict, config_dict):
    global prev_action
    state = state_from_observation(obs_dict) #create current state matrix
    
    #the number of diffusions is a tuning parameter, same for kernel values
    diff_state = diffuse_state(state, 5) #diffuse the state n times
    my_index = obs_dict["index"]
    my_head = obs_dict["geese"][my_index][0]
    my_head_pos = pos(my_head)
    
    
    around = values_around_point(my_head_pos, diff_state)  ##get values around our head
    
    # Apply Penalty to previous move, so we don't move in reverse
    #create and apply mask for non-allowed moves eg.: reverse
    mask_for_unallowed = torch.zeros(9)
    if prev_action >=0:
        a = Opposite_Action[prev_action]
        mask_for_unallowed[a] = -1*1000000
    mask_for_unallowed = mask_for_unallowed.view(3,3)
    
    
    around = around + mask_for_unallowed ##apply mask
    
    #get direction by local reward for directions
    direction = torch.argmax(around) ##can be 1, 3, 5, 7  ##selects the argmax based on column-major index
    
    #save previous action
    prev_action = direction.item()
    
    #take action value and get the appropriate text
    action = Action_Map[direction.item()]
    #print(action)
    
    return action

# Examples

In [None]:
from kaggle_environments import make, evaluate
env = make("hungry_geese", debug=True)

observations = env.run(["diffuse_agent_variable.py","diffuse_agent_static.py", "greedy", "greedy"])
env.render(mode="ipython", width=700, height=600)

fig, ax = plt.subplots(1, 2)
print(observations[1][0]["observation"])
s = state_from_observation(observations[1][0]["observation"])
show = ax[0].imshow(s, cmap = "inferno")
diff_s = diffuse_state(s, 5)
ax[1].imshow(diff_s, cmap = "inferno")
cbar = fig.colorbar(show, ax=ax)

We can see that the empy scells are red, the bright white spots are food, and the yellow spot is our head, while the black spots are the enemy head

# Running Environment

In [None]:
from kaggle_environments import make, evaluate
env = make("hungry_geese", debug=True)

env.run(["diffuse_agent_variable.py","diffuse_agent_static.py", "greedy", "greedy"])

env.render(mode="ipython", width=700, height=600)

# League

In [None]:
def setup_env(debug=False):
    env = make(
        "hungry_geese", 
        configuration={
            "episodeSteps": 200,
            "actTimeout": 1,
        },
        debug=debug
    )
    return env

In [None]:
def run_league(env, teams, nb_iter, debug=True):
    # Run simulations
    if debug:
        team_names = [teams[0].split("/")[-1], teams[1].split("/")[-1], teams[2].split("/")[-1], teams[3].split("/")[-1]]
        
    current_score = evaluate(
            "hungry_geese", 
            [
                teams[0], 
                teams[1], 
                teams[2], 
                teams[3], 
            ],
            num_episodes=nb_iter,
        )
    
    # Retrieve results
    episode_winners = np.argmax(current_score, axis=1)
    print(episode_winners)
    episode_winner_counts = collections.Counter(episode_winners)
    print(episode_winner_counts)
    if debug:
        for i in range(4):
            print("TEAM", i, team_names[i], "won", episode_winner_counts.get(i, 0), "times")
    
    return env, episode_winner_counts

In [None]:
LEAGUE_TEAMS = [
    "diffuse_agent_static.py",
    "diffuse_agent_variable.py",
    "greedy",
    "greedy"
]

In [None]:
from time import time
import collections
# League
NB_ITER = 100 # number of times teams play each other

# Set up the Environment.
env = setup_env(debug=True)

# Run league
print("Start league...")
start = time()
env, counts = run_league(env, LEAGUE_TEAMS, NB_ITER)
print("Runtime :", np.round(time() - start, 2), "seconds")

# Hyperparameter tuning with Optuna

## Create Diffuse Agent Class

In [None]:
class Diffuse_Agent():
    def __init__(self, food = 3, head = 1, body = -2, tail = 0, other_body = -2, other_head = -5, food_mult = 1, tail_mult = 1):
        self.food_score = food
        self.this_head_score = head
        self.this_body_score = body
        self.this_tail_score = tail
        self.other_body_score = other_body
        self.other_head_score = other_head
        
        self.food_multiplier = food_mult
        self.tail_multiplier = tail_mult
    
        self.Actions = ["NORTH", "SOUTH", "EAST", "WEST"]
        self.Action_Map = {1:"NORTH", 3:"WEST", 5:"EAST", 7:"SOUTH"}
        self.Opposite_Action = {1:7, 7:1, 3:5, 5:3}
        self.environment_size = [7,11]
        
        self.prev_action = -1 #previous action taken (as index)
        return
        
    # get position in (row, column) format from index
    def pos(index):
        return row_col(index,11)

    def state_from_observation(self, observation):
        food_score = self.food_score
        this_head_score = self.this_head_score
        this_body_score = self.this_body_score
        this_tail_score = self.this_tail_score
        other_body_score = self.other_body_score
        other_head_score = self.other_head_score
        
        geese = observation["geese"] ##array of geese bodies
        food = observation["food"] ##array for food
        index = observation["index"] ##my index
        this_goose = geese[index] #our controlled goose
        other_goose = [geese[i] for i in range(len(geese)) if i != index]

        state = torch.zeros(7,11, dtype=torch.float) ##base state space

        ## State based score increase
        other_max_length = np.max([len(g) for g in other_goose])
        relative_length = len(this_goose) - other_max_length
        food_score -= max(0, relative_length) * self.food_multiplier ##penalize the food score by 0.25 per length advantage
        this_tail_score += max(0, relative_length) * self.tail_multiplier ##give bonus to tail (so we follow ourselves) => increases survival

        #Set values for our goose
        body_length = len(this_goose)
        for i,body in enumerate(this_goose):
            state[pos(body)] = this_body_score
            if i == len(this_goose)-1:
                state[pos(body)] = this_tail_score
            if i == 0:
                state[pos(body)] = this_head_score

        ##Set values for other geese
        for goose in other_goose:
            for i,body in enumerate(goose):
                state[pos(body)] = other_body_score
                if i == len(goose)-1:
                    state[pos(body)] = other_body_score/2
                if i==0:
                    state[pos(body)] = other_head_score

        ##Set values for food
        for f in food:
            state[pos(f)] = food_score

        return state
    
    def diffuse_state(self, state, n = 1):
        #we expect the state to be a tensor of size (7,11)
        #Pytorch needs format (N, C, Dim0, Dim1), where N=number of samples, C=number of channels (used for 3D state space), Dim0 and Dim1 is our 2D state space dimensions 
        x = state.view(1,1, state.size(0), state.size(1)).detach() #we have to make it dimensions (1, 1, 7, 11)
        weight = torch.tensor([[0,1,0], [1,9,1], [0,1,0]], dtype=torch.float).view(1, 1, 3, 3).detach() ##custom weight for convolution 
        weight /= weight.sum() ##for normalisation of kernel
        conv = nn.Conv2d(1, 1, 3, padding=0, bias=False)
        conv.weight = nn.Parameter(weight) ##setting the weight of the convolution
        for i in range(n):
            h = torch.cat([x[:, :, :, -1:], x, x[:, :, :, :1]], dim = 3) #adding the extra layer on the columns      
            h = torch.cat([h[:, :, -1:], h, h[:, :, :1]], dim=2) #rows
            y = conv(h).detach()
            x = y

        return y.squeeze() ##we view y "normally" again, => size = (7,11)

    ##finding the local (3,3) box of the state around the point
    def values_around_point(self, point, state):
        points = [(point[0] + i) for i in range(-1, 2)]
        for i  in range(len(points)):
            if points[i] > 6:
                points[i]-=7

        v = state[points] ##3 rows => current size=(3, 11)

        points = [(point[1] + i) for i in range(-1, 2)]
        for i  in range(len(points)):
            if points[i] > 10:
                points[i]-=11

        v = v[:, points] ## 3 columns => size= (3,3)

        ##we give a large negative for the unreachable local points (could be made different with different sampling of states)
        large = 1000
        mask = torch.tensor([[0,1,0], [1,0,1], [0,1,0]], dtype=torch.float) ##reachable points on local "box"
        inv_mask = (mask-torch.ones(3,3))*large ## the non-reachable points
        v = (v*mask)+inv_mask

        return v
    
    def evaluate(self, obs):
        state = self.state_from_observation(obs) #create current state matrix

        #the number of diffusions is a tuning parameter, same for kernel values
        diff_state = diffuse_state(state, 5) #diffuse the state n times
        my_index = obs["index"]
        my_head = obs["geese"][my_index][0]
        my_head_pos = pos(my_head)


        around = self.values_around_point(my_head_pos, diff_state)  ##get values around our head

        # Apply Penalty to previous move, so we don't move in reverse
        #create and apply mask for non-allowed moves eg.: reverse
        mask_for_unallowed = torch.zeros(9)
        if self.prev_action >=0:
            a = self.Opposite_Action[self.prev_action]
            mask_for_unallowed[a] = -1*1000000
        mask_for_unallowed = mask_for_unallowed.view(3,3)


        around = around + mask_for_unallowed ##apply mask

        #get direction by local reward for directions
        direction = torch.argmax(around) ##can be 1, 3, 5, 7  ##selects the argmax based on column-major index

        #save previous action
        self.prev_action = direction.item()

        #take action value and get the appropriate text
        action = self.Action_Map[direction.item()]
        #print(action)

        return action

In [None]:
diffuse_agent = Diffuse_Agent()
def my_agent(obs_dict, config_dict):
    return diffuse_agent.evaluate(obs_dict)

In [None]:
from time import time
import collections

# League
NB_ITER = 10 # number of times teams play each other
teams = [my_agent, "diffuse_agent_variable.py", "diffuse_agent_static.py", "greedy"]
current_score = evaluate(
        "hungry_geese", 
        [
            teams[0], 
            teams[1], 
            teams[2], 
            teams[3], 
        ],
        num_episodes=NB_ITER,
    )
# Retrieve results
print(current_score)
episode_winners = np.argmax(current_score, axis=1)
episode_winner_counts = collections.Counter(episode_winners)
print(episode_winner_counts)
print(episode_winner_counts.get(0, 0))

In [None]:
import optuna
import collections

def objective(trial):
    food_score = trial.suggest_uniform("food score", 0, 100)
    this_head_score = trial.suggest_uniform("head score", -25, 100)
    this_body_score = trial.suggest_uniform("body score", -100, 0)
    this_tail_score = trial.suggest_uniform("tail score", -100, 100)
    other_body_score = trial.suggest_uniform("other body score", -100, 0)
    other_head_score = trial.suggest_uniform("other head score", -100, 0)

    food_multiplier = trial.suggest_uniform("food multiplier", -10, 10)
    tail_multiplier = trial.suggest_uniform("tail multiplier", -10, 10)
    
    diffuse_agent = Diffuse_Agent(food_score, this_head_score, this_body_score, this_tail_score, 
                                  other_body_score, other_head_score, 
                                  food_multiplier, tail_multiplier)
    
    def new_agent(obs_dict, config_dict):
        return diffuse_agent.evaluate(obs_dict)
    
    
    
    # League
    NB_ITER = 100 # number of times teams play each other
    teams = [new_agent, "diffuse_agent_variable.py", "diffuse_agent_static.py", "greedy"]
    current_score = evaluate(
            "hungry_geese", 
            [
                teams[0], 
                teams[1], 
                teams[2], 
                teams[3], 
            ],
            num_episodes=NB_ITER,
        )
    # Retrieve results
    episode_winners = np.argmax(current_score, axis=1)
    episode_winner_counts = collections.Counter(episode_winners)
    return episode_winner_counts.get(0, 0)

In [None]:
study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials = 100)