# Halite challenge - basics

In [1]:
import gym
import numpy as np
from Environment import halite_env as Env

In [2]:
from importlib import reload
reload(Env)

<module 'Environment.halite_env' from '/home/nicola/Nicola_unipd/QuartoAnno/TODO/Baiesi/RL/haliteRL/Environment/halite_env.py'>

## Setup of the environment

In [49]:
num_players = 1
n_actions = 5 # no dropoffs, 1 action for staying still, 4 for moving in the cardinal directions
map_size = 7 # 7 x 7 map

In [50]:
env = Env.HaliteEnv(num_players, map_size)

Initializing Halite Environment


In [51]:
# halite in the map, min = 0, max = 1000
env.map[:,:,0]

array([[918, 831, 249,  67, 114, 937, 195],
       [559, 364, 481, 367, 918, 215, 767],
       [704, 602, 907, 598, 938, 131, 821],
       [190, 346, 269,   0, 440, 157, 373],
       [ 29,  33, 664, 456, 504, 584, 727],
       [ 74, 181, 218, 770,   4, 478, 879],
       [249, 616, 393, 331, 599, 401, 617]])

In [52]:
# shows the halite carried from each ship in the position corresponding to the ship
# initially there is no ship, hence no halite carried either
env.map[:,:,1] 

array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0]])

In [53]:
# shipyard position
env.map[:,:,2] 

array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0]])

In [54]:
# ship position
env.map[:,:,3] 

array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0]])

In [55]:
print("Initial halite: ", env.player_halite[0])

Initial halite:  [5000.]


In [56]:
# actions are represented as a matrix whose entries are -1 if no ship is in that position, 
#'a_i' if ship i is present in that position 
action_matrix = np.full((map_size,map_size), -1) # no ship, no action

In [57]:
# the environment already has in memory the last state, thus we don't need to resubmit it
# the only things that we submit are the action matrix and the shipyard action (1 or True to spawn a ship, 0 otherwise)
shipyard_action = 1 # initially always choose to create a ship
# returns the state, i.e. env.map
s, h, finish, _ = env.step(action_matrix, makeship = shipyard_action)
# s_0 -> map_halite, s_1 -> cargo_halite, s_2 -> shipyard_position (not used), s_3 -> ship_position
map_halite = s[:,:,0]
ship_pos_matrix = s[:,:,3]

In [58]:
def encode(v_dec, L):
    # v_two = [v1,v2]
    # returns the encoded version V[v1,v2] of V = np.arange(0,L)
    # L = length(all_possible_v)
    V = np.arange(0,L**2).reshape((L,L))
    v_enc = V[v_dec[0],v_dec[1]] 
    return v_enc

def decode(v_enc, L):
    V = np.arange(0,L**2).reshape((L,L))
    v_dec = np.array([np.where(v_enc == V)[0][0],np.where(v_enc == V)[1][0]])
    return v_dec

def one_to_index(V,L):
    # matrix V with one entry = 1 and the others 0
    return np.arange(L**2).reshape((L, L))[V.astype(bool)]

In [59]:
#position_encoded
pos_enc = one_to_index(ship_pos_matrix, map_size)
print("Encoded position of the ship: ", pos_enc)
#position_decoded
pos_dec = decode(pos_enc, map_size)
print("Decoded position of the ship: ", pos_dec)

Encoded position of the ship:  [24]
Decoded position of the ship:  [3 3]


In [60]:
ship_cargo = s[pos_dec[0],pos_dec[1],1]
print("Initial ship cargo: ", ship_cargo)

Initial ship cargo:  0


In [62]:
# functions for the agent
def greedy_policy(s, q_values):
    return np.argmax(q_values[s])

def e_greedy_policy(s, q_values, eps = 0.01):
    # s is encoded in input, a is encoded in output
    u = np.random.rand()
    if u > eps:
        return np.argmax(q_values[s])
    else:
        return np.random.randint(0, len(q_values[s]))
    
def update_q(s, a, r, sp, ap, q_values, gamma = 1):
    q_values[s,a] = r + gamma*q_values[sp,ap]
    return q_values

## State complexity and state approximation

We have:
- $(map size)^2$ positions ($49$ in this case, up to $64**2 = 4096$);
- 1000 values of halite for each position;
- 1000 values of carried halite.



In [None]:

n_states = map_size**2
n_actions = 4