In [None]:
!pip install kaggle-environments --upgrade
from kaggle_environments import make
from kaggle_environments.envs.halite.helpers import *
import random
import numpy as np
from scipy.optimize import linear_sum_assignment
import scipy.ndimage
# Create a test environment for use later
environment = make("halite", configuration={"size": 10, "startingHalite": 1000}, debug=True)

In [None]:
agent_count = 2
environment.reset(agent_count)
state = environment.state[0]
board = Board(state.observation, environment.configuration)
print(board)

# Scaffolds: Agnostic to agents

In [None]:
# constants
INF = int(1e9)
CFG = environment.configuration
OPTIMAL_MINING_TURNS = np.array( # optimal mining turn for [Cargo/Deposit, travelTime]
  [[0, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8],
   [0, 1, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7],
   [0, 0, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7],
   [0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6],
   [0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6],
   [0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5],
   [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

# helpers
def dist(a: Point, b: Point) -> int:
    return min(abs(a.x-b.x), CFG.size-abs(a.x-b.x)) + min(abs(a.y-b.y), CFG.size-abs(a.y-b.y))

def directions_to(s: Point, t: Point) -> ShipAction:
    N = environment.configuration.size
    candidate = [] # [N/S, E/W]
    if s.x-t.x != 0:
        candidate.append(ShipAction.WEST if (s.x-t.x) % N < (t.x-s.x) % N else ShipAction.EAST)
    if s.y-t.y != 0:
        candidate.append(ShipAction.SOUTH if (s.y-t.y) % N < (t.y-s.y) % N else ShipAction.NORTH)
    return candidate

def direction_to(s: Point, t: Point) -> ShipAction:
    candidate = directions_to(s, t)
    return random.choice(candidate) if len(candidate) > 0 else None

def nearest(shipsOrShipyards: list, pos: Point):
    minimum, argmin = INF, None
    for it in shipsOrShipyards:
        if minimum > dist(pos, it.position):
            minimum = dist(pos, it.position)
            argmin = it
    return argmin

# Core Strategy

In [None]:
# globals
tasks = {} # ship -> tasks

# TUNABLE parameters
shipyardsTargetMultiplier = 8 # should be dynamic
MAX_SHIPS = 20
MAX_CHASE_RANGE = 2
CHASE_PUNISHMENT = 2
SHIPYARD_DEMOLISH_REWARD = 700

# core strategies

def num_turns_to_mine(C, D, travelTime): # https://www.kaggle.com/krishnaharish/optimus-mine-agent
    # How many turns should we plan on mining?
    # C = carried halite, D = halite deposit, travelTime = steps to square and back to shipyard
    if C == 0:
        cdRatio = 0
    elif D == 0:
        cdRatio = OPTIMAL_MINING_TURNS.shape[0]
    else:
        cdRatio = int(math.log(C / H) * 2.5 + 5.5)
        cdRatio = np.clip(cdRatio, 0, OPTIMAL_MINING_TURNS.shape[0] - 1)
        travelTime = int(np.clip(travelTime, 0, OPTIMAL_MINING_TURNS.shape[1] - 1))
    return OPTIMAL_MINING_TURNS[cdRatio, travelTime]

def halite_per_turn(cargo, deposit, travelTime, minMineTurns=1):
    turns = num_turns_to_mine(cargo, deposit, travelTime)
    turns = max(turns, minMineTurns)
    mined = carrying + (1 - .75**turns) * halite
    return mined / (travelTime + turns), turns

def update_tasks(board): # return updated tasks
    cfg = environment.configuration
    me = board.current_player
    tasksOld = tasks.copy()
    tasks.clear()
    
    # calculate rewards
    targets = board.cells.values() + map(lambda it: it.cell, me.shipyards.values()) * shipyardsTargetMultiplier # target cells
    rewards = np.zeros((len(me.ships), len(targets))) # reward matrix for optimization
    for i, ship in enumerate(me.ships.values()):
        for j, cell in enumerate(targets): # enumerate targets TODO: enemy ships/shipyards
            if cell.ship is None and cell.shipyard is None: # mineral
                d1 = dist(ship.position, cell.position)
                d2 = dist(cell.position, nearest(me.shipyards.values(), cell.position).position) # TODO: edge case no shipyards
                reward = halite_per_turn(ship.halite, cell.halite, d1 + d2)
                
            elif cell.ship is not None and cell.ship.player is me: # friendly ship
                reward = -INF
            elif cell.ship is not None and cell.ship.player is not me: # enemy ship
                dist_ = dist(ship.position, cell.position)
                reward = cell.ship.halite / (dist_ * CHASE_PUNISHMENT) if cell.ship.halite > me.halite and dist_ <= MAX_CHASE_RANGE else 0 # TUNABLE
            elif cell.shipyard is not None and cell.shipyard is me: # friendly shipyard
                reward = ship.halite / max(dist(ship.position, cell.position), 0.1) # TODO: TUNABLE?
            elif cell.shipyard is not None and cell.shipyard is not me: # enemy shipyard
                reward = SHIPYARD_DEMOLISH_REWARD / dist(ship.position, cell.position)
            
            rewards[i, j] = reward
    
    rows, cols = linear_sum_assignment(rewards, maximize=True) # rows[i] -> cols[i]
    for r, c in zip(rows, cols):
        tasks[r] = c
    return tasks
    

@board_agent
def agent(board):
    
    for i, ship in enumerate(ships):
        ship.next_action = direction_to(ship.position, miningCells[tasks[i]].position)
    if len(shipyards) == 0:
        ships[0].next_action = ShipAction.CONVERT
    for shipyard in shipyards:
        if shipyard.cell.ship is None:
            shipyard.next_action = ShipyardAction.SPAWN

In [None]:
environment.reset(agent_count)
environment.run([agent, "random"])
environment.render(mode="ipython", width=500, height=450)

In [None]:
environment.configuration