In [67]:
!pip install kaggle-environments --upgrade
from kaggle_environments import make
from kaggle_environments.envs.halite.helpers import *
import math
import random
import numpy as np
from scipy.optimize import linear_sum_assignment
import scipy.ndimage
# Create a test environment for use later
environment = make("halite", configuration={"size": 10, "startingHalite": 1000}, debug=True)

Requirement already up-to-date: kaggle-environments in /opt/conda/lib/python3.7/site-packages (1.0.12)


In [2]:
agent_count = 2
environment.reset(agent_count)
state = environment.state[0]
board = Board(state.observation, environment.configuration)
print(board)

| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 |a0 | 0 | 0 | 0 | 0 |b0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |



# Scaffolds: Agnostic to agents

In [95]:
# constants
INF = int(1e9)
CFG = environment.configuration
OPTIMAL_MINING_TURNS = np.array( # optimal mining turn for [Cargo/Deposit, travelTime]
  [[0, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8],
   [0, 1, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7],
   [0, 0, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7],
   [0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6],
   [0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6],
   [0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5],
   [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

# helpers
def dist(a: Point, b: Point) -> int:
    return min(abs(a.x-b.x), CFG.size-abs(a.x-b.x)) + min(abs(a.y-b.y), CFG.size-abs(a.y-b.y))

def directions_to(s: Point, t: Point) -> ShipAction:
    N = environment.configuration.size
    candidates = [] # [N/S, E/W]
    if s.x-t.x != 0:
        candidates.append(ShipAction.WEST if (s.x-t.x) % N < (t.x-s.x) % N else ShipAction.EAST)
    if s.y-t.y != 0:
        candidates.append(ShipAction.SOUTH if (s.y-t.y) % N < (t.y-s.y) % N else ShipAction.NORTH)
    return candidates

def direction_to(s: Point, t: Point) -> ShipAction:
    candidate = directions_to(s, t)
    return random.choice(candidate) if len(candidate) > 0 else None

def dry_move(s: Point, d: ShipAction) -> Point:
    N = environment.configuration.size
    if d == ShipAction.NORTH:
        return s + Point(0, 1) % N
    elif d == ShipAction.SOUTH:
        return s + Point(0, -1) % N
    elif d == ShipAction.EAST:
        return s + Point(1, 0) % N
    elif d == ShipAction.WEST:
        return s + Point(-1, 0) % N
    else:
        return s

def nearest(shipsOrShipyards: list, pos: Point):
    minimum, argmin = INF, None
    for it in shipsOrShipyards:
        if minimum > dist(pos, it.position):
            minimum = dist(pos, it.position)
            argmin = it
    return argmin

# Core Strategy

In [101]:
# globals
tasks = {} # ship -> cell

# TUNABLE parameters
shipyardsTargetMultiplier = 8 # should be dynamic
MAX_SHIPS = 10
MAX_CHASE_RANGE = 2
CHASE_PUNISHMENT = 2
SHIPYARD_DEMOLISH_REWARD = 700

# core strategies

def num_turns_to_mine(C, D, travelTime, minMineTurns=1): # https://www.kaggle.com/krishnaharish/optimus-mine-agent
    # How many turns should we plan on mining?
    # C = carried halite, D = halite deposit, travelTime = steps to square and back to shipyard
    if C == 0:
        cdRatio = 0
    elif D == 0:
        cdRatio = OPTIMAL_MINING_TURNS.shape[0] - 1
    else:
        cdRatio = np.clip(int(math.log(C/D)*2.5+5.5), 0, OPTIMAL_MINING_TURNS.shape[0] - 1)
        travelTime = int(np.clip(travelTime, 0, OPTIMAL_MINING_TURNS.shape[1] - 1))
    return max(OPTIMAL_MINING_TURNS[cdRatio, travelTime], minMineTurns)

def halite_per_turn(cargo, deposit, travelTime, minMineTurns=1):
    turns = num_turns_to_mine(cargo, deposit, travelTime, minMineTurns)
    mined = cargo + (1 - .75**turns) * deposit
    return mined / (travelTime + turns)

def update_tasks(board): # return updated tasks
    cfg = environment.configuration
    me = board.current_player
    tasksOld = tasks.copy()
    tasks.clear()
    
    # calculate rewards
    targets = list(board.cells.values()) + list(map(lambda it: it.cell, me.shipyards)) * shipyardsTargetMultiplier # target cells
    rewards = np.zeros((len(me.ships), len(targets))) # reward matrix for optimization
    for i, ship in enumerate(me.ships):
        for j, cell in enumerate(targets): # enumerate targets TODO: enemy ships/shipyards
            if (cell.ship is None or cell.ship is ship) and cell.shipyard is None: # mineral
                d1 = dist(ship.position, cell.position)
                d2 = dist(cell.position, nearest(me.shipyards, cell.position).position) # TODO: edge case no shipyards
                reward = halite_per_turn(ship.halite, cell.halite, d1 + d2)
                
            elif cell.ship is not None and cell.ship.player.is_current_player: # friendly ship
                reward = -INF # avoid clustering
            elif cell.ship is not None and not cell.ship.player.is_current_player: # enemy ship
                dist_ = dist(ship.position, cell.position)
                reward = cell.ship.halite / (dist_ * CHASE_PUNISHMENT) if cell.ship.halite > me.halite and dist_ <= MAX_CHASE_RANGE else 0 # TUNABLE
            elif cell.shipyard is not None and cell.shipyard.player.is_current_player: # friendly shipyard
                reward = ship.halite / max(dist(ship.position, cell.position), 0.1) # TODO: TUNABLE?
            elif cell.shipyard is not None and not cell.shipyard.player.is_current_player: # enemy shipyard
                reward = SHIPYARD_DEMOLISH_REWARD / dist(ship.position, cell.position)
            rewards[i, j] = reward
    
    rows, cols = linear_sum_assignment(rewards, maximize=True) # rows[i] -> cols[i]
    for r, c in zip(rows, cols):
        tasks[me.ships[r]] = targets[c]
    return tasks
    

@board_agent
def agent(board):
    print("turn", board.step)
    me = board.current_player
    
    # convert
    if len(me.shipyards) == 0:
        me.ships[0].next_action = ShipAction.CONVERT
        return
    
    # mine
    update_tasks(board)
    for ship, target in tasks.items():
        candidates = directions_to(ship.position, target.position)
        for candidate in candidates:
            nextCell = board.next()[dry_move(ship.position, candidate)]
            if nextCell.ship is None or not nextCell.ship.player.is_current_player: # not ally ship
                ship.next_action = candidate
                break
    
    # spawn
    for shipyard in me.shipyards:
        numShipToSpawn = 0
        if board.next()[shipyard.position].ship is None and me.halite >= 500 and len(me.ships) < MAX_SHIPS:
            shipyard.next_action = ShipyardAction.SPAWN
            numShipToSpawn += 1

In [102]:
environment.reset(agent_count)
environment.run([agent, "random"])
environment.render(mode="ipython", width=500, height=450)

turn 0
turn 1
turn 2
turn 3
turn 4
<kaggle_environments.envs.halite.helpers.Player object at 0x7f91a9b33750> <kaggle_environments.envs.halite.helpers.Player object at 0x7f91a98a9d90>
is ally, avoiding
turn 5
turn 6
<kaggle_environments.envs.halite.helpers.Player object at 0x7f91a9abcc50> <kaggle_environments.envs.halite.helpers.Player object at 0x7f91a98d6b10>
is ally, avoiding
turn 7
turn 8
<kaggle_environments.envs.halite.helpers.Player object at 0x7f91ab3f7b50> <kaggle_environments.envs.halite.helpers.Player object at 0x7f91a9b54990>
is ally, avoiding
turn 9
<kaggle_environments.envs.halite.helpers.Player object at 0x7f91a9b3af90> <kaggle_environments.envs.halite.helpers.Player object at 0x7f91ab612150>
is ally, avoiding
turn 10
turn 11
turn 12
turn 13
turn 14
turn 15
<kaggle_environments.envs.halite.helpers.Player object at 0x7f91a99f0390> <kaggle_environments.envs.halite.helpers.Player object at 0x7f91a9b3ae10>
turn 16
turn 17
<kaggle_environments.envs.halite.helpers.Player object

In [100]:
environment.configuration

{'size': 10,
 'startingHalite': 1000,
 'episodeSteps': 400,
 'agentExec': 'LOCAL',
 'agentTimeout': 30,
 'actTimeout': 6,
 'runTimeout': 9600,
 'spawnCost': 500,
 'convertCost': 500,
 'moveCost': 0,
 'collectRate': 0.25,
 'regenRate': 0.02,
 'maxCellHalite': 500}