In [None]:
import numpy as np
import pandas as pd
from IPython.display import clear_output

from kaggle_environments import evaluate, make
env = make("halite", debug=True)
env.render()

In [None]:
import shutil; import os
shutil.copyfile('shared_code_1.py' if os.path.exists('shared_code_1.py') 
                   else '../input/halite-models/shared_code_1.py', 
                'submission.py');
 

In [None]:
model_path = '../input/halite-models/2020_07_28_1150fresh_3k_batch32_lr2_wdp003_CLEAN_95.pth'

In [None]:
import base64
with open(model_path, 'rb') as f:
    raw_bytes = f.read()
    encoded_weights = base64.encodebytes(raw_bytes)

with open('submission.py', 'a') as f:
    f.write(f'\nencoded_weights = {encoded_weights}\n')

In [None]:
%%writefile -a submission.py

import io
import base64
import torch

decoded = base64.b64decode(encoded_weights)
buffer = io.BytesIO(decoded)
model.load_state_dict(torch.load(buffer, torch.device("cuda" if torch.cuda.is_available() else "cpu")))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
%%writefile -a submission.py

model.eval();
torch.no_grad();

torch.set_num_threads(os.cpu_count())

In [None]:
%%writefile -a submission.py

device = 'cpu'

import scipy.optimize

def main(obs, config):
    start_time = datetime.datetime.now()

    conf = config 
        
    # convert to game format
    step, halite, ship, base, cargo, ph, new_var, prior_actions = \
                processStep(obs, conf)
    
    # featurize
    input_stack = getInputStack(step, halite, ship, base, cargo, ph, new_var, prior_actions, action_map,
                               first_player = obs['player'])[0]

    input_stack   = torch.as_tensor(input_stack).unsqueeze(0).to(device)  
    
        
    # predict
    output = model(input_stack)
    
    # extract predictions
    policy_output = output[:, :-N_VALUE_HEADS].reshape(output.shape[0], N_POLICY_CHOICES, 21, 21)
    
    ship_prediction = Softmax(1)(policy_output[:, :-1, :, :]).detach().numpy()
    
    base_prediction = torch.sigmoid( policy_output[:, -1, :] ).detach().numpy()

    value_prediction = output[:, -N_VALUE_HEADS:].detach().numpy()
            
        
    nn_ship_actions = ship_prediction
    nn_base_actions = base_prediction
    nn_value_prediction = value_prediction

    print("{:.0f} ms - neural network".format((datetime.datetime.now() - start_time).microseconds // 1e3))
    
    
    actions = {}
    
    # my assets -- to predict which step to take
    my_ships = obs['players'][obs['player']][SHIPS]
    my_bases = obs['players'][obs['player']][BASES]
    my_halite = obs['players'][obs['player']][HALITE]
    
    
    ship_list = list(my_ships.items())
    base_list = list(my_bases.items())
    print(ship_list)
    print(base_list)
    print()
    
    # score matrix -- can only pick valid actions
    C = -100* np.ones((len(my_ships) + len(my_bases), 21 * 21 + len(my_ships) + len(my_bases)))
#     print(C.shape)
    
    # add ships to scoring matrix
    for ship_idx, (ship_key, ship_info)  in enumerate(ship_list):        
        x, y = xy(ship_info[POSITION])
        ship_pred_actions = nn_ship_actions[0, :, x, y]
        
        print('{},{}: {}'.format(x, y, list(np.round(ship_pred_actions, 2))))
    
        raw_ship_pred_actions = np.copy(ship_pred_actions)
    
        MIN_PROB = 0.0  
        PRED_PWR = 2.7
        
        restore_sum = np.sum(ship_pred_actions[1:5])
        ship_pred_actions[1:5] = np.where(ship_pred_actions[1:5] > MIN_PROB, ship_pred_actions[1:5], 0)
        if np.sum(ship_pred_actions[1:5]) > 0:
            ship_pred_actions[1:5] *= restore_sum / sum(ship_pred_actions[1:5])
        
        restore_sum = np.sum(ship_pred_actions[1:5])
        if restore_sum > 0:
            ship_pred_actions[1:5] = ship_pred_actions[1:5] ** PRED_PWR
            ship_pred_actions[1:5] *= restore_sum / sum(ship_pred_actions[1:5])
            
        print(list(np.round(ship_pred_actions, 2)))
        
        ship_ranked_actions = np.zeros((6,), dtype = np.float32)
        
        for rank in range(0, np.sum(ship_pred_actions > 1e-6) ):
            while True:
                action = int(random.choice(np.flatnonzero(ship_pred_actions)))
                    
                if random.random() < ship_pred_actions[action]:
                    ship_ranked_actions[action] = 6 - rank + raw_ship_pred_actions[action];
                    ship_pred_actions[action] = 0
                    ship_pred_actions = ship_pred_actions / np.sum(ship_pred_actions) 
                    break;
                
        
        print(list(np.round(ship_ranked_actions, 1)))
                

        C[ship_idx, x + 21*y] = ship_ranked_actions[0]  
        C[ship_idx, x + 21*c(y - 1)] = ship_ranked_actions[1]  
        C[ship_idx, c(x + 1) + 21*y] = ship_ranked_actions[2]  
        C[ship_idx, x + 21*c(y + 1)] = ship_ranked_actions[3]  
        C[ship_idx, c(x - 1) + 21*y] = ship_ranked_actions[4]  
        if my_halite >= conf.convertCost or ship_info[CARGO] > conf.convertCost:
            C[ship_idx, 21*21 + ship_idx]= ship_ranked_actions[5] # conversion doesn't use any squares
       
    # add bases to scoring matrix
    for base_idx, (base_key, base_info) in enumerate(base_list):
        x, y = xy(base_info)
        base_pred_actions = nn_base_actions[0, x, y]
        print()
        print("{}, {}: {:.2f}".format(x, y, base_pred_actions))
        
        
        SPAWN_CEIL = 0.8
        
        spawn_prob =  (base_pred_actions if obs.step > 20 else 
                           ( base_pred_actions if base_pred_actions < SPAWN_CEIL else 1 ))
        spawn_yesno = random.random() < spawn_prob 
        print(spawn_yesno)
        
        if my_halite >= conf.spawnCost:
            C[len(my_ships) + base_idx, x + 21*y] = spawn_yesno * 10
            C[len(my_ships) + base_idx, 21*21 + len(my_ships) + base_idx] = 5
        else:
            C[len(my_ships) + base_idx, 21*21 + len(my_ships) + base_idx] = 10
 

 #     print("{:.0f} ms - rankings".format((datetime.datetime.now() - start_time).microseconds // 1e3))

    entity_idxs, assignments = scipy.optimize.linear_sum_assignment(C, maximize=True)
    print()
    print(entity_idxs)
    print([ ( xy(assignment) if assignment < 21 * 21 else assignment - 21 * 21 ) for assignment in assignments])
    
    
#     print("{:.0f} ms - linear optimization".format((datetime.datetime.now() - start_time).microseconds // 1e3))

    
    # iterate over ships, assign them action
    assigned = dict(zip(entity_idxs, assignments))
    for ship_idx, (ship_key, ship_info)  in enumerate(ship_list):        
        x, y = xy(ship_info[POSITION])

        if assigned[ship_idx] >= 21 * 21:  # if assigned to convert
            if ship_info[CARGO] > conf.convertCost:
                actions[ship_key] = 'CONVERT'
            elif my_halite > conf.convertCost:
                actions[ship_key] = 'CONVERT'
                my_halite -= conf.convertCost
            else:
                print('{} wanted to convert at {},{} but no resources'.format(ship_idx, x, y))
            continue;
        
        xt, yt = xy(assigned[ship_idx])
        if x == xt and y == yt:
            print('remaining at {},{}'.format(x, y))
            
        else:
            print('ship {} from {},{} to {},{}'.format(ship_idx, x, y, xt, yt))
            a = None
            if c(xt-x) == 1:
                a = 'EAST'
            elif c(yt-y) == 1:
                a = 'SOUTH'
            elif c(x-xt) == 1:
                a = 'WEST'
            elif c(y-yt) == 1:
                a = 'NORTH'
            if a is not None:
                actions[ship_key] = a 
            else:
                print('   says to move but where???')
    
    # iterate over bases - assign them actions
    for base_idx, (base_key, base_info) in enumerate(base_list):
        if assigned[len(my_ships) + base_idx] >= 21 * 21:
            continue; # no spawn
        else:
            if my_halite >= conf.spawnCost:
                actions[base_key] = 'SPAWN'
                print('spawning a ship at {},{}'.format(*xy(base_info)))
            else:
                print('assigned to spawn at {}, {} but no cash on hand'.format(*xy(base_info)))
                
        
    print()
    print(actions)
    


    print("{:.0f} ms - total".format((datetime.datetime.now() - start_time).microseconds // 1e3))

    return actions

### Agents

In [None]:
def show_status():
    player = observation['players'][observation['player']]
    print('   {} halite, {} bases, {} ships with {} onboard'.format( player[0], len(player[1]), 
                                            len(player[2]), sum([i[1][1] for i in player[2].items()]) )) 

In [None]:
paths = [   '../input/halite-bots-c30',  '../input/halite-bots-c30',  ]

SAVED_BOTS = []
for path in paths:
    SAVED_BOTS.extend([path + '/' + file for file in os.listdir(path) if '.py' in file])
SAVED_BOTS = sorted(SAVED_BOTS)


def printBots(bots):
    for a in [a.split('/')[-1] for a in bots]:
        print(a)
        
EXTRAS = []

AGENTS = sorted(SAVED_BOTS + [b for b in SAVED_BOTS if any(z in b for z in EXTRAS)] * 2)


### Live Game

In [None]:
LIVE = False
import time 
    
if LIVE:
    %run submission.py

    # reset variables
    ships_data = {};  
    max_step_time = 0 
    step_times = np.zeros(400)
    random.seed(3)

    # Play as first position against random agent.
    this_run = [None, random.choice(AGENTS), random.choice(AGENTS), random.choice(AGENTS)]
    trainer = env.train(this_run)

    observation = trainer.reset()
    print(this_run)
    
    while not env.done:
        start = datetime.datetime.now()
        show_status(); print()
            
        my_action = main(observation, env.configuration)
        step_time = (datetime.datetime.now() - start).microseconds//1e3; 
        step_times[observation.step] = step_time

        print("\nStep: {}, {:.0f}ms, My Actions: {}".format(observation.step, 
                                                           step_time, 
                                                           my_action))
        prior_obs = observation.copy(); 
        observation, reward, done, info = trainer.step(my_action); obs = observation

    print(' Longest Step Time: {:.0f}ms'.format(np.max(step_times)))

    clear_output(wait=True)
    print('Run Complete')

In [None]:
if LIVE:
    env.render(mode = 'ipython', width = 700, height = 500)

### Games

In [None]:
def compete(runs):
    return evaluate("halite", [ 'submission.py', random.choice(AGENTS), random.choice(AGENTS), random.choice(AGENTS)],
    num_episodes=runs, configuration={"agentExec": "LOCAL"})

def mean_reward(rewards):
    wins = 0; ties = 0; losses = 0
    rewards = np.nan_to_num(np.array(rewards, dtype = np.float32), -500)
    games = 3 * len(rewards)
    for p in range(1, 4):
        wins += sum(rewards[:, 0] > rewards[:, p])
        ties += sum(rewards[:, 0] == rewards[:, p])
        losses += sum(rewards[:, 0] < rewards[:, p])
    
    return "{:.0%} wins, {:.0%} losses, {:.0%} ties".format(
        wins/games, losses/games, ties/games )

In [None]:
N_RUNS = 3  
CSEED = 10

In [None]:
import datetime as datetime
import random

start = datetime.datetime.now()
if CSEED > 0:
    random.seed(CSEED + N_RUNS)
else:
    random.seed(datetime.datetime.now().microsecond)

preds = []
for i in range(0, N_RUNS):
    r = compete(1)
    preds.append(r)
    clear_output(wait=True)

print('Over {} runs:'.format(len(preds)))
print(" ", mean_reward([p for sub in preds for p in sub]))
print('\nTime Elapsed: ', (datetime.datetime.now() - start).seconds, 's')