## Notebook to store states and rewards from saved episodes

In [None]:
from kaggle_environments.envs.halite.helpers import ShipAction, ShipyardAction, Board, Point, board_agent, Cell
from kaggle_environments import make, evaluate

import json
import glob
import time

import numpy as np
import random
import pickle
import math

In [None]:
SHIP_ACTION_SPACE_SIZE = 6
SHIPYARD_ACTION_SPACE_SIZE = 2


with open('../input/base-dictionary/base_dict.pickle', 'rb') as handle:
    neighbours_dict = pickle.load(handle)

In [None]:
#get all files from scrapped dataset
files = glob.glob('../input/halite-game-scraper/*[0-9].*')

In [None]:
def get_all_ship_positions(board, ship_pos):
    """
    returns ship and opposite ship indicators, nearest one to the input position first
    ship indicated by 1 and opposite ship by -1
    """
    neighbours = neighbours_dict[ship_pos]
    me_ships = board.current_player.ships
        
    ship_positions = []
    for pos in neighbours:
        ship_positions.append(1 if pos in me_ships else 0)    
    
    opp_ship_posn = []
    for pos in neighbours:
        posn = []
        for opponent in range(len(board.opponents)):
            for ship in board.opponents[opponent].ships:
                posn.append(ship.position)
        opp_ship_posn.append(-1 if pos in posn else 0)
            
    ship_positions = np.array(ship_positions) + np.array(opp_ship_posn)
        
    return np.array(ship_positions).reshape(22,22,1)
    
def get_all_shipyard_positions(board, ship_pos):        
    """
    returns shipyard and opposite shipyard indicators, nearest one to the input position first
    shipyard indicated by 1 and opposite shipyard by -1
    """
        
    neighbours = neighbours_dict[ship_pos]
    me_shipyards = board.current_player.shipyards
        
    shipyard_positions = []
    for pos in neighbours:
        shipyard_positions.append(1 if pos in me_shipyards else 0)
        
    opp_shipyard_posn = []
    for pos in neighbours:
        posn = []
        for opponent in range(len(board.opponents)):
            for shipyard in board.opponents[opponent].shipyards:
                posn.append(shipyard.position)
        opp_shipyard_posn.append(-1 if pos in posn else 0)
    shipyard_positions = np.array(shipyard_positions) + np.array(opp_shipyard_posn)
        
    return np.array(shipyard_positions).reshape(22,22,1)
    
    
def get_all_halites(board, ship_pos):
    """
    get halites from each cell, nearest cell halite becomes the initial cell values in the array
    """
    neighbours = neighbours_dict[ship_pos]
    current_halites = np.array([board[pos].halite if board[pos].halite<=0 else math.log10(board[pos].halite)  for pos in neighbours])
    return current_halites.reshape(22,22,1)

In [None]:
def get_direction(current_pos, next_pos, shipyard_pos):
    """
    get the action of ship wrt current position and previous position
    """
    x, y = current_pos
    x1, y1 = next_pos
    shipyard_x1, shipyard_y1 = shipyard_pos
    if((x==x1) & (y==y1)):
        #action = None
        return 0
    elif((x==shipyard_x1) & (y==shipyard_y1)):
        return 1
    elif (y < y1):
        #action = ShipAction.NORTH
        return 2
    elif (y > y1):
        #action = ShipAction.SOUTH
        return 3
    elif (x < x1):
        #action = ShipAction.WEST
        return 4
    elif (x > x1):
        #action = ShipAction.EAST
        return 5

In [None]:
def get_end_margin(data, config):
    """
    calculate the difference in rewards between me and top opponent
    """
    #get the last step information
    len_data = len(data["steps"])
    step_info = data["steps"][len_data-1][0]
    step_obs = step_info["observation"]
    board = Board(step_obs, config)
                   
    #get the difference between opponent toper and current player in the last step
    top_opp_halites = max([ i.halite for i in board.opponents])
    me_halites = board.current_player.halite
    end_margin = me_halites - top_opp_halites 
    return end_margin

#def get_input_data(board, step_rewards):
def get_input_data(data, board, config, len_steps, next_step_number, step_rewards):
    me = board.current_player
    current_state_ship = []
    current_state_shipyard = []
    Q_values = []
    ship_actions = []
    shipyard_actions = []
    
    #next step board
    step_info = data["steps"][next_step_number][0]
    step_obs = step_info["observation"]
    next_step_board = Board(step_obs, config)
    
    for ship in me.ships:
        rewards_arr = np.zeros(SHIP_ACTION_SPACE_SIZE) # array for rewards and Q values
        halites = get_all_halites(board, ship.position) # get halite matrix 22*22
        ship_positions = get_all_ship_positions(board, ship.position) # get ship position matrix 22*22
        shipyard_positions = get_all_shipyard_positions(board, ship.position) #get shipyard position matrix 22*22
        current_state_ship.append(np.append(np.append(halites, ship_positions, axis=2), 
                               shipyard_positions, axis=2)) # make current state 3*22*22 wrt ship by appenidng halite, ship and shipyard positions
        
        try:
            index = [s.id for s in next_step_board.current_player.ships].index(ship.id)
        except ValueError:
            index = -1
            
        if index == -1:
            #check if place of ship is occupied by shipyard in in next step, then its a CONVERT            
            if ship.position in [shipyard.position for shipyard in next_step_board.current_player.shipyards]:
                rewards_arr[1] = step_rewards
                ship_actions.append(rewards_arr)
                
            else:
                rewards_arr[0] = step_rewards
                ship_actions.append(rewards_arr)
            
        else:
            reward_index = get_direction(ship.position, next_step_board.current_player.ships[index].position, (-1,-1)) # (-1,-1) stands for None
            rewards_arr[reward_index] = step_rewards
            ship_actions.append(rewards_arr)
            
    # Set actions for each shipyard
    for shipyard in me.shipyards:
        rewards_arr = np.zeros(SHIPYARD_ACTION_SPACE_SIZE)
        halites = get_all_halites(board, shipyard.position)
        ship_positions = get_all_shipyard_positions(board, shipyard.position)
        shipyard_positions = get_all_shipyard_positions(board, shipyard.position)
        # input to the model
        current_state_shipyard.append(np.append(np.append(halites, ship_positions, axis=2), 
                               shipyard_positions, axis=2))
        
        if shipyard.position in [ship.position for ship in next_step_board.current_player.ships]:
            rewards_arr[1] = step_rewards # SPAWN for shipyard
            shipyard_actions+=list(rewards_arr) 
        else:
            rewards_arr[0] = step_rewards
            shipyard_actions+=list(rewards_arr)
        
    return current_state_ship, current_state_shipyard, ship_actions, shipyard_actions
    

In [None]:
%%time
input_states_ship = []
input_states_shipyard = []
Qvalues_ship = []
Qvalues_shipyard = []
files =  files[:3]
for file in files:
    print("processing - " + file)
    with open(file) as f:
        data = json.load(f)
    # get the configuration from root, common for all steps
    config = data["configuration"]
    # calculate end margin
    end_margin = get_end_margin(data, config)
    len_steps = len(data["steps"])
    for step_number in range(len_steps-1): # last steps actions for ship/shipyards are not considered , reason for len_steps-1
        
        #get observation for each step and combine with config to get board
        step_info = data["steps"][step_number][0]
        step_obs = step_info["observation"]
        board = Board(step_obs, config)
        
        # taking log of rewards
        if(end_margin > 0):
            step_rewards = math.log10(end_margin)
        elif(end_margin < 0):
            step_rewards = math.log10(abs(end_margin)) *-1
        else:
            step_rewards = 0
        
        current_state_ship, current_state_shipyard, ship_actions, shipyard_actions = get_input_data(data, board, config, len_steps, step_number+1, step_rewards)
        input_states_ship += current_state_ship
        input_states_shipyard += current_state_shipyard
        Qvalues_ship  += ship_actions
        Qvalues_shipyard += shipyard_actions
        
input_states_ship = np.array(input_states_ship)
input_states_shipyard = np.array(input_states_shipyard)
Qvalues_ship = np.array(Qvalues_ship).reshape(-1, 6)
Qvalues_shipyard = np.array(Qvalues_shipyard).reshape(-1, 2)

In [None]:
np.save('input_states_ship.npy', input_states_ship)
np.save('input_states_shipyard.npy', input_states_shipyard)
np.save('Qvalues_ship.npy', Qvalues_ship)
np.save('Qvalues_shipyard.npy', Qvalues_shipyard)