<a href="https://www.kaggle.com/code/huikang/kore-2022-feature-generator?scriptVersionId=98093277" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

This is a method to extract features and actions from gameplay data.

Feel free to clarify.

# Input

The input is 54 x 21 x 21
- One for each cell on the map
- Center the shipyard on the map
- 6n shipyard features (home/away x capacity, ship_count, existence)
- 1n kore features (amount of kore on the map)
- 4n fleet features (home/away x ship_count, cargo_kore)
- 2n+1 global features (home/away stored_kore, turn_idx)

The number of turns differ for each set of features. For all features, I have n=1 just to check that the enumeration is working, and n=12 for the fleet features to check that the flight plan is adequately represented. You can prune some layers when you train.

At the current configuration, one turn is already taking up 200 KB, and there can be up to 400 turns in a game. I save time and space by processing the inputs only for turns with launch actions.

# Action

The output is 3 x 21 x 21
- One for each cell on the map
- Center the shipyard on the map
- 3 actions for launching
  - 1 boomerang with horizontal start
  - 1 boomerang with vertical start
  - 1 build (start orientation agnostic), with a check for a fleet size of 50+
  - (acyclic and rectangle paths are considered as boomerang)
  - (assume launching always, no negative samples are taken)

I save the action data as a tuple (shipyard_x, shipyard_y, target_x, target_y, action_class, ship_amount).
The centering action will be done during training, so that we only need to store one matrix per turn to save disk space.

In [None]:
%reset -sf
%load_ext autoreload
%autoreload 2
!mkdir -p npy
!echo $KAGGLE_KERNEL_RUN_TYPE

In [None]:
!rm *.py *.pickle

from IPython.core.magic import register_cell_magic

@register_cell_magic
def writefile_and_run(line, cell):
    argz = line.split()
    file = argz[-1]
    mode = 'w'
    if len(argz) == 2 and argz[0] == '-a':
        mode = 'a'
    with open(file, mode) as f:
        f.write(cell)
    get_ipython().run_cell(cell)

In [None]:
!cp ../input/kore-2022-match-analysis/kore_analysis.py .

In [None]:
%%writefile_and_run feature_generator.py
import os, re, json, enum, glob, math, shutil, collections, requests, pickle

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.animation
import matplotlib.patheffects
import matplotlib.pyplot as plt
import IPython.display

import kaggle_environments

from kore_analysis import KoreMatch, plot_3d_matrix, load_from_replay_json

# Load Replay

In [None]:
submission_id = 26838520
episode_id = 37973669
turn_idx = 230

In [None]:
def get_player_id(path_to_info_json, submission_id):
    with open(path_to_info_json, 'r') as f:
        match = json.load(f)
    for agent in match["agents"]:
        if agent["submissionId"] == submission_id:
            return agent["index"]
    return -1
    
def load_turns_from_replay_json(path_to_json):
    with open(path_to_json, 'r') as f:
        match = json.load(f)
        
    envs = []
    for turn_idx, match_state in enumerate(match["steps"]):
        for player_id in [0,1]:
            match_state[player_id]["observation"]["remainingOverageTime"] \
                = max(0, match_state[player_id]["observation"]["remainingOverageTime"])
        env = kaggle_environments.make("kore_fleets", steps=[match_state],
                                       configuration=match['configuration'])
        envs.append(env)

    return envs

In [None]:
path_to_json = f"../input/kore-2022-episodes/json/{episode_id}.json"
path_to_info_json = f"../input/kore-2022-episodes/json/{episode_id}_info.json"
acting_player_id = get_player_id(path_to_info_json, submission_id)
envs = load_turns_from_replay_json(path_to_json)

acting_player_id, len(envs), type(envs[turn_idx]), len(envs[turn_idx].steps)

# Simulate Game

For each turn, we simulate 21 turns of no action.

In [None]:
def simulate_turn(env):
    # in place
    for _ in range(12):  # simulate for 12 turns of no action
        if not env.done:
            env.step([[], []])
        else:
            env.steps.append(env.steps[-1])

In [None]:
acting_player_id, len(envs), type(envs[turn_idx]), len(envs[turn_idx].steps)

In [None]:
env = simulate_turn(envs[turn_idx])

In [None]:
acting_player_id, len(envs), type(envs[turn_idx]), len(envs[turn_idx].steps)

In [None]:
kore_match = KoreMatch(envs[turn_idx].steps)
kore_match.animate()
kore_match.html_animation

# Extract Features

In [None]:
%%writefile_and_run -a feature_generator.py

def existence_to_production_capacity(existence):
    if existence >= 294: return 10
    if existence >= 212: return 9
    if existence >= 147: return 8
    if existence >= 97: return 7
    if existence >= 60: return 6
    if existence >= 34: return 5
    if existence >= 17: return 4
    if existence >= 7: return 3
    if existence >= 2: return 2
    return 1


def parse_shipyard_info(env, acting_player_id):
    turns_to_simulate = 1
    res = np.zeros((turns_to_simulate * 2,21,21))
    # 21 x 21 x 1 x turns_to_simulate
    # only current turn?
    # (ship_production_capacity / 10, ships_standby / 21, turn_existed / 400) x (home, away)
    for turn_idx in range(turns_to_simulate):
        turn_offset = 2*turn_idx
        for player_id, (_, shipyards, _) in enumerate(env.steps[0][0]["observation"]["players"]):
            player_offset = abs(acting_player_id - player_id)
            for loc_idx, ship_count, turn_existence in shipyards.values():
                production_capacity = existence_to_production_capacity(turn_existence)
                x,y = kaggle_environments.helpers.Point.from_index(int(loc_idx), 21)
                res[turn_offset + player_offset,x,y] = turn_existence / 400
    return res


def parse_kore_info(env, acting_player_id):
    turns_to_simulate = 1
    res = np.zeros((turns_to_simulate,21,21))
    # 21 x 21 x turns_to_simulate
    # `turns_to_simulate` simulated turns
    # (kore amount / 500)
    for turn_idx in range(turns_to_simulate):
        kore_map = env.steps[turn_idx][0]["observation"]["kore"]
        for loc_idx, kore_amount in enumerate(kore_map):
            x,y = kaggle_environments.helpers.Point.from_index(int(loc_idx), 21)
            res[turn_idx,x,y] = kore_amount / 500
    return res


def parse_fleet_info(env, acting_player_id):
    turns_to_simulate = 12
    res = np.zeros((turns_to_simulate*4,21,21))
    # 21 x 21 x (`turns_to_simulate` x 2 x 2)
    # `turns_to_simulate` simulated turns, 2 players
    # (fleetsize / 50, kore_amount / 100)
    for turn_idx in range(turns_to_simulate):
        turn_offset = turn_idx*4
        for player_id, (_, _, fleets) in enumerate(env.steps[turn_idx][0]["observation"]["players"]):
            player_offset = 2*abs(acting_player_id - player_id)
            for loc_idx, kore_carried, ship_count, _, _ in fleets.values():
                x,y = kaggle_environments.helpers.Point.from_index(int(loc_idx), 21)
                res[turn_offset + player_offset + 0,x,y] = kore_carried / 500
                res[turn_offset + player_offset + 1,x,y] = ship_count / 200

        for player_id, (_, shipyards, _) in enumerate(env.steps[0][0]["observation"]["players"]):
            player_offset = 2*abs(acting_player_id - player_id)
            for loc_idx, ship_count, turn_existence in shipyards.values():
                production_capacity = existence_to_production_capacity(turn_existence)
                x,y = kaggle_environments.helpers.Point.from_index(int(loc_idx), 21)
                res[turn_offset + player_offset + 1,x,y] = ship_count / 200
    return res


def parse_global_info(env, acting_player_id):
    turns_to_simulate = 1
    res = np.zeros((turns_to_simulate*2*1+1,21,21))
    # 21 x 21 x (`turns_to_simulate` x 1 x 2 + 1)
    # `turns_to_simulate` simulated turns
    # all same values
    # kore amount / 500 for each player
    # turn / 400
    for turn_idx in range(turns_to_simulate):
        for player_id, (kore, _, _) in enumerate(env.steps[turn_idx][0]["observation"]["players"]):
            player_offset = turns_to_simulate*abs(acting_player_id - player_id)
            res[player_offset + turn_idx,:,:] = np.log2(1+kore) / 12
    res[-1,:,:] = env.state[0]["observation"]["step"] / 400
    return res


def parse_env(env, acting_player_id):
    res_lst = []
    res_lst.append(parse_global_info(env, acting_player_id))
    res_lst.append(parse_kore_info(env, acting_player_id))
    res_lst.append(parse_fleet_info(env, acting_player_id))
    res_lst.append(parse_shipyard_info(env, acting_player_id))
    return np.concatenate(res_lst, axis=0)

In [None]:
input_matrix = parse_env(envs[turn_idx], acting_player_id=acting_player_id)
input_matrix.shape

In [None]:
plot_3d_matrix(parse_global_info(envs[turn_idx], acting_player_id), scene_camera_eye=dict(x=3, y=3, z=3))

In [None]:
plot_3d_matrix(parse_kore_info(envs[turn_idx], acting_player_id), scene_camera_eye=dict(x=3, y=3, z=3))

In [None]:
plot_3d_matrix(parse_fleet_info(envs[turn_idx], acting_player_id))

In [None]:
plot_3d_matrix(parse_shipyard_info(envs[turn_idx], acting_player_id), scene_camera_eye=dict(x=3, y=3, z=3))

In [None]:
np.save(f"npy/{submission_id}_{episode_id}_{turn_idx:03d}_inputs.npy", input_matrix)

# Dataset Production

Generate and save as numpy files for imitation learning

In [None]:
import glob

actions_dfs = []

for path_to_info_json in glob.glob("../input/kore-2022-episodes/json/*_info.json"):
    try:
        acting_player_id = get_player_id(path_to_info_json, submission_id)
        if acting_player_id == -1:
            continue  # target not in match  

        path_to_json = path_to_info_json.replace("_info", "")
        episode_id = int(path_to_json.split("/")[-1][:-5])

        env = load_from_replay_json(path_to_json)
        kore_match = KoreMatch(env.steps)
        actions_df = kore_match.extract_player_actions(acting_player_id)
        actions_df["submission_id"] = submission_id
        actions_df["episode_id"] = episode_id
        actions_dfs.append(actions_df)

        turns_to_parse = set()
        for turn_idx, action_class in zip(actions_df["turn_idx"], actions_df["action_class"]):
            if turn_idx > 1 and action_class >= 0:
                turns_to_parse.add(turn_idx)  

        envs = load_turns_from_replay_json(path_to_json)
        for turn_idx in turns_to_parse:
            simulate_turn(envs[turn_idx-1])  # -1 because actions is a inferred from previous turn obs
            input_matrix = parse_env(envs[turn_idx-1], acting_player_id=acting_player_id)
            np.save(f"npy/{submission_id}_{episode_id}_{turn_idx:03d}_inputs.npy", input_matrix)  # attributed to same idx

    except:
        print(path_to_info_json)
    
    if os.environ.get("KAGGLE_KERNEL_RUN_TYPE") == "Interactive" and len(actions_dfs) >= 3:
        break  # for interactive runs, only check that it is working

In [None]:
actions_df_all = pd.concat(actions_dfs)
actions_df_all = actions_df_all[actions_df_all["action_class"] >= 0]
actions_df_all.head()

In [None]:
actions_df_all["action_class"].value_counts()

In [None]:
plt.scatter(actions_df_all["diff_x"] + np.random.randn(len(actions_df_all))/8, 
            actions_df_all["diff_y"] + np.random.randn(len(actions_df_all))/8,
            s=actions_df_all["ship_amount"], c=actions_df_all["action_class"], cmap="winter_r")
plt.gca().xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
plt.gca().yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
plt.gca().set_aspect('equal')
plt.show()

In [None]:
actions_df_all.to_csv("actions_df.csv", index=False)
print(actions_df_all.shape)

In [None]:
!rm -rf __pycache__/