# Introduction
This notebook uses lightgbm and some simple features (kore, cargo, shipyard count...) to estimate the win rate.

In [None]:
import os
import json
import pickle

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

from tqdm.auto import tqdm

# Load Data

In [None]:
from kaggle_environments.envs.kore_fleets.helpers import SPAWN_VALUES
from functools import lru_cache

@lru_cache(maxsize=None)
def max_ships_to_spawn(turns_controlled: int) -> int:
    for idx, target in enumerate(SPAWN_VALUES):
        if turns_controlled < target:
            return idx + 1
    return len(SPAWN_VALUES) + 1

In [None]:
data = []

for fn in tqdm(os.listdir('../input/kore-2022-episodes/json/')):
    if 'info' in fn:
        continue
        
    eps = json.load(open(f'../input/kore-2022-episodes/json/{fn}', 'rb'))
    
    epid = eps['id']

    state = eps['steps'][-1][0]
    p1 = state['observation']['players'][0]
    p2 = state['observation']['players'][1]

    if len(p1[1]) == 0 and len(p1[2]) == 0:
        winner = 1
    elif len(p2[1]) == 0 and len(p2[2]) == 0:
        winner = 0
    elif p1[0] > p2[0]:
        winner = 0
    else:
        winner = 1

    for step in eps['steps']:
        obs = step[0]['observation']
        turn = obs['step']

        for player_id, (kore, shipyards, fleets) in enumerate(obs['players']):
            cargo = 0
            ship_count = 0
            shipyard_count = len(shipyards)
            fleet_count = len(fleets)
            max_spawn = 0

            for k, v in fleets.items():
                cargo += v[1]
                ship_count += v[2]

            for k, v in shipyards.items():
                ship_count += v[1]
                max_spawn += max_ships_to_spawn(v[2])

            data.append({
                'episode': epid,
                'player': player_id,
                'turn': turn,
                'cargo': cargo,
                'kore': kore,
                'ship_count': ship_count,
                'shipyard_count': shipyard_count,
                'fleet_count': fleet_count,
                'max_spawn': max_spawn,
                'win': int(winner == player_id)
            })



In [None]:
df = pd.DataFrame(data)
df

# Train Model

In [None]:
import lightgbm as lgb

## Feature Extraction

In [None]:
_df = df.copy()
_df_counter = df.copy()
_df_counter['player'] = _df_counter['player'].apply(lambda x: 0 if x == 1 else 1)
_df = pd.merge(_df, _df_counter, on=['episode', 'player', 'turn'])

dff = _df.drop(['episode', 'player', 'win_y'], axis=1).rename({'win_x': 'win'}, axis=1)

for c in ['cargo', 'kore', 'ship_count', 'shipyard_count', 'fleet_count', 'max_spawn']:
    dff[c + '_gap'] = dff[c + '_x'] - dff[c + '_y']

dff

## 5Fold Training

In [None]:
params = {
    'objective': 'binary',
    'metric': ['binary_error', 'auc'],
    'nthread': 4
}

d_train = lgb.Dataset(dff.drop('win', axis=1), dff['win'])

rst = lgb.cv(params, d_train, 1000, early_stopping_rounds=50, verbose_eval=10, return_cvbooster=True)

In [None]:
models = rst['cvbooster'].boosters

In [None]:
lgb.plot_importance(models[0])

In [None]:
pickle.dump(models, open('models.p', 'wb'))

# Test

In [None]:
eps = json.load(open('../input/kore-2022-episodes/json/36599621.json', 'rb'))
epid = eps['id']
data = []

for step in eps['steps']:
    obs = step[0]['observation']
    turn = obs['step']

    for player_id, (kore, shipyards, fleets) in enumerate(obs['players']):
        cargo = 0
        ship_count = 0
        shipyard_count = len(shipyards)
        fleet_count = len(fleets)
        max_spawn = 0

        for k, v in fleets.items():
            cargo += v[1]
            ship_count += v[2]

        for k, v in shipyards.items():
            ship_count += v[1]
            max_spawn += max_ships_to_spawn(v[2])

        data.append({
            'episode': epid,
            'player': player_id,
            'turn': turn,
            'cargo': cargo,
            'kore': kore,
            'ship_count': ship_count,
            'shipyard_count': shipyard_count,
            'fleet_count': fleet_count,
            'max_spawn': max_spawn,
        })

In [None]:
def get_features(data):
    df = pd.DataFrame(data)
    
    _df = df.copy()
    _df_counter = df.copy()
    _df_counter['player'] = _df_counter['player'].apply(lambda x: 0 if x == 1 else 1)
    _df = pd.merge(_df, _df_counter, on=['episode', 'player', 'turn'])

    dff = _df.set_index(['episode', 'player'])

    for c in ['cargo', 'kore', 'ship_count', 'shipyard_count', 'fleet_count', 'max_spawn']:
        dff[c + '_gap'] = dff[c + '_x'] - dff[c + '_y']
        
    return dff

dff = get_features(data)
dff

In [None]:
df1 = dff.query('player == 0').copy()
df1['winr'] = np.mean([m.predict(df1) for m in models], axis=0)
df1 = df1.reset_index().set_index('turn')
df1['winr'].plot()

df2 = dff.query('player == 1').copy()
df2['winr'] = np.mean([m.predict(df2) for m in models], axis=0)
df2 = df2.reset_index().set_index('turn')
df2['winr'].plot()

plt.legend(['p0', 'p1'])

In [None]:
import kaggle_environments

def fix_overage_time(match):
    for turn_idx, match_state in enumerate(match["steps"]):
        for player_id in [0,1]:
            match_state[player_id]["observation"]["remainingOverageTime"] \
                = max(0, match_state[player_id]["observation"]["remainingOverageTime"])
    return match

def load_from_replay_json(path_to_json):
    with open(path_to_json, 'r') as f:
        match = json.load(f)
    match = fix_overage_time(match)
    env = kaggle_environments.make("kore_fleets", steps=match['steps'],
                                   configuration=match['configuration'])
    home_agent = "home"
    away_agent = "away"
    return env

In [None]:
env = load_from_replay_json('../input/kore-2022-episodes/json/36599621.json')
env.render(mode="ipython", width=1000, height=800)