In [None]:
!pip install kaggle-environments --upgrade

In [None]:
import gc
import os
import sys
from time import time, sleep
import json
import math
import pickle
from collections import defaultdict

import numpy as np
import numba
import pandas as pd
import seaborn as sns
import lightgbm as lgb
import matplotlib.pyplot as plt
from kaggle_environments import (
    evaluate, make, utils,
    get_episode_replay, list_episodes, list_episodes_for_submission
)

os.listdir("../input")

# Data

In [None]:
df_episode_agents = pd.read_csv("../input/meta-kaggle/EpisodeAgents.csv")
recent_submissions = [19352359, 19352272, 19352181, 19352079, 19351928,
                      19335271, 19334971, 19334470, 19334131, 19334038,
                      19325005, 19324861, 19324344, 19324095, 19323940,
                      19311665, 19311618, 19311565, 19311503, 19311396,
                      19287465, 19287233, 19286948, 19286747, 19286415, 
                      19278412, 19278217, 19278083, 19277961, 19277822, 
                      19252525, 19252439, 19252356, 19252202, 19252130,
                      19237820, 19237695, 19237490, 19237372, 19237108,
                      19224197, 19224008, 19223754, 19223504, 19223418,
                      19208120, 19207921, 19207843, 19207741, 19207633,]
set_recent_submissions = set(recent_submissions)
df_episode_agents = df_episode_agents[df_episode_agents["SubmissionId"].isin(set_recent_submissions)]
df_episode_agents.reset_index(drop=True, inplace=True)
episodes = sorted(df_episode_agents["EpisodeId"].unique().tolist())
set_recent_submission_episode_id_and_indexes = set((df_episode_agents["EpisodeId"] * 2 + df_episode_agents["Index"]).tolist())
del df_episode_agents
gc.collect()
print(f"len(episodes)={len(episodes)}")

In [None]:
data = []
seen_episodes = set()

for episode_id in episodes[-3000:]:
    replay = get_episode_replay(episode_id)
    sleep(1)
    if not replay["wasSuccessful"]:
        continue
    d = json.loads(replay["result"]["replay"])
    if d["statuses"] != ['DONE', 'DONE']:
        continue

    final_rewards = d["rewards"]
    thresholds = d["steps"][0][0]["observation"]["thresholds"]
    actions = []
    last_reward_0 = 0
    last_reward_1 = 0
    rewards = []
    for step in d["steps"][1:]:
        actions.append([step[0]["action"], step[1]["action"]])
        reward_0 = step[0]["reward"]
        reward_1 = step[1]["reward"]
        rewards.append([
            reward_0 - last_reward_0,
            reward_1 - last_reward_1,
        ])
        last_reward_0 = reward_0
        last_reward_1 = reward_1
    dat = {
        "episode_id": episode_id,
        "final_rewards": final_rewards,
        "rewards": rewards,
        "actions": actions,
        "thresholds": thresholds,
    }
    data.append(dat)

In [None]:
print(len(data))
with open("data.json", "w") as f:
    json.dump(data, f)

# Model

In [None]:
N_FEATURES = 10
TRANSFORM_BASE = 1.02

In [None]:
_rand = np.array([42], dtype=np.uint64)

@numba.njit("f4[:,:](i8[:],i8[:,:],i8[:,:],i8[:],u8[:])", cache=True)
def proc(thresholds, actions, rewards, teams, _rand):
    def rand():
        _rand[0] ^= _rand[0] << np.uint64(7)
        _rand[0] ^= _rand[0] >> np.uint64(9)
        return _rand[0]
    res = np.empty((len(actions)*100*2, N_FEATURES+1), dtype=np.float32)
    idx_res = 0
    likelihoods1 = np.ones((100, 101), dtype=np.float64)
    likelihoods2 = np.ones((100, 101), dtype=np.float64)
    e1 = np.full(101, (TRANSFORM_BASE**np.arange(101.0)).mean(), dtype=np.float64)
    e2 = np.full(101, (TRANSFORM_BASE**np.arange(101.0)).mean(), dtype=np.float64)
    p1 = np.zeros((100, 2), dtype=np.int64)
    p2 = np.zeros((100, 2), dtype=np.int64)
    last1 = np.full(100, -1, dtype=np.int64)
    last2 = np.full(100, -1, dtype=np.int64)
    last_bandit_1 = -1
    last_bandit_2 = -1
    n_consecutive_1 = 0
    n_consecutive_2 = 0
    n_tried_bandits_1 = 0
    n_tried_bandits_2 = 0
    max_tried_1 = 0
    max_tried_2 = 0
    tried_counts_sorted_1 = np.zeros(100, dtype=np.int64)
    tried_counts_sorted_2 = np.zeros(100, dtype=np.int64)
    jini_sum_1 = 0
    jini_sum_2 = 0
    jini_sum_ideal = 1e-100
    tried_counts_modified_1 = np.zeros(100, dtype=np.float64)
    tried_counts_modified_2 = np.zeros(100, dtype=np.float64)
    for step in range(len(actions)):
        a1, a2 = actions[step]
        r1, r2 = rewards[step]
        for bandit, thr in enumerate(thresholds):  # 100
            if teams[0] == 1 and rand() % 5 == 0:
                res[idx_res] = float(step), float(n_consecutive_2), float(n_tried_bandits_2), float(max_tried_2),\
                                jini_sum_2/jini_sum_ideal, float(p1[bandit].sum()), float(e1[bandit]), float(p2[bandit].sum()), float(tried_counts_modified_2[bandit]), float(step-last2[bandit]), float(thr)
                idx_res += 1
            if teams[1] == 1 and rand() % 5 == 0:
                res[idx_res] = float(step), float(n_consecutive_1), float(n_tried_bandits_1), float(max_tried_1),\
                                jini_sum_1/jini_sum_ideal, float(p2[bandit].sum()), float(e2[bandit]), float(p1[bandit].sum()), float(tried_counts_modified_1[bandit]), float(step-last1[bandit]), float(thr)
                idx_res += 1
        decay1 = 0.97 ** (p1[a1].sum() + p2[a1].sum())
        decay2 = 0.97 ** (p1[a2].sum() + p2[a2].sum())
        for thr in range(101):
            prob = np.ceil(thr * decay1) / 101.0
            likelihoods1[a1, thr] *= prob if r1 == 1 else 1 - prob
            prob = np.ceil(thr * decay2) / 101.0
            likelihoods2[a2, thr] *= prob if r2 == 1 else 1 - prob
        e1[a1] = (likelihoods1[a1] * TRANSFORM_BASE**np.arange(101.0)).sum() / likelihoods1[a1].sum()
        e2[a2] = (likelihoods2[a2] * TRANSFORM_BASE**np.arange(101.0)).sum() / likelihoods2[a2].sum()
        if p1[a1].sum() == 0:
            n_tried_bandits_1 += 1
        if p2[a2].sum() == 0:
            n_tried_bandits_2 += 1
        idx = np.searchsorted(tried_counts_sorted_1, p1[a1].sum(), side="right") - 1
        jini_sum_1 += 99 - idx
        tried_counts_sorted_1[idx] += 1
        idx = np.searchsorted(tried_counts_sorted_2, p2[a2].sum(), side="right") - 1
        jini_sum_2 += 99 - idx
        tried_counts_sorted_2[idx] += 1
        jini_sum_ideal += step % 100
        assert jini_sum_1 <= jini_sum_ideal and jini_sum_2 <= jini_sum_ideal
        tried_counts_modified_1[a1] += 1.0 / decay1
        tried_counts_modified_2[a2] += 1.0 / decay2
        p1[a1, r1] += 1
        p2[a2, r2] += 1
        max_tried_1 = max(max_tried_1, p1[a1].sum())
        max_tried_2 = max(max_tried_2, p2[a2].sum())
        last1[a1] = step
        last2[a2] = step
        if a1 == last_bandit_1:
            n_consecutive_1 += 1
        if a2 == last_bandit_2:
            n_consecutive_2 += 1
        last_bandit_1 = a1
        last_bandit_2 = a2
    return res[:idx_res]


train = np.empty((3000*2000*100*2, N_FEATURES+1), dtype=np.float32)
valid = np.empty((800*2000*100*2, N_FEATURES+1), dtype=np.float32)
idx_train = idx_valid = 0
ids = set()

def rand():
    _rand[0] ^= _rand[0] << np.uint64(7)
    _rand[0] ^= _rand[0] >> np.uint64(9)
    return _rand[0]
for dat in data:
    episode_id = dat["episode_id"]
    if episode_id in ids:
        continue
    ids.add(episode_id)
    thresholds = np.array(dat["thresholds"])
    actions = np.array(dat["actions"])
    rewards = np.array(dat["rewards"])
    teams = np.array([episode_id * 2 + 0 in set_recent_submission_episode_id_and_indexes,
                      episode_id * 2 + 1 in set_recent_submission_episode_id_and_indexes], dtype=np.int64)
    d = proc(thresholds, actions, rewards, teams, _rand)
    if rand() % 5 != 0:
        train[idx_train:idx_train+len(d)] = d
        idx_train += len(d)
    else:
        valid[idx_valid:idx_valid+len(d)] = d
        idx_valid += len(d)

train = train[:idx_train]
valid = valid[:idx_valid]

print(train.shape, valid.shape)

In [None]:
n_train = len(train)
X_train, y_train = train[:, :N_FEATURES], TRANSFORM_BASE ** train[:, N_FEATURES]
X_valid, y_valid = valid[:, :N_FEATURES], TRANSFORM_BASE ** valid[:, N_FEATURES]

lgb_train = lgb.Dataset(X_train.copy(), y_train.copy())
del X_train, y_train
lgb_valid = lgb.Dataset(X_valid.copy(), y_valid.copy(), reference=lgb_train)
del X_valid, y_valid
del train, valid
gc.collect()

In [None]:
%%time

params = {
    'boosting_type': "gbdt",
    'objective': "rmse",
    'metric': "rmse",
    'num_leaves': 4095,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.5,
    'bagging_freq': 5,
    'verbose': 1,
    "seed": 42,
}

gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round=1024,
    valid_sets=lgb_valid,
    early_stopping_rounds=50
)

In [None]:
feature_names = "STEP, OPPONENT_N_CONSECUTIVE, OPPONENT_N_TRIED_BANDITS, OPPONENT_MAX_TRIED, OPPONENT_JINI, N_TRIED, EXPECTED_THRESHOLD, OPPONENT, OPPONENT_MODIFIED, OPPONENT_NOT_USED_TURNS".split(", ")

df_importance = pd.DataFrame()
df_importance["importance"] = gbm.feature_importance()
df_importance["feature"] = feature_names
df_importance.sort_values("importance", ascending=False, inplace=True)

plt.figure(figsize=(10,6)) 
sns.barplot(y="feature", x="importance", data=df_importance, palette=sns.hls_palette(10, l=0.7, s=1))
plt.show()

In [None]:
model_filename = "model062_1.pickle"
with open(model_filename, "wb") as f:
    pickle.dump(gbm,  f)

In [None]:
@numba.njit("f4[:,:](i8[:],i8[:,:],i8[:,:],i8[:],u8[:])", cache=True)
def proc(thresholds, actions, rewards, teams, _rand):
    def rand():
        _rand[0] ^= _rand[0] << np.uint64(7)
        _rand[0] ^= _rand[0] >> np.uint64(9)
        return _rand[0]
    res = np.empty((len(actions)*100*2, N_FEATURES+1), dtype=np.float32)
    idx_res = 0
    likelihoods1 = np.ones((100, 101), dtype=np.float64)
    likelihoods2 = np.ones((100, 101), dtype=np.float64)
    e1 = np.full(101, 50.0, dtype=np.float64)
    e2 = np.full(101, 50.0, dtype=np.float64)
    p1 = np.zeros((100, 2), dtype=np.int64)
    p2 = np.zeros((100, 2), dtype=np.int64)
    last1 = np.full(100, -1, dtype=np.int64)
    last2 = np.full(100, -1, dtype=np.int64)
    last_bandit_1 = -1
    last_bandit_2 = -1
    n_consecutive_1 = 0
    n_consecutive_2 = 0
    n_tried_bandits_1 = 0
    n_tried_bandits_2 = 0
    max_tried_1 = 0
    max_tried_2 = 0
    tried_counts_sorted_1 = np.zeros(100, dtype=np.int64)
    tried_counts_sorted_2 = np.zeros(100, dtype=np.int64)
    jini_sum_1 = 0
    jini_sum_2 = 0
    jini_sum_ideal = 1e-100
    tried_counts_modified_1 = np.zeros(100, dtype=np.float64)
    tried_counts_modified_2 = np.zeros(100, dtype=np.float64)
    for step in range(len(actions)):
        a1, a2 = actions[step]
        r1, r2 = rewards[step]
        for bandit, thr in enumerate(thresholds):  # 100
            if teams[0] == 1 and rand() % 5 == 0:
                res[idx_res] = float(step), float(n_consecutive_2), float(n_tried_bandits_2), float(max_tried_2),\
                                jini_sum_2/jini_sum_ideal, float(p1[bandit].sum()), float(e1[bandit]), float(p2[bandit].sum()), float(tried_counts_modified_2[bandit]), float(step-last2[bandit]), float(thr)
                idx_res += 1
            if teams[1] == 1 and rand() % 5 == 0:
                res[idx_res] = float(step), float(n_consecutive_1), float(n_tried_bandits_1), float(max_tried_1),\
                                jini_sum_1/jini_sum_ideal, float(p2[bandit].sum()), float(e2[bandit]), float(p1[bandit].sum()), float(tried_counts_modified_1[bandit]), float(step-last1[bandit]), float(thr)
                idx_res += 1
        decay1 = 0.97 ** (p1[a1].sum() + p2[a1].sum())
        decay2 = 0.97 ** (p1[a2].sum() + p2[a2].sum())
        for thr in range(101):
            prob = np.ceil(thr * decay1) / 101.0
            likelihoods1[a1, thr] *= prob if r1 == 1 else 1 - prob
            prob = np.ceil(thr * decay2) / 101.0
            likelihoods2[a2, thr] *= prob if r2 == 1 else 1 - prob
        e1[a1] = (likelihoods1[a1] * np.arange(101.0)).sum() / likelihoods1[a1].sum()
        e2[a2] = (likelihoods2[a2] * np.arange(101.0)).sum() / likelihoods2[a2].sum()
        if p1[a1].sum() == 0:
            n_tried_bandits_1 += 1
        if p2[a2].sum() == 0:
            n_tried_bandits_2 += 1
        idx = np.searchsorted(tried_counts_sorted_1, p1[a1].sum(), side="right") - 1
        jini_sum_1 += 99 - idx
        tried_counts_sorted_1[idx] += 1
        idx = np.searchsorted(tried_counts_sorted_2, p2[a2].sum(), side="right") - 1
        jini_sum_2 += 99 - idx
        tried_counts_sorted_2[idx] += 1
        jini_sum_ideal += step % 100
        assert jini_sum_1 <= jini_sum_ideal and jini_sum_2 <= jini_sum_ideal
        tried_counts_modified_1[a1] += 1.0 / decay1
        tried_counts_modified_2[a2] += 1.0 / decay2
        p1[a1, r1] += 1
        p2[a2, r2] += 1
        max_tried_1 = max(max_tried_1, p1[a1].sum())
        max_tried_2 = max(max_tried_2, p2[a2].sum())
        last1[a1] = step
        last2[a2] = step
        if a1 == last_bandit_1:
            n_consecutive_1 += 1
        if a2 == last_bandit_2:
            n_consecutive_2 += 1
        last_bandit_1 = a1
        last_bandit_2 = a2
    return res[:idx_res]


train = np.empty((3000*2000*100*2, N_FEATURES+1), dtype=np.float32)
valid = np.empty((800*2000*100*2, N_FEATURES+1), dtype=np.float32)
idx_train = idx_valid = 0
ids = set()

def rand():
    _rand[0] ^= _rand[0] << np.uint64(7)
    _rand[0] ^= _rand[0] >> np.uint64(9)
    return _rand[0]
for dat in data:
    episode_id = dat["episode_id"]
    if episode_id in ids:
        continue
    ids.add(episode_id)
    thresholds = np.array(dat["thresholds"])
    actions = np.array(dat["actions"])
    rewards = np.array(dat["rewards"])
    teams = np.array([episode_id * 2 + 0 in set_recent_submission_episode_id_and_indexes,
                      episode_id * 2 + 1 in set_recent_submission_episode_id_and_indexes], dtype=np.int64)
    d = proc(thresholds, actions, rewards, teams, _rand)
    if rand() % 5 != 0:
        train[idx_train:idx_train+len(d)] = d
        idx_train += len(d)
    else:
        valid[idx_valid:idx_valid+len(d)] = d
        idx_valid += len(d)

print(train.shape, valid.shape)
train = train[:idx_train]
valid = valid[:idx_valid]

In [None]:
n_train = len(train)
X_train, y_train = train[:, :N_FEATURES], train[:, N_FEATURES]
X_valid, y_valid = valid[:, :N_FEATURES], valid[:, N_FEATURES]

lgb_train = lgb.Dataset(X_train.copy(), y_train.copy())
del X_train, y_train
lgb_valid = lgb.Dataset(X_valid.copy(), y_valid.copy(), reference=lgb_train)
del X_valid, y_valid
del train, valid
del data
gc.collect()

In [None]:
%%time

params = {
    'boosting_type': "gbdt",
    'objective': "rmse",
    'metric': "rmse",
    'num_leaves': 4095,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.5,
    'bagging_freq': 5,
    'verbose': 1,
    "seed": 42,
}

gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round=1024,
    valid_sets=lgb_valid,
    early_stopping_rounds=50
)

In [None]:
feature_names = "STEP, OPPONENT_N_CONSECUTIVE, OPPONENT_N_TRIED_BANDITS, OPPONENT_MAX_TRIED, OPPONENT_JINI, N_TRIED, EXPECTED_THRESHOLD, OPPONENT, OPPONENT_MODIFIED, OPPONENT_NOT_USED_TURNS".split(", ")

df_importance = pd.DataFrame()
df_importance["importance"] = gbm.feature_importance()
df_importance["feature"] = feature_names
df_importance.sort_values("importance", ascending=False, inplace=True)

plt.figure(figsize=(10,6)) 
sns.barplot(y="feature", x="importance", data=df_importance, palette=sns.hls_palette(10, l=0.7, s=1))
plt.show()

In [None]:
model_filename = "model062_2.pickle"
with open(model_filename, "wb") as f:
    pickle.dump(gbm,  f)

# Agent

In [None]:
!mkdir /kaggle_simulations
!mkdir /kaggle_simulations/agent

In [None]:
transform_model_filename = "model062_1.pickle"
normal_model_filename = "model062_2.pickle"

!cp {transform_model_filename} /kaggle_simulations/agent/
!cp {normal_model_filename} /kaggle_simulations/agent/

with open("main.py", "w") as f:
    f.write(f"TRANSFORM_BASE = {TRANSFORM_BASE}\n")
    f.write(f"N_FEATURES = {N_FEATURES}\n")
    f.write(f"transform_model_filename = '/kaggle_simulations/agent/{transform_model_filename}'\n")
    f.write(f"normal_model_filename = '/kaggle_simulations/agent/{normal_model_filename}'\n")

In [None]:
%%writefile -a main.py

import sys
import math
from time import time
import gzip
import base64
import random
import pickle

import numpy as np

with open(transform_model_filename, "rb") as f:
    transform_model = pickle.load(f)
with open(normal_model_filename, "rb") as f:
    normal_model = pickle.load(f)

state = {}

rng = np.random.default_rng(int(time()*1000))

STEP, OPPONENT_N_CONSECUTIVE, OPPONENT_N_TRIED_BANDITS, OPPONENT_MAX_TRIED, OPPONENT_JINI, N_TRIED, EXPECTED_THRESHOLD, OPPONENT, OPPONENT_MODIFIED, OPPONENT_NOT_USED_TURNS = range(N_FEATURES)

def get_monotonic_func(a, b):
    # a が正なら単調増加、負なら単調減少
    assert a != 0.0
    x_left = -b
    x_right = x_left + 1.0
    def sigmoid(x):
        return 1.0 / (1.0 + np.exp(-a * x))
    left, right = sigmoid(x_left), sigmoid(x_right)
    if a > 0:
        def res(x):
            y = sigmoid(x + x_left)
            return (y - left) / (right - left)
    else:
        def res(x):
            y = sigmoid(x + x_left)
            return (y - right) / (left - right)
    return res

monotonic_func = get_monotonic_func(-10.0, 0.0)
def get_transform_model_ratio(step):
    return monotonic_func(step / 2000.0) * 1.2

def agent(observation, configuration):
    if observation.step == 0:
        state["X"] = X = np.zeros((configuration["banditCount"], N_FEATURES), dtype=np.float32)
        X[:, OPPONENT_NOT_USED_TURNS] = 1
        X[:, EXPECTED_THRESHOLD] = (TRANSFORM_BASE**np.arange(101.0)).mean()
        state["normal_expected_threshold"] = normal_expected_threshold = np.full(configuration["banditCount"], 50.0, dtype=np.float32)
        state["n_selections"] = n_selections = [0] * configuration["banditCount"]
        state["total_reward"] = 0
        state["likelihoods"] = np.ones((configuration["banditCount"], 101), dtype=np.float64)
        state["jini_sum"] = 0
        state["trial_counts"] = np.zeros(100, dtype=np.int64)
        state["trial_counts_sorted"] = np.zeros(100, dtype=np.float64)
        state["opponent_last_bandit"] = -999
        state["opponent_trial_counts"] = np.zeros(100, dtype=np.int64)
        state["opponent_jini_sum"] = 0
        state["jini_sum_ideal"] = 1e-100
        state["opponent_trial_counts_sorted"] = np.zeros(100, dtype=np.float64)
    else:
        X = state["X"]
        normal_expected_threshold = state["normal_expected_threshold"]
        last_bandit = state["last_bandit"]
        n_selections = state["n_selections"]
        total_reward = state["total_reward"]
        likelihoods = state["likelihoods"]
        trial_counts = state["trial_counts"]
        trial_counts_sorted = state["trial_counts_sorted"]
        opponent_trial_counts = state["opponent_trial_counts"]
        opponent_trial_counts_sorted = state["opponent_trial_counts_sorted"]
        reward = observation.reward - total_reward
        state["total_reward"] = observation.reward
        
        decay = 0.97 ** (n_selections[last_bandit])
        a1, a2 = observation.lastActions
        opponent_last_bandit = a1 + a2 - last_bandit
        opponent_decay = 0.97 ** n_selections[opponent_last_bandit]
        for thr in range(101):
            prob = np.ceil(thr * decay) / 101.0
            likelihoods[last_bandit, thr] *= prob if reward == 1 else 1 - prob
        X[last_bandit, EXPECTED_THRESHOLD] = (likelihoods[last_bandit] * TRANSFORM_BASE**np.arange(101.0)).sum() / likelihoods[last_bandit].sum()
        normal_expected_threshold[last_bandit] = (likelihoods[last_bandit] * np.arange(101.0)).sum() / likelihoods[last_bandit].sum()
        
        for b in observation["lastActions"]:  # take care
            n_selections[b] += 1
        
        X[:, STEP] = observation.step
        
        X[last_bandit, N_TRIED] += 1
#         if reward == 1:
#             X[last_bandit, WIN] += 1
#         else:
#             X[last_bandit, LOSE] += 1
        X[:, OPPONENT_N_CONSECUTIVE] += state["opponent_last_bandit"] == opponent_last_bandit
        if opponent_trial_counts[opponent_last_bandit] == 0:
            X[:, OPPONENT_N_TRIED_BANDITS] += 1
        
        # jini
        idx = np.searchsorted(trial_counts_sorted, trial_counts[last_bandit], side="right") - 1
        state["jini_sum"] += 99 - idx
        trial_counts_sorted[idx] += 1
        idx = np.searchsorted(opponent_trial_counts_sorted, opponent_trial_counts[opponent_last_bandit], side="right") - 1
        state["opponent_jini_sum"] += 99 - idx
        opponent_trial_counts_sorted[idx] += 1
        state["jini_sum_ideal"] += (observation.step-1) % 100
        #X[:, JINI] = state["jini_sum"] / state["jini_sum_ideal"]
        X[:, OPPONENT_JINI] = state["opponent_jini_sum"] / state["jini_sum_ideal"]
        
        opponent_trial_counts[opponent_last_bandit] += 1  ### take care
        trial_counts[last_bandit] += 1
        
        if X[0, OPPONENT_MAX_TRIED] < opponent_trial_counts[opponent_last_bandit]:
            X[:, OPPONENT_MAX_TRIED] = opponent_trial_counts[opponent_last_bandit]
        
        X[opponent_last_bandit, OPPONENT] += 1
        X[opponent_last_bandit, OPPONENT_MODIFIED] += 1.0 / opponent_decay
        X[:, OPPONENT_NOT_USED_TURNS] += 1
        X[opponent_last_bandit, OPPONENT_NOT_USED_TURNS] = 1
        state["opponent_last_bandit"] = opponent_last_bandit
    
    
    y_transform = np.log(transform_model.predict(X)) / np.log(TRANSFORM_BASE)
    X_normal = X.copy()
    X_normal[:, EXPECTED_THRESHOLD] = normal_expected_threshold
    y_normal = normal_model.predict(X_normal)
    
    r = get_transform_model_ratio(observation.step)
    y = r * y_transform + (1 - r) * y_normal
    
    order = list(range(configuration["banditCount"]))
    rng.shuffle(order)
    ama = 0
    ma = 0
    for bandit in order:
        theta = y[bandit]
        decay = 0.97 ** (n_selections[bandit])
        theta *= decay
        if theta > ma:
            ma = theta
            ama = bandit
    bandit = ama
    
    state["last_bandit"] = bandit
    return bandit

In [None]:
!tar -czvf submission.tar.gz main.py {transform_model_filename} {normal_model_filename}

In [None]:
!cp ../input/santa2020-061-f057-d0128/model059.pickle /kaggle_simulations/agent/
!cp ../input/santa2020-061-f057-d0128/model060.pickle /kaggle_simulations/agent/
opponent_filename = "../input/santa2020-061-f057-d0128/main.py"

env = make("mab", debug=True)
env.reset()
env.run(["main.py", opponent_filename])
print(env.toJSON()["rewards"])
env.render(mode="ipython", width=800, height=700)