# My Strategy

My strategy is Not Losing Strategy.
* Trust the prediction when 
    * the prediction score is very better(over threshold)
    * consecutive wins(over threshold)
    * won the last few(exp:100) games at a high rate
* Random when it's worth it

In [None]:
%%writefile predict_only_when_confirmation.py
import random
import numpy as np
import lightgbm as lgb
import pandas as pd

USE_BACK = 10
# CONT_WINS_THRES = 30
CHECK_WINS_RATE_THRES = 0.9
CHECK_PAST_PERIOD = 100
PRED_USE_STEP_THRES = 200
PRED_USE_SCORE_THRES = 0.9
my_actions = []
op_actions = []
pr_actions = []
cont_wins = 0
solutions   = []

## ============== LIGHT GBM PREDICTION ============== ## 
def predict(my_actions, op_actions):
    size = len(my_actions)
    
    d = dict()
    for u in range(USE_BACK):
        d[f"OP_{u}"] = op_actions[u: size - (USE_BACK - u)]
        d[f"MY_{u}"] = my_actions[u: size - (USE_BACK - u)]
    
    X_train = pd.DataFrame(d)
    y_train = op_actions[USE_BACK: size]
    y_train = pd.DataFrame(y_train, columns=["y"])
    
    n = dict()
    for u in range(USE_BACK):
        n[f"OP_{u}"] = [op_actions[size - (USE_BACK - u)]]
        n[f"MY_{u}"] = [my_actions[size - (USE_BACK - u)]]
    
    X_test = pd.DataFrame(n)

    classifier = lgb.LGBMClassifier(
        random_state=0, 
        n_estimators=20, 
    )
    
    classifier.fit(X_train, y_train)
    return classifier.predict_proba(X_test).tolist()[0], int(classifier.predict(X_test)[0])

## ============== RANDOM(NASH EQUILIBRIUM) ============== ##
def randomize():
    return int(random.randint(0, 2))

## ============== PREDICT ONLY CONFIRM, OTHER RANDOM ============== ##
def predict_only_when_confirmation(observation, configuration):
    global my_actions
    global op_actions
    global pr_actions
    global cont_wins
    
    if observation.step != 0:
        op_actions.append(observation.lastOpponentAction)
#         if observation.step > PRED_USE_STEP_THRES + 1:
#             if op_actions[-1] == pr_actions[-1]:
#                 cont_wins += 1
#             else:
#                 cont_wins = 0
                
    
    if observation.step > USE_BACK:
        pred_proba, pred = predict(my_actions, op_actions)
        pr_actions.append(pred)
        if observation.step > PRED_USE_STEP_THRES:
            CONF_A = max(pred_proba) > PRED_USE_SCORE_THRES
#             CONF_B = cont_wins > CONT_WINS_THRES
            CONF_B = sum([op==l for op, l in zip(op_actions[-CHECK_PAST_PERIOD:], pr_actions[-CHECK_PAST_PERIOD-1:-1])])/CHECK_PAST_PERIOD > CHECK_WINS_RATE_THRES
            if CONF_A or CONF_B:
                my_action = pred
                my_action = (my_action + 1) % 3

            else:
                my_action = randomize()
        else:
            my_action = randomize()
    else:    
        my_action = randomize()
    
    my_actions.append(my_action)
    
    return my_action

# Test

I used the agent list from [RPS Dojo](https://www.kaggle.com/chankhavu/rps-dojo) is a very useful notebook

In [None]:
import os
from tqdm.notebook import tqdm
import pandas as pd
from kaggle_environments import make, evaluate

!pip install -q -U kaggle_environments

In [None]:
def simulate_score(player1, player2, steps):
    return evaluate(
        "rps", 
        [player1, player2], 
        configuration={"episodeSteps": steps}
    )

In [None]:
opponent_dict = dict()
opponent_dict["black"] = os.listdir("../input/rps-dojo/black_belt")
opponent_dict["blue"] = os.listdir("../input/rps-dojo/blue_belt")
opponent_dict["white"] = os.listdir("../input/rps-dojo/white_belt")

results = []
for types, opponents in tqdm(opponent_dict.items()):
    path = f"../input/rps-dojo/{types}_belt"
    for opponent in tqdm(opponents):
        result = simulate_score("predict_only_when_confirmation.py", os.path.join(path, opponent), 1000)
        result = [opponent, result[0], types]
        results.append(result)
results_df = pd.DataFrame(results, columns=["opponent", "score", "belt"])

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, axes = plt.subplots(1, 3, figsize=(20, 5))
results_df["ps"] = results_df["score"].apply(lambda x: x[0])

results_df[results_df["belt"] == "black"].set_index("opponent")["ps"].plot(kind="bar", color="black", ax=axes[0])
results_df[results_df["belt"] == "blue"].set_index("opponent")["ps"].plot(kind="bar", color="blue", ax=axes[1])
results_df[results_df["belt"] == "white"].set_index("opponent")["ps"].plot(kind="bar", color="grey", ax=axes[2])
axes[0].set_ylim(-100, 100)
axes[1].set_ylim(-100, 100)
axes[2].set_ylim(-100, 100)
axes[0].hlines(20, -1, 10, color="red", linestyle="dashdot")
axes[0].hlines(-20, -1, 10, color="blue", linestyle="dashdot")
axes[1].hlines(20, -1, 7, color="red", linestyle="dashdot")
axes[1].hlines(-20, -1, 7, color="blue", linestyle="dashdot")
axes[2].hlines(20, -1, 10, color="red", linestyle="dashdot")
axes[2].hlines(-20, -1, 10, color="blue", linestyle="dashdot")
axes[0].hlines(0, -1, 10, color="black")
axes[1].hlines(0, -1, 7, color="black")
axes[2].hlines(0, -1, 10, color="black")
axes[0].set_title("black belt")
axes[1].set_title("blue belt")
axes[2].set_title("white belt")
plt.suptitle("1000 battle point(Cut Over 100 and Under -100)")