# Rock Paper Scissors - Multi Armed Tree Bandit

This agent records a history of previous moves, opponent moves and the correctness of our predictions. 

This dataset is passed into DecisionTreeClassifier to predict our opponents move. The process is applied iteratively: 
- first we predict the opponents next move based purely off move history
- then we add our history of first-stage predictions to the dataset
- we repeat this process a third time, incase our opponent is trying to predict our predictions

In [None]:
# !find ../input/ -name '*.py'
!cp ../input/rock-paper-scissors-multi-armed-stats-bandit/*.py ./

# !cat ../input/rock-paper-scissors/react.py | tee reactionary.py
# !cat ../input/rock-paper-scissors-anti-rotn/anti_rotn.py | perl -p -e 's/history/rotn_history/g;' | tee anti_rotn.py
# !cat ../input/rps-roshambo-comp-iocaine-powder/submission.py | tee -a iocaine.py
# !cat ../input/rock-paper-scissors-greenberg/greenberg.py | perl -p -e 's/kaggle_agent/greenberg_agent/g' | tee greenberg.py
# !cat ../input/rock-paper-scissors-statistical-prediction/submission.py | perl -p -e 's/history/statistical_history/g;' | tee statistical.py
# !cat ../input/rock-paper-scissors-decision-tree/submission.py | perl -p -e 's/history/decision_tree_history_1/g; s/decision_tree_agent/decision_tree_agent_1/g;' > decision_tree_1.py
# !cat ../input/rock-paper-scissors-decision-tree/submission.py | perl -p -e 's/history/decision_tree_history_2/g; s/decision_tree_agent/decision_tree_agent_2/g;' > decision_tree_2.py
# !cat ../input/rock-paper-scissors-decision-tree/submission.py | perl -p -e 's/history/decision_tree_history_2/g; s/decision_tree_agent/decision_tree_agent_3/g;' > decision_tree_3.py
!find ./ -name '*.py' | xargs -L1 perl -p -i -e 's/\bprint\(.*\)/pass;/sg;'

# !cat multi_armed_stats_bandit.py -n | grep 1351 -C 5

# Agent

In [None]:
%%writefile multi_armed_tree_bandit.py

import time
import os
import random
import numpy as np
import traceback
from typing import List, Dict
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

def random_agent(observation, configuration):
    return random.randint(0, configuration.signs-1)

def rock_agent(observation, configuration):
    return 0

def paper_agent(observation, configuration):
    return 1

def scissors_agent(observation, configuration):
    return 2

def sequential_agent(observation, configuration):
    return observation.step % configuration.signs



def get_winstats(history) -> Dict[str,int]:
    total = len(history['action'])
    wins = 0
    draw = 0
    loss = 0 
    for n in reversed(range(1,total+1)):
        if   history['action'][-n] == history['opponent'][-n] + 1: wins +=  1
        elif history['action'][-n] == history['opponent'][-n]:     draw +=  1
        elif history['action'][-n] == history['opponent'][-n] - 1: loss +=  1
    return { "wins": wins, "draw": draw, "loss": loss }

def get_winrate(history):
    winstats = get_winstats(history)
    winrate  = winstats['wins'] / (winstats['wins'] + winstats['loss']) if (winstats['wins'] + winstats['loss']) else 0
    return winrate
    
    
# Initialize starting history
history = {
    "step":        [],
    "prediction1": [],
    "prediction2": [],
    "expected":    [],
    "action":      [],
    "opponent":    [],
}

# NOTE: adding statistics causes the DecisionTree to make random moves 
def get_statistics(values) -> List[float]:
    values = np.array(values)
    return [
        np.count_nonzero(values == n) / len(values)
        if len(values) else 0.0
        for n in [0,1,2]
    ]

# mlb_expected =  dict_keys(['mlb_stats', 'random', 'reactionary', 'anti_rotn', 'iocaine', 'greenberg', 'statistical', 'statistical_expected', 'decision_tree_1', 'decision_tree_3'])

# observation   =  {'step': 1, 'lastOpponentAction': 1}
# configuration =  {'episodeSteps': 10, 'agentTimeout': 60, 'actTimeout': 1, 'runTimeout': 1200, 'isProduction': False, 'signs': 3}
def multi_armed_tree_bandit(observation, configuration, window=6, stages=1, random_freq=0.0, warmup_period=10, max_samples=300, classifier="forest"): 
    global history
    global mlb_expected
    mlb_excluded = [ 'statistical', 'random' ]
    mlb_included = [ 'mlb_stats', 'iocaine', 'greenberg', 'statistical_expected', 'anti_rotn', 'decision_tree_1' ]

    mlb_expected['mlb_stats'] += [ 
        (multi_armed_bandit_stats_agent(observation, configuration, average='running', window=20) - 1) % configuration.signs
    ]
    mlb_values = { key: value for key, value in mlb_expected.items() if key in mlb_included and not key in mlb_excluded }
    
    warmup_period   = warmup_period  # if os.environ.get('KAGGLE_KERNEL_RUN_TYPE','') != 'Interactive' else 0
    if classifier == "xgb":
        models = [ None ] + [ XGBClassifier() ] * stages
    if classifier == "forest":
        models = [ None ] + [ RandomForestClassifier(max_depth=100, ccp_alpha=0.05) ] * stages
    if classifier == "tree":
        models = [ None ] + [ DecisionTreeClassifier(max_depth=100, ccp_alpha=0.05) ] * stages

    
    time_start      = time.perf_counter()
    actions         = list(range(configuration.signs))  # [0,1,2]
    
    step            = observation.step
    last_action     = history['action'][-1]          if len(history['action']) else 2
    opponent_action = observation.lastOpponentAction if observation.step > 0   else 2
        
    if observation.step > 0:
        history['opponent'].append(opponent_action)
        
    winrate  = get_winrate(history)
    winstats = get_winstats(history)
    
    # Set default values     
    prediction1 = random.randint(0,2)
    prediction2 = random.randint(0,2)
    prediction3 = random.randint(0,2)
    expected    = random.randint(0,2)

    # We need at least some turns of history for DecisionTreeClassifier to work
    if observation.step >= window:
        # First we try to predict the opponents next move based on move history
        # TODO: create windowed history
        try:
            min_length = min(map(len,[ *history.values(), *mlb_values.values() ]))
            n_start = max(1, len(history['opponent']) - window - max_samples) 
            n_end   = min(max_samples, max(1, min_length - window - warmup_period))
            # print('stats: ', { key: get_statistics(history[key]) for key in history.keys() })
            if stages >= 1 and n_end > 1:
                X = np.stack([
                    np.array([
                        # get_statistics(history['action'][:-n-window]),
                        # get_statistics(history['opponent'][:n-1+window]),
                        history['action'][-n-window:-n], 
                        history['opponent'][-n-window-1:-n-1],
                        *[ mlb_values[key][-n-window-1:-n-1] for key in mlb_values.keys() ],
                    ]).flatten()
                    for n in reversed(range(1,n_end)) 
                ])
                Y = np.array([
                    history['opponent'][-n-window]
                    for n in reversed(range(1,n_end)) 
                ])  
                Z = np.array([
                    # get_statistics(history['action']),
                    # get_statistics(history['opponent']),
                    history['action'][-window+1:] + [ last_action ], 
                    history['opponent'][-window:],
                    *[ mlb_values[key][-window:] for key in mlb_values.keys() ],
                ]).flatten().reshape(1, -1)

                models[1].fit(X, Y)
                expected = prediction1 = models[1].predict(Z)[0]

            if stages >= 2 and n_end > 1:
                # Now retrain including prediction history
                X = np.stack([
                    np.array([
                        # get_statistics(history['action'][:-n-window]),
                        # get_statistics(history['prediction1'][:-n-window]),
                        # get_statistics(history['opponent'][:n-1+window]),
                        history['action'][-n-window:-n], 
                        history['prediction1'][-n-window:-n],
                        history['opponent'][-n-window-1:-n-1],
                        *[ mlb_values[key][-n-window-1:-n-1] for key in mlb_values.keys() ],
                    ]).flatten()
                    for n in reversed(range(1,n_end)) 
                ])
                Y = np.array([
                    history['opponent'][-n-window]
                    for n in reversed(range(1,n_end)) 
                ])  
                Z = np.array([
                    # get_statistics(history['action']),
                    # get_statistics(history['prediction1']),
                    # get_statistics(history['opponent']),
                    history['action'][-window+1:]      + [ last_action ], 
                    history['prediction1'][-window+1:] + [ prediction1 ],
                    history['opponent'][-window:],
                    *[ mlb_values[key][-window:] for key in mlb_values.keys() ],
                ]).flatten().reshape(1, -1)

                models[2].fit(X, Y)
                expected = prediction2 = models[2].predict(Z)[0]

            if stages >= 3 and n_end > 1:
                # Now retrain including prediction history
                X = np.stack([
                    np.array([
                        # get_statistics(history['action'][:-n-window]),
                        # get_statistics(history['prediction1'][:-n-window]),
                        # get_statistics(history['prediction2'][:-n-window]),
                        # get_statistics(history['opponent'][:n-1+window]),
                        history['action'][-n-window:-n], 
                        history['prediction1'][-n-window:-n],
                        history['prediction2'][-n-window:-n],
                        history['opponent'][-n-window-1:-n-1],
                        *[ mlb_values[key][-n-window-1:-n-1] for key in mlb_values.keys() ],
                    ]).flatten()
                    for n in reversed(range(1,n_end)) 
                ])
                Y = np.array([
                    history['opponent'][-n-window]
                    for n in reversed(range(1,n_end)) 
                ])  
                Z = np.array([
                    # get_statistics(history['action']),
                    # get_statistics(history['prediction1']),
                    # get_statistics(history['prediction2']),
                    # get_statistics(history['opponent']),
                    history['action'][-window+1:]      + [ last_action ], 
                    history['prediction1'][-window+1:] + [ prediction1 ],
                    history['prediction2'][-window+1:] + [ prediction2 ],
                    history['opponent'][-window:],
                    *[ mlb_values[key][-window:] for key in mlb_values.keys() ],
                ]).flatten().reshape(1, -1)

                models[3].fit(X, Y)
                expected = prediction3 = models[3].predict(Z)[0]
        
        except Exception as exception:
            print(exception)
            traceback.print_exc()

                    
    # During the warmup period, play random to get a feel for the opponent 
    if (observation.step <= max(warmup_period,window)):
        actor  = 'warmup'
        action = random_agent(observation, configuration)    
    
    # # Play a purely random move occasionally, which will hopefully distort any opponent statistics
    # elif (random.random() <= random_freq):
    #     actor  = 'random'
    #     action = random_agent(observation, configuration)
        
    # But mostly use DecisionTreeClassifier to predict the next move
    else:
        actor  = 'DecisionTree'
        action = (expected + 1) % configuration.signs
    
    # Persist state
    history['step'].append(step)
    history['prediction1'].append(prediction1)
    history['prediction2'].append(prediction2)
    history['expected'].append(expected)
    history['action'].append(action)
    if observation.step == 0:  # keep arrays equal length
        print('mlb_expected = ', mlb_expected.keys())
        history['opponent'].append(random.randint(0, 2))

    print('mlb_values = ', mlb_values)

    # Print debug information
    time_taken = time.perf_counter() - time_start
    # print('mlb_expected = ', mlb_expected)
    # print(f'{1000*time_taken:3.0f}ms | {step:4d} | opp = {opponent_action} | pred1 = {prediction1} | pred2 = {prediction2} | exp = {expected} | act = {action} | {winrate:.2f} {actor:7s}')    
    print(f'{1000*time_taken:3.0f}ms | {step:4d} | opp = {opponent_action} | exp = {expected} | act = {action} | {actor:7s} | {100*winrate:5.1f}% {winstats}')    
    return int(action)

In [None]:
!rm -f submission.py
!ls -tr ./*.py | xargs cat > submission.py
%run -i 'submission.py'

In [None]:
# !cat submission.py -n | grep 1351 -C 5

In [None]:
from kaggle_environments import make

env = make("rps", configuration={"episodeSteps": 100}, debug=False)
env.run(["submission.py", "submission.py"])
env.render(mode="ipython", width=600, height=600)

In [None]:
from kaggle_environments import make

env = make("rps", configuration={"episodeSteps": 100}, debug=False)
env.run(["submission.py", rock_agent])
env.render(mode="ipython", width=600, height=600)

In [None]:
from kaggle_environments import make

env = make("rps", configuration={"episodeSteps": 100}, debug=False)
env.run(["submission.py", sequential_agent])
env.render(mode="ipython", width=600, height=600)

In [None]:
from kaggle_environments import make

env = make("rps", configuration={"episodeSteps": 100}, debug=False)
env.run(["submission.py", 'greenberg.py'])
env.render(mode="ipython", width=600, height=600)

In [None]:
from kaggle_environments import make

env = make("rps", configuration={"episodeSteps": 100}, debug=False)
env.run(["submission.py", 'iocaine.py'])
env.render(mode="ipython", width=600, height=600)

In [None]:
from kaggle_environments import make

env = make("rps", configuration={"episodeSteps": 100}, debug=False)
env.run(["submission.py", '../input/rock-paper-scissors-statistical-prediction/submission.py'])
env.render(mode="ipython", width=600, height=600)

In [None]:
from kaggle_environments import make

env = make("rps", configuration={"episodeSteps": 100}, debug=False)
env.run(["submission.py", 'decision_tree_1.py'])
env.render(mode="ipython", width=600, height=600)

# Further Reading

This notebook is part of a series exploring Rock Paper Scissors:
- [Rock Paper Scissors - PI Bot](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-pi-bot)
- [Rock Paper Scissors - De Bruijn Sequence](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-de-bruijn-sequence)
- [Rock Paper Scissors - Random Agent](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-random-agent)
- [Rock Paper Scissors - Weighted Random Agent](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-weighted-random-agent)
- [Rock Paper Scissors - Statistical Prediction](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-statistical-prediction)
- [Rock Paper Scissors - Random Seed Search](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-random-seed-search)
- [Rock Paper Scissors - RNG Statistics](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-rng-statistics)
- [Rock Paper Scissors - XGBoost](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-xgboost)
- [Rock Paper Scissors - Decision Tree](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-decision-tree)