The crooks is to provide the model with:
* A rolling history of player / opponent hands.
* A running form (win / draw / loss).
* A total count of form (wins / draws / losses).

Then a basic counter that:
* Uses form to asses if the opponent is predicting my moves.
* If im getting battered... go random.

In [None]:
%%writefile 'agent_tree.py'

import random
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.tree import DecisionTreeClassifier # faster than random forest

# globals
conf_lookback = 20
conf_min_train = 10
list_agent = [] # agent hand
list_opp = [] # opponent hand
df_lookback = pd.DataFrame() # re-init the no of cols later (easier to debug)
model = DecisionTreeClassifier()

# calc score (this probably can be done in a neat 1 liner)
def _get_score(a, o):
    if a == o: return 0 # draw
    elif ((a + 1) % 3) == o: return -1 # lose
    else: return 1 # win

# agent
def agent_tree(obs, config):
    
    global dict_results
    global conf_lookback
    global conf_min_train
    global list_agent
    global list_opp
    global df_lookback
    
    # default hand
    hand = random.randint(0,2)
    
    # check past first round
    if obs['step'] >= 1:
        
        # populate last opponent result
        list_opp.append(obs['lastOpponentAction'])
        
        # check if past lookback point (otherwise some data will be missing on early records)
        if obs['step'] > (conf_lookback):
            
            # get lookback / reverse
            opp_lookback = list_opp[-(conf_lookback + 1):-1][::-1]
            agent_lookback = list_agent[-(conf_lookback + 1):-1][::-1]

            # get dummies / flatten
            list_dummies = [[0,0], [0,1], [1,0]]
            opp_lookback_dummies = [a for b in opp_lookback for a in (list_dummies[b][0], list_dummies[b][1])]
            agent_lookback_dummies = [a for b in agent_lookback for a in (list_dummies[b][0], list_dummies[b][1])]
            
            # get form
            form = [_get_score(a, o) for a, o in zip(agent_lookback, opp_lookback)]
            
            # count wdl
            wdl = [Counter(form)[1], Counter(form)[0], Counter(form)[-1]] # based on scores (1=win, 0=draw, -1=lose)
            
            # construct lookback fields (as code was getting messy)
            train_data = opp_lookback_dummies + agent_lookback_dummies + form + wdl
            
            # init lookback cols
            if df_lookback.shape[1] == 0:
                df_lookback = pd.DataFrame(columns=range(0,len(train_data)))
            
            # append lookback to df / get classifiers
            df_lookback.loc[len(df_lookback)] = train_data
        
        # check if obtained enough data to make a prediction
        if obs['step'] >= (conf_lookback + conf_min_train): # note first x records wont have full lookback
            
            # prediction lookback
            opp_pred = list_opp[-conf_lookback:][::-1]
            agent_pred = list_agent[-conf_lookback:][::-1]
            
            # prediction dummies
            opp_pred_dummies = [a for b in opp_pred for a in (list_dummies[b][0], list_dummies[b][1])]
            agent_pred_dummies = [a for b in agent_pred for a in (list_dummies[b][0], list_dummies[b][1])]
            
            # prediction form
            form_pred = [_get_score(a, o) for a, o in zip(agent_pred, opp_pred)]
            
            # count wdl for predictions
            wdl_pred = [Counter(form_pred)[1], Counter(form_pred)[0], Counter(form_pred)[-1]]
            
            # construct prediction data
            pred_data = opp_pred_dummies + agent_pred_dummies + form_pred + wdl_pred
            
            # classifiers / fit / predict / offset hand
            list_x = list_opp[-len(df_lookback):]
            
            # fit / predict
            model.fit(df_lookback, np.array(list_x))
            pred_hand = model.predict(np.array(pred_data).reshape(1, -1))
            
            # offset predictions to win
            hand = (int(pred_hand[0]) + 1) % 3
            
            # counter predict
            if Counter(form_pred[:3])[0] == 3: hand = (hand + 1) % 3 # 3 draws
            elif Counter(form_pred[:3])[-1] == 3: hand = (hand + 2) % 3 # 3 losses
            elif sum(form[:int(round(len(form) * 0.5,0))]) < 0: hand = random.randint(0,2) # getting battered > go random
            
    # append my hand
    list_agent.append(hand)
        
    return hand