# General Idea

The Nash Equilibrium in this game is of course playing randomly.

However, this will not lead to a good submission, as there are people uploading submissions with some "structure" in them, and if you are better at finding structure, then, you are exploitable.

This means, when you are losing, you should switch strategies.

In here, I introduce a high level way of doing this: I have a simple Prediction Algorithm based on https://www.kaggle.com/daniello20000/a-simple-machine-learing-approach, but it could be any algorithm: frequency counting, pattern matching, ... and I evaluate different pattern in parallel and count their ficticious scores. The different patterns are:

First: The opponent outguesses me every time and he wins. So, I change my strategy by adding one.
Second: Same as above, but now, he predicts, I do that. So, I change my strategy by adding two.
Last: If nothing works, I switch to a random strategy.

Afterward, I select the one best one of the four strategies.

In [None]:
%%writefile submission.py

import random
from sklearn.naive_bayes import MultinomialNB
import numpy as np

history = [0]*2*1001

my_last_move = -1

clf = MultinomialNB()

num_of_strategies = 4
last_move_real = None
last_moves = [0]*num_of_strategies
score_fict = [0]*num_of_strategies
score_real = 0

def have_i_won(my_move, his_move):
    if my_move == his_move:       return 0
    if my_move == (his_move+1)%3: return 1
    
    return -1

def bayes_prediction(history, step):
    global clf
    
    if step <= 7: return random.randint(0,2)
    
    X = np.array(history[(2*(step-5)):(2*step)])
            
    X=X.reshape(1, -1)
    pred = clf.predict(X)
        
    pred=int(pred[0])
    return  (pred+1)%3

def bayes_learn(history, step):
    global clf
    if step > 7:
        X   = np.array(history[(2*(step-6)):(2*(step-1))])
                
        X=X.reshape(1, -1)
        y=[history[2*(step-1)+1]]
            
        clf.partial_fit(X, y, classes=[0,1,2])
    
    
def strategy_selection(observation, configuration):
    global history, last_move_real, last_moves
    global score_real, score_fict
    
    best_strategy = 0 # Default Strategy: Random
    
    if observation.step > 0:
        # First: record the history
        history[2*(observation.step-1)]   = last_move_real
        history[2*(observation.step-1)+1] = observation.lastOpponentAction
                
        # Incremental learning step (update frequencies, whatever)
        bayes_learn(history, observation.step)
        
        
        # Next: Calculate the score
        score_real += have_i_won(last_move_real, observation.lastOpponentAction)
        
        # Next: Calculate the score, had I played differently
        # and choose the strategy that would have done the best given all games
        score_max = -10000
        for i in range(len(last_moves)):
            score_fict[i] += have_i_won(last_moves[i], observation.lastOpponentAction)

            if score_fict[i] > score_max:
                score_max = score_fict[i]
                best_strategy = i
        
    
    last_moves[0] = random.randint(0,2)
    last_moves[1] = bayes_prediction(history, observation.step)
    last_moves[2] = (last_moves[1]+1)%3
    last_moves[3] = (last_moves[1]+2)%3
    
    last_move_real = last_moves[best_strategy]
        
    return last_move_real

In [None]:
%run -i submission.py

In [None]:
# Simple test suite:
class Test:
    step = 0
    lastOpponentAction = 0
    
def have_i_won_env(my_move, his_move):
    if my_move == his_move:       return 0
    if my_move == (his_move+1)%3: return 1
    
    return -1


c=Test()

score = 0
for i in range(1000):
    c.step = i
    move = strategy_selection(c, c)
    #print(str(i) +': ' + str(move))
    c.lastOpponentAction = random.randint(0,2)
    score += have_i_won_env(move, c.lastOpponentAction)
    print("Round ", i, " with score: ", score)