# **Random Agents**

In [None]:
!pip install 'kaggle-environments>=0.1.6'

In [None]:
%%writefile submission.py


import random
import operator
import numpy as np
import pandas as pd
import collections
from collections import defaultdict
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier


USE_RANDOM = 1
USE_MARKOW = 1
LAST_ROUND = 1000
ROUND = 1
alpha = 0.01
M = 0

# Initialize matrices
T = np.zeros((3, 3))
P = np.zeros((3, 3))

# Save state of the match at every step
score_markov = []
opponent_actions = []
own_actions = []

move2num = {'R': 0, 'P': 1, 'S': 2}
move2str = {0: 'R', 1: 'P', 2: 'S'}
moves = ["R","P","S"]
beatedBy = {"R":"P", "P":"S", "S":"R"}
result = {"R":{"R":0, "P":-1, "S":1}, "P":{"R":1, "P":0, "S":-1}, "S":{"R":-1, "P":1, "S":0}}
beat = {'P': 'S', 'S': 'R', 'R': 'P'}
cede = {'P': 'R', 'S': 'P', 'R': 'S'}




def construct_local_features(rollouts):
    features = np.array([[step % k for step in rollouts['steps']] for k in (2, 3, 5)])
    features = np.append(features, rollouts['steps'])
    features = np.append(features, rollouts['actions'])
    features = np.append(features, rollouts['opp-actions'])
    return features

def construct_global_features(rollouts):
    features = []
    for key in ['actions', 'opp-actions']:
        for i in range(3):
            actions_count = np.mean([r == i for r in rollouts[key]])
            features.append(actions_count)
    
    return np.array(features)

def construct_features(short_stat_rollouts, long_stat_rollouts):
    lf = construct_local_features(short_stat_rollouts)
    gf = construct_global_features(long_stat_rollouts)
    features = np.concatenate([lf, gf])
    return features

def predict_opponent_move(train_data, test_sample):
    classifier = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(random_state=34), random_state=34, n_estimators=100)
    classifier.fit(train_data['x'], train_data['y'])
    return classifier.predict(test_sample)

def update_rollouts_hist(rollouts_hist, last_move, opp_last_action):
    rollouts_hist['steps'].append(last_move['step'])
    rollouts_hist['actions'].append(last_move['action'])
    rollouts_hist['opp-actions'].append(opp_last_action)
    return rollouts_hist

def warmup_strategy(observation, configuration):

    global rollouts_hist, last_move
    action = int(np.random.randint(3))

    if observation.step == 0:
        last_move = {'step': 0, 'action': action}
        rollouts_hist = {'steps': [], 'actions': [], 'opp-actions': []}
    else:
        rollouts_hist = update_rollouts_hist(rollouts_hist, last_move, observation.lastOpponentAction)
        last_move = {'step': observation.step, 'action': action}

    return int(action)

def init_training_data(rollouts_hist, k):
    for i in range(len(rollouts_hist['steps']) - k + 1):
        short_stat_rollouts = {key: rollouts_hist[key][i:i+k] for key in rollouts_hist}
        long_stat_rollouts = {key: rollouts_hist[key][:i+k] for key in rollouts_hist}
        features = construct_features(short_stat_rollouts, long_stat_rollouts)        
        data['x'].append(features)
    test_sample = data['x'][-1].reshape(1, -1)
    data['x'] = data['x'][:-1]
    data['y'] = rollouts_hist['opp-actions'][k:]
    return data, test_sample

def selectBest(s):
    return random.choice([i for i in range(len(s)) if max(s) == s[i]])

def selectBestDict(s):
    ew = {i:s[beatedBy[beatedBy[i]]] - s[beatedBy[i]] for i in s.keys()};
    return random.choice([i for i in ew.keys() if max(ew.values()) == ew[i]])


## --- Agents --

def agent(observation, configuration):

    if observation.step == 0:
        idx_agent = int(np.random.randint(len(output)))
    
    output = []
    output_markov, score_markov = markov_chain(observation, configuration)
    output.append(output_markov)
    output.append(adaboost(observation, configuration))
    output.append(switching(observation, configuration))
    output.append(meta_sgd(observation, configuration))
    
    if (observation.step % 50 == 0) and (observation.step > 0):
        if score_markov[observation.step] < score_markov[observation.step-50]:
            idx_agent = (idx_agent+1) % len(output)

    return output[idx_agent]


def markov_chain(observation, configuration):

    global T, P, score_markov, opponent_actions, own_actions
    
    # Save opponent's previous action
    if observation.step > 0:
        opponent_actions.append(observation.lastOpponentAction)
    
    if observation.step > 1:
        
        # Update matrices 
        T[opponent_actions[-2], opponent_actions[-1]] += 1
        P = np.divide(T, np.maximum(1, T.sum(axis=1)).reshape(-1, 1))
        
        # Update match score
        score_markov.append(get_score(own_actions[-1], opponent_actions[-1]))
        
        # Check every 50 moves is we are losing
        if observation.step%50<5:
            
            # If we are losing, change strategy during the next 5 steps to random
            if score_markov[-1] <= 0:
                reaction = int(np.random.randint(3))
                own_actions.append(reaction)
                return reaction

        # Infer next action
        if np.sum(P[opponent_actions[-1], :]) == 1:
            reaction = int((np.random.choice([0, 1, 2], p=P[opponent_actions[-1], :]) + 1) % 3)
            
        else:
            reaction = int(np.random.randint(3))

           
    elif observation.step == 1:
        
        score_markov.append(get_score(own_actions[-1], opponent_actions[-1]))
        reaction = int(np.random.randint(3))
            
    else:
        reaction = int(np.random.randint(3))
            
    own_actions.append(reaction)
    return reaction, score_markov


def adaboost(observation, configuration):

    # Hyperparameters
    k = 5
    min_samples = 25
    global rollouts_hist, last_move, data, test_sample
    
    if observation.step == 0:
        data = {'x': [], 'y': []}

    if observation.step <= min_samples + k:
        return warmup_strategy(observation, configuration)

    # update statistics
    rollouts_hist = update_rollouts_hist(rollouts_hist, last_move, observation.lastOpponentAction)

    # update training data
    if len(data['x']) == 0:
        data, test_sample = init_training_data(rollouts_hist, k)
    else:        
        short_stat_rollouts = {key: rollouts_hist[key][-k:] for key in rollouts_hist}
        features = construct_features(short_stat_rollouts, rollouts_hist)
        data['x'].append(test_sample[0])
        data['y'] = rollouts_hist['opp-actions'][k:]
        test_sample = features.reshape(1, -1)
        
    # predict opponents move and choose an action
    next_opp_action_pred = predict_opponent_move(data, test_sample)
    action = int((next_opp_action_pred + 1) % 3)
    last_move = {'step': observation.step, 'action': action}
    return action


def switching(observation, configuration):

    global my_hist, both_hist, opp_hist, output, candidates, \
        performance, score_switch, both_patterns, opp_patterns, my_patterns, \
        both2_patterns, opp2_patterns, my2_patterns

    if observation.step == 0:
        
        both_hist = ""
        my_hist = ""
        opp_hist = ""

        both_patterns = defaultdict(str)
        opp_patterns = defaultdict(str)
        my_patterns = defaultdict(str)

        both2_patterns = defaultdict(str)
        opp2_patterns = defaultdict(str)
        my2_patterns = defaultdict(str)

        score_switch = {'RR': 0, 'PP': 0, 'SS': 0, 'PR': 1, 'RS': 1, 'SP': 1,'RP': -1, 'SR': -1, 'PS': -1}
        output = random.choice(["R", "P", "S"])

        candidates = [output] * 36

        performance = [(0,0)] * 36

    else:
        
        for length in range(min(5, len(my_hist)), 0, -1):
            p = opp_patterns[opp_hist[-length:]]
            if p != "":
                for length2 in range(min(5, len(p)), 0, -1):
                    opp2_patterns[p[-2*length2:]] += output + move2str[observation.lastOpponentAction]
            opp_patterns[opp_hist[-length:]] += output + move2str[observation.lastOpponentAction]

            p = my_patterns[my_hist[-length:]]
            if p != "":
                for length2 in range(min(5, len(p)), 0, -1):
                    my2_patterns[p[-2*length2:]] += output + move2str[observation.lastOpponentAction]
            my_patterns[my_hist[-length:]] += output + move2str[observation.lastOpponentAction]

            p = both_patterns[both_hist[-2*length:]]
            if p != "":
                for length2 in range(min(5, len(p)), 0, -1):
                    both2_patterns[p[-2*length2:]] += output + move2str[observation.lastOpponentAction]
            both_patterns[both_hist[-2*length:]] += output + move2str[observation.lastOpponentAction]
    
        both_hist += output+move2str[observation.lastOpponentAction]
        my_hist += output
        opp_hist += move2str[observation.lastOpponentAction]

        for i, c in enumerate(candidates):
            performance[i] = ({1:performance[i][0]+1, 0: 0, -1: 0}[score_switch[c+move2str[observation.lastOpponentAction]]],  
                            performance[i][1]+score_switch[c+move2str[observation.lastOpponentAction]])

        output = random.choice(['R', 'P', 'S'])
        candidates = [output] * 36

        idx = performance.index(max(performance, key=lambda x: x[0]**3+x[1]))

        for length in range(min(5, len(my_hist)), 0, -1):
            pattern = both_patterns[both_hist[-2*length:]]
            if pattern != "":
                opp = pattern[-1]
                my = pattern[-2]
                candidates[0] = beat[opp]
                candidates[1] = cede[my]
                candidates[2] = opp
                candidates[3] = my
                candidates[4] = cede[opp]
                candidates[5] = beat[my]
                for length2 in range(min(5, len(pattern)), 0, -1):
                    pattern2 = both2_patterns[pattern[-2*length2:]]
                    if pattern2 != "":
                        my2 = pattern2[-2]
                        opp2 = pattern2[-1]
                        candidates[6] = beat[opp2]
                        candidates[7] = cede[my2]
                        candidates[8] = opp2
                        candidates[9] = my2
                        candidates[10] = cede[opp2]
                        candidates[11] = beat[my2]
                        break
                break

        for length in range(min(5, len(my_hist)), 0, -1):
            pattern = my_patterns[my_hist[-length:]]
            if pattern != "":
                opp = pattern[-1]
                my = pattern[-2]
                candidates[24] = beat[opp]
                candidates[25] = cede[my]
                candidates[26] = opp
                candidates[27] = my
                candidates[28] = cede[opp]
                candidates[29] = beat[my]
                for length2 in range(min(5, len(pattern)), 0, -1):
                    pattern2 = my2_patterns[pattern[-2*length2:]]
                    if pattern2 != "":
                        my2 = pattern2[-2]
                        opp2 = pattern2[-1]
                        candidates[30] = beat[opp2]
                        candidates[31] = cede[my2]
                        candidates[32] = opp2
                        candidates[33] = my2
                        candidates[34] = cede[opp2]
                        candidates[35] = beat[my2]
                        break
                break

        for length in range(min(5, len(opp_hist)), 0, -1):
            pattern = opp_patterns[opp_hist[-length:]]
            if pattern != "":
                opp = pattern[-1]
                my = pattern[-2]
                candidates[12] = beat[opp]
                candidates[13] = cede[my]
                candidates[14] = opp
                candidates[15] = my
                candidates[16] = cede[opp]
                candidates[17] = beat[my]
                for length2 in range(min(5, len(pattern)), 0, -1):
                    pattern2 = opp2_patterns[pattern[-2*length2:]]
                    if pattern2 != "":
                        my2 = pattern2[-2]
                        opp2 = pattern2[-1]
                        candidates[18] = beat[opp2]
                        candidates[19] = cede[my2]
                        candidates[20] = opp2
                        candidates[21] = my2
                        candidates[22] = cede[opp2]
                        candidates[23] = beat[my2]
                        break
                break

        output = candidates[idx]

    return move2num[output]


def meta_sgd(observation, configuration):

    global history, weight, decay, score_meta, selected, move, M, \
            ROUND, last, markov_orders, historyCount

    if observation.step == 0:

        history = []

        if USE_RANDOM == 1:
            M += 1

        if USE_MARKOW == 1:
            markov_orders = [0,1,2,3,4,5,6]
            historyCount = {}
            M += 6 * len(markov_orders)


        weight = [1] * M
        decay = [0.85] * M

        score_meta = [0] * M
        selected = [0] * M
        move = [random.choice(moves) for i in range(M)]

    else:

        ROUND += 1
        history += [(last,move2str[observation.lastOpponentAction])]
        score_meta = [ decay[i] * score_meta[i] + weight[i] * result[move[i]][move2str[observation.lastOpponentAction]] for i in range(M)]
        weight = [ weight[i] + alpha * result[move[i]][move2str[observation.lastOpponentAction]] for i in range(M)]
        index = 0

        # random optimal
        if USE_RANDOM == 1:
            move[index] = random.choice(moves)
            # adjust random optimal score to zero
            score_meta[index] = 0
            index += 1

        first_meta_index = index

        if USE_MARKOW == 1:
            # markow with meta strategies
            for m in markov_orders:
                if len(history) > m:
                    key = tuple(history[-m-1:-1])
                    if not (key in historyCount):
                        historyCount[key] = [{"R":0,"P":0,"S":0},{"R":0,"P":0,"S":0}]
                    historyCount[key][0][history[-1][0]] += 1
                    historyCount[key][1][history[-1][1]] += 1

            for m in markov_orders:
                if len(history) >= m:
                    key = tuple(history[-m:])
                    if key in historyCount:
                        move[index]   = selectBestDict(historyCount[key][0])
                        move[index+3] = selectBestDict(historyCount[key][1])
                    else:
                        move[index]   = random.choice(moves)
                        move[index+3] = random.choice(moves)
                else:
                    move[index]   = random.choice(moves)
                    move[index+3] = random.choice(moves)
                index += 6

        # set other meta strategies
        for i in range(first_meta_index, M, 3):
            move[i+1] = beatedBy[move[i]]
            move[i+2] = beatedBy[move[i+1]]

    best = selectBest(score_meta)
    selected[best] += 1
    output = move[best]
    last = output

    return move2num[output]


# Testing with other strategies

In [None]:
%%writefile random.py

import numpy as np

def random_agent(observation, configuration):
    return int(np.random.randint(3))


In [None]:
from kaggle_environments import evaluate, make

env = make("rps", debug=True)
env.render()

env.reset()
# Play as the first agent against default "random" agent.
env.run(["submission.py", "random.py"])
env.render(mode="ipython", width=500, height=450)