# **RPS Agent**# 



In [None]:
!pip install 'kaggle-environments>=0.1.6'

In [None]:

from __future__ import division
from kaggle_environments import evaluate, make

import random
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%%writefile submission.py

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 26 12:11:08 2021

@author: gas
"""


from __future__ import division
import random
import itertools
import numpy as np


beat = {'R': 'P', 'P': 'S', 'S': 'R'}
move2num = {'R': 0, 'P': 1, 'S': 2}
move2str = {0: 'R', 1: 'P', 2: 'S'}


class MarkovChain():

    def __init__(self, type, beat, level, memory, score=0, score_mem=0.9):
        self.type = type
        self.matrix = self.create_matrix(beat, level, memory)
        self.memory = memory
        self.level = level
        self.beat = beat
        self.score = score
        self.score_mem = score_mem
        self.prediction = ''
        self.name = 'level: {}, memory: {}'.format(self.level, self.memory)
        self.last_updated_key = ''

    @staticmethod
    def create_matrix(beat, level, memory):

        def create_keys(beat, level):
            keys = list(beat)

            if level > 1:

                for i in range(level - 1):
                    key_len = len(keys)
                    for i in itertools.product(keys, ''.join(beat)):
                        keys.append(''.join(i))
                    keys = keys[key_len:]

            return keys

        keys = create_keys(beat, level)

        matrix = {}
        for key in keys:
            matrix[key] = {'R': 1 / (1 - memory) / 3,
                           'P': 1 / (1 - memory) / 3,
                           'S': 1 / (1 - memory) / 3}

        return matrix

    def update_matrix(self, key_lagged, response):

        for key in self.matrix[key_lagged]:
            self.matrix[key_lagged][key] = self.memory * self.matrix[key_lagged][key]

        self.matrix[key_lagged][response] += 1
        self.last_updated_key = key_lagged

    def update_score(self, inp, out):

        if self.beat[out] == inp:
            self.score = self.score * self.score_mem - 1
        elif out == inp:
            self.score = self.score * self.score_mem
        else:
            self.score = self.score * self.score_mem + 1

    def predict(self, key_current):

        probs = self.matrix[key_current]

        if max(probs.values()) == min(probs.values()):
            self.prediction = random.choice(list(beat.keys()))
        else:
            self.prediction = max([(i[1], i[0]) for i in probs.items()])[1]

        if self.type == 'input_oriented':
            return self.prediction
        elif self.type == 'output_oriented':
            return self.beat[self.prediction]


class Ensembler():
    def __init__(self, type, beat, min_score=-10, score=0, score_mem=0.9):
        self.type = type
        self.matrix = {i: 0 for i in beat}
        self.beat = beat
        self.min_score = min_score
        self.score = score
        self.score_mem = score_mem
        self.prediction = ''

    def update_score(self, inp, out):

        if self.beat[out] == inp:
            self.score = self.score * self.score_mem - 1
        elif out == inp:
            self.score = self.score * self.score_mem
        else:
            self.score = self.score * self.score_mem + 1

    def update_matrix(self, pred_dict, pred_score):
        norm_dict = {key: pred_dict[key] / sum(pred_dict.values()) for key in pred_dict}
        for key in self.matrix:
            if pred_score >= self.min_score:
                self.matrix[key] = self.matrix[key] + pred_score * norm_dict[key]

    def predict(self):

        if max(self.matrix.values()) == min(self.matrix.values()):
            self.prediction = random.choice(list(beat.keys()))
        else:
            self.prediction = max([(i[1], i[0]) for i in self.matrix.items()])[1]

        return self.prediction


class HistoryColl():
    def __init__(self):
        self.history = ''

    def hist_collector(self, inp, out):
        self.history = self.history + inp
        self.history = self.history + out
        if len(self.history) > 10:
            self.history = self.history[-10:]

    def create_keys(self, level):
        return self.history[-level:]

    def create_keys_hist(self, level):
        key_hist = self.history[-level - 2:-2]
        inp_latest = self.history[-2]
        out_latest = self.history[-1]
        return key_hist, inp_latest, out_latest



T = np.zeros((3, 3))
P = np.zeros((3, 3))

# a1 is the action of the opponent 1 step ago
# a2 is the action of the opponent 2 steps ago
a1, a2 = None, None


def transition_agent(observation, configuration):
    
    global T, P, a1, a2, output, history, models

    if observation.step == 0:

        output = random.choice(list(beat.keys()))

        history = HistoryColl()

        memory = [0.5, 0.6, 0.7, 0.8, 0.9, 0.93, 0.95, 0.97, 0.99]
        level = [1, 2, 3, 4]
        ensemble_min_score = [5]

        models_inp = [MarkovChain('input_oriented', beat, i[0], i[1]) for i in itertools.product(level, memory)]
        models_out = [MarkovChain('output_oriented', beat, i[0], i[1]) for i in itertools.product(level, memory)]
        models_ens = [Ensembler('ensemble', beat, i) for i in ensemble_min_score]

        models = models_inp + models_out + models_ens

    elif len(history.history) == 10:

        history.hist_collector(move2str[observation.lastOpponentAction], output)
        
        a1 = observation.lastOpponentAction
        T[a2, a1] += 1
        P = np.divide(T, np.maximum(1, T.sum(axis=1)).reshape(-1, 1))
        a2 = a1

        max_score = 0

        for model in models:

            if model.type in ('input_oriented', 'output_oriented'):
                key_hist, inp_latest, out_latest = history.create_keys_hist(model.level)
                key_curr = history.create_keys(model.level)

            if model.prediction != '':
                model.update_score(observation.lastOpponentAction, beat[model.prediction])

            if model.type == 'input_oriented':
                model.update_matrix(key_hist, inp_latest)

            elif model.type == 'output_oriented':
                model.update_matrix(key_hist, out_latest)

            elif model.type == 'ensemble':
                for mod in models:
                    if mod.type in ('input_oriented', 'output_oriented'):
                        model.update_matrix(mod.matrix[mod.last_updated_key], model.score)

            if model.type in ('input_oriented', 'output_oriented'):
                predicted_input = model.predict(key_curr)
            elif model.type == 'ensemble':
                predicted_input = model.predict()

            if model.score > max_score:
                best_model = model
                max_score = model.score
                output = beat[predicted_input]

        if max_score < 1:
            
            #output = random.choice(list(beat.keys()))
            if np.sum(P[a1, :]) == 1:
                return int((np.random.choice(
                    [0, 1, 2],
                    p=P[a1, :]
                ) + 1) % 3)
            else:
                return int(np.random.randint(3))
            


    else:
        
        history.hist_collector(move2str[observation.lastOpponentAction], output)
        
        a1 = observation.lastOpponentAction
        T[a2, a1] += 1
        P = np.divide(T, np.maximum(1, T.sum(axis=1)).reshape(-1, 1))
        a2 = a1
        
        if np.sum(P[a1, :]) == 1:
            return int((np.random.choice(
                [0, 1, 2],
                p=P[a1, :]
            ) + 1) % 3)
        else:
            return int(np.random.randint(3))
        
        
    return move2num[output]


# Writing other strategies

In [None]:
%%writefile random.py

import numpy as np

def random_agent(observation, configuration):
    return int(np.random.randint(3))

In [None]:
%%writefile markov_chain.py

import numpy as np
import pandas as pd
import random

T = np.zeros((3, 3))
P = np.zeros((3, 3))

# a1 is the action of the opponent 1 step ago
# a2 is the action of the opponent 2 steps ago
a1, a2 = None, None

def transition_agent(observation, configuration):
    global T, P, a1, a2
    if observation.step > 1:
        a1 = observation.lastOpponentAction
        T[a2, a1] += 1
        P = np.divide(T, np.maximum(1, T.sum(axis=1)).reshape(-1, 1))
        a2 = a1
        if np.sum(P[a1, :]) == 1:
            return int((np.random.choice(
                [0, 1, 2],
                p=P[a1, :]
            ) + 1) % 3)
        else:
            return int(np.random.randint(3))
    else:
        if observation.step == 1:
            a2 = observation.lastOpponentAction
        return int(np.random.randint(3))

# Testing with other strategies

In [None]:
from kaggle_environments import evaluate, make
env = make("rps", debug=True)
env.render()

In [None]:
env.reset()
# Play as the first agent against "statistical" agent.
env.run(["submission.py", "statistical"])
env.render(mode="ipython", width=500, height=450)

In [None]:
env.reset()
# Play as the first agent against default "random" agent.
env.run(["submission.py", "reactionary"])
env.render(mode="ipython", width=500, height=450)

In [None]:
env.reset()
# Play as the first agent against default "random" agent.
env.run(["submission.py", "counter_reactionary"])
env.render(mode="ipython", width=500, height=450)

In [None]:
env.reset()
# Play as the first agent against default "random" agent.
env.run(["submission.py", "random.py"])
env.render(mode="ipython", width=500, height=450)

# Comparison with all strategies

In [None]:

list_names = [
    "statistical","reactionary", "counter_reactionary", "random",
    "markov_chain", "submission"
]
list_agents = [
    "statistical","reactionary", "counter_reactionary", "random.py",
    "markov_chain.py", "submission.py"
]

scores = np.zeros((len(list_names), len(list_names)), dtype=int)

In [None]:
for ind_agent_1 in range(len(list_names)):
    for ind_agent_2 in range(ind_agent_1 + 1, len(list_names)):
        
        current_score = evaluate(
            "rps", 
            [list_agents[ind_agent_1], list_agents[ind_agent_2]], 
            configuration={"episodeSteps": 1000}
        )
        
        scores[ind_agent_1, ind_agent_2] = current_score[0][0]
        scores[ind_agent_2, ind_agent_1] = current_score[0][1]
        
df_scores = pd.DataFrame(
    scores, 
    index=list_names, 
    columns=list_names,
)


plt.figure(figsize=(len(list_agents), len(list_agents)))
sns.heatmap(
    df_scores, annot=True, cbar=False, cmap='coolwarm', linewidths=1, linecolor='black', fmt="d"
)
plt.xticks(rotation=90, fontsize=15)
plt.yticks(fontsize=15);