# Rock Paper Scissors - Naive Bayes

This is a Naive Bayes implemention of Rock Paper Scissors

It keeps track of previous memory patterns, along with the next move, and computes the log likelihood of each pattern repeating.

In [None]:
%%writefile submission.py
from collections import defaultdict
from itertools import chain, combinations
import random
import sys
from typing import *

import numpy as np
from pydash import flatten


class RPSNaiveBayes():
    def __init__(self, max_memory=20, verbose=True):
        self.max_memory = max_memory
        self.verbose    = verbose
        self.history = {
            "opponent": [],
            "rotn":     [],
            "expected": [],
            "action":   [],
        }
        # self.root_keys = ['action','opponent','rotn','expected']
        self.root_keys = ['action','opponent']
        self.keys = [
            ",".join(combo)
            for n in range(1,len(self.root_keys)+1)        
            for combo in combinations(self.root_keys, n)
        ]
        # self.keys = ['action', 'opponent', 'rotn', 'action,opponent', 'action,rotn', 'opponent,rotn', 'action,opponent,rotn']
        self.memory = {
            key: defaultdict(lambda: np.array([0,0,0]))
            for key in self.keys
        }
        
    def __call__(self, obs, conf):
        return self.agent(obs, conf)


    # obs  {'remainingOverageTime': 60, 'step': 1, 'reward': 0, 'lastOpponentAction': 0}
    # conf {'episodeSteps': 10, 'actTimeout': 1, 'runTimeout': 1200, 'signs': 3, 'tieRewardThreshold': 20, 'agentTimeout': 60}
    def agent(self, obs, conf):
        # print('obs', obs)
        self.update_state(obs, conf)

        views          = self.get_current_views()
        log_likelihood = self.get_log_likelihood(views)
        probability    = self.get_probability(log_likelihood)

        expected = random.choices( population=[0,1,2], weights=probability, k=1 )[0]
        action   = int(expected + 1) % conf.signs
        self.history['expected'].insert(0, expected)
        self.history['action'].insert(0, action)

        if self.verbose:
            print(f'step = {obs.step:4d} | action = {action} | expected = {expected} | probability', probability.round(3), 'log_likelihood', log_likelihood.round(3))

        return int(action)


    def update_state(self, obs, conf):
        if obs.step > 0:
            rotn = obs.lastOpponentAction - self.history['action'][0] 

            self.history['opponent'].insert(0, obs.lastOpponentAction % conf.signs)
            self.history['rotn'].insert(0, rotn)

        for keys in self.memory.keys():
            memories = self.get_new_memories(keys)
            for value, path in memories:
                self.memory[keys][path][value] += 1


    def get_key_min_length(self, keys: str) -> int:
        min_length = min([ len(self.history[key]) for key in keys.split(',') ])
        return min_length


    def get_new_memories(self, keys: Union[str,List[str]]) -> List[Tuple[Tuple,int]]:
        min_length = self.get_key_min_length(keys)
        min_length = min(min_length, self.max_memory)
        memories   = []
        for n in range(1,min_length):
            value = self.history["opponent"][0]
            paths = []
            for key in keys.split(','):
                path = self.history[key][1:n]
                if len(path): paths.append(path)
            paths = tuple(flatten(paths))
            if len(paths):
                memories.append( (value, paths) )
        return memories


    def get_current_views(self) -> Dict[str, List[Tuple[int]]]:
        views = {
            keys: [
                tuple(flatten([value, paths]))
                for (value, paths) in self.get_new_memories(keys)
            ]
            for keys in self.memory.keys()
        }
        return views


    def get_log_likelihood(self, views: List[Tuple]) -> np.ndarray:
        log_likelihoods = np.array([.0,.0,.0])
        for keys in self.memory.keys():
            count = np.sum( np.array(list(self.memory[keys].values())).shape )
            for path in views[keys]:
                try:
                    n_unique = 3 ** len(path)
                    freqs = self.memory[keys][path] * n_unique    
                    probs = (freqs + 1) / ( count + n_unique )    # Laplacian Smoothing
                    log_likelihood = [
                        np.log(probs[a]) - np.log(probs[b] + probs[c])
                        if (probs[b] + probs[c]) > 0 else 0.0
                        for a, b, c in [ (0,1,2), (1,2,0), (2,0,1) ]
                    ]
                    log_likelihood = [ n if not np.isnan(n) else 0.0 for n in log_likelihood ]
                    log_likelihoods += np.array(log_likelihood)
                except ZeroDivisionError: pass

        return log_likelihoods

    
    def get_probability(self, log_likelihood: np.ndarray) -> np.ndarray:
        probability = np.exp(log_likelihood)
        probability[ probability == np.inf ] = sys.maxsize / len(probability) / 2
        probability = probability / np.sum(probability)
        return probability
        
            
    
    
instance = RPSNaiveBayes()
def kaggle_agent(obs, conf):
    return instance.agent(obs, conf)


In [None]:
%run submission.py

# Evaluation

In [None]:
from kaggle_environments import make
import random
agent = RPSNaiveBayes(verbose=False)

env = make("rps", configuration={"episodeSteps": 100}, debug=True)
env.run(["submission.py", lambda obs, conf: obs.step % conf.signs ])
# env.run(["submission.py", '../input/rock-paper-scissors-xgboost/submission.py'])
# env.run(["submission.py", 'submission.py'])
env.render(mode="ipython", width=600, height=600)

# Further Reading

This notebook is part of a series exploring Rock Paper Scissors:

Predetermined
- [PI Bot](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-pi-bot)
- [Anti-PI Bot](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-anti-pi-bot)
- [De Bruijn Sequence](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-de-bruijn-sequence)

RNG
- [Random Agent](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-random-agent)
- [Random Seed Search](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-random-seed-search)
- [RNG Statistics](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-rng-statistics)

Opponent Response
- [Anti-Rotn](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-anti-rotn)
- [Sequential Strategies](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-sequential-strategies)

Statistical 
- [Weighted Random Agent](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-weighted-random-agent)
- [Statistical Prediction](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-statistical-prediction)
- [Anti-Rotn Weighted Random](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-anti-rotn-weighted-random)

Memory Patterns
- [Naive Bayes](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-naive-bayes)
- [Memory Patterns](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-memory-patterns)

Decision Tree
- [XGBoost](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-xgboost)
- [Multi Stage Decision Tree](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-multi-stage-decision-tree)
- [Decision Tree Ensemble](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-decision-tree-ensemble)

Ensemble
- [Multi Armed Stats Bandit](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-multi-armed-stats-bandit)

RoShamBo Competition Winners
- [Iocaine Powder](https://www.kaggle.com/jamesmcguigan/rps-roshambo-comp-iocaine-powder)
- [Greenberg](https://www.kaggle.com/jamesmcguigan/rock-paper-scissors-greenberg)