# RPS - Rock Paper Scissors Agent - using PPL

In this notebook I will show an expirment of RPS game simulation.
I will use two players:
<ol>
    <li>Simple Player - playing according to Categorical seed (alpha vector)</li>
    <li> Inferencing Player - models the opponent as a Probalistic Program and by observations trying to infer the latent alpha vector.<br>
        Using this infered vector the player will try to exploit the simple player.<br>
</ol>
    

In [70]:
import numpy as np
import scipy as sp
import pymc3 as pm
import logging
logger = logging.getLogger('pymc3')
logger.setLevel(logging.ERROR)

### Simple Player
The simple player creates a categorical distribution (with dirichlet prior) and a given alpha vector and returns <num_of_samples> samples from this distirbution

### Smart Player

Infercing Player, takes the toolset of MCMC infernce. Each round this player takes the moves of the simple players as observations, and uses the same Probabalistic Model to posterior inference

### RPS Model
Probabalistic Program, describes the decision process.<br>
    $dir \sim Dirichlet(\alpha_1, \alpha_2, \alpha_3)$<br>
    $Action \sim Categorical(dir)$
    


In [71]:
def rps_player_model(alpha=[1, 1, 1], observed=None):
    with pm.Model() as model:
        dirichlet = pm.Dirichlet('dirichlet', a=alpha)
        phi = pm.Categorical('phi', p=dirichlet, observed=observed)
        return model

In [72]:
def beats(i):
    return (i + 1) % 3

## The hierarchy

### Base class of all players

In [73]:
class Player:
    def __init__(self, id):
        self.id = id

    def move(self, history):
        raise NotImplementedError()

### Naive player

In [74]:
class NaivePlayer(Player):
    """Naive player chooses a move according to fixed probabilities.
    """
    def __init__(self, id, p=[1, 1, 1]):
        Player.__init__(self, id)
        p = np.array(p)
        p = p/sum(p)
        self.p = p
    
    def move(self, history):
        return np.argmax(sp.stats.multinomial.rvs(1, self.p))

### Frequentist Players

In [75]:
class FrequentistPlayer(Player):
    """Frequentist player uses prior history 
    to choose a move
    """
    def __init__(self, id, counts=None):
        Player.__init__(self, id)
        if counts is None:
            counts = [1, 1, 1]
        self.counts = counts
    
    def stats(self, history):
        counts = self.counts[:]
        for id, m in history:
            if id != self.id:
                counts[m] += 1
        return np.array(counts)

In [76]:
class FixedFrequentistPlayer(FrequentistPlayer):
    def __init__(self, id, counts=None):
        FrequentistPlayer.__init__(self, id, counts)
        
    def move(self, history):
        counts = self.stats(history)
        return beats(np.argmax(counts))
    
# Example
# ffp = FixedFrequentistPlayer(1)
# print([ffp.move([(1, 1), (2, 1), (1, 0), (2, 1)]) for _ in range(10)])
# print([ffp.move([(1, 1), (2, 2), (1, 0), (2, 1), (1, 2), (2, 2)]) for _ in range(10)])

In [77]:
class RandomFrequentistPlayer(FrequentistPlayer):
    def __init__(self, id, counts=None):
        FrequentistPlayer.__init__(self, id, counts)
        
    def move(self, history):
        counts = self.stats(history)
        return beats(np.argmax(sp.stats.multinomial.rvs(n=1, p=counts/sum(counts))))
    
# Example
# rfp = RandomFrequentistPlayer(1)
# print([rfp.move([(1, 1), (2, 1), (1, 0), (2, 1)]) for _ in range(10)])
# print([rfp.move([(1, 1), (2, 2), (1, 0), (2, 1), (1, 2), (2, 2)]) for _ in range(10)])

In [60]:
class BayesianPlayer(Player):
    def __init__(self, id, alpha=None):
        Player.__init__(self, id)
        if alpha is None:
            alpha = 1, 1, 1
        self.alpha = np.array(alpha)
        
    def opponent_model(self, history):
        pass
    
    def select_action(samples):
        pass
    
    def infer(self, model):
        with model:
            trace = pm.sample(step=pm.Metropolis(), model=model, return_inferencedata=True, progressbar=False, cores=1)
            return trace
        
    def sample_from_posterior(self, model, trace, theta_var):
        with model:
            posterior_pred = pm.sample_posterior_predictive(trace, progressbar=False)
            return beats(self.select_action(posterior_pred[theta_var]))
        
    def opponent_history(self, history):
        moves_history = []
        for idx, move in history:
            if idx != self.id:
                moves_history.append(move)
        return moves_history
    
    def move(self, history):
        history = self.opponent_history(history)
        opponent_model = self.opponent_model(history)
        trace = self.infer(opponent_model)
        return self.sample_from_posterior(opponent_model, trace, self.get_theta_var())

In [61]:
class CategoricalBaysianPlayer(BayesianPlayer):
    def __init__(self, id, alpha = None):
        BayesianPlayer.__init__(self, id, alpha)
    
    def opponent_model(self, history):
        with pm.Model() as model:
            dirichlet = pm.Dirichlet('dirichlet', a=self.alpha)
            phi = pm.Categorical('phi', p=dirichlet, observed=history)
            return model
    
    def get_theta_var(self):
        return 'phi'

In [78]:
class FixedCategoricalBaysianPlayer(CategoricalBaysianPlayer):
    def __init__(self, id, alpha = None):
        CategoricalBaysianPlayer.__init__(self, id, alpha)
    
    def select_action(self, samples):
        samples = samples.reshape(-1)
        counts = [0, 0, 0]
        # return most common sample
        for m in samples:
            counts[m] += 1
        return np.argmax(np.array(counts))
        
# Example
# fcbp = FixedCategoricalBaysianPlayer(1)
# print([fcbp.move([(1, 1), (2, 1), (1, 0), (2, 1)]) for _ in range(10)])
# print([fcbp.move([(1, 1), (2, 2), (1, 0), (2, 1), (1, 2), (2, 2)]) for _ in range(10)])

In [79]:
class RandomCategoricalBaysianPlayer(CategoricalBaysianPlayer):
    def __init__(self, id, alpha = None):
        CategoricalBaysianPlayer.__init__(self, id, alpha)
    
    def select_action(self, samples):
        samples = samples.reshape(-1)
        counts = [0, 0, 0]
        # return most common sample
        for m in samples:
            counts[m] += 1
        counts = np.array(counts)
        return np.argmax(sp.stats.multinomial.rvs(n=1, p=counts/sum(counts)))
        
# Example
# rcbp = RandomCategoricalBaysianPlayer(1)
# print([rcbp.move([(1, 1), (2, 1), (1, 0), (2, 1)]) for _ in range(10)])
# print([rcbp.move([(1, 1), (2, 2), (1, 0), (2, 1), (1, 2), (2, 2)]) for _ in range(10)])

In [80]:
ROCK = 0
PAPER = 1
SCISSORS = 2
def score(m1, m2):
    # ROCK < PAPER < SCISSORS < ROCK
    if m1==m2:
        return 0
    score = -1
    if m1 > m2:
        m1, m2 = m2, m1
        score = -score
    if m2 - m1 == 2:
        score = -score
    return score

def summerize_score(scores):
    first_wins = scores.count(1)
    ties = scores.count(0)
    second_wins = scores.count(-1)
    return first_wins, ties, second_wins

def game(player1, player2, n=20):
    # TODO play the game with two players
    history = []
    scores = []
    for i in range(n):
        m1 = player1.move(history)
        history.append([1, m1])
        m2 = player2.move(history)
        history.append([2, m2])
        scores.append(score(m1, m2))
    return scores

### Game between random frequentist player and naive player

In [86]:
rfp = RandomFrequentistPlayer(1)
nap = NaivePlayer(2)
play_game_and_print_summery(rfp,nap)

[1, 0, -1, 0, 0, -1, 0, 1, 1, -1, 0, 1, -1, 1, 0, -1, 0, 1, -1, 0]
first wins:6 ties: 8 second wins: 6


### Game between categorical bayesian player and naive player

In [87]:
rcbp = RandomCategoricalBaysianPlayer(1)
nap = NaivePlayer(2)
play_game_and_print_summery(rcbp,nap)

The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.


[1, -1, 1, 0, 0, 0, 0, 1, -1, 0, 1, -1, -1, -1, 0, -1, 1, -1, 0, -1]
first wins:5 ties: 7 second wins: 8


### Game between random frequentist player and categorical baysian player

In [88]:
rfp = RandomFrequentistPlayer(1)
rcbp = RandomCategoricalBaysianPlayer(2)
play_game_and_print_summery(rfp,rcbp)

The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.


[0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, -1, 1, 0, 1, 0, 1, -1, -1, -1]
first wins:3 ties: 11 second wins: 6


### Game between random frequentist player and fixed frequentist player

In [89]:
rfp = RandomFrequentistPlayer(1)
ffp = FixedFrequentistPlayer(2)
play_game_and_print_summery(rfp,ffp)

[-1, 0, -1, -1, 1, -1, 1, -1, 1, 1, -1, -1, -1, -1, 0, 1, -1, 1, -1, -1]
first wins:6 ties: 2 second wins: 12


### Game between fixed baysien player and naive player

In [90]:
fcbp = FixedCategoricalBaysianPlayer(1)
nap = NaivePlayer(2)
play_game_and_print_summery(fcbp,nap)

The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The 

[1, 0, 1, 0, 0, -1, 1, 0, 1, 0, 1, 0, 0, 1, 1, -1, -1, -1, 1, -1]
first wins:8 ties: 7 second wins: 5


### Game between fixed baysien player and exploitable naive player

In [93]:
fcbp = FixedCategoricalBaysianPlayer(1)
nap = NaivePlayer(2, p=[5, 1, 1])
play_game_and_print_summery(fcbp,nap)

The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
The 

[1, 1, 1, 0, 1, 1, -1, -1, -1, 0, -1, 1, 1, 1, 1, 1, 1, -1, 1, 1]
first wins:13 ties: 2 second wins: 5


### Posterior Infernce
using PPL infernce - Metropolis Hasting Algorithm due to the fact that the distribution is discrete

In [14]:
def infer(model):
    with model:
        trace = pm.sample(step=pm.Metropolis(), model=model, return_inferencedata=True, progressbar=False)
        return trace

### Predictive Posterior Sampling
sampling from the posterior and return the most common action at each stage<br>
The smart player using this sampling to play

In [15]:
def sample_from_posterior(model, trace):
    with model:
        posterior_pred = pm.sample_posterior_predictive(trace, progressbar=False)
        median_over_samples = np.median(posterior_pred['phi'], axis=0)
        return median_over_samples

### Sampling from the model without observations
The simple player using this sampling.<br> given alpha vector it's draw samples from the distribtuion

In [16]:
def sample_from_prior(model, num_of_samples):
    with model:
        samples = pm.sample_prior_predictive(num_of_samples)['phi']
        return samples

### Simulation
In the expirement we will look at those two player playing. and will examine the results

### Some aux function for RPS
Rock Paper Scissors is popular game.
With 3 Actions (Rock , Paper , Scissors) each action lose and wins exactly other action

In [17]:
def beats(i):
    return (i + 1) % 3

In [18]:
from enum import IntEnum

class RPS(IntEnum):
    ROCK = 0,
    PAPER = 1,
    SCISSORS = 2
    
def get_result(first_player, second_player):
    if first_player == second_player:
        return 0
    elif (first_player == RPS.ROCK and second_player == RPS.SCISSORS) or (
            first_player == RPS.PAPER and second_player == RPS.ROCK) or (
            first_player == RPS.SCISSORS and second_player == RPS.PAPER):
        return 1
    else:
        return -1

### Simulator
We run <num_of_simulations> simulations.<br>
Each simulation the simple player plays number of actions from the Probalistic Distribution with alpha vector as parameter.<br>
The Smart player infer about the observations of the previous round and suggest <num_of_samples> action.<br> The Simple player does the same with constant distribution and the simulator compare the results and update the number.<br>
In the end we look at the expactation of each player to win and the ties.<br>
We check if the smart player is realy "smarter" then the simple player

In [19]:
def simulate_with_latent_alpha(num_of_simulations=10, alpha=[1, 1, 1]):
    total_smart_player_wins = 0
    total_simple_player_wins = 0
    total_ties = 0

    simple_player_observation = []

    for i in range(num_of_simulations):
        # Learning phase
        simple_player = rps_player_model(alpha=alpha)

        if len(simple_player_observation) == 0:
            simple_player_observations = sample_from_prior(simple_player, num_of_samples=10)

        # gets a list of observed values and returns the distribution of probable action
        smart_player = rps_player_model(observed=simple_player_observations)
        trace = infer(smart_player)

        smart_player_next_moves = sample_from_posterior(smart_player, trace)
        smart_player_next_moves = list(map(beats, smart_player_next_moves))

        # Evaluation phase
        simple_player_next_moves = sample_from_prior(simple_player, num_of_samples=10)

        smart_player_wins = 0
        simple_player_wins = 0
        ties = 0

        for j in range(len(simple_player_next_moves)):
            result = get_result(smart_player_next_moves[j], simple_player_next_moves[j])
            if result > 0:
                smart_player_wins += 1
            elif result < 0:
                simple_player_wins += 1
            else:
                ties += 1
        total_smart_player_wins += smart_player_wins
        total_simple_player_wins += simple_player_wins
        total_ties += ties
        print(f'in simulation {i}: wins: {smart_player_wins}, loses: {simple_player_wins}, ties: {ties}')
    print(
        f'For opponent\'s alpha vector: {alpha} averages in all simulations is wins: {total_smart_player_wins / num_of_simulations} '
        f' loses:{total_simple_player_wins / num_of_simulations} ties:{total_ties / num_of_simulations}')

### Expirements
I will check the results of different alpha played by the simple player

alpha = [1, 10, 10] (playing less Rock)

In [20]:
simulate_with_latent_alpha(alpha=[1, 10, 10])

NameError: name 'dirichlet' is not defined

In [None]:
simulate_with_latent_alpha(alpha=[10, 6, 1])

In [None]:
simulate_with_latent_alpha(alpha=[1, 6, 1])

In [None]:
simulate_with_latent_alpha(alpha=[1, 3, 5])

In [None]:
simulate_with_latent_alpha(alpha=[1, 1, 1])

### Summery
we can see from the expirments that the "smart" player able to exploit the simple opponent.<br> As the opponent is farther from complete random strategy we succeed to exploit it better<br> And when it plays complete random the smart player do the same and the results are even
