## Bayes estimator

Suppose that out opponent throws Rock=0, Paper=1, Scissors=2 with (unknown to us) probability distribution:
$$
X = 
\begin{cases}
0, p=p_1\\
1, p=p_2\\
2, p=1-p_1-p_2
\end{cases}
$$

Our goal is to estimate parameters $\theta=(p_1, p_2)$.

We can take as prior distribution $\pi(\theta)$ - uniform distribution on:
$$
\Omega=\{ p_1 > 0, p_2 > 0, p_1 + p_2 \leq 1 \}
$$
 
But for simplicity, we assume prior distribution $\pi(\theta)$ is discrete - uniform on:
$$
\Omega'=\{ p_1 = \frac{i}{10}, p_2 = \frac{j}{10}, i>0, j>0, i+ j \leq 10 \}
$$

If take $L(\theta, \hat{\theta}) = \mathbb{I}\{ \theta \ne \hat{\theta} \}$ as loss function, the Bayesian estimator $\theta^*=(p_1^*, p_2^*)$ is the mode of the posterior distribution:

$$
\theta^* = argmax_{\theta_i} \mathbb{P}\{ \theta=\theta_i | X_1=x_1,...,X_n=x_n \}
$$

Then we'll sample another $X$ with distribution $(p_1^*, p_2^*, 1-p_1^*-p_2^*)$ and play opposite to it.

In [None]:
%%writefile bayes.py

import numpy as np

history = []
d = np.zeros((11, 11)) # d[i, j] = P{p_1=i/10, p_2=j/10}

def get_posterior(d, history):
    '''
    d - numpy array, shape=(11, 11) 
    history - list
    '''
    post_d = np.zeros((11, 11))
    freqs = np.unique(np.array(history+[0,1,2]), return_counts=True)[1]-1
    
    for i in range(11):
        for j in range(11):
            if (i+j)<=10:
                idx = np.array([i, j, 10-i-j])/10.
                post_d[i, j] = np.prod(idx**freqs) * d[i, j]

    return post_d / post_d.sum()

def bayes(observation, configuration):
    global d
    global history

    if observation.step==0:
        history = []
        d = np.zeros((11, 11))

        for i in range(11):
            for j in range(11):
                if (i+j)<=10:
                    d[i, j] = 1.0

        d = d / d.sum() 
        return

    action = observation.lastOpponentAction
    history.append(action)

    post_d = get_posterior(d, history[-20:]) # we will use only last 20 rounds.

    params = list(np.unravel_index(np.argmax(post_d), post_d.shape))
    params += [10.-sum(params)]

    params = np.array(params) / 10.
    params = params * (params > 0)

    pred_action = np.random.choice(3, p=params) # sample opponent action.

    return (pred_action + 1) % 3

## example:

In [None]:
def get_posterior(d, history):
    '''
    d - numpy array, shape=(11, 11) 
    history - list
    '''
    post_d = np.zeros((11, 11))
    freqs = np.unique(np.array(history+[0,1,2]), return_counts=True)[1]-1
    
    for i in range(11):
        for j in range(11):
            if (i+j)<=10:
                idx = np.array([i, j, 10-i-j])/10.
                post_d[i, j] = np.prod(idx**freqs) * d[i, j]
    
    return post_d / post_d.sum()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random

# prior distribution is uniform:
d = np.zeros((11, 11))
for i in range(11):
    for j in range(11):
        if i+j<=10:
            d[i, j] = 1.
d = d / d.sum()

history = [random.randint(0, 2) for i in range(15)]
print('RPS count:', np.unique(np.array(history+[0, 1, 2]), return_counts=True)[1]-1)

# posterior:
post_d = get_posterior(d, history)

params = list(np.unravel_index(np.argmax(post_d), post_d.shape))
params += [10 - sum(params)]
print('P{X=0}, P{X=1}, P{X=2} = ', np.array(params)/10.)

fig, axs = plt.subplots(1, 2, figsize=(10, 10))

axs[0].imshow(d, cmap='hot')
axs[0].set_title('prior')

axs[1].imshow(post_d, cmap='hot')
axs[1].set_title('posterior')

## Test.

In [None]:
! pip install kaggle-environments

In [None]:
%%writefile rock.py

def rock(observation, configuration):
    return 0

In [None]:
%%writefile copy_opponent.py

def copy_opponent(observation, configuration):
    if observation.step > 0:
        return observation.lastOpponentAction
    else:
        return 0

In [None]:
%%writefile random_agent.py

import random

def random_agent(observation, configuration):
    return random.randint(0, 2)

In [None]:
from kaggle_environments import make, evaluate

In [None]:
env = make("rps", configuration={"episodeSteps": 1000})
env.run(["rock.py", "bayes.py"])
env.render(mode="ipython", width=600, height=600)

In [None]:
env = make("rps", configuration={"episodeSteps": 1000})
env.run(["copy_opponent.py", "bayes.py"])
env.render(mode="ipython", width=600, height=600)

In [None]:
env = make("rps", configuration={"episodeSteps": 1000})
env.run(["random_agent.py", "bayes.py"])
env.render(mode="ipython", width=600, height=600) 