# Purple Puppy RPS bot

![pup](https://purplepuppy.com/puphooray.png)

The main predictors are of the history matching type, similar to the `rfind` bot or variants.

We keep track of:

* the last time the latest sequence of your moves was seen.
* the last time the latest sequence of my moves was seen.
* the last time the latest sequence of a combination of my moves and your moves was seen.
* the first time the latestsequence of your moves was seen.
* the first time the latest sequence of my moves was seen.
* the first time the latest sequence of a combination of my moves and your moves was seen.

For each of these, we look at what was played next. We store the next move you played, the next move I played. And, we store the distribution of moves that was played next.

Sequences are matched for lengths of 8, 4, 2, 1.

Then we have a variety of metapredictors that score the likelihood of seeing each of the 3 rotations of the predictors. These use varying types of decaying frequency counting of how much each predictor wins.

A metametapredictor then ranks the metapredictors.

## Future ideas

* We can implement a metapredictor based on history matching rather than simple decaying frequency counting.
* We can get rid of useless metapredictors
* We can consider weighted random sampling instead of picking the top one
* We can use better predictors (LSTM etc)


In [None]:
%%writefile pup.py

# for discussion, see https://daniel.lawrence.lu/programming/rps

import random

hist = [[], [], []]  # history of my moves, your moves, and combination thereof

dicts_last = [{}, {}, {}]
dicts_first = [{}, {}, {}]
dicts_freq = [{}, {}, {}]

max_dict_keys = [8, 4, 2, 1]
last_move = 0
n_pred = len(max_dict_keys) * 18
n_meta = 8
p_score = [[5 for i in range(n_pred * 3)] for i in range(n_meta)]
m_score = [5 for i in range(n_meta * 3)]
predictions = [random.choice([0, 1, 2]) for i in range(n_pred * 3)]
meta_predictions = [random.choice([0, 1, 2]) for i in range(n_meta * 3)]


def beat(x):
    return (x + 1) % 3


def maxind(a):
    return a.index(max(a))


def predict():
    global hist
    global dicts
    global max_dict_keys
    global n_pred

    predictions = [random.choice([0, 1, 2]) for i in range(n_pred * 3)]

    for mi, m in enumerate(max_dict_keys):
        for k in range(3):
            for i in reversed(range(min(m, len(hist[0])))):
                t = tuple(hist[k][-i:])
                if t in dicts_first[k]:
                    for j in range(2):
                        predictions[len(max_dict_keys) * (6 * k + j) +
                                    mi] = dicts_first[k][t][j]
                        predictions[len(max_dict_keys) * (6 * k + j + 2) +
                                    mi] = dicts_last[k][t][j]
                    predictions[len(max_dict_keys) * (6 * k + 4) +
                                mi] = maxind([
                                    d + random.random() * 0.1
                                    for d in dicts_freq[k][t][:3]
                                ])
                    predictions[len(max_dict_keys) * (6 * k + 5) +
                                mi] = maxind([
                                    d + random.random() * 0.1
                                    for d in dicts_freq[k][t][3:]
                                ])
                    break
    for p in range(2 * n_pred):
        predictions[p + n_pred] = beat(predictions[p])

    return predictions


def metapredict():
    global predictions
    global meta_predictions
    for i in range(0, n_meta, 2):
        meta_predictions[i] = predictions[maxind(p_score[i])]
        meta_predictions[i + 1] = beat(predictions[maxind(p_score[i + 1])])

    for p in range(2 * n_meta):
        meta_predictions[p + n_meta] = beat(meta_predictions[p])
    return meta_predictions[maxind(
        [m + random.random() * 0.1 for m in m_score])]


def update(move, op_move):
    global hist
    global dicts
    global n_pred
    global n_meta
    global p_score
    global m_score
    global predictions
    global max_dict_keys
    global meta_predictions
    if op_move is None:
        op_move = random.choice([0, 1, 2])

    for i in range(min(max_dict_keys[0], len(hist[0]))):
        for k in range(3):
            t = tuple(hist[k][-i:])
            dicts_last[k][t] = (move, op_move)
            if t not in dicts_first[k]:
                dicts_first[k][t] = (move, op_move)
            if t not in dicts_freq[k]:
                dicts_freq[k][t] = [0 for j in range(6)]
            dicts_freq[k][t][move] += 1
            dicts_freq[k][t][op_move + 3] += 1
    hist[0].append(move)
    hist[1].append(op_move)
    hist[2].append(move * 3 + op_move)

    for i in range(n_pred * 3):
        pp = predictions[i]
        bpp = beat(pp)
        bbpp = beat(beat(pp))

        p_score[0][i] = 0.9 * p_score[0][i] + ((op_move == pp) -
                                               (op_move == bbpp)) * 3
        p_score[1][i] = 0.9 * p_score[1][i] + ((move == pp) -
                                               (move == bbpp)) * 3

        p_score[2][i] = 0.87 * p_score[2][i] + (op_move == pp) * 3.3 - (
            op_move == bpp) * 1.2 - (op_move == bbpp) * 2.3
        p_score[3][i] = 0.87 * p_score[3][i] + (move == pp) * 3.3 - (
            move == bpp) * 1.2 - (move == bbpp) * 2.3

        p_score[4][i] = (p_score[4][i] +
                         (op_move == pp) * 3) * (1 - (op_move == bbpp))
        p_score[5][i] = (p_score[5][i] + (move == pp) * 3) * (1 -
                                                              (move == bbpp))

        p_score[6][i] = (p_score[6][i] - 1) / 1.05 + 2 + (op_move == bbpp) * 1
        p_score[7][i] = (p_score[7][i] - 1) / 1.05 + 2 + (move == pp) * 1
    for i in range(n_meta * 3):
        m_score[i] = 0.97 * (m_score[i] + (op_move == meta_predictions[i]) -
                             (op_move == beat(beat(meta_predictions[i]))))


def run(observation, configuration):
    global last_move
    if observation.step == 0:
        last_move = random.choice([0, 1, 2])
        return last_move
    global predictions
    update(last_move, observation.lastOpponentAction)
    global dicts_freq
    global m_score
    global p_score

    predictions = predict()
    move = beat(metapredict())
    if max(m_score) < 0.07 or random.randint(20, 40) > len(hist[0]):
        move = random.choice([0, 1, 2])
    last_move = move
    return move


We can now run our bot against a simple bot, such as this one (an original bot that I just wrote).


In [None]:
%%writefile rfind.py

import random
hist = []  # history of your moves
dict_last = {}
max_dict_key = 10
last_move = 0


def beat(x):
    return (x + 1) % 3


def predict():
    global dict_last
    global max_dict_key
    for i in reversed(range(min(len(hist), max_dict_key))):
        t = tuple(hist[-i:])
        if t in dict_last:
            return dict_last[t]
    return random.choice([0, 1, 2])


def update(move, op_move):
    global hist
    global dict_last
    global max_dict_key
    hist.append(move)
    for i in reversed(range(min(len(hist), max_dict_key))):
        t = tuple(hist[-i:])
        dict_last[t] = op_move


def run(observation, configuration):
    global last_move
    if observation.step == 0:
        last_move = random.choice([0, 1, 2])
        return last_move
    update(last_move, observation.lastOpponentAction)
    move = beat(predict())

    return move

In [None]:
from kaggle_environments import make, evaluate

trials = 10
wins = 0
ties = 0
losses = 0
avg = 0

print('Running {} matches; this may take a few seconds'.format(trials))
for trial in range(trials):
    current_score = evaluate("rps", ["pup.py", "rfind.py"],
                                 configuration={"episodeSteps": 1000})
    score = 0
    if current_score[0][0] > 0:
        wins += 1
    elif current_score[0][1] > 0:
        losses += 1
    else:
        ties += 1
    avg += current_score[0][0] - current_score[0][1]

print(
    '{:>3} {:>3} {:>3}'.format('+' + str(wins),
                               '=' + str(ties),
                               '-' + str(losses)),
    '| {}{}{} |'.format('+' * wins, '=' * ties, '-' * losses),
    'avg score: {}'.format(avg / trials))