Simple agent based on rfind (looks for history patterns). Good for starting point, and definitively improvable.

Features:
* Completely deterministic;
* Multiple predictors: my history, his history and both;
* Multiple meta-strategies;
* Threat: if opponent is having a good streak, start playing around him.

Agent inspired from http://www.rpscontest.com/entry/498002.

In [None]:
%%writefile Spinoza.py

# = * = * = * = * = * = * 

max_limit = 23  # can be modified
add_rotations = True

# number of predictors
numPre = 6
if add_rotations:
	numPre *= 3

# number of meta-predictors
numMeta = 4
if add_rotations:
	numMeta *= 3

# saves history
moves = ['', '', '']

beat = {'R':'P', 'P':'S', 'S':'R'}
dna =  {'RP':0, 'PS':1, 'SR':2,
		'PR':3, 'SP':4, 'RS':5,
		'RR':6, 'PP':7, 'SS':8}

p = ["P"]*numPre
m = ["P"]*numMeta
pScore = [[0]*numPre for i in range(8)]
mScore = [0]*numMeta

length = 0
threat = 0
output = "P"


def myagent(observation, configuration):    
	global max_limit, add_rotations, \
		numPre, numMeta, moves, beat, dna, \
		p, m, pScore, mScore, length, threat, output

	if observation.step < 2:
		output = beat[output]
		return {'R':0, 'P':1, 'S':2}[output]

	# - - - -

	input = "RPS"[observation.lastOpponentAction]

	# threat of opponent
	outcome = (beat[input]==output) - (input==beat[output])
	threat = 0.9*threat - 0.1*outcome
	
	# refresh pScore
	for i in range(numPre):
		pp = p[i]
		bpp = beat[pp]
		bbpp = beat[bpp]
		pScore[0][i] = 0.9*pScore[0][i] + 0.1*((input==pp)-(input==bbpp))
		pScore[1][i] = 0.9*pScore[1][i] + 0.1*((output==pp)-(output==bbpp))
		pScore[2][i] = 0.8*pScore[2][i] + 0.3*((input==pp)-(input==bbpp)) + \
						0.1*(length % 3 - 1)
		pScore[3][i] = 0.8*pScore[3][i] + 0.3*((output==pp)-(output==bbpp)) + \
						0.1*(length % 3 - 1)

	# refresh mScore
	for i in range(numMeta):
		mScore[i] = 0.9*mScore[i] + 0.1*((input==m[i])-(input==beat[beat[m[i]]])) + \
					0.05*(length % 5 - 2)

	# refresh moves
	moves[0] += str(dna[input+output])
	moves[1] += input
	moves[2] += output

	# refresh length
	length += 1

	# new predictors
	limit = min([length,max_limit])
	for y in range(3):	# my moves, his, and both
		j = limit
		while j>=1 and not moves[y][length-j:length] in moves[y][0:length-1]:
			j-=1
		if j>=1:
			i = moves[y].rfind(moves[y][length-j:length],0,length-1)
			p[0+2*y] = moves[1][j+i] 
			p[1+2*y] = beat[moves[2][j+i]]

	# rotations of predictors
	if add_rotations:
		for i in range(int(numPre/3),numPre):
			p[i]=beat[beat[p[i-int(numPre/3)]]]

	# new meta
	for i in range(0,4,2):
		m[i] = p[pScore[i].index(max(pScore[i]))]
		m[i+1] = beat[p[pScore[i+1].index(max(pScore[i+1]))]]

	# rotations of meta
	if add_rotations:
		for i in range(4,12):
			m[i]=beat[beat[m[i-4]]]
	
	# - - -
    
	output = beat[m[mScore.index(max(mScore))]]

	if threat > 0.4:
		# ah take this!
		output = beat[beat[output]]

	return {'R':0, 'P':1, 'S':2}[output]

Let's run a basic test agaist good opponents (thanks to the notebook https://www.kaggle.com/chankhavu/rps-dojo)

In [None]:
import os
import pandas as pd
import contextlib
with contextlib.redirect_stdout(None):
    import kaggle_environments
from datetime import datetime
import multiprocessing as pymp
from tqdm import tqdm
import ray.util.multiprocessing as raymp


# function to return score
def get_result(match_settings):
    start = datetime.now()
    outcomes = kaggle_environments.evaluate(
        'rps', [match_settings[0], match_settings[1]], num_episodes=match_settings[2])
    won, lost, tie, avg_score = 0, 0, 0, 0.
    for outcome in outcomes:
        score = outcome[0]
        if score > 0: won += 1
        elif score < 0: lost += 1
        else: tie += 1
        avg_score += score
    elapsed = datetime.now() - start
    return match_settings[1], won, lost, tie, elapsed, float(avg_score) / float(match_settings[2])


def eval_agent_against_baselines(agent, baselines, num_episodes=10, use_ray=False):
    df = pd.DataFrame(
        columns=['wins', 'loses', 'ties', 'total_time', 'avg_score'],
        index=baselines + ["summary"]
    )
    
    if use_ray:
        pool = raymp.Pool()
    else:
        pool = pymp.Pool()
    matches = [[agent, baseline, num_episodes] for baseline in baselines]
    
    results = []
    for content in tqdm(pool.imap_unordered(get_result, matches), total=len(matches)):
        results.append(content)
    
    for baseline_agent, won, lost, tie, elapsed, avg_score in results:
        df.loc[baseline_agent, 'wins'] = won
        df.loc[baseline_agent, 'loses'] = lost
        df.loc[baseline_agent, 'ties'] = tie
        df.loc[baseline_agent, 'total_time'] = elapsed
        df.loc[baseline_agent, 'avg_score'] = avg_score
    
    # add summary
    df.loc["summary","wins"] = df["wins"].mean()
    df.loc["summary","loses"] = df["loses"].mean()
    df.loc["summary","ties"] = df["ties"].mean()
    df.loc["summary","avg_score"] = df["avg_score"].mean()
        
    return df


In [None]:
blue_belt_agents = [os.path.join('../input/some-agents/blue_belt', agent)
    for agent in os.listdir('../input/some-agents/blue_belt')]
black_belt_agents = [os.path.join('../input/some-agents/black_belt', agent)
    for agent in os.listdir('../input/some-agents/black_belt')]

# too long to simulate
black_belt_agents.remove("../input/some-agents/black_belt/multi_armed_bandit_v15.py")
black_belt_agents.remove("../input/some-agents/black_belt/multi_armed_bandit_v32.py")

my_agent = "./Spinoza.py"

eval_agent_against_baselines(my_agent, black_belt_agents, num_episodes = 10)