In [None]:
!pip install kaggle-environments --upgrade
from kaggle_environments import make
import numpy as np
import tqdm as tqdm

In [None]:
%%writefile submission.py

#Bigram with backoff probability model
import random
import numpy as np
import math
import copy
from itertools import product

last_choice = None
transitions = None
cur_state = None
states = None

def get_states(order,vals):
    if order == 1:
        st = {str(i)+'-'+str(j):False for i in range(vals) for j in range(vals)}
    else:
        tmp = [str(i)+'-'+str(j) for i in range(vals) for j in range(vals)]
        st = {'+'.join(c):False for c in product(tmp,repeat=order)}
    return st

def agent(observation, configuration,method='indexes',lamb=3,memory=0.5,ngram=2,backoff=True):
    global last_choice, transitions, cur_state,states
    if observation.step == 0:
        #Store which states have been seen
        states = {}
        for i in range(1,ngram+1):
            states={**states,**(get_states(i,configuration.signs))}
         #Populate the transition probabilities
        transitions = dict()
        for state in states:
            transitions[state] = dict()
            for i in range(configuration.signs):
                transitions[state][i] = 1/configuration.signs
        #Choose a random current state
        cur_state = random.choice([s for s in list(states.keys()) if len(s.split('+'))==(ngram)])
    else:
        #Last opponent choice
        last_oppo_choice = observation.lastOpponentAction
        #Increase probability for last opponent choice, decrease for the rest
        #For all possible states within history
        for i in range(configuration.signs):
            for j in range(0,ngram):
                update_state = '+'.join(cur_state.split('+')[j:])
                if i!=last_oppo_choice:
                    transitions[update_state][i] = (0.0 * (1-memory)) + (transitions[update_state][i] * memory)
                else:
                    transitions[update_state][i] = (1.0 * (1-memory)) + (transitions[update_state][i] * memory)
                states[update_state] = True
        #Update current state
        if ngram == 1:
            cur_state = str(last_oppo_choice)+'-'+str(last_choice)
        else:
            cur_state = '+'.join(cur_state.split('+')[1:]) + '+' + str(last_oppo_choice)+'-'+str(last_choice)
    prob_state = copy.deepcopy(cur_state)
    #Do backoff if the whole history has not been seen
    if backoff:
        while not states[prob_state] and len(prob_state.split('+'))>1:
            prob_state = '+'.join(prob_state.split('+')[1:])
    if method == 'best':
        #Just pick the most likely next opponent choice
        next_oppo_choice = int(np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[-1])
    elif method == 'indexes':
        #Find the most likely next opponent choice, but use a exponential distribution
        #to reduce predictability
        sorted_vals = np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        next_oppo_choice = int(sorted_vals[min(configuration.signs-1,math.floor(np.random.exponential(1/lamb)*configuration.signs))])
    elif method == 'probs':
        #Find the most likely next opponent choice, but weight based on actual probabilities of each choice
        #Use exponential distribution
        sorted_vals = np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        sorted_probs = np.sort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        rnd = min(0.999,np.random.exponential(1/lamb)) 
        next_oppo_choice = int(sorted_vals[np.where((np.cumsum(sorted_probs)/np.sum(sorted_probs))>=rnd)[0][0]])
    #Our next choice is whatever beats the decision on the next opponent choice
    next_choice = (next_oppo_choice + 1) % configuration.signs
    last_choice = next_choice
    return next_choice

In [None]:
%%writefile bigram.py

import random
import numpy as np
import math
import copy
from itertools import product

last_choice = None
transitions = None
cur_state = None
states = None

def get_states(order,vals):
    if order == 1:
        st = {str(i)+'-'+str(j):False for i in range(vals) for j in range(vals)}
    else:
        tmp = [str(i)+'-'+str(j) for i in range(vals) for j in range(vals)]
        st = {'+'.join(c):False for c in product(tmp,repeat=order)}
    return st

def agent(observation, configuration,method='indexes',lamb=3,memory=0.5,ngram=2,backoff=False):
    global last_choice, transitions, cur_state,states
    if observation.step == 0:
        #Store which states have been seen
        states = {}
        for i in range(1,ngram+1):
            states={**states,**(get_states(i,configuration.signs))}
         #Populate the transition probabilities
        transitions = dict()
        for state in states:
            transitions[state] = dict()
            for i in range(configuration.signs):
                transitions[state][i] = 1/configuration.signs
        #Choose a random current state
        cur_state = random.choice([s for s in list(states.keys()) if len(s.split('+'))==(ngram)])
    else:
        #Last opponent choice
        last_oppo_choice = observation.lastOpponentAction
        #Increase probability for last opponent choice, decrease for the rest
        #For all possible states within history
        for i in range(configuration.signs):
            for j in range(0,ngram):
                update_state = '+'.join(cur_state.split('+')[j:])
                if i!=last_oppo_choice:
                    transitions[update_state][i] = (0.0 * (1-memory)) + (transitions[update_state][i] * memory)
                else:
                    transitions[update_state][i] = (1.0 * (1-memory)) + (transitions[update_state][i] * memory)
                states[update_state] = True
        #Update current state
        if ngram == 1:
            cur_state = str(last_oppo_choice)+'-'+str(last_choice)
        else:
            cur_state = '+'.join(cur_state.split('+')[1:]) + '+' + str(last_oppo_choice)+'-'+str(last_choice)
    prob_state = copy.deepcopy(cur_state)
    #Do backoff if the whole history has not been seen
    if backoff:
        while not states[prob_state] and len(prob_state.split('+'))>1:
            prob_state = '+'.join(prob_state.split('+')[1:])
    if method == 'best':
        #Just pick the most likely next opponent choice
        next_oppo_choice = int(np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[-1])
    elif method == 'indexes':
        #Find the most likely next opponent choice, but use a exponential distribution
        #to reduce predictability
        sorted_vals = np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        next_oppo_choice = int(sorted_vals[min(configuration.signs-1,math.floor(np.random.exponential(1/lamb)*configuration.signs))])
    elif method == 'probs':
        #Find the most likely next opponent choice, but weight based on actual probabilities of each choice
        #Use exponential distribution
        sorted_vals = np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        sorted_probs = np.sort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        rnd = min(0.999,np.random.exponential(1/lamb)) 
        next_oppo_choice = int(sorted_vals[np.where((np.cumsum(sorted_probs)/np.sum(sorted_probs))>=rnd)[0][0]])
    #Our next choice is whatever beats the decision on the next opponent choice
    next_choice = (next_oppo_choice + 1) % configuration.signs
    last_choice = next_choice
    return next_choice

In [None]:
%%writefile unigram.py

import random
import numpy as np
import math
import copy
from itertools import product

last_choice = None
transitions = None
cur_state = None
states = None

def get_states(order,vals):
    if order == 1:
        st = {str(i)+'-'+str(j):False for i in range(vals) for j in range(vals)}
    else:
        tmp = [str(i)+'-'+str(j) for i in range(vals) for j in range(vals)]
        st = {'+'.join(c):False for c in product(tmp,repeat=order)}
    return st

def agent(observation, configuration,method='indexes',lamb=3,memory=0.5,ngram=1,backoff=False):
    global last_choice, transitions, cur_state,states
    if observation.step == 0:
        #Store which states have been seen
        states = {}
        for i in range(1,ngram+1):
            states={**states,**(get_states(i,configuration.signs))}
         #Populate the transition probabilities
        transitions = dict()
        for state in states:
            transitions[state] = dict()
            for i in range(configuration.signs):
                transitions[state][i] = 1/configuration.signs
        #Choose a random current state
        cur_state = random.choice([s for s in list(states.keys()) if len(s.split('+'))==(ngram)])
    else:
        #Last opponent choice
        last_oppo_choice = observation.lastOpponentAction
        #Increase probability for last opponent choice, decrease for the rest
        #For all possible states within history
        for i in range(configuration.signs):
            for j in range(0,ngram):
                update_state = '+'.join(cur_state.split('+')[j:])
                if i!=last_oppo_choice:
                    transitions[update_state][i] = (0.0 * (1-memory)) + (transitions[update_state][i] * memory)
                else:
                    transitions[update_state][i] = (1.0 * (1-memory)) + (transitions[update_state][i] * memory)
                states[update_state] = True
        #Update current state
        if ngram == 1:
            cur_state = str(last_oppo_choice)+'-'+str(last_choice)
        else:
            cur_state = '+'.join(cur_state.split('+')[1:]) + '+' + str(last_oppo_choice)+'-'+str(last_choice)
    prob_state = copy.deepcopy(cur_state)
    #Do backoff if the whole history has not been seen
    if backoff:
        while not states[prob_state] and len(prob_state.split('+'))>1:
            prob_state = '+'.join(prob_state.split('+')[1:])
    if method == 'best':
        #Just pick the most likely next opponent choice
        next_oppo_choice = int(np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[-1])
    elif method == 'indexes':
        #Find the most likely next opponent choice, but use a exponential distribution
        #to reduce predictability
        sorted_vals = np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        next_oppo_choice = int(sorted_vals[min(configuration.signs-1,math.floor(np.random.exponential(1/lamb)*configuration.signs))])
    elif method == 'probs':
        #Find the most likely next opponent choice, but weight based on actual probabilities of each choice
        #Use exponential distribution
        sorted_vals = np.argsort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        sorted_probs = np.sort([transitions[prob_state][i] for i in range(configuration.signs)])[::-1]
        rnd = min(0.999,np.random.exponential(1/lamb)) 
        next_oppo_choice = int(sorted_vals[np.where((np.cumsum(sorted_probs)/np.sum(sorted_probs))>=rnd)[0][0]])
    #Our next choice is whatever beats the decision on the next opponent choice
    next_choice = (next_oppo_choice + 1) % configuration.signs
    last_choice = next_choice
    return next_choice

In [None]:
#Evaluate against the unigram baseline
env = make("rps", debug=True)
results = []
for i in tqdm.tqdm(range(50)):
    env.reset()
    state = env.run(["submission.py","unigram.py"])
    results.append(state[-1][0]['observation']['reward'])
#env.render(mode="ipython", width=400, height=350)
print('Victories: {0:d} of {1:d}'.format(sum([1 for r in results if r>0]),len(results)))
print('Average winning result: {0:.0f}'.format(np.mean([r for r in results if r>0])))
print('Average winning result: {0:.0f}'.format(np.mean([r for r in results if r<0])))

In [None]:
#Evaluate against the bigram baseline
env = make("rps", debug=True)
results = []
for i in tqdm.tqdm(range(50)):
    env.reset()
    state = env.run(["submission.py","bigram.py"])
    results.append(state[-1][0]['observation']['reward'])
#env.render(mode="ipython", width=400, height=350)
print('Victories: {0:d} of {1:d}'.format(sum([1 for r in results if r>0]),len(results)))
print('Average winning result: {0:.0f}'.format(np.mean([r for r in results if r>0])))
print('Average winning result: {0:.0f}'.format(np.mean([r for r in results if r<0])))