Implementation of the FEWA (Filtering on expanding window average) algorithm described in [Seznec *et. al*, 2019](https://hal.inria.fr/hal-01936894v2/document).


In [None]:
!pip install kaggle-environments --upgrade -q

import numpy as np
from kaggle_environments import make
env = make("mab", debug=True)

In [None]:
def best_of(n, file1, file2):
    env = make("mab", debug=True)
    
    wins = list()
    for i in range(n):
        env.run([file1, file2])
        p1_score = env.steps[-1][0]['reward']
        p2_score = env.steps[-1][1]['reward']
        
        if p1_score > p2_score:
            wins.append(1)
        elif p1_score < p2_score:
            wins.append(2)
        else:
            pass
        env.reset()
        print(f"Round {i+1}: {p1_score} - {p2_score}")
        
    # print wins
    print()
    p1_wins = sum(np.array(wins) == 1)
    p2_wins = sum(np.array(wins) == 2)
    if p1_wins > p2_wins:
        print(f'P1 wins {p1_wins} out of {n}')
    elif p1_wins < p2_wins:
        print(f'P2 wins {p2_wins} out of {n}')
    else: 
        print(f'P1 and P2 ties with {p1_wins} wins out of {n}')

In [None]:
%%writefile fewa.py
# fewa
import random
import numpy as np

# seed
random.seed(2020)
np.random.seed(2020)

# global vars
decay = .97
n_ag = None
history = None
last_a_ag = None
last_a_op = None
rewards = None
total_reward = 0
alpha = 0.1
delta0 = 1

# filter
def filter_step(k, h, t, delta_t, rewards, sigma2=1):
    # determine c
    c = np.sqrt((2 * sigma2 / (h + 1)) * np.log(1 / delta_t))
    # estimates
    mu = np.mean(rewards[(t - h - 1):t, k], axis=0)
    mu_max = np.max(mu)
#     print('c', c, 'mu_max', mu_max, 'len(k)', len(k))
    # filter
    delta_i = mu_max - mu.reshape(-1,)
    k_next = [i for i, di in zip(k, delta_i) if di <= 2 * c]
    return k_next

# agent
def agent(obs, conf):
    global n_ag, last_a_ag, last_a_op, rewards, total_reward, history

    # init
    t = obs.step
    if t == 0:
        # init
        n_ag = np.zeros(conf.banditCount, dtype=np.int)
        history = np.zeros(conf.banditCount)
        rewards = np.zeros(conf.banditCount)
        # take action
        action = int(obs.step)
        # update history
        hist_vector = 1 * (np.arange(conf.banditCount) == action)
        history = np.vstack((history, hist_vector))
    else:
        # get opps last action
        op_ix = (obs.agentIndex + 1) % len(obs.lastActions)
        last_a_op = obs.lastActions[op_ix]
        # update counts
        n_ag[last_a_ag] += int(1)
        #n_ag[last_a_op] += int(1)
        #print(n_ag)
        # update history
        hist_vector = 1 * (np.arange(conf.banditCount) == last_a_op)
        history = np.vstack((history, hist_vector))
        # reward
        r = (obs.reward - total_reward)
        r_vector =  r * (np.arange(conf.banditCount) == last_a_op)
        rewards = np.vstack((rewards, r_vector))
        total_reward = obs.reward

        # warmup
        if t < conf.banditCount:
            # take action
            action = int(obs.step)
        else:
            # FEWA algorithm
            # update delta
            delta_t = delta0 / ((t+1) ** alpha)
            # init
            h = int(0)
            k = list(range(conf.banditCount))
            it = None
            # loop
            while it is None:
                # filter
                k_next = filter_step(k, h, t, delta_t, rewards)
                k = k_next
                # increment
                h += int(1)
                # there exists any bandit that number of selected times is h?
                if any(n_ag[k] == h):
                    if (n_ag[k] == h).sum() > 1:
                        # breaks tie randomly
                        it = int(np.random.choice(np.array(k)[n_ag[k] == h]))
                    else:
                        ix = np.argmin(n_ag[k])
                        it = int(k[ix])
#             print('potential bandits', len(k))
#             print('action = ', it)
            action = it
    # update last action
    last_a_ag = action
    return action

In [None]:
best_of(5, "../input/santa-2020/submission.py", "fewa.py")