# Spot the best shooter
Data Science challenge to identify the best shooter using multi-armed bandit bayesian approach. The explotation vs exploratino tradeoff is controlled by the temperature which can be parametrized as a function of the round number.

In [13]:
import numpy as np
from scipy.stats import binom
from scipy.special import gamma, betainc, comb
from random import choices, random

class Player:
    def __init__(self, p=None, n_players=100, p_min=0, p_max=0.6, p_target=0.7):
        self.n = self.k = 0
        self.n_players = n_players
        self.x0, self.x1  = p_min, p_max
        self.prior = self.posterior = 1/self.n_players
        self.p_target = p_target
        self.set_succes_proba(p)
        
    def set_succes_proba(self, p):
        if p:
            self.p=p
        else:
            self.p = self.x0+np.random.random()*(self.x1-self.x0)
            
    def shoot(self):
        self.k += np.random.binomial(1,self.p)
        self.n += 1
        self.update_posterior()
    
    def beteainc_diff(self, a, b, x0, x1):
        return betainc(a, b, x1)-betainc(a, b, x0)
        
    def update_posterior(self):
        """Bayesian approach magic"""
        
        def beteainc_diff(a, b, x0, x1):
            return betainc(a, b, x1)-betainc(a, b, x0)
        
        k, n, x0, x1 = self.k, self.n, self.x0, self.x1
        binom_k_n_p = binom.pmf(k, n, self.p_target)
        
        self.posterior = binom_k_n_p * self.prior
        den = self.prior * binom_k_n_p
        den += (1-self.prior) * 1/(x1-x0) * 1/(n+1) * beteainc_diff(k+1, n-k+1, x0, x1)
        self.posterior /= den


In [110]:
# Set problem parameters 
n_players = 100
rounds = 14000
min_p, max_p = 0.5, 0.6 # bounds for success rate of players
target_p = max_p + 0.1 # succes rate for special player

# Spawn Players
players = [Player(n_players=n_players, p_min=min_p, p_max=max_p, p_target=target_p)
           for _ in range(n_players)]

# Choose special player
players[0].set_succes_proba(target_p)

# Run simulation
for i_round in range(rounds):
    temp = 1.25 - i_round/rounds * 1.24 # Explotation vs Exploration control
    shooter = choices(players, weights=[np.exp(pl.posterior/temp) for pl in players]).pop() # choose shooter using softmax
    shooter.shoot()

# Choose our candidate based on maximum likelihood
candidate = sorted(players, key= lambda x:x.posterior).pop()

In [111]:
if candidate.p == target_p:
    print("Player was correclty identified!")
else:
    print(f"Player incorrectly selected, succes rate is {candidate.p}")

Player was correclty identified!
