In [93]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kendalltau
% matplotlib notebook
plt.style.use(['ggplot','seaborn-poster'])

Let us set $\delta$ to be one for all paths (i.e. all arms start the same)

Then $\beta$ is redundant, and `psi(a,b) == tau[a,b]`; we'll just use `tau` for speed.

# Set parameters

## MAB

In [78]:
ARMS = 10
THETAS_TRUE = np.random.uniform(size=ARMS)

## ACS

In [94]:
ITERATIONS = 100
ANTS = 100
Q_0 = 0.9
RHO = 0.1
ALPHA = 0.1
TAU_0 = 1.0 / ( ARMS * ( ARMS - 1 ) )

# Definitions

In [83]:
def go_to_next_city(ant):
    
    q = np.random.uniform()
    unvisited_cities = np.where(Us[ant,:])[0]
    current_position = xs[ant]
    feasible_taus = taus[current_position, unvisited_cities]
    
    # choose next city
    if q < Q_0: # deterministic exploitation
        x_new = feasible_taus.argmax()
    else: # stochastic exploration
        x_new = np.random.choice(unvisited_cities, p=feasible_taus / feasible_taus.sum())
        
    # do local update
    # pull the arm (corresponds to next city)
    reward = np.random.binomial(1, THETAS_TRUE[x_new])
    # local update rule
    taus[current_position, x_new] = (1 - RHO) * taus[current_position, x_new] + RHO * reward
    
    # clean up
    xs[ant] = x_new
    Us[ant, x_new] = 0

# Run ze simulations

In [97]:
taus = TAU_0 * np.ones([ARMS, ARMS])
for iteration in xrange(ITERATIONS):
    xs = np.random.choice(xrange(ARMS), size=ANTS)
    Us = np.ones([ANTS, ARMS])
    # remove all cities the ants are starting from
    Us[xrange(ANTS),xs] = 0
    for step in xrange(ARMS - 1):
        for ant in xrange(ANTS):
            go_to_next_city(ant)

In [98]:
kendalltau(taus.sum(axis=0), THETAS_TRUE)

KendalltauResult(correlation=0.86666666666666659, pvalue=0.00048616424374551593)

In [99]:
taus.sum(axis=0).argmax()

7

In [100]:
THETAS_TRUE.argmax()

7