Install requirements: pip install -r requirements.txt

Run each algorithm once on a simple instance with three arms and one constraint 

In [13]:
import numpy as np
from constraint_explore import TnS, CGE, UniformExplorer, run_exploration_experiment, get_policy, GaussianBandit, run_imdb_exp

In [11]:
seed = 0
delta = 0.1

mu = np.array([0.92, 0.91, 0.8])
A = np.array([[5, 1, 0]])
b = np.array([0.5])

optimal_policy, _ = get_policy(mu, A, b)
print(f' Optimal Policy: {optimal_policy}')

dist_type = 'Gaussian'
explorer = UniformExplorer(len(mu), A=A, b=b, delta=delta, restricted_exploration=True, seed=seed, dist_type=dist_type)
bandit = GaussianBandit(mu, noise=1, seed=seed)
t, correct, _, _, avg_t = run_exploration_experiment(bandit, explorer, A, b)
print(f'Uniform Explorer: Stopped at {t} with Correct Policy: {correct} Average time: {avg_t}')

explorer = CGE(len(mu), A=A, b=b, delta=delta, restricted_exploration=True, dist_type=dist_type)
bandit = GaussianBandit(mu, noise=1)
t, correct, _, _, avg_t = run_exploration_experiment(bandit, explorer, A, b)
print(f'Game Explorer: Stopped at {t} with Correct Policy: {correct} Average time: {avg_t}')

explorer = TnS(len(mu), A=A, b=b, delta=delta, restricted_exploration=True, dist_type=dist_type)
bandit = GaussianBandit(mu, noise=1)
t, correct, _, _, avg_t = run_exploration_experiment(bandit, explorer, A, b)
print(f'Track-n-Stop: Stopped at {t} with Correct Policy: {correct} Average time: {avg_t}')

 Optimal Policy: [0.  0.5 0.5]
Uniform Explorer: Stopped at 7381 with Correct Policy: True Average time: 0.0009160975004143709
Game Explorer: Stopped at 3485 with Correct Policy: True Average time: 0.004437300464515194
Track-n-Stop: Stopped at 4105 with Correct Policy: True Average time: 0.005503986988230251


Example with IMDB env and 4 arms

In [34]:
from IMDB.imdb_utils import get_env

n_movies = 4
seed = 1
delta = 0.001
spec = {
    'action': ('<', 0.3),
    'drama': ('>', 0.3),
    'family': ('>', 0.3)
}


imdb, A, b = get_env(n_movies, spec=spec, seed=seed)
means = imdb.get_means()
sigma = imdb.get_std()
optimal_policy,aux  = get_policy(means.values, A=A, b=b)
print('Optimal Policy: ', optimal_policy)

Optimal Policy:  [0.3 0.7 0.  0. ]


In [35]:
uniform = UniformExplorer(n_movies, A=A, b=b, delta=delta, restricted_exploration=False, sigma=sigma)
t, correct, optimal_policy, policy, _ = run_imdb_exp(imdb, uniform, A, b)
print(f'Uniform: Stopped at {t} with correct policy {correct}')

cge = CGE(n_movies, A=A, b=b, delta=delta, restricted_exploration=False, sigma=sigma)
t, correct,_, _, _ = run_imdb_exp(imdb, cge, A, b)
print(f'CGE: Stopped at {t} with correct policy {correct}')

tns = TnS(n_movies, A=A, b=b, delta=delta, restricted_exploration=False, sigma=sigma)
t, correct, optimal_policy, policy, _ = run_imdb_exp(imdb, tns, A, b)
print(f'Track-n-Stop: Stopped at {t} with correct policy {correct}')

Uniform: Stopped at 15844 with correct policy True
CGE: Stopped at 9068 with correct policy True
Track-n-Stop: Stopped at 5552 with correct policy True
