In [None]:
import numpy as np
import os
import tensorflow as tf
import seaborn as sns
sns.set(font_scale=2.5, style = 'whitegrid')

In [None]:
from scripts.data.data_sampler import sample_adult_data
from scripts.data.data_sampler import sample_census_data
from scripts.data.data_sampler import sample_mushroom_data
from scripts.core.contextual_bandit import run_contextual_bandit, run_contextual_bandit_single
from scripts.algorithms.linear_thompson_sampling import LinTS
from scripts.algorithms.posterior_bnn_sampling import PosteriorBNNSampling
from scripts.algorithms.linucb import LinUcb

In [None]:
base_route = os.getcwd()
data_route = 'datasets'
output = os.path.join(base_route,'results')

In [None]:
def sample_data(data_type, num_contexts=None):
    """
    Args:
        data_type: Dataset from which to sample.
        num_contexts: Number of contexts to sample.
    Returns:
        dataset: Sampled matrix with rows: (context, reward_1, ..., reward_num_act).
        opt_rewards: Vector of expected optimal reward for each context.
        opt_actions: Vector of optimal action for each context.
        num_actions: Number of available actions.
        context_dim: Dimension of each context.
    """
    
    if data_type == 'mushroom':
        # Create mushroom dataset
        num_actions = 2
        context_dim = 117
        file_name = os.path.join(base_route, data_route, 'mushrooms.csv')
        dataset, opt_mushroom = sample_mushroom_data(file_name, num_contexts)
        opt_rewards, opt_actions = opt_mushroom
    elif data_type == 'census':
        file_name = os.path.join(base_route, data_route, 'USCensus1990.data.txt')
        num_actions = 9
        num_contexts = min(150000, num_contexts)
        sampled_vals = sample_census_data(file_name, num_contexts,
                                          shuffle_rows=True)
        contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
        dataset = np.hstack((contexts, rewards))
        context_dim = contexts.shape[1]
    else:
        file_name = os.path.join(base_route, data_route, 'adult.data')
        num_actions = 14
        num_contexts = min(45222, num_contexts)
        sampled_vals = sample_adult_data(file_name, num_contexts,
                                         shuffle_rows=True)
        contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
        dataset = np.hstack((contexts, rewards))
        context_dim = contexts.shape[1]
        
    return dataset, opt_rewards, opt_actions, num_actions, context_dim
        
    


In [None]:
num_contexts = 2000

# Data type in {linear, sparse_linear, mushroom, financial, jester,
#                 statlog, adult, covertype, census, wheel}
data_type = 'mushroom'

# Create dataset
sampled_vals = sample_data(data_type, num_contexts)
dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

In [None]:
hparams_ucb = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               alpha=10.0)
hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)
hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)
hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)
hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)


In [None]:
algos = [
      LinUcb('LinUCB', hparams_ucb, output),
      PosteriorBNNSampling('BBB', hparams_bbb, output, 'Variational'),
      LinTS('LinFullPost', hparams_linear),
      BootstrappedBNNSampling('BootRMS', hparams_rms, output),
      PosteriorBNNSampling('MultitaskGP', hparams_gp, output, 'GP'),      
  ]

In [None]:
results = run_contextual_bandit(context_dim, num_actions, dataset, algos)

In [None]:
 _, h_rewards = results

Plot cumulative Reward

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
for i, a in enumerate(algos):
    ax.plot(np.arange(0,2000),np.cumsum(h_rewards[:,i]),label= a.name)
plt.legend(loc= 'best')
plt.xlabel('rounds')
plt.ylabel('cumulative reward')
plt.show()

Plot cumulative Regret

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
for i, a in enumerate(algos):
    ax.plot(np.arange(0,2000),np.cumsum(opt_rewards - h_rewards[:,i]),label= a.name)
plt.legend(loc= 'best')
plt.xlabel('rounds')
plt.ylabel('cumulative Regret')
plt.show()