<a href="https://colab.research.google.com/github/q8888620002/Contextual_bandit/blob/master/LMRL_2020.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img height="45px" src="https://colab.research.google.com/img/colab_favicon.ico" align="left" hspace="10px" vspace="0px">

<h1>Welcome to Colaboratory!</h1>

Colaboratory is a free Jupyter notebook environment that requires no setup and runs entirely in the cloud.

With Colaboratory you can write and execute code, save and share your analyses, and access powerful computing resources, all for free from your browser.

In [2]:
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Simple example of contextual bandits simulation.

Code corresponding to:
Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks
for Thompson Sampling, by Carlos Riquelme, George Tucker, and Jasper Snoek.
https://arxiv.org/abs/1802.09127
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time
from absl import app, flags
import os
import tensorflow as tf
import sys
import pickle as pkl
import numpy as np
import pandas as pd
from google.colab import drive


sys.path.append('/content/drive/My Drive/contextual_bandit_onco/deep_contextual_bandits')
drive.mount('/content/drive')


from bandits.algorithms.bootstrapped_bnn_sampling import BootstrappedBNNSampling
from bandits.core.contextual_bandit import run_contextual_bandit
from bandits.algorithms.fixed_policy_sampling import FixedPolicySampling
from bandits.algorithms.linear_full_posterior_sampling import LinearFullPosteriorSampling
from bandits.algorithms.neural_linear_sampling import NeuralLinearPosteriorSampling
from bandits.algorithms.parameter_noise_sampling import ParameterNoiseSampling
from bandits.algorithms.posterior_bnn_sampling import PosteriorBNNSampling
from bandits.data.synthetic_data_sampler import sample_linear_data
from bandits.data.synthetic_data_sampler import sample_sparse_linear_data
from bandits.data.synthetic_data_sampler import sample_wheel_bandit_data
from bandits.algorithms.uniform_sampling import UniformSampling

############################################################################################################
############################################################################################################

# The action index needed to be modified 

from bandits.algorithms.clinical_guideline import ClinicalGuideLine

############################################################################################################
############################################################################################################



# Set up your file routes to the data files.
base_route = os.getcwd()
data_route = 'contextual_bandits/datasets'

FLAGS = flags.FLAGS
FLAGS.set_default('alsologtostderr', True)
flags.DEFINE_string('logdir', '/tmp/bandits/', 'Base directory to save output')
flags.DEFINE_string(
    'mushroom_data',
    os.path.join(base_route, data_route, 'mushroom.data'),
    'Directory where Mushroom data is stored.')
flags.DEFINE_string(
    'financial_data',
    os.path.join(base_route, data_route, 'raw_stock_contexts'),
    'Directory where Financial data is stored.')
flags.DEFINE_string(
    'jester_data',
    os.path.join(base_route, data_route, 'jester_data_40jokes_19181users.npy'),
    'Directory where Jester data is stored.')
flags.DEFINE_string(
    'statlog_data',
    os.path.join(base_route, data_route, 'shuttle.trn'),
    'Directory where Statlog data is stored.')
flags.DEFINE_string(
    'adult_data',
    os.path.join(base_route, data_route, 'adult.full'),
    'Directory where Adult data is stored.')
flags.DEFINE_string(
    'covertype_data',
    os.path.join(base_route, data_route, 'covtype.data'),
    'Directory where Covertype data is stored.')
flags.DEFINE_string(
    'census_data',
    os.path.join(base_route, data_route, 'USCensus1990.data.txt'),
    'Directory where Census data is stored.')


Mounted at /content/drive


ModuleNotFoundError: ignored

In [None]:
def sample_data(num_contexts=None):
  """Sample data from given 'data_type'.

  Args:
    data_type: Dataset from which to sample.
    num_contexts: Number of contexts to sample.

  Returns:
    dataset: Sampled matrix with rows: (context, reward_1, ..., reward_num_act).
    opt_rewards: Vector of expected optimal reward for each context.
    opt_actions: Vector of optimal action for each context.
    num_actions: Number of available actions.
    context_dim: Dimension of each context.
  """
  
############################################################################################################
############################################################################################################


#change the file name 

  data = pd.read_csv("/content/drive/My Drive/contextual_bandit_onco/clinic_percentile.csv").values
  
#Need to change context_dim to 20 if using a different nuber of input

  num_actions = 7
  context_dim = 7
  num_contexts = min(896, num_contexts)
  

############################################################################################################
############################################################################################################
  
  
  dataset, opt_cancer = sample_cancer_data(data, context_dim,
                                           num_actions, num_contexts,
                                           shuffle_rows=False,
                                           shuffle_cols=False)
  opt_rewards, opt_actions = opt_cancer


  return dataset, opt_rewards, opt_actions, num_actions, context_dim



def sample_cancer_data(dataset, context_dim, num_actions, num_contexts,
                       shuffle_rows=False, shuffle_cols=False):
  """Samples bandit from dense subset of cancer dataset.

  Args:
    dataset: Route of file containing the modified UMAP dataset.
    context_dim: Context dimension (i.e. vector with some ratings from a user).
    num_actions: Number of actions (number of medication to predict).
    num_contexts: Number of contexts to sample.
    shuffle_rows: If True, rows from original dataset are shuffled.
    shuffle_cols: Whether or not context/action jokes are randomly shuffled.

  Returns:
    dataset: Sampled matrix with rows: (context, rating_1, ..., rating_k).
    opt_vals: Vector of deterministic optimal (reward, action) for each context.
  """
  
  print(dataset.shape)
  
  num_contexts = min(896, num_contexts)
  
  if shuffle_cols:
    dataset = dataset[:, np.random.permutation(dataset.shape[1])]
  if shuffle_rows:
    np.random.shuffle(dataset)
  
  dataset = dataset[:num_contexts, :]

  assert context_dim + num_actions == dataset.shape[1], 'Wrong data dimensions.'

  opt_actions = np.argmax(dataset[:, context_dim:], axis=1)
  
  
  opt_rewards = np.array([dataset[i, context_dim + a]
                          for i, a in enumerate(opt_actions)])
  
  return dataset, (opt_rewards, opt_actions)



In [None]:
def display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, actions):
  """Displays summary statistics of the performance of each algorithm."""

  print('---------------------------------------------------')
  print('---------------------------------------------------')
  print(' bandit completed after {} seconds.'.format(time.time() - t_init))
  print('---------------------------------------------------')

  performance_pairs = []
  
  
  
  action_dis = pd.DataFrame(index=range(0,896))
  reward_table = pd.DataFrame(index=range(0,1))
  
  print(len(algos))
  for j, a in enumerate(algos):
    algo_actions = []
    algo_actions.append([action[j] for action in actions])
    performance_pairs.append((a.name, np.sum(h_rewards[:, j]), algo_actions ))

  performance_pairs = sorted(performance_pairs,
                             key=lambda elt: elt[1],
                             reverse=True)
  for i, (name, reward, algo_action) in enumerate(performance_pairs):
    
    print('{:3}) {:20}| \t \t total reward = {:10}.'.format(i, name, reward))

    ### record the reward and actions
    reward_table[name] = reward
    action_dis[name] = algo_action[0]
    
    ##print([[elt, algo_action[0].count(elt)] for elt in set(algo_action[0])])

  print('---------------------------------------------------')
  print('Optimal total reward = {}.'.format(np.sum(opt_rewards)))
  print('Frequency of optimal actions (action, frequency):')
  print([[elt, list(opt_actions).count(elt)] for elt in set(opt_actions)])
  print('---------------------------------------------------')
  print('---------------------------------------------------')
  
  
  
  return reward_table, action_dis
  

In [None]:
def main(_):

  # Problem parameters 
  num_contexts = 2000
  
  # Create dataset  
  
  sampled_vals = sample_data(num_contexts)
  
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)
  
  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=50,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)
  
  
  
  ##################################################
  
  #Set guideline_only to true if using only the guideline data 
  
  hparams_clinical = tf.contrib.training.HParams(num_actions=num_actions, guideline_only=True)
  
  ######################################################################

  algos = [
      UniformSampling('Uniform Sampling', hparams),
      UniformSampling('Uniform Sampling 2', hparams),
      #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
      #FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
      PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
      ### RMS net
      PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
      ### Dropout
      PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
      ## Stochastic Variational Inference: 
      
      NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
      NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
      ### neural linaer 
      LinearFullPosteriorSampling('LinFullPost', hparams_linear),
      
      ### Bayesian Linear 
      BootstrappedBNNSampling('BootRMS', hparams_rms),
      ## bootstrapped network
      
      ParameterNoiseSampling('ParamNoise', hparams_pnoise),
      
      PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
      
      PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
      
      #### Comment ClinicalGuideLine if not using clinical guideline as input feature e.g. df_percentile.csv, df_rank.csv and df_base.csv
      
      ClinicalGuideLine('Clinical Guideline ', hparams_clinical),
  ]

  
  
  ## Run contextual bandit problem

    
  t_init = time.time()
  
  h_actions, h_rewards = run_contextual_bandit(context_dim, num_actions, dataset, algos)

  ## Display results
  reward, action_dis = display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, h_actions)

  ## Store in the action folder 

  reward.to_csv("/content/drive/My Drive/contextual_bandit_onco/action_folder/reward_clinic_only_percentile_1.csv")
  action_dis.to_csv("/content/drive/My Drive/contextual_bandit_onco/action_folder/action_clinic_only_percentile_1.csv")
  

if __name__ == '__main__':
  
  tf.set_random_seed(5225)	
  np.random.seed(5225)
  
  tf.app.run(main) 