In [None]:
%run algorithms.ipynb

In [None]:
import ctypes
from copy import deepcopy
from functools import partial
from itertools import product
import multiprocessing
from multiprocessing.managers import BaseManager
import numpy as np
import os
import sys
import time

In [None]:
class SharedState(object):
  def __init__(self, slot_count=None, level_count_per_slot=None, additive=None):
    self.regret_shared_array_base = multiprocessing.Array(ctypes.c_double, simulation_count * horizon)
    self.regret = np.ctypeslib.as_array(self.regret_shared_array_base.get_obj())
    self.regret = self.regret.reshape((simulation_count, horizon))
     
    self.reward_shared_array_base = multiprocessing.Array(ctypes.c_double, simulation_count * horizon)
    self.reward = np.ctypeslib.as_array(self.reward_shared_array_base.get_obj())
    self.reward = self.reward.reshape((simulation_count, horizon))
    
    if slot_count is None or level_count_per_slot is None:
      # Fixed world.
      slot_count = 3
      level_count_per_slot = 3
      input_marginals = np.array([[0.08, 0.10, 0.09], [0.11, 0.11, 0.06], [0.05, 0.16, 0.07]])
      self.worlds = [World(slot_count=slot_count,level_count_per_slot=level_count_per_slot, input_marginals=input_marginals)] \
                     * simulation_count
    else:
      self.worlds = []
      for s in range(simulation_count):
        self.worlds.append(World(slot_count=slot_count,level_count_per_slot=level_count_per_slot,
                                 additive=additive))
      
  def get_world(self, s):
    return self.worlds[s]
  
  def update_regret(self, s, regret_per_period):
    self.regret[s, :] = regret_per_period[:]
   
  def get_regret(self):
    return self.regret
   
  def update_reward(self, s, reward_per_period):
    self.reward[s, :] = reward_per_period[:]
   
  def get_reward(self):
    return self.reward
   
BaseManager.register('SharedState', SharedState)
 

def single_simulation(s, agent_type, shared_state):
  t0 = time.time()
  
  world = shared_state.get_world(s)
  
  if (agent_type == "IndependentBernoulliArmsTSAgent"):
    agent = IndependentBernoulliArmsTSAgent(world = world, horizon = horizon)
  elif (agent_type == "MarginalPosteriorTSAgent"):
    agent = MarginalPosteriorTSAgent(world = world, horizon = horizon)
  elif (agent_type == "MarginalPosteriorUCBAgent"):
    agent = MarginalPosteriorTSAgent(world = world, horizon = horizon)
  elif (agent_type == "LogisticRegressionTSAgent"):
    agent = LogisticRegressionTSAgent(world = world, horizon = horizon, regularization_parameter = regularization_parameter)
  elif (agent_type == "LogisticRegressionUCBAgent"):
    agent = LogisticRegressionUCBAgent(world = world, horizon = horizon, regularization_parameter = regularization_parameter)
  else:
    print("This agent_type is not supported.")
    return
     
  agent.run()
  shared_state.update_regret(s, agent.regret_per_period)
  shared_state.update_reward(s, agent.reward_per_period)
   
  t1 = time.time()


def run(agent_types, output_prefix, slot_count=None, level_count_per_slot=None, additive=None):  
  if (seed > 0):
    np.random.seed(seed)
   
  manager = BaseManager()
  manager.start()
  pool = multiprocessing.Pool(core_count) 
  shared_state = manager.SharedState(slot_count=slot_count, level_count_per_slot=level_count_per_slot)
  
  for agent_type in agent_types:
    t0 = time.time()
    func = partial(single_simulation, agent_type = agent_type, shared_state = shared_state)
    pool.map(func, range(simulation_count))
    t1 = time.time()
    print("{} Elapsed Time: {}".format(agent_type, (t1 - t0)))
   
    parameters = ""
    if (agent_type == "LogisticRegressionTSAgent" or agent_type == "LogisticRegressionUCBAgent"):
      parameters = "_R{}".format(regularization_parameter)
    
    if not os.path.exists("Results/"):
        os.makedirs("Results/")
    filename = "Results/{}_{}_H{}_S{}{}".format(output_prefix, agent_type, horizon, simulation_count, parameters)

    np.save(filename + "_Regret.npy", np.mean(shared_state.get_regret(), axis = 0))
    np.save(filename + "_RegretVar.npy", np.var(shared_state.get_regret(), axis = 0))
    np.save(filename + "_ET.npy", (t1-t0) / simulation_count)

In [None]:
horizon = 50000
simulation_count = 1000
regularization_parameter = 10
seed = 1704 
core_count = multiprocessing.cpu_count()

agent_types = ["IndependentBernoulliArmsTSAgent", "MarginalPosteriorTSAgent", "LogisticRegressionTSAgent"]
slot_count_list = [2, 3, 4]
level_count_per_slot_list = [2, 3, 4, 5]
additive_world_type_list = [True, False]
for slot_count, level_count_per_slot, additive_world_type in product(slot_count_list, level_count_per_slot_list, 
                                                                     additive_world_type_list):
  output_prefix = "SLOT{}LEVEL{}_ADD{}".format(slot_count, level_count_per_slot, additive_world_type)
  print(output_prefix)
  run(agent_types = agent_types, output_prefix=output_prefix, 
      slot_count=slot_count, level_count_per_slot=level_count_per_slot)