In [1]:
from seldonian.spec import createRLSpec
from seldonian.dataset import RLDataSet
from seldonian.utils.io_utils import load_pickle



In [2]:
episodes_file = './bolus_simglucose_2000_episodes_322_actions.pkl'
episodes = load_pickle(episodes_file)
dataset = RLDataSet(episodes=episodes)

In [3]:
len(episodes)

2000

In [4]:
import numpy as np
from itertools import product

CR_ACTIONS = np.arange(3, 30, 2)
CF_ACTIONS = np.arange(5, 50, 2)

ACTIONS = [*product(CR_ACTIONS, CF_ACTIONS)]

assert len(ACTIONS) == CR_ACTIONS.shape[0] * CF_ACTIONS.shape[0]
len(ACTIONS)

322

In [5]:
from seldonian.RL.Agents.Policies.Softmax import DiscreteSoftmax
from seldonian.RL.Env_Description.Env_Description import Env_Description
from seldonian.RL.Env_Description.Spaces import Discrete_Space

In [6]:
NUM_STATES = 1
observation_space = Discrete_Space(0, NUM_STATES - 1)

NUM_ACTIONS = len(ACTIONS)    # NUM_SETTINGS in generate_data.py
action_space = Discrete_Space(0, NUM_ACTIONS - 1)


env_description = Env_Description(observation_space, action_space)
policy = DiscreteSoftmax(hyperparam_and_setting_dict={}, env_description=env_description)
env_kwargs={'gamma':1.0}
save_dir = '.'
constraint_strs = ['J_pi_new >= -5.0']   # New risk is grater than -10 (
                                        # determined by looking at certain P choices)
deltas=[0.05]

spec = createRLSpec(
    dataset=dataset,
    policy=policy,
    constraint_strs=constraint_strs,
    deltas=deltas,
    env_kwargs=env_kwargs,
    save=True,
    save_dir='.',
    verbose=True)

Saved .\spec.pkl



In [7]:
from seldonian.seldonian_algorithm import SeldonianAlgorithm
from seldonian.utils.io_utils import load_pickle

In [8]:
specfile = './spec.pkl'
spec = load_pickle(specfile)
spec.optimization_hyperparams['num_iters']=10
spec.optimization_hyperparams['alpha_theta']=0.01
spec.optimization_hyperparams['alpha_lamb']=0.01
# Run Seldonian algorithm 
SA = SeldonianAlgorithm(spec)
passed_safety,solution = SA.run()
if passed_safety:
    print("Passed safety test!")
    print("The solution found is:")
    print(solution)
else:
    print("No Solution Found")

Safety dataset has 1200 episodes
Candidate dataset has 800 episodes
Iteration 0
Passed safety test!
The solution found is:
[[-0.20138858 -0.20157812 -0.20159582 -0.20135109 -0.20141401 -0.20181047
  -0.20149462 -0.20138177 -0.20180519 -0.20186632 -0.20154517 -0.20214061
  -0.20221531 -0.20152668  0.20124616 -0.20142525  0.20111339  0.20081337
  -0.20290821 -0.20214356 -0.2015822  -0.20157842 -0.2027144  -0.20162168
  -0.2013908   0.2015394   0.19995398 -0.20149671 -0.20144434 -0.20147368
  -0.20141022 -0.20148044 -0.20148253  0.20152648  0.20155766  0.20126854
   0.20140847 -0.20305107  0.2006453   0.20121863 -0.20145545 -0.20221852
  -0.20146276 -0.20140854  0.20142174 -0.20172684  0.20066607  0.20153504
  -0.20192574 -0.20154058 -0.20139563  0.19356412 -0.2019346   0.20158395
  -0.20163229  0.20151856 -0.20207836 -0.20143443 -0.20164711 -0.20140257
  -0.20142566 -0.20145171 -0.20162942  0.19835375 -0.20179402 -0.20199241
  -0.20140095 -0.2014706  -0.20145738 -0.20145844 -0.20250574  

In [9]:
print("Primary objective evaluated on safety dataset:")
print(SA.evaluate_primary_objective(branch='safety_test',theta=solution))

Primary objective evaluated on safety dataset:
2.4126129772553906


  File 'C:\Users\faria\anaconda3\lib\site-packages\seldonian\seldonian_algorithm.py', line 271
    elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
