-
Notifications
You must be signed in to change notification settings - Fork 6
/
simglucose_custom_fixedarea_random_agent.py
69 lines (59 loc) · 2.68 KB
/
simglucose_custom_fixedarea_random_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from seldonian.RL.Agents.Agent import *
from seldonian.RL.Agents.Function_Approximators.Table import *
from seldonian.RL.Agents.Policies.Softmax import *
from seldonian.RL.Agents.Policies.SimglucosePolicyFixedArea import (
SigmoidPolicyFixedArea,
)
class SimglucoseFixedAreaAgent(Agent):
def __init__(
self, bb_crmin, bb_crmax, bb_cfmin, bb_cfmax, cr_shrink_factor, cf_shrink_factor
):
"""
An agent used for the simglucose problem studied in this example: https://seldonian.cs.umass.edu/Tutorials/examples/diabetes/
:param bb_crmin: The bounding box minimum value in CR space.
:type bb_crmin: float
:param bb_crmax: The bounding box maximum value in CR space.
:type bb_crmax: float
:param bb_cfmin: The bounding box minimum value in CF space.
:type bb_cfmin: float
:param bb_cfmax: The bounding box maximum value in CF space.
:type bb_cfmax: float
:param cr_shrink_factor: How much to shrink the CR size by
:param cf_shrink_factor: How much to shrink the CF size by
"""
self.policy = SigmoidPolicyFixedArea(
bb_crmin, bb_crmax, bb_cfmin, bb_cfmax, cr_shrink_factor, cf_shrink_factor
)
def choose_action(self, obs):
"""Return a CR,CF by sampling from uniform random distributions
whose bounds are determined by the crmin,crmax,cfmin,cfmax which
are determined from sigmoiding the theta values (policy weights).
:param obs: The current observation of the agent,
type depends on environment
:return: array of actions
"""
theta = self.policy.get_params()
cr1, cr2, cf1, cf2 = self.policy.theta2crcf(theta)
cr = np.random.uniform(cr1, cr2)
cf = np.random.uniform(cf1, cf2)
return cr, cf
def get_prob_this_action(self, observation, action):
return 0 # this is a continuous action space so all individual actions have 0 probability
def update(self, observation, next_observation, reward, terminated):
"""
Noop, but it must be implemented
:param observation: The current observation of the agent,
type depends on environment.
:param next_observation: The observation of the agent after
an action is taken
:param reward: The reward for taking the action
:param terminated: Whether next_observation is the
terminal observation
:type terminated: bool
"""
pass
def set_new_params(self, new_params):
"""Set the parameters of the agent
:param new_params: array of weights
"""
self.policy.set_new_params(new_params)