update code

reml-lab · Jul 28, 2023 · 4edced0 · 4edced0
1 parent 7f6e849
commit 4edced0
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@ This paper was accepted at the Conference on Uncertainty in Artificial Intellige
 
 + UAI 2023 link: https://proceedings.mlr.press/v216/karine23a.html
 
++ UAI 2023 poster: https://github.com/reml-lab/rl_jitai_simulation/tree/main/rl_jitai_simulation_poster.pdf
 
 ## Examples
 

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,36 @@
+# RL for JITAI optimization using simulated environments
+
+This repository contains the official implementation for the paper: **Assessing the Impact of Context Inference Error and Partial Observability on RL Methods for Just-In-Time Adaptive Interventions**, Karine Karine, Predrag Klasnja, Susan A. Murphy, Benjamin M. Marlin, UAI 2023.
+
+This paper was accepted at the Conference on Uncertainty in Artificial Intelligence, UAI 2023.
+
++ arXiv link: https://arxiv.org/abs/2305.09913
+
++ UAI 2023 link: https://proceedings.mlr.press/v216/karine23a.html
+
++ UAI 2023 poster: https://github.com/reml-lab/rl_jitai_simulation/tree/main/rl_jitai_simulation_poster.pdf
+
+## Examples
+
+See the [Examples](https://github.com/reml-lab/rl_jitai_simulation/tree/main/examples) directory for a list of examples that can be run locally or launched in Google Colab.
+
+For example:
+
++ [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/reml-lab/rl_jitai_simulation/blob/main/examples/rl_jitai_simulation_quickstart.ipynb)  RL for JITAI Quickstart
+
++ [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/reml-lab/rl_jitai_simulation/blob/main/examples/rl_jitai_simulation_experiments.ipynb)  RL for JITAI Examples
+
+
+## Citing this paper
+
+```
+@inproceedings{RLJITAI2023, 
+title = {Assessing the Impact of Context Inference Error and Partial Observability on RL Methods for Just-In-Time Adaptive Interventions},
+author = {Karine, Karine and Klasnja, Predrag and Murphy, Susan A. and Marlin, Benjamin M.},
+booktitle = {Proceedings of the Thirty-Ninth Conference on Uncertainty in Artificial Intelligence},
+pages = {1047--1057},
+year = {2023},
+volume = {216},
+}
+```
+
diff --git a/rl_jitai_simulation/__init__.py b/rl_jitai_simulation/__init__.py
diff --git a/rl_jitai_simulation/utils.py b/rl_jitai_simulation/utils.py
@@ -28,29 +28,31 @@ def run_RL_loop_DQN(env, agent, n_episodes, b_train, b_plot=False, plot_title=''
     plt.ylim(y_lim); plt.xlabel('episode'); plt.ylabel('return'); plt.grid(); plt.show()
   return return_values
 
-def run_RL_loop_REINFORCE(env, agent, n_episodes, b_train, b_plot=False, plot_title='', y_lim=(-200,3500), color='C0'):
+def run_RL_loop_REINFORCE(env, agent, M, n_episodes, b_train, b_plot=False, plot_title='', y_lim=(-200,3500)):
   return_values = []
   for i in tqdm(range(n_episodes), desc ="REINFORCE {} σ={} duration".format(env.chosen_obs_names_str, env.sigma)):
     if b_train: agent.init_grad()
-    obs = env.reset()
-    return_sample = 0
-    done = False
-    while not done:
-      action = agent.choose_action(obs)
-      obs_, reward, done, info = env.step(action)
-      agent.store_rewards(reward)
-      obs = obs_
-      return_sample += reward
-    if b_train: agent.compute_grad()
+    return_samples = []
+    for m in range(M):
+      obs = env.reset()
+      return_sample = 0
+      done = False
+      while not done:
+        action = agent.choose_action(obs)
+        obs_, reward, done, info = env.step(action)
+        agent.store_rewards(reward)
+        obs = obs_
+        return_sample += reward
+      return_samples.append(return_sample)
+      if b_train: agent.compute_grad()
     if b_train: agent.take_step(env.max_episode_length)
-    return_values.append(return_sample)
+    return_values.append(np.mean(return_samples))
   if b_plot:
     plt.figure(figsize=(3,2))
-    plt.plot(return_values, color=color)
+    plt.plot(return_values)
     if len(plot_title) < 1:
       plot_title = ' (σ={})'.format(env.sigma)
-    plot_detail = 'train' if b_train else 'perf'
-    plt.title('REINFORCE {} {}{}'.format(env.chosen_obs_names_str, plot_detail, plot_title))
+    plt.title('REINFORCE {} learning{}'.format(env.chosen_obs_names_str, plot_title))
     plt.ylim(y_lim); plt.xlabel('episode'); plt.ylabel('return'); plt.grid(); plt.show()
   return return_values
 

diff --git a/rl_jitai_simulation_poster.pdf b/rl_jitai_simulation_poster.pdf