In [9]:
import numpy as np
import gym
from gym_CTgraph import CTgraph_env
from gym_CTgraph.CTgraph_plot import CTgraph_plot
from gym_CTgraph.CTgraph_conf import CTgraph_conf
from gym_CTgraph.CTgraph_images import CTgraph_images
import json

from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [10]:
def printout(p_obs, p_reward, p_act, p_done, p_info, p_counter):
    """Print out the navigation variable at each step"""
    print("Feeding action: ", p_act)
    print("Step:", p_counter)
    print("Observation: ", p_obs)
    print("Reward: ", p_reward)
    print("Done: ", p_done)
    print("Info: ", p_info)

In [11]:
# fetch the parameters from the json file
configuration = CTgraph_conf("graph.json")
conf_data = configuration.getParameters()
print(json.dumps(conf_data, indent=3))

env = gym.make('CTgraph-v0')

print_results = True

imageDataset = CTgraph_images(conf_data)
observation = env.init(conf_data, imageDataset)

---------------------------------------------------
             The CT-graph environments             
---------------------------------------------------
Reading configuration parameters from  graph.json
{
   "general_seed": 3,
   "graph_shape": {
      "depth": 2,
      "branching_factor": 2,
      "wait_prob": 0.5
   },
   "reward": {
      "high_reward_value": 1.0,
      "crash_reward_value": -1.0,
      "stochastic_sampling": false,
      "reward_std": 0.1,
      "min_static_reward_episodes": 30,
      "max_static_reward_episodes": 70,
      "reward_distribution": "needle_in_haystack"
   },
   "observations": {
      "MDP_decision_s": true,
      "MDP_wait_s": false,
      "wait_states": [
         2,
         100
      ],
      "decision_states": [
         101,
         103
      ],
      "graph_ends": [
         104,
         107
      ]
   },
   "image_dataset": {
      "seed": 1,
      "1D": false,
      "nr_of_images": 200,
      "noise_on_images_on_read": 99,
      "small_

In [12]:
high_reward_path = np.array(np.random.randint(1, conf_data['graph_shape']['branching_factor'], size=conf_data['graph_shape']['depth']))
env.set_high_reward_path(high_reward_path)

print("*--- Testing script ----*")
print("We set the high reward path to: ", env.get_high_reward_path() )
print("*--- Running test ------*")

action = 0
counter = 0
reward = 0
done = False
info = {'state': 'Home'}

printout(observation, reward, action, done, info, counter)

*--- Testing script ----*
We set the high reward path to:  [1 1]
*--- Running test ------*
Feeding action:  0
Step: 0
Observation:  [[  0   0   6  18  76 113 102 132 116  14   0   0]
 [  0   8  48  30  78 101 139 116 126 123  24   0]
 [  6  47  57  38  71  66  66 109 133 157 167  29]
 [ 64  58  65  32  59  81  32  63 201 172 169 157]
 [ 91  48  81  14  35  62  26  32 105 221 174 183]
 [158  56  60  22  24  58  72  63  68 108 193 216]
 [131 146 106  57  78  66  75  47  62  83 141 197]
 [118 147 135  89  94  83  82  89  75  85  74  98]
 [102 148 110  96 104 124 121 133 104  78  56  49]
 [ 23 127 102  70  40  97 128 172 174 134  25   3]
 [  0  17  73  45  55  82 133 162 181 160  17   0]
 [  0   0   7  41  73 100 104 154 177  30   0   0]]
Reward:  0
Done:  False
Info:  {'state': 'Home'}


In [13]:
while not done:
    action = int(input("Action: "))
    observation, reward, done, info = env.step(action)
    counter = counter  + 1
    if print_results:
        printout(observation, reward, action, done, info, counter)

Action: 0
Feeding action:  0
Step: 1
Observation:  [[  0   0   1 115 157 179 170 155 167  61   0   0]
 [  0  12  91 199 238 206 184 213 173 109  22   0]
 [ 22  76  84 177 212 195 169 182 149 126  92   1]
 [ 33  16  74 239 234 191 116 132  80 135 125  76]
 [ 60  32  98 195 241 181 105 122 137 136 129 113]
 [ 47  46  75 163 202 184 144 105 132 112 124 100]
 [ 38  45  83  87  24  90  98 101 114  83 111  89]
 [ 41  97  82  61  38  56  74  93 119 160 134 128]
 [ 12  47  55  47  64  69  95  74  61 141 130 142]
 [  1  49  95 103  59  85  67  36  70 111 195  65]
 [  0  19 163 111  40  29  48  18  68  92  32   0]
 [  0   0  56 107  92  48  58  49  22   1   0   0]]
Reward:  0.0
Done:  False
Info:  {'state': '1'}
Action: 0
Feeding action:  0
Step: 2
Observation:  [[ 15  16  46  80 112 147  96  61  45  11  52  64]
 [ 40  84 123 101 123  79  47  37  62  37  43  62]
 [ 87 146 134 146 133  97  67  71  66 125 115  61]
 [151 181 164 105 102 132  85 105 151 135 155  99]
 [166 221 164 120  87 114 119 142