In [1]:
import numpy as np
import gym
from gym_CTMaze.envs import CTMaze_env
from gym_CTMaze.envs.CTMaze_conf import CTMaze_conf
import json

from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [2]:
def printout(p_obs, p_reward, p_act, p_done, p_info, p_counter):
    """Print out the navigation variable at each step"""
    print("Feeding action: ", p_act)
    print("Step:", p_counter)
    print("Observation: ", p_obs)
    print("Reward: ", p_reward)
    print("Done: ", p_done)
    print("Info: ", p_info)

In [10]:
# fetch the parameters from the json file
configuration = CTMaze_conf("maze.json")
conf_data = configuration.getParameters()

env = gym.make('CTMaze-v0')
print(json.dumps(conf_data, indent=3))

print_results = True

observation, reward, done, info = env.init(conf_data)

Reading configuration parameters from  maze.json
---------------------------------------------------
             The CT-Maze environments              
---------------------------------------------------
{
   "maze_shape": {
      "depth": 4,
      "branching_factor": 4,
      "corridor_min_length": 2,
      "corridor_max_length": 5
   },
   "reward": {
      "high_reward_value": 1.0,
      "crash_reward_value": -1.0,
      "stochastic_sampling": false,
      "reward_std": 0.1,
      "min_static_reward_episodes": 30,
      "max_static_reward_episodes": 70,
      "reward_distribution": "needle_in_haystack"
   },
   "observations": {
      "image_set": "standard",
      "observation_res": 12,
      "noise_on_images": 64
   },
   "actions": {
      "action_space_size": 10,
      "corridor_actions": [
         5,
         6,
         7,
         8,
         9
      ],
      "turning_point_actions": [
         0,
         1,
         2,
         3,
         4
      ]
   }
}
---------------

In [11]:
high_reward_path = np.array(np.random.randint(1, conf_data['maze_shape']['branching_factor'], size=conf_data['maze_shape']['depth']))
env.set_high_reward_path(high_reward_path)

print("*--- Testing script ----*")
print("We set the high reward path to: ", env.get_high_reward_path() )
print("*--- Running test ------*")

action = 0
counter = 0

printout(observation, reward, action, done, info, counter)

*--- Testing script ----*
We set the high reward path to:  [1 1 3 1]
*--- Running test ------*
Feeding action:  0
Step: 0
Observation:  [[224. 232. 237. 207. 200. 197. 198. 234. 192. 211. 217. 218.]
 [224.   9.  48.  32.   4.  41.   4.  55.   1.  44.  58. 232.]
 [198.  50.  52.  39.   6.  60.  22.  62.  34.  59.  45. 216.]
 [239.  54.  12. 200. 226. 240. 239. 244. 225.  40.  43. 230.]
 [216.   4.  19. 219. 253. 235. 227. 225. 230.  46.   2. 202.]
 [199.   9.   7. 228. 240. 200. 212. 251. 221.  17.   0. 198.]
 [252.   1.  24. 215. 235. 230. 192. 221. 234.  47.  51. 226.]
 [194.  49.  51. 226. 227. 211. 211. 194. 225.   9.   1. 240.]
 [231.  32.  23. 245. 245. 213. 199. 228. 224.  11.  55. 210.]
 [190.  42.  45.  60.  36.  28.  28.   9.  23.  35.  59. 212.]
 [239.  38.  48.  42.   7.  27.  42.  41.  61.  52.  22. 227.]
 [208. 204. 248. 236. 245. 212. 200. 253. 195. 242. 194. 234.]]
Reward:  0.0
Done:  False
Info:  Home


In [12]:
while not done:
    action = int(input("Action: "))
    observation, reward, done, info = env.step(action)
    counter = counter  + 1
    if print_results:
        printout(observation, reward, action, done, info, counter)

Action: 6
Feeding action:  6
Step: 1
Observation:  [[ 25.  23. 218. 239.  24.  33.  19.  53.  21.  28.   8.  54.]
 [ 29.  19. 242. 211.   9.  27.  57.  24.  21.  46.   1.  48.]
 [ 33.  22. 213. 215.  46.   0.   6.  42.  31.  26.  13.  54.]
 [ 19.  60. 247. 207.  41.   9.  54.  60.  25.  45.  43.  37.]
 [ 54.  20. 202. 192.  53.  16.  30.  33.  23.  60.  20.  50.]
 [193. 239. 253. 248. 194. 200. 226. 199. 251. 191. 245. 215.]
 [213. 215. 239. 248. 231. 240. 224. 202. 214. 218. 212. 249.]
 [ 48.  18. 226. 231.  18.  52.  39.  43.  11.   2.  15.   6.]
 [  9.   6. 249. 192.  38.  39.  27.  14.  18.  55.  57.   2.]
 [ 17.  14. 226. 211.  51.  43.  55.  51.  11.   3.  46.   2.]
 [ 62.  60. 219. 248.  52.  29.  39.   8.   1.  26.  30.  36.]
 [ 11.  55. 210. 204.  58.  22.  50.  50.  36.   9.  60.   5.]]
Reward:  0.0
Done:  False
Info:  ('OK/corridor->corridor, steps left', 3)
Action: 6
Feeding action:  6
Step: 2
Observation:  [[ 50.  11.  42.   1.  54.  44.  25.  21. 219. 213.   1.   9.]
 [  