In [1]:
import gym
import random, math
import numpy as np
import arcade
from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean
from PIL import Image

        
from LightEnvCopy import LightEnv

import gym.spaces
from gym.spaces import Discrete, Box

from ray.rllib.env.env_context import EnvContext
from ray.rllib.models import ModelCatalog

from collections import namedtuple

# Do the math to figure out our screen dimensions
SCREEN_WIDTH = 800
SCREEN_HEIGHT = 600
SCREEN_TITLE = "Game 1: Let There Be Light!"

# COnvenient data structure to hold information about actions
Action = namedtuple('Action', 'name index delta_i delta_j')

up = Action('up', 0, -1, 0)    
down = Action('down', 1, 1, 0)    
left = Action('left', 2, 0, -1)    
right = Action('right', 3, 0, 1)    

index_to_actions = {}
for action in [up, down, left, right]:
    index_to_actions[action.index] = action
# print(index_to_actions[0].name)
str_to_actions = {}
for action in [up, down, left, right]:
    str_to_actions[action.name] = action
#TF End - Adding in actions for action conversion


class LightEnvWrapper(gym.Env, LightEnv):
    """Class that wraps the Lights Environment to make it 
    compatible with RLLib."""

    metadata = {"render.modes": ["rgb_array", "state_pixels"]}
    
    def __init__(self, config: EnvContext):
        super().__init__(SCREEN_WIDTH, SCREEN_HEIGHT, SCREEN_TITLE)
        self.torch_collected = False
        self.torch_collected_count = []
        self.mygame = LightEnv
        self.steps_taken = 0
        #The action space is a choice of 4 actions: U/D/L/R.
        self.action_space = Discrete(4)
        
        #The observation space is a fixed image of the current game screen
        self.observation_space = Box(low=0, high=255, shape=(84,84, 4), dtype=np.uint8)
        
    def reset(self):
        print("resetting in wrapper")
        
        if self.torch_collected == 1:
            print("Torch was collected this episode!")
        else:
            print("Torch was not collected this episode...")
        self.torch_collected_count.append(self.torch_collected)
        print(self.torch_collected_count)

        self.render(self)
        #Resets the state of the environment for a new episode and an initial observation.
        obs_mygame = self.mygame.reset(self)
        
        #Open up the resetted image to verify working correctly.
        obs_mygame.show()
        
        self.mygame.on_draw(self)
        #Convert observation to 84x84 resolution and np array for rllib.
        obs = self.convert_observations(obs_mygame)
        
        self.steps_taken = 0
        return obs

    def step(self, action):
        self.steps_taken += 1
        
        #Making sure an action is chosen, either: 0, 1, 2, 3.
        assert action in [0, 1, 2, 3] #0-up,1-down,2-left,3-right.
        
        #Convert the numeric action to a keyword: up, down, left, right.
        actions_myenv = index_to_actions[action].name #returns a word, one of: up/down/left/right
#         print(f"action taken: {actions_myenv}")
        
        #Update the window with on_update()
        self.render(self)
#         print("env rendered")
        #Compute observation extracted from the window (800x600), with reward and done flag.
        obs, reward, done, torch_collected, fps_check = self.mygame.step(self,actions_myenv)
        if torch_collected == True:
            self.torch_collected = 1
        else:
            self.torch_collected = 0
                    
        if self.steps_taken % 100 == 0: #33 steps roughly equates to 1 second in game time
            print(f"total score is {self.score} at time: {self.mygame.time_taken_reported(self)}")
            print(f"FPS is currently: {fps_check}")
        #Convert observation to 84x84 resolution and np array for rllib.
        obs_mygame = self.convert_observations(obs)
        
        #If the reward has been obtained, reset the environment and start again
        if done == True:
            print(f"done is {done}, resetting environment in wrapper.")
            print(f"steps taken: {self.steps_taken}")
            obs.show()
            self.reset()
        
        return obs_mygame, reward, done, {}

    def seed(self, seed=None):
        random.seed(seed)

    def convert_observations(self, obs_mygame): #resizing and converting to array for rllib processing
        # We normalize and concatenate observations
        obs = obs_mygame
        obs_resized = obs.resize((84,84))
        obsarray = np.array(obs_resized)
        return obsarray
    
    def render(self, mode='state_pixels'):
#         self.mygame.update(self)
        self.mygame.on_draw(self)
        test = self.mygame.time_taken_reported(self)
        

  if (distutils.version.LooseVersion(tf.__version__) <


### Now run the rllib script to train the agent

### Manual Grid Search

#### lr

In [2]:
import gym
import ray.rllib.agents.ppo.ppo as ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray import air
from ray import tune
import os


config = PPOConfig()


from ray.rllib.algorithms.ppo import PPOConfig
RAY_DISABLE_MEMORY_MONITOR = 1
config = PPOConfig().training(gamma=0.99, lr=0.001, kl_coeff=0.2, entropy_coeff=1,
#                               entropy_coeff_schedule=[[0,1],[1000,0]],
                             sgd_minibatch_size=128, num_sgd_iter=60)\
            .resources(num_gpus=0)\
            .rollouts(num_envs_per_worker=1, num_rollout_workers=2, recreate_failed_workers=True, horizon=30000)
config.normalize_actions=False
config.env=LightEnvWrapper
config.clip_actions=False
print(config.to_dict())
# Build a Algorithm object from the config
trainer = ppo.PPOTrainer(config=config)

avg_rewards = []
num_iterations = []
for episode in range(100):
    print("Starting episode ", episode)
    # Perform one iteration of training the policy with PPO
    result = trainer.train()
    #print(pretty_print(result))
    print("episode reward mean: ", result['episode_reward_mean'])
    avg_rewards.append(result['episode_reward_mean'])
    num_iterations.append(episode)
    if episode % 10 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)
    print("End of episode ", episode)



    

{'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'num_gpus': 0, 'num_cpus_per_worker': 1, 'num_gpus_per_worker': 0, '_fake_gpus': False, 'custom_resources_per_worker': {}, 'placement_strategy': 'PACK', 'eager_tracing': False, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'env': <class '__main__.LightEnvWrapper'>, 'env_config': {}, 'observation_space': None, 'action_space': None, 'env_task_fn': None, 'render_env': False, 'clip_rewards': None, 'normalize_actions': False, 'clip_actions': False, 'disable_env_checking': False, 'num_workers': 2, 'num_envs_per_worker': 1, 'sample_collector': <class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollecto

2022-09-27 21:46:37,332	INFO worker.py:1518 -- Started a local Ray instance.
[2m[36m(pid=21952)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=21952)[0m 
[2m[36m(pid=5152)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=5152)[0m 
[2m[36m(pid=21952)[0m   if (distutils.version.LooseVersion(tf.__version__) <
[2m[36m(pid=5152)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=21952)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=21952)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=21952)[0m [False]
[2m[36m(RolloutWorker pid=5152)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=5152)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=5152)[0m [False]
[2m[36m(RolloutWorker pid=21952)[0m resetting
[2m[36m(RolloutWorker pid=5152)[0m resetting


2022-09-27 21:47:01,755	INFO trainable.py:160 -- Trainable.setup took 29.034 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Starting episode  0
[2m[36m(RolloutWorker pid=21952)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=21952)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=21952)[0m [False, 0]
[2m[36m(RolloutWorker pid=21952)[0m resetting
[2m[36m(RolloutWorker pid=5152)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=5152)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=5152)[0m [False, 0]
[2m[36m(RolloutWorker pid=5152)[0m resetting
[2m[36m(RolloutWorker pid=5152)[0m total score is -4 at time: 2
[2m[36m(RolloutWorker pid=5152)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=21952)[0m total score is -4 at time: 2
[2m[36m(RolloutWorker pid=21952)[0m FPS is currently: 80
[2m[36m(RolloutWorker pid=5152)[0m total score is -5 at time: 3
[2m[36m(RolloutWorker pid=5152)[0m FPS is currently: 93
[2m[36m(RolloutWorker pid=21952)[0m total score is -5 at time: 3
[2m[36m(RolloutWorker pid=21952)[0m FPS is currently: 91


2022-09-27 21:47:39,567	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

2022-09-27 21:47:41,286	ERROR worker_set.py:728 -- Worker 2 is faulty.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36mray::RolloutWorker.sample()[39m (pid=21

[2m[36m(pid=940)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=940)[0m 
[2m[36m(pid=27112)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=27112)[0m 
[2m[36m(pid=940)[0m   if (distutils.version.LooseVersion(tf.__version__) <
[2m[36m(pid=27112)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=940)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=940)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=940)[0m [False]
[2m[36m(RolloutWorker pid=27112)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=27112)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=27112)[0m [False]
[2m[36m(RolloutWorker pid=940)[0m resetting
[2m[36m(RolloutWorker pid=27112)[0m resetting
[2m[36m(RolloutWorker pid=27112)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=27112)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=27112)[0m [False, 0]
[2m[36m(RolloutWorker pid=27112)[0m resetting
[2m[36m(RolloutWorker pid=940)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=940)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=940)[0m [False, 0]
[2m[36m(RolloutWorker pid=940)[0m resetting
[2m[36m(RolloutWorker pid=27112)[0m Game completed with a score of:

2022-09-27 21:48:30,411	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(RolloutWorker pid=27112)[0m total score is -13 at time: 13
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 78
[2m[36m(RolloutWorker pid=27112)[0m total score is -15 at time: 15
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 82
[2m[36m(RolloutWorker pid=940)[0m total score is -15 at time: 15
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 57


2022-09-27 21:48:36,096	ERROR worker.py:399 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::RolloutWorker.sample()[39m (pid=940, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000015EF643CF40>)
  File "python\ray\_raylet.pyx", line 620, in ray._raylet.execute_task
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\memory_monitor.py", line 162, in raise_if_low_memory
    raise RayOutOfMemoryError(
ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-BKAPO4O is used (7.35 / 7.73 GB). The top 10 memory consumers are:

PID	MEM	COMMAND
27112	0.77GiB	C:\Users\Tim\Anaconda3\envs\rllib\python.exe C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray
17236	0.44GiB	C:\Users\Tim\AppData\Local\Microsoft\OneDrive\OneDrive.exe /background
940	0.41GiB	C:\Users\Tim\Anaconda3\envs\rllib\python.exe C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray
29420	0.29GiB	C:\Prog

[2m[36m(RolloutWorker pid=940)[0m total score is -17 at time: 17
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 72
[2m[36m(RolloutWorker pid=27112)[0m total score is -17 at time: 17
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 102
[2m[36m(RolloutWorker pid=27112)[0m total score is -18 at time: 18
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 89
[2m[36m(RolloutWorker pid=940)[0m total score is -18 at time: 18
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 65
[2m[36m(RolloutWorker pid=940)[0m total score is -20 at time: 20
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 67
[2m[36m(RolloutWorker pid=27112)[0m total score is -20 at time: 20
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 72
[2m[36m(RolloutWorker pid=27112)[0m total score is -22 at time: 22
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 77
[2m[36m(RolloutWorker pid=940)[0m total score is -22 at time: 22
[2m[36m(RolloutWorker pid=940)

2022-09-27 21:49:16,118	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(RolloutWorker pid=27112)[0m total score is -37 at time: 37
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 100
[2m[36m(RolloutWorker pid=27112)[0m total score is -38 at time: 38
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 71
[2m[36m(RolloutWorker pid=940)[0m total score is -38 at time: 38
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 64


2022-09-27 21:49:21,486	ERROR worker.py:399 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::RolloutWorker.sample()[39m (pid=940, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000015EF643CF40>)
  File "python\ray\_raylet.pyx", line 620, in ray._raylet.execute_task
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\memory_monitor.py", line 162, in raise_if_low_memory
    raise RayOutOfMemoryError(
ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-BKAPO4O is used (7.37 / 7.73 GB). The top 10 memory consumers are:

PID	MEM	COMMAND
27112	0.75GiB	C:\Users\Tim\Anaconda3\envs\rllib\python.exe C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray
17236	0.46GiB	C:\Users\Tim\AppData\Local\Microsoft\OneDrive\OneDrive.exe /background
940	0.41GiB	C:\Users\Tim\Anaconda3\envs\rllib\python.exe C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray
29420	0.28GiB	C:\Prog

[2m[36m(RolloutWorker pid=940)[0m total score is -40 at time: 40
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 67
[2m[36m(RolloutWorker pid=27112)[0m total score is -40 at time: 40
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 93
[2m[36m(RolloutWorker pid=940)[0m total score is -42 at time: 42
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=27112)[0m total score is -42 at time: 42
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=940)[0m total score is -43 at time: 43
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 59
[2m[36m(RolloutWorker pid=27112)[0m total score is -43 at time: 43
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 80
[2m[36m(RolloutWorker pid=940)[0m total score is -45 at time: 45
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 72
[2m[36m(RolloutWorker pid=27112)[0m total score is -45 at time: 45
[2m[36m(RolloutWorker pid=27112)[

[2m[36m(RolloutWorker pid=27112)[0m total score is -104 at time: 92
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 83
[2m[36m(RolloutWorker pid=940)[0m total score is -105 at time: 93
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 87
[2m[36m(RolloutWorker pid=27112)[0m total score is -105 at time: 93
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -108 at time: 95
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=27112)[0m total score is -108 at time: 95
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=940)[0m total score is -110 at time: 97
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=27112)[0m total score is -111 at time: 97
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 95
[2m[36m(RolloutWorker pid=940)[0m total score is -111 at time: 98
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=940)[0m total score is -168 at time: 145
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 95
[2m[36m(RolloutWorker pid=27112)[0m total score is -165 at time: 145
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 89
[2m[36m(RolloutWorker pid=940)[0m total score is -170 at time: 147
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=27112)[0m total score is -167 at time: 147
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -173 at time: 148
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 75
[2m[36m(RolloutWorker pid=27112)[0m total score is -168 at time: 148
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 74
[2m[36m(RolloutWorker pid=940)[0m total score is -175 at time: 150
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 70
[2m[36m(RolloutWorker pid=27112)[0m total score is -170 at time: 150
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=27112)[0m total score is -225 at time: 197
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 89
[2m[36m(RolloutWorker pid=940)[0m total score is -228 at time: 198
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=27112)[0m total score is -226 at time: 198
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 87
[2m[36m(RolloutWorker pid=940)[0m total score is -230 at time: 200
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 83
[2m[36m(RolloutWorker pid=27112)[0m total score is -228 at time: 200
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 87
[2m[36m(RolloutWorker pid=940)[0m total score is -232 at time: 202
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 66
[2m[36m(RolloutWorker pid=27112)[0m total score is -230 at time: 202
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 68
[2m[36m(RolloutWorker pid=940)[0m total score is -233 at time: 203
[2m[36m(RolloutW

[2m[36m(RolloutWorker pid=27112)[0m total score is -280 at time: 248
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 97
[2m[36m(RolloutWorker pid=940)[0m total score is -282 at time: 250
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 83
[2m[36m(RolloutWorker pid=27112)[0m total score is -282 at time: 250
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 82
[2m[36m(RolloutWorker pid=940)[0m total score is -285 at time: 252
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 83
[2m[36m(RolloutWorker pid=27112)[0m total score is -284 at time: 252
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 93
[2m[36m(RolloutWorker pid=940)[0m total score is -286 at time: 253
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 83
[2m[36m(RolloutWorker pid=27112)[0m total score is -285 at time: 253
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=940)[0m total score is -288 at time: 255
[2m[36m(RolloutW

[2m[36m(RolloutWorker pid=940)[0m total score is -336 at time: 302
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 77
[2m[36m(RolloutWorker pid=27112)[0m total score is -334 at time: 302
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 89
[2m[36m(RolloutWorker pid=940)[0m total score is -337 at time: 303
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 87
[2m[36m(RolloutWorker pid=27112)[0m total score is -335 at time: 303
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -339 at time: 305
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 94
[2m[36m(RolloutWorker pid=27112)[0m total score is -337 at time: 305
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 95
[2m[36m(RolloutWorker pid=940)[0m total score is -341 at time: 307
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 83
episode reward mean:  99.0
End of episode  7
Starting episode  8
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=940)[0m total score is -391 at time: 353
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 79
[2m[36m(RolloutWorker pid=27112)[0m total score is -386 at time: 353
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 80
[2m[36m(RolloutWorker pid=940)[0m total score is -394 at time: 355
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=27112)[0m total score is -388 at time: 355
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 91
[2m[36m(RolloutWorker pid=940)[0m total score is -396 at time: 357
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=27112)[0m total score is -390 at time: 357
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=940)[0m total score is -397 at time: 358
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=27112)[0m total score is -391 at time: 358
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=27112)[0m total score is -436 at time: 402
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 68
[2m[36m(RolloutWorker pid=940)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=940)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=940)[0m [False, 0, 0, False]
[2m[36m(RolloutWorker pid=940)[0m resetting
[2m[36m(RolloutWorker pid=940)[0m total score is -6 at time: 2
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=27112)[0m total score is -438 at time: 403
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 76
[2m[36m(RolloutWorker pid=940)[0m total score is -7 at time: 3
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 72
[2m[36m(RolloutWorker pid=27112)[0m total score is -440 at time: 405
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 79
episode reward mean:  -121.0
checkpoint saved at C:\Users\Tim/ray_results\PPO_LightEnvWrapper_2022-09-27_21-46-32itz

[2m[36m(RolloutWorker pid=27112)[0m total score is -489 at time: 452
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 87
[2m[36m(RolloutWorker pid=940)[0m total score is -64 at time: 52
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 69
[2m[36m(RolloutWorker pid=27112)[0m total score is -490 at time: 453
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 94
[2m[36m(RolloutWorker pid=940)[0m total score is -65 at time: 53
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 69
[2m[36m(RolloutWorker pid=27112)[0m total score is -492 at time: 455
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 63
[2m[36m(RolloutWorker pid=940)[0m total score is -67 at time: 55
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 86
[2m[36m(RolloutWorker pid=27112)[0m total score is -494 at time: 457
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -69 at time: 57
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=27112)[0m total score is -538 at time: 500
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 109
[2m[36m(RolloutWorker pid=27112)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=27112)[0m Torch was collected this episode!
[2m[36m(RolloutWorker pid=27112)[0m [False, 0, 0, False, 1]
[2m[36m(RolloutWorker pid=27112)[0m resetting
[2m[36m(RolloutWorker pid=940)[0m total score is -8 at time: 3
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 122
[2m[36m(RolloutWorker pid=27112)[0m total score is -6 at time: 2
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 114
[2m[36m(RolloutWorker pid=940)[0m total score is -11 at time: 5
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 103
[2m[36m(RolloutWorker pid=27112)[0m total score is -9 at time: 3
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 109
[2m[36m(RolloutWorker pid=940)[0m total score is -15 at time: 7
[2m[36m(RolloutWorker pid=940)[0m FPS is cu

[2m[36m(RolloutWorker pid=940)[0m total score is -74 at time: 53
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 91
[2m[36m(RolloutWorker pid=27112)[0m total score is -70 at time: 52
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 80
[2m[36m(RolloutWorker pid=940)[0m total score is -76 at time: 55
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=27112)[0m total score is -72 at time: 53
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=940)[0m total score is -78 at time: 57
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=27112)[0m total score is -74 at time: 55
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=940)[0m total score is -79 at time: 58
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 92
[2m[36m(RolloutWorker pid=27112)[0m total score is -76 at time: 57
[2m[36m(RolloutWorker pid=27112)[

[2m[36m(RolloutWorker pid=27112)[0m total score is -126 at time: 100
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 94
[2m[36m(RolloutWorker pid=940)[0m total score is -40 at time: 33
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=27112)[0m total score is -128 at time: 102
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 95
[2m[36m(RolloutWorker pid=940)[0m total score is -42 at time: 35
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 78
[2m[36m(RolloutWorker pid=27112)[0m total score is -129 at time: 103
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -45 at time: 37
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 84
[2m[36m(RolloutWorker pid=27112)[0m total score is -131 at time: 105
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=940)[0m total score is -46 at time: 38
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=940)[0m total score is -97 at time: 85
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=27112)[0m total score is -181 at time: 153
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=940)[0m total score is -99 at time: 87
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 92
[2m[36m(RolloutWorker pid=27112)[0m total score is -183 at time: 155
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=940)[0m total score is -100 at time: 88
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 89
[2m[36m(RolloutWorker pid=27112)[0m total score is -185 at time: 157
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=940)[0m total score is -102 at time: 90
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 99
[2m[36m(RolloutWorker pid=27112)[0m total score is -186 at time: 158
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=27112)[0m total score is -237 at time: 205
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 97
[2m[36m(RolloutWorker pid=940)[0m total score is -154 at time: 138
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
episode reward mean:  -156.2
End of episode  19
Starting episode  20
[2m[36m(RolloutWorker pid=27112)[0m total score is -239 at time: 207
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 111
[2m[36m(RolloutWorker pid=940)[0m total score is -156 at time: 140
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 115
[2m[36m(RolloutWorker pid=27112)[0m total score is -240 at time: 208
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 111
[2m[36m(RolloutWorker pid=940)[0m total score is -159 at time: 142
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 110
[2m[36m(RolloutWorker pid=27112)[0m total score is -242 at time: 210
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 101
[2m[36m(Roll

[2m[36m(RolloutWorker pid=940)[0m total score is -211 at time: 188
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=27112)[0m total score is -291 at time: 257
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=940)[0m total score is -213 at time: 190
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=27112)[0m total score is -292 at time: 258
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 82
[2m[36m(RolloutWorker pid=940)[0m total score is -215 at time: 192
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 91
[2m[36m(RolloutWorker pid=27112)[0m total score is -294 at time: 260
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 92
[2m[36m(RolloutWorker pid=940)[0m total score is -216 at time: 193
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 91
[2m[36m(RolloutWorker pid=27112)[0m total score is -296 at time: 262
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=940)[0m total score is -266 at time: 240
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 106
[2m[36m(RolloutWorker pid=27112)[0m total score is -343 at time: 308
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 109
[2m[36m(RolloutWorker pid=940)[0m total score is -268 at time: 242
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 97
[2m[36m(RolloutWorker pid=27112)[0m total score is -345 at time: 310
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 104
[2m[36m(RolloutWorker pid=940)[0m total score is -269 at time: 243
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 109
[2m[36m(RolloutWorker pid=27112)[0m total score is -347 at time: 312
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 103
[2m[36m(RolloutWorker pid=940)[0m total score is -271 at time: 245
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 106
[2m[36m(RolloutWorker pid=27112)[0m total score is -348 at time: 313
[2m[36m(Roll

[2m[36m(RolloutWorker pid=27112)[0m total score is -397 at time: 360
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -322 at time: 293
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 97
[2m[36m(RolloutWorker pid=27112)[0m total score is -399 at time: 362
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 92
[2m[36m(RolloutWorker pid=940)[0m total score is -324 at time: 295
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=27112)[0m total score is -400 at time: 363
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 96
[2m[36m(RolloutWorker pid=940)[0m total score is -326 at time: 297
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=27112)[0m total score is -402 at time: 365
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 102
[2m[36m(RolloutWorker pid=940)[0m total score is -327 at time: 298
[2m[36m(Rollout

[2m[36m(RolloutWorker pid=27112)[0m total score is -450 at time: 412
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 106
[2m[36m(RolloutWorker pid=940)[0m total score is -378 at time: 345
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 104
[2m[36m(RolloutWorker pid=27112)[0m total score is -451 at time: 413
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 83
[2m[36m(RolloutWorker pid=940)[0m total score is -380 at time: 347
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 89
[2m[36m(RolloutWorker pid=27112)[0m total score is -453 at time: 415
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 78
[2m[36m(RolloutWorker pid=940)[0m total score is -381 at time: 348
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 97
[2m[36m(RolloutWorker pid=27112)[0m total score is -455 at time: 417
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 87
[2m[36m(RolloutWorker pid=940)[0m total score is -383 at time: 350
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=27112)[0m total score is -500 at time: 460
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 86
[2m[36m(RolloutWorker pid=940)[0m total score is -37 at time: 27
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=27112)[0m total score is -502 at time: 462
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 101
[2m[36m(RolloutWorker pid=940)[0m total score is -39 at time: 28
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 90
[2m[36m(RolloutWorker pid=27112)[0m total score is -503 at time: 463
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 91
[2m[36m(RolloutWorker pid=940)[0m total score is -42 at time: 30
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 92
[2m[36m(RolloutWorker pid=27112)[0m total score is -505 at time: 465
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 88
[2m[36m(RolloutWorker pid=940)[0m total score is -44 at time: 32
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=27112)[0m total score is -14 at time: 12
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 106
[2m[36m(RolloutWorker pid=940)[0m total score is -95 at time: 78
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 112
[2m[36m(RolloutWorker pid=27112)[0m total score is -15 at time: 13
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 102
[2m[36m(RolloutWorker pid=940)[0m total score is -97 at time: 80
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 110
[2m[36m(RolloutWorker pid=27112)[0m total score is -17 at time: 15
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -99 at time: 82
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=27112)[0m total score is -19 at time: 17
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 92
[2m[36m(RolloutWorker pid=940)[0m total score is -101 at time: 83
[2m[36m(RolloutWorker pid=9

[2m[36m(RolloutWorker pid=940)[0m total score is -143 at time: 122
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 91
[2m[36m(RolloutWorker pid=27112)[0m total score is -27 at time: 20
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 99
[2m[36m(RolloutWorker pid=940)[0m total score is -145 at time: 123
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 81
[2m[36m(RolloutWorker pid=27112)[0m total score is -30 at time: 22
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 85
[2m[36m(RolloutWorker pid=940)[0m total score is -147 at time: 125
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 95
[2m[36m(RolloutWorker pid=27112)[0m total score is -31 at time: 23
[2m[36m(RolloutWorker pid=27112)[0m FPS is currently: 87
[2m[36m(RolloutWorker pid=940)[0m total score is -150 at time: 127
[2m[36m(RolloutWorker pid=940)[0m FPS is currently: 83
[2m[36m(RolloutWorker pid=27112)[0m total score is -33 at time: 25
[2m[36m(RolloutWorker pid=

  mo = re.match("state_in_(\d+)", view_col)


KeyboardInterrupt: 