In [1]:
import gym
import random, math
import numpy as np
import arcade
from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean
from PIL import Image

        
from LightEnvCopy import LightEnv

import gym.spaces
from gym.spaces import Discrete, Box

from ray.rllib.env.env_context import EnvContext
from ray.rllib.models import ModelCatalog

from collections import namedtuple

# Do the math to figure out our screen dimensions
SCREEN_WIDTH = 800
SCREEN_HEIGHT = 600
SCREEN_TITLE = "Game 1: Let There Be Light!"

# COnvenient data structure to hold information about actions
Action = namedtuple('Action', 'name index delta_i delta_j')

up = Action('up', 0, -1, 0)    
down = Action('down', 1, 1, 0)    
left = Action('left', 2, 0, -1)    
right = Action('right', 3, 0, 1)    

index_to_actions = {}
for action in [up, down, left, right]:
    index_to_actions[action.index] = action
# print(index_to_actions[0].name)
str_to_actions = {}
for action in [up, down, left, right]:
    str_to_actions[action.name] = action
#TF End - Adding in actions for action conversion


class LightEnvWrapper(gym.Env, LightEnv):
    """Class that wraps the Lights Environment to make it 
    compatible with RLLib."""

    metadata = {"render.modes": ["rgb_array", "state_pixels"]}
    
    def __init__(self, config: EnvContext):
        super().__init__(SCREEN_WIDTH, SCREEN_HEIGHT, SCREEN_TITLE)
        self.counting = 0    
        self.torch_collected = False
        self.torch_collected_count = []
        self.mygame = LightEnv
        
        #The action space is a choice of 4 actions: U/D/L/R.
        self.action_space = Discrete(4)
        
        #The observation space is a fixed image of the current game screen
        self.observation_space = Box(low=0, high=255, shape=(84,84, 4), dtype=np.uint8)
        
        self.counting = 0

    def reset(self):
        print("resetting in wrapper")
        
        if self.torch_collected == 1:
            print("Torch was collected this episode!")
        else:
            print("Torch was not collected this episode...")
        self.torch_collected_count.append(self.torch_collected)
        print(self.torch_collected_count)

        self.render(self)
        #Resets the state of the environment for a new episode and an initial observation.
        obs_mygame = self.mygame.reset(self)
        
        #Open up the resetted image to verify working correctly.
        obs_mygame.show()
        
        #Convert observation to 84x84 resolution and np array for rllib.
        obs = self.convert_observations(obs_mygame)
        
        return obs

    def step(self, action):
        self.counting += 1
        
        #Making sure an action is chosen, either: 0, 1, 2, 3.
        assert action in [0, 1, 2, 3] #0-up,1-down,2-left,3-right.
        
        #Convert the numeric action to a keyword: up, down, left, right.
        actions_myenv = index_to_actions[action].name #returns a word, one of: up/down/left/right
#         print(f"action taken: {actions_myenv}")
        
        #Update the window with on_update()
        self.render(self)
        
        #Compute observation extracted from the window (800x600), with reward and done flag.
        obs, reward, done, torch_collected = self.mygame.step(self,actions_myenv)
        if torch_collected == True:
            self.torch_collected = 1
        else:
            self.torch_collected = 0
                    
        if self.counting % 33 == 0: #33 steps roughly equates to 1 second in game time
            print(f"total score is {self.score} at time: {self.mygame.time_taken_reported(self)}")
        
        #Convert observation to 84x84 resolution and np array for rllib.
        obs_mygame = self.convert_observations(obs)
        
        #If the reward has been obtained, reset the environment and start again
        if done == True:
            print(f"done is {done}, resetting environment in wrapper.")
            self.reset()
        
        return obs_mygame, reward, done, {}

    def seed(self, seed=None):
        random.seed(seed)

    def convert_observations(self, obs_mygame): #resizing and converting to array for rllib processing
        # We normalize and concatenate observations
        obs = obs_mygame
        obs_resized = obs.resize((84,84))
        obsarray = np.array(obs_resized)
        return obsarray
    
    def render(self, mode='state_pixels'):
        self.mygame.on_update(self, 1/60)
        self.mygame.on_draw(self)
        test = self.mygame.time_taken_reported(self)
        

  if (distutils.version.LooseVersion(tf.__version__) <


### Now run the rllib script to train the agent

### Manual Grid Search (Running for 50 iterations)

#### Entropy coeff 0.03

In [2]:
import gym
import ray.rllib.agents.ppo.ppo as ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray import air
from ray import tune
config = PPOConfig()


from ray.rllib.algorithms.ppo import PPOConfig
RAY_DISABLE_MEMORY_MONITOR = 1

config = PPOConfig().training(gamma=0.99, lr=0.01, kl_coeff=0.2, entropy_coeff=0.03,
#                               entropy_coeff_schedule=[[0,1],[1000,0]],
                             sgd_minibatch_size=128, num_sgd_iter=60)\
            .resources(num_gpus=0)\
            .rollouts(num_envs_per_worker=1, num_rollout_workers=1, recreate_failed_workers=True)
config.normalize_actions=False
config.env=LightEnvWrapper
config.clip_actions=False
print(config.to_dict())
# Build a Algorithm object from the config
trainer = ppo.PPOTrainer(config=config)

avg_rewards = []
num_iterations = []
for episode in range(50):
    print("Starting episode ", episode)
    # Perform one iteration of training the policy with PPO
    result = trainer.train()
    #print(pretty_print(result))
    print("episode reward mean: ", result['episode_reward_mean'])
    avg_rewards.append(result['episode_reward_mean'])
    num_iterations.append(episode)
    if episode % 10 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)
    print("End of episode ", episode)



    

{'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'num_gpus': 0, 'num_cpus_per_worker': 1, 'num_gpus_per_worker': 0, '_fake_gpus': False, 'custom_resources_per_worker': {}, 'placement_strategy': 'PACK', 'eager_tracing': False, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'env': <class '__main__.LightEnvWrapper'>, 'env_config': {}, 'observation_space': None, 'action_space': None, 'env_task_fn': None, 'render_env': False, 'clip_rewards': None, 'normalize_actions': False, 'clip_actions': False, 'disable_env_checking': False, 'num_workers': 1, 'num_envs_per_worker': 1, 'sample_collector': <class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollecto

2022-09-26 12:37:15,973	INFO worker.py:1518 -- Started a local Ray instance.
[2m[36m(pid=16588)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=16588)[0m 
[2m[36m(pid=16588)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=16588)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=16588)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=16588)[0m [False]
[2m[36m(RolloutWorker pid=16588)[0m resetting


2022-09-26 12:37:32,507	INFO trainable.py:160 -- Trainable.setup took 19.476 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Starting episode  0
[2m[36m(RolloutWorker pid=16588)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=16588)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=16588)[0m [False, 0]
[2m[36m(RolloutWorker pid=16588)[0m resetting
[2m[36m(RolloutWorker pid=16588)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=16588)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=16588)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=16588)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=16588)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=16588)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=16588)[0m total score is -4 at time: 4
[2m[36m(RolloutWorker pid=16588)[0m total score is -4 at time: 4
[2m[36m(RolloutWorker pid=16588)[0m total score is -5 at time: 5
[2m[36m(RolloutWorker pid=16588)[0m total score is -5 at time: 5
[2m[36m(RolloutWorker pid=16588)[0m total score is -6 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -63 at time: 63
[2m[36m(RolloutWorker pid=16588)[0m total score is -64 at time: 64
[2m[36m(RolloutWorker pid=16588)[0m total score is -64 at time: 64
[2m[36m(RolloutWorker pid=16588)[0m total score is -65 at time: 65
[2m[36m(RolloutWorker pid=16588)[0m total score is -65 at time: 65
[2m[36m(RolloutWorker pid=16588)[0m total score is -66 at time: 66
[2m[36m(RolloutWorker pid=16588)[0m total score is -67 at time: 67
episode reward mean:  nan
checkpoint saved at C:\Users\Tim/ray_results\PPO_LightEnvWrapper_2022-09-26_12-37-12w8fmcxi1\checkpoint_000001
End of episode  0
Starting episode  1
[2m[36m(RolloutWorker pid=16588)[0m total score is -67 at time: 67
[2m[36m(RolloutWorker pid=16588)[0m total score is -68 at time: 68
[2m[36m(RolloutWorker pid=16588)[0m total score is -68 at time: 68
[2m[36m(RolloutWorker pid=16588)[0m total score is -69 at time: 69
[2m[36m(RolloutWorker pid=16588)[0m total score is -

[2m[36m(RolloutWorker pid=16588)[0m total score is -126 at time: 126
[2m[36m(RolloutWorker pid=16588)[0m total score is -126 at time: 126
[2m[36m(RolloutWorker pid=16588)[0m total score is -127 at time: 127
[2m[36m(RolloutWorker pid=16588)[0m total score is -128 at time: 128
[2m[36m(RolloutWorker pid=16588)[0m total score is -128 at time: 128
[2m[36m(RolloutWorker pid=16588)[0m total score is -129 at time: 129
[2m[36m(RolloutWorker pid=16588)[0m total score is -129 at time: 129
[2m[36m(RolloutWorker pid=16588)[0m total score is -130 at time: 130
[2m[36m(RolloutWorker pid=16588)[0m total score is -130 at time: 130
[2m[36m(RolloutWorker pid=16588)[0m total score is -131 at time: 131
[2m[36m(RolloutWorker pid=16588)[0m total score is -131 at time: 131
[2m[36m(RolloutWorker pid=16588)[0m total score is -132 at time: 132
[2m[36m(RolloutWorker pid=16588)[0m total score is -133 at time: 133
[2m[36m(RolloutWorker pid=16588)[0m total score is -133 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -188 at time: 188
[2m[36m(RolloutWorker pid=16588)[0m total score is -189 at time: 189
[2m[36m(RolloutWorker pid=16588)[0m total score is -189 at time: 189
[2m[36m(RolloutWorker pid=16588)[0m total score is -190 at time: 190
[2m[36m(RolloutWorker pid=16588)[0m total score is -190 at time: 190
[2m[36m(RolloutWorker pid=16588)[0m total score is -191 at time: 191
[2m[36m(RolloutWorker pid=16588)[0m total score is -191 at time: 191
[2m[36m(RolloutWorker pid=16588)[0m total score is -192 at time: 192
[2m[36m(RolloutWorker pid=16588)[0m total score is -192 at time: 192
[2m[36m(RolloutWorker pid=16588)[0m total score is -193 at time: 193
[2m[36m(RolloutWorker pid=16588)[0m total score is -194 at time: 194
[2m[36m(RolloutWorker pid=16588)[0m total score is -194 at time: 194
[2m[36m(RolloutWorker pid=16588)[0m total score is -195 at time: 195
[2m[36m(RolloutWorker pid=16588)[0m total score is -195 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -250 at time: 250
[2m[36m(RolloutWorker pid=16588)[0m total score is -251 at time: 251
[2m[36m(RolloutWorker pid=16588)[0m total score is -251 at time: 251
[2m[36m(RolloutWorker pid=16588)[0m total score is -252 at time: 252
[2m[36m(RolloutWorker pid=16588)[0m total score is -252 at time: 252
[2m[36m(RolloutWorker pid=16588)[0m total score is -253 at time: 253
[2m[36m(RolloutWorker pid=16588)[0m total score is -254 at time: 254
[2m[36m(RolloutWorker pid=16588)[0m total score is -254 at time: 254
[2m[36m(RolloutWorker pid=16588)[0m total score is -255 at time: 255
[2m[36m(RolloutWorker pid=16588)[0m total score is -255 at time: 255
[2m[36m(RolloutWorker pid=16588)[0m total score is -256 at time: 256
[2m[36m(RolloutWorker pid=16588)[0m total score is -256 at time: 256
[2m[36m(RolloutWorker pid=16588)[0m total score is -257 at time: 257
[2m[36m(RolloutWorker pid=16588)[0m total score is -257 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -312 at time: 312
[2m[36m(RolloutWorker pid=16588)[0m total score is -313 at time: 313
[2m[36m(RolloutWorker pid=16588)[0m total score is -313 at time: 313
[2m[36m(RolloutWorker pid=16588)[0m total score is -314 at time: 314
[2m[36m(RolloutWorker pid=16588)[0m total score is -315 at time: 315
[2m[36m(RolloutWorker pid=16588)[0m total score is -315 at time: 315
[2m[36m(RolloutWorker pid=16588)[0m total score is -316 at time: 316
[2m[36m(RolloutWorker pid=16588)[0m total score is -316 at time: 316
[2m[36m(RolloutWorker pid=16588)[0m total score is -317 at time: 317
[2m[36m(RolloutWorker pid=16588)[0m total score is -317 at time: 317
[2m[36m(RolloutWorker pid=16588)[0m total score is -318 at time: 318
[2m[36m(RolloutWorker pid=16588)[0m total score is -318 at time: 318
[2m[36m(RolloutWorker pid=16588)[0m total score is -319 at time: 319
[2m[36m(RolloutWorker pid=16588)[0m total score is -320 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -375 at time: 375
[2m[36m(RolloutWorker pid=16588)[0m total score is -375 at time: 375
[2m[36m(RolloutWorker pid=16588)[0m total score is -376 at time: 376
[2m[36m(RolloutWorker pid=16588)[0m total score is -376 at time: 376
[2m[36m(RolloutWorker pid=16588)[0m total score is -377 at time: 377
[2m[36m(RolloutWorker pid=16588)[0m total score is -377 at time: 377
[2m[36m(RolloutWorker pid=16588)[0m total score is -378 at time: 378
[2m[36m(RolloutWorker pid=16588)[0m total score is -378 at time: 378
[2m[36m(RolloutWorker pid=16588)[0m total score is -379 at time: 379
[2m[36m(RolloutWorker pid=16588)[0m total score is -379 at time: 379
[2m[36m(RolloutWorker pid=16588)[0m total score is -380 at time: 380
[2m[36m(RolloutWorker pid=16588)[0m total score is -381 at time: 381
[2m[36m(RolloutWorker pid=16588)[0m total score is -381 at time: 381
[2m[36m(RolloutWorker pid=16588)[0m total score is -382 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -437 at time: 437
[2m[36m(RolloutWorker pid=16588)[0m total score is -437 at time: 437
[2m[36m(RolloutWorker pid=16588)[0m total score is -438 at time: 438
[2m[36m(RolloutWorker pid=16588)[0m total score is -438 at time: 438
[2m[36m(RolloutWorker pid=16588)[0m total score is -439 at time: 439
[2m[36m(RolloutWorker pid=16588)[0m total score is -439 at time: 439
[2m[36m(RolloutWorker pid=16588)[0m total score is -440 at time: 440
[2m[36m(RolloutWorker pid=16588)[0m total score is -441 at time: 441
[2m[36m(RolloutWorker pid=16588)[0m total score is -441 at time: 441
[2m[36m(RolloutWorker pid=16588)[0m total score is -442 at time: 442
[2m[36m(RolloutWorker pid=16588)[0m total score is -442 at time: 442
[2m[36m(RolloutWorker pid=16588)[0m total score is -443 at time: 443
[2m[36m(RolloutWorker pid=16588)[0m total score is -443 at time: 443
[2m[36m(RolloutWorker pid=16588)[0m total score is -444 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -499 at time: 499
[2m[36m(RolloutWorker pid=16588)[0m total score is -499 at time: 499
[2m[36m(RolloutWorker pid=16588)[0m total score is -500 at time: 500
[2m[36m(RolloutWorker pid=16588)[0m total score is -500 at time: 500
[2m[36m(RolloutWorker pid=16588)[0m total score is -501 at time: 501
[2m[36m(RolloutWorker pid=16588)[0m total score is -502 at time: 502
[2m[36m(RolloutWorker pid=16588)[0m total score is -502 at time: 502
[2m[36m(RolloutWorker pid=16588)[0m total score is -503 at time: 503
[2m[36m(RolloutWorker pid=16588)[0m total score is -503 at time: 503
[2m[36m(RolloutWorker pid=16588)[0m total score is -504 at time: 504
[2m[36m(RolloutWorker pid=16588)[0m total score is -504 at time: 504
[2m[36m(RolloutWorker pid=16588)[0m total score is -505 at time: 505
[2m[36m(RolloutWorker pid=16588)[0m total score is -505 at time: 505
[2m[36m(RolloutWorker pid=16588)[0m total score is -506 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -561 at time: 561
[2m[36m(RolloutWorker pid=16588)[0m total score is -562 at time: 562
[2m[36m(RolloutWorker pid=16588)[0m total score is -562 at time: 562
[2m[36m(RolloutWorker pid=16588)[0m total score is -563 at time: 563
[2m[36m(RolloutWorker pid=16588)[0m total score is -563 at time: 563
[2m[36m(RolloutWorker pid=16588)[0m total score is -564 at time: 564
[2m[36m(RolloutWorker pid=16588)[0m total score is -564 at time: 564
[2m[36m(RolloutWorker pid=16588)[0m total score is -565 at time: 565
[2m[36m(RolloutWorker pid=16588)[0m total score is -565 at time: 565
[2m[36m(RolloutWorker pid=16588)[0m total score is -566 at time: 566
[2m[36m(RolloutWorker pid=16588)[0m total score is -566 at time: 566
[2m[36m(RolloutWorker pid=16588)[0m total score is -567 at time: 567
[2m[36m(RolloutWorker pid=16588)[0m total score is -568 at time: 568
[2m[36m(RolloutWorker pid=16588)[0m total score is -568 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -623 at time: 623
[2m[36m(RolloutWorker pid=16588)[0m total score is -624 at time: 624
[2m[36m(RolloutWorker pid=16588)[0m total score is -624 at time: 624
[2m[36m(RolloutWorker pid=16588)[0m total score is -625 at time: 625
[2m[36m(RolloutWorker pid=16588)[0m total score is -625 at time: 625
[2m[36m(RolloutWorker pid=16588)[0m total score is -626 at time: 626
[2m[36m(RolloutWorker pid=16588)[0m total score is -626 at time: 626
[2m[36m(RolloutWorker pid=16588)[0m total score is -627 at time: 627
[2m[36m(RolloutWorker pid=16588)[0m total score is -628 at time: 628
[2m[36m(RolloutWorker pid=16588)[0m total score is -628 at time: 628
[2m[36m(RolloutWorker pid=16588)[0m total score is -629 at time: 629
[2m[36m(RolloutWorker pid=16588)[0m total score is -629 at time: 629
[2m[36m(RolloutWorker pid=16588)[0m total score is -630 at time: 630
[2m[36m(RolloutWorker pid=16588)[0m total score is -630 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -685 at time: 685
[2m[36m(RolloutWorker pid=16588)[0m total score is -686 at time: 686
[2m[36m(RolloutWorker pid=16588)[0m total score is -686 at time: 686
[2m[36m(RolloutWorker pid=16588)[0m total score is -687 at time: 687
[2m[36m(RolloutWorker pid=16588)[0m total score is -687 at time: 687
[2m[36m(RolloutWorker pid=16588)[0m total score is -688 at time: 688
[2m[36m(RolloutWorker pid=16588)[0m total score is -689 at time: 689
[2m[36m(RolloutWorker pid=16588)[0m total score is -689 at time: 689
[2m[36m(RolloutWorker pid=16588)[0m total score is -690 at time: 690
[2m[36m(RolloutWorker pid=16588)[0m total score is -690 at time: 690
[2m[36m(RolloutWorker pid=16588)[0m total score is -691 at time: 691
[2m[36m(RolloutWorker pid=16588)[0m total score is -691 at time: 691
[2m[36m(RolloutWorker pid=16588)[0m total score is -692 at time: 692
[2m[36m(RolloutWorker pid=16588)[0m total score is -692 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -747 at time: 747
[2m[36m(RolloutWorker pid=16588)[0m total score is -747 at time: 747
[2m[36m(RolloutWorker pid=16588)[0m total score is -748 at time: 748
[2m[36m(RolloutWorker pid=16588)[0m total score is -749 at time: 749
[2m[36m(RolloutWorker pid=16588)[0m total score is -749 at time: 749
[2m[36m(RolloutWorker pid=16588)[0m total score is -750 at time: 750
[2m[36m(RolloutWorker pid=16588)[0m total score is -750 at time: 750
[2m[36m(RolloutWorker pid=16588)[0m total score is -751 at time: 751
[2m[36m(RolloutWorker pid=16588)[0m total score is -751 at time: 751
[2m[36m(RolloutWorker pid=16588)[0m total score is -752 at time: 752
[2m[36m(RolloutWorker pid=16588)[0m total score is -752 at time: 752
[2m[36m(RolloutWorker pid=16588)[0m total score is -753 at time: 753
[2m[36m(RolloutWorker pid=16588)[0m total score is -753 at time: 753
[2m[36m(RolloutWorker pid=16588)[0m total score is -754 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -809 at time: 809
[2m[36m(RolloutWorker pid=16588)[0m total score is -810 at time: 810
[2m[36m(RolloutWorker pid=16588)[0m total score is -810 at time: 810
[2m[36m(RolloutWorker pid=16588)[0m total score is -811 at time: 811
[2m[36m(RolloutWorker pid=16588)[0m total score is -811 at time: 811
[2m[36m(RolloutWorker pid=16588)[0m total score is -812 at time: 812
[2m[36m(RolloutWorker pid=16588)[0m total score is -812 at time: 812
[2m[36m(RolloutWorker pid=16588)[0m total score is -813 at time: 813
[2m[36m(RolloutWorker pid=16588)[0m total score is -813 at time: 813
[2m[36m(RolloutWorker pid=16588)[0m total score is -814 at time: 814
[2m[36m(RolloutWorker pid=16588)[0m total score is -815 at time: 815
[2m[36m(RolloutWorker pid=16588)[0m total score is -815 at time: 815
[2m[36m(RolloutWorker pid=16588)[0m total score is -816 at time: 816
[2m[36m(RolloutWorker pid=16588)[0m total score is -816 at ti

[2m[36m(RolloutWorker pid=16588)[0m total score is -871 at time: 871
[2m[36m(RolloutWorker pid=16588)[0m total score is -872 at time: 872
[2m[36m(RolloutWorker pid=16588)[0m total score is -872 at time: 872
[2m[36m(RolloutWorker pid=16588)[0m total score is -873 at time: 873
[2m[36m(RolloutWorker pid=16588)[0m total score is -873 at time: 873
[2m[36m(RolloutWorker pid=16588)[0m total score is -874 at time: 874
[2m[36m(RolloutWorker pid=16588)[0m total score is -874 at time: 874
[2m[36m(RolloutWorker pid=16588)[0m total score is -875 at time: 875
[2m[36m(RolloutWorker pid=16588)[0m total score is -876 at time: 876
[2m[36m(RolloutWorker pid=16588)[0m total score is -876 at time: 876
[2m[36m(RolloutWorker pid=16588)[0m total score is -877 at time: 877
[2m[36m(RolloutWorker pid=16588)[0m total score is -877 at time: 877
[2m[36m(RolloutWorker pid=16588)[0m total score is -878 at time: 878
[2m[36m(RolloutWorker pid=16588)[0m total score is -878 at ti

episode reward mean:  nan
End of episode  13
Starting episode  14
[2m[36m(RolloutWorker pid=16588)[0m total score is -934 at time: 934
[2m[36m(RolloutWorker pid=16588)[0m total score is -934 at time: 934
[2m[36m(RolloutWorker pid=16588)[0m total score is -935 at time: 935
[2m[36m(RolloutWorker pid=16588)[0m total score is -936 at time: 936
[2m[36m(RolloutWorker pid=16588)[0m total score is -936 at time: 936
[2m[36m(RolloutWorker pid=16588)[0m total score is -937 at time: 937
[2m[36m(RolloutWorker pid=16588)[0m total score is -937 at time: 937
[2m[36m(RolloutWorker pid=16588)[0m total score is -938 at time: 938
[2m[36m(RolloutWorker pid=16588)[0m total score is -938 at time: 938
[2m[36m(RolloutWorker pid=16588)[0m total score is -939 at time: 939
[2m[36m(RolloutWorker pid=16588)[0m total score is -939 at time: 939
[2m[36m(RolloutWorker pid=16588)[0m total score is -940 at time: 940
[2m[36m(RolloutWorker pid=16588)[0m total score is -940 at time: 94

2022-09-26 14:39:23,120	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(pid=10580)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=10580)[0m 
[2m[36m(pid=10580)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=10580)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=10580)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=10580)[0m [False]
[2m[36m(RolloutWorker pid=10580)[0m resetting
[2m[36m(RolloutWorker pid=10580)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=10580)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=10580)[0m [False, 0]
[2m[36m(RolloutWorker pid=10580)[0m resetting
[2m[36m(RolloutWorker pid=10580)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=10580)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=10580)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=10580)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=10580)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=10580)[0m total score is -3 at time: 3


2022-09-26 14:39:52,705	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(pid=27664)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=27664)[0m 
[2m[36m(pid=27664)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=27664)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=27664)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=27664)[0m [False]
[2m[36m(RolloutWorker pid=27664)[0m resetting
[2m[36m(RolloutWorker pid=27664)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=27664)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=27664)[0m [False, 0]
[2m[36m(RolloutWorker pid=27664)[0m resetting
[2m[36m(RolloutWorker pid=27664)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=27664)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=27664)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=27664)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=27664)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=27664)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=27664)[0m total score is -4 at time: 4
[2m[36m(RolloutWorker pid=27664)[0m total score 

2022-09-26 14:40:27,494	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(pid=26156)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=26156)[0m 
[2m[36m(pid=26156)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=26156)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=26156)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=26156)[0m [False]
[2m[36m(RolloutWorker pid=26156)[0m resetting
[2m[36m(RolloutWorker pid=26156)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=26156)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=26156)[0m [False, 0]
[2m[36m(RolloutWorker pid=26156)[0m resetting
[2m[36m(RolloutWorker pid=26156)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=26156)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=26156)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=26156)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=26156)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=26156)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=26156)[0m total score is -4 at time: 4
[2m[36m(RolloutWorker pid=26156)[0m total score 

[2m[36m(RolloutWorker pid=26156)[0m total score is -62 at time: 62
[2m[36m(RolloutWorker pid=26156)[0m total score is -62 at time: 62
[2m[36m(RolloutWorker pid=26156)[0m total score is -63 at time: 63
[2m[36m(RolloutWorker pid=26156)[0m total score is -63 at time: 63
[2m[36m(RolloutWorker pid=26156)[0m total score is -64 at time: 64
[2m[36m(RolloutWorker pid=26156)[0m total score is -64 at time: 64
[2m[36m(RolloutWorker pid=26156)[0m total score is -65 at time: 65
[2m[36m(RolloutWorker pid=26156)[0m total score is -65 at time: 65
[2m[36m(RolloutWorker pid=26156)[0m total score is -66 at time: 66
[2m[36m(RolloutWorker pid=26156)[0m total score is -67 at time: 67
episode reward mean:  nan
End of episode  14
Starting episode  15
[2m[36m(RolloutWorker pid=26156)[0m total score is -67 at time: 67
[2m[36m(RolloutWorker pid=26156)[0m total score is -68 at time: 68
[2m[36m(RolloutWorker pid=26156)[0m total score is -68 at time: 68
[2m[36m(RolloutWorker 

[2m[36m(RolloutWorker pid=26156)[0m total score is -125 at time: 125
[2m[36m(RolloutWorker pid=26156)[0m total score is -125 at time: 125
[2m[36m(RolloutWorker pid=26156)[0m total score is -126 at time: 126
[2m[36m(RolloutWorker pid=26156)[0m total score is -126 at time: 126
[2m[36m(RolloutWorker pid=26156)[0m total score is -127 at time: 127
[2m[36m(RolloutWorker pid=26156)[0m total score is -128 at time: 128
[2m[36m(RolloutWorker pid=26156)[0m total score is -128 at time: 128
[2m[36m(RolloutWorker pid=26156)[0m total score is -129 at time: 129
[2m[36m(RolloutWorker pid=26156)[0m total score is -129 at time: 129
[2m[36m(RolloutWorker pid=26156)[0m total score is -130 at time: 130
[2m[36m(RolloutWorker pid=26156)[0m total score is -130 at time: 130
[2m[36m(RolloutWorker pid=26156)[0m total score is -131 at time: 131
[2m[36m(RolloutWorker pid=26156)[0m total score is -131 at time: 131
[2m[36m(RolloutWorker pid=26156)[0m total score is -132 at ti

[2m[36m(RolloutWorker pid=26156)[0m total score is -187 at time: 187
[2m[36m(RolloutWorker pid=26156)[0m total score is -188 at time: 188
[2m[36m(RolloutWorker pid=26156)[0m total score is -188 at time: 188
[2m[36m(RolloutWorker pid=26156)[0m total score is -189 at time: 189
[2m[36m(RolloutWorker pid=26156)[0m total score is -189 at time: 189
[2m[36m(RolloutWorker pid=26156)[0m total score is -190 at time: 190
[2m[36m(RolloutWorker pid=26156)[0m total score is -190 at time: 190
[2m[36m(RolloutWorker pid=26156)[0m total score is -191 at time: 191
[2m[36m(RolloutWorker pid=26156)[0m total score is -191 at time: 191
[2m[36m(RolloutWorker pid=26156)[0m total score is -192 at time: 192
[2m[36m(RolloutWorker pid=26156)[0m total score is -192 at time: 192
[2m[36m(RolloutWorker pid=26156)[0m total score is -193 at time: 193
[2m[36m(RolloutWorker pid=26156)[0m total score is -194 at time: 194
[2m[36m(RolloutWorker pid=26156)[0m total score is -194 at ti

[2m[36m(RolloutWorker pid=26156)[0m total score is -249 at time: 249
[2m[36m(RolloutWorker pid=26156)[0m total score is -250 at time: 250
[2m[36m(RolloutWorker pid=26156)[0m total score is -250 at time: 250
[2m[36m(RolloutWorker pid=26156)[0m total score is -251 at time: 251
[2m[36m(RolloutWorker pid=26156)[0m total score is -251 at time: 251
[2m[36m(RolloutWorker pid=26156)[0m total score is -252 at time: 252
[2m[36m(RolloutWorker pid=26156)[0m total score is -252 at time: 252
[2m[36m(RolloutWorker pid=26156)[0m total score is -253 at time: 253
[2m[36m(RolloutWorker pid=26156)[0m total score is -254 at time: 254
[2m[36m(RolloutWorker pid=26156)[0m total score is -254 at time: 254
[2m[36m(RolloutWorker pid=26156)[0m total score is -255 at time: 255
[2m[36m(RolloutWorker pid=26156)[0m total score is -255 at time: 255
[2m[36m(RolloutWorker pid=26156)[0m total score is -256 at time: 256
[2m[36m(RolloutWorker pid=26156)[0m total score is -256 at ti

[2m[36m(RolloutWorker pid=26156)[0m total score is -311 at time: 311
[2m[36m(RolloutWorker pid=26156)[0m total score is -312 at time: 312
[2m[36m(RolloutWorker pid=26156)[0m total score is -312 at time: 312
[2m[36m(RolloutWorker pid=26156)[0m total score is -313 at time: 313
[2m[36m(RolloutWorker pid=26156)[0m total score is -313 at time: 313
[2m[36m(RolloutWorker pid=26156)[0m total score is -314 at time: 314
[2m[36m(RolloutWorker pid=26156)[0m total score is -315 at time: 315
[2m[36m(RolloutWorker pid=26156)[0m total score is -315 at time: 315
[2m[36m(RolloutWorker pid=26156)[0m total score is -316 at time: 316
[2m[36m(RolloutWorker pid=26156)[0m total score is -316 at time: 316
[2m[36m(RolloutWorker pid=26156)[0m total score is -317 at time: 317
[2m[36m(RolloutWorker pid=26156)[0m total score is -317 at time: 317
[2m[36m(RolloutWorker pid=26156)[0m total score is -318 at time: 318
[2m[36m(RolloutWorker pid=26156)[0m total score is -318 at ti

2022-09-26 15:31:12,762	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(pid=5728)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=5728)[0m 
[2m[36m(pid=5728)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=5728)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=5728)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=5728)[0m [False]
[2m[36m(RolloutWorker pid=5728)[0m resetting
[2m[36m(RolloutWorker pid=5728)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=5728)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=5728)[0m [False, 0]
[2m[36m(RolloutWorker pid=5728)[0m resetting
[2m[36m(RolloutWorker pid=5728)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=5728)[0m total score is -1 at time: 1
[2m[36m(RolloutWorker pid=5728)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=5728)[0m total score is -2 at time: 2
[2m[36m(RolloutWorker pid=5728)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=5728)[0m total score is -3 at time: 3
[2m[36m(RolloutWorker pid=5728)[0m total score is -4 at time: 4
[2m[36m(RolloutWorker pid=5728)[0m total score is -4 at time: 4

[2m[36m(RolloutWorker pid=5728)[0m total score is -63 at time: 63
[2m[36m(RolloutWorker pid=5728)[0m total score is -63 at time: 63
[2m[36m(RolloutWorker pid=5728)[0m total score is -64 at time: 64
[2m[36m(RolloutWorker pid=5728)[0m total score is -64 at time: 64
[2m[36m(RolloutWorker pid=5728)[0m total score is -65 at time: 65
[2m[36m(RolloutWorker pid=5728)[0m total score is -65 at time: 65
[2m[36m(RolloutWorker pid=5728)[0m total score is -66 at time: 66
[2m[36m(RolloutWorker pid=5728)[0m total score is -67 at time: 67
episode reward mean:  nan
End of episode  19
Starting episode  20
[2m[36m(RolloutWorker pid=5728)[0m total score is -67 at time: 67
[2m[36m(RolloutWorker pid=5728)[0m total score is -68 at time: 68
[2m[36m(RolloutWorker pid=5728)[0m total score is -68 at time: 68
[2m[36m(RolloutWorker pid=5728)[0m total score is -69 at time: 69
[2m[36m(RolloutWorker pid=5728)[0m total score is -69 at time: 69
[2m[36m(RolloutWorker pid=5728)[0m

[2m[36m(RolloutWorker pid=5728)[0m total score is -127 at time: 127
[2m[36m(RolloutWorker pid=5728)[0m total score is -128 at time: 128
[2m[36m(RolloutWorker pid=5728)[0m total score is -128 at time: 128
[2m[36m(RolloutWorker pid=5728)[0m total score is -129 at time: 129
[2m[36m(RolloutWorker pid=5728)[0m total score is -129 at time: 129
[2m[36m(RolloutWorker pid=5728)[0m total score is -130 at time: 130
[2m[36m(RolloutWorker pid=5728)[0m total score is -130 at time: 130
[2m[36m(RolloutWorker pid=5728)[0m total score is -131 at time: 131
[2m[36m(RolloutWorker pid=5728)[0m total score is -131 at time: 131
[2m[36m(RolloutWorker pid=5728)[0m total score is -132 at time: 132
[2m[36m(RolloutWorker pid=5728)[0m total score is -133 at time: 133
[2m[36m(RolloutWorker pid=5728)[0m total score is -133 at time: 133
episode reward mean:  nan
checkpoint saved at C:\Users\Tim/ray_results\PPO_LightEnvWrapper_2022-09-26_12-37-12w8fmcxi1\checkpoint_000021
End of epis

[2m[36m(RolloutWorker pid=5728)[0m total score is -189 at time: 189
[2m[36m(RolloutWorker pid=5728)[0m total score is -190 at time: 190
[2m[36m(RolloutWorker pid=5728)[0m total score is -190 at time: 190
[2m[36m(RolloutWorker pid=5728)[0m total score is -191 at time: 191
[2m[36m(RolloutWorker pid=5728)[0m total score is -191 at time: 191
[2m[36m(RolloutWorker pid=5728)[0m total score is -192 at time: 192
[2m[36m(RolloutWorker pid=5728)[0m total score is -192 at time: 192
[2m[36m(RolloutWorker pid=5728)[0m total score is -193 at time: 193
[2m[36m(RolloutWorker pid=5728)[0m total score is -194 at time: 194
[2m[36m(RolloutWorker pid=5728)[0m total score is -194 at time: 194
[2m[36m(RolloutWorker pid=5728)[0m total score is -195 at time: 195
[2m[36m(RolloutWorker pid=5728)[0m total score is -195 at time: 195
[2m[36m(RolloutWorker pid=5728)[0m total score is -196 at time: 196
[2m[36m(RolloutWorker pid=5728)[0m total score is -196 at time: 196
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -252 at time: 252
[2m[36m(RolloutWorker pid=5728)[0m total score is -253 at time: 253
[2m[36m(RolloutWorker pid=5728)[0m total score is -254 at time: 254
[2m[36m(RolloutWorker pid=5728)[0m total score is -254 at time: 254
[2m[36m(RolloutWorker pid=5728)[0m total score is -255 at time: 255
[2m[36m(RolloutWorker pid=5728)[0m total score is -255 at time: 255
[2m[36m(RolloutWorker pid=5728)[0m total score is -256 at time: 256
[2m[36m(RolloutWorker pid=5728)[0m total score is -256 at time: 256
[2m[36m(RolloutWorker pid=5728)[0m total score is -257 at time: 257
[2m[36m(RolloutWorker pid=5728)[0m total score is -257 at time: 257
[2m[36m(RolloutWorker pid=5728)[0m total score is -258 at time: 258
[2m[36m(RolloutWorker pid=5728)[0m total score is -258 at time: 258
[2m[36m(RolloutWorker pid=5728)[0m total score is -259 at time: 259
[2m[36m(RolloutWorker pid=5728)[0m total score is -260 at time: 260
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -316 at time: 316
[2m[36m(RolloutWorker pid=5728)[0m total score is -316 at time: 316
[2m[36m(RolloutWorker pid=5728)[0m total score is -317 at time: 317
[2m[36m(RolloutWorker pid=5728)[0m total score is -317 at time: 317
[2m[36m(RolloutWorker pid=5728)[0m total score is -318 at time: 318
[2m[36m(RolloutWorker pid=5728)[0m total score is -318 at time: 318
[2m[36m(RolloutWorker pid=5728)[0m total score is -319 at time: 319
[2m[36m(RolloutWorker pid=5728)[0m total score is -320 at time: 320
[2m[36m(RolloutWorker pid=5728)[0m total score is -320 at time: 320
[2m[36m(RolloutWorker pid=5728)[0m total score is -321 at time: 321
[2m[36m(RolloutWorker pid=5728)[0m total score is -321 at time: 321
[2m[36m(RolloutWorker pid=5728)[0m total score is -322 at time: 322
[2m[36m(RolloutWorker pid=5728)[0m total score is -322 at time: 322
[2m[36m(RolloutWorker pid=5728)[0m total score is -323 at time: 323
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -379 at time: 379
[2m[36m(RolloutWorker pid=5728)[0m total score is -379 at time: 379
[2m[36m(RolloutWorker pid=5728)[0m total score is -380 at time: 380
[2m[36m(RolloutWorker pid=5728)[0m total score is -381 at time: 381
[2m[36m(RolloutWorker pid=5728)[0m total score is -381 at time: 381
[2m[36m(RolloutWorker pid=5728)[0m total score is -382 at time: 382
[2m[36m(RolloutWorker pid=5728)[0m total score is -382 at time: 382
[2m[36m(RolloutWorker pid=5728)[0m total score is -383 at time: 383
[2m[36m(RolloutWorker pid=5728)[0m total score is -383 at time: 383
[2m[36m(RolloutWorker pid=5728)[0m total score is -384 at time: 384
[2m[36m(RolloutWorker pid=5728)[0m total score is -384 at time: 384
[2m[36m(RolloutWorker pid=5728)[0m total score is -385 at time: 385
[2m[36m(RolloutWorker pid=5728)[0m total score is -386 at time: 386
[2m[36m(RolloutWorker pid=5728)[0m total score is -386 at time: 386
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -442 at time: 442
[2m[36m(RolloutWorker pid=5728)[0m total score is -443 at time: 443
[2m[36m(RolloutWorker pid=5728)[0m total score is -443 at time: 443
[2m[36m(RolloutWorker pid=5728)[0m total score is -444 at time: 444
[2m[36m(RolloutWorker pid=5728)[0m total score is -444 at time: 444
[2m[36m(RolloutWorker pid=5728)[0m total score is -445 at time: 445
[2m[36m(RolloutWorker pid=5728)[0m total score is -445 at time: 445
[2m[36m(RolloutWorker pid=5728)[0m total score is -446 at time: 446
[2m[36m(RolloutWorker pid=5728)[0m total score is -447 at time: 447
[2m[36m(RolloutWorker pid=5728)[0m total score is -447 at time: 447
[2m[36m(RolloutWorker pid=5728)[0m total score is -448 at time: 448
[2m[36m(RolloutWorker pid=5728)[0m total score is -448 at time: 448
[2m[36m(RolloutWorker pid=5728)[0m total score is -449 at time: 449
[2m[36m(RolloutWorker pid=5728)[0m total score is -449 at time: 449
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -505 at time: 505
[2m[36m(RolloutWorker pid=5728)[0m total score is -506 at time: 506
[2m[36m(RolloutWorker pid=5728)[0m total score is -507 at time: 507
[2m[36m(RolloutWorker pid=5728)[0m total score is -507 at time: 507
[2m[36m(RolloutWorker pid=5728)[0m total score is -508 at time: 508
[2m[36m(RolloutWorker pid=5728)[0m total score is -508 at time: 508
[2m[36m(RolloutWorker pid=5728)[0m total score is -509 at time: 509
[2m[36m(RolloutWorker pid=5728)[0m total score is -509 at time: 509
[2m[36m(RolloutWorker pid=5728)[0m total score is -510 at time: 510
[2m[36m(RolloutWorker pid=5728)[0m total score is -510 at time: 510
[2m[36m(RolloutWorker pid=5728)[0m total score is -511 at time: 511
[2m[36m(RolloutWorker pid=5728)[0m total score is -511 at time: 511
[2m[36m(RolloutWorker pid=5728)[0m total score is -512 at time: 512
[2m[36m(RolloutWorker pid=5728)[0m total score is -513 at time: 513
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -569 at time: 569
[2m[36m(RolloutWorker pid=5728)[0m total score is -569 at time: 569
[2m[36m(RolloutWorker pid=5728)[0m total score is -570 at time: 570
[2m[36m(RolloutWorker pid=5728)[0m total score is -570 at time: 570
[2m[36m(RolloutWorker pid=5728)[0m total score is -571 at time: 571
[2m[36m(RolloutWorker pid=5728)[0m total score is -571 at time: 571
[2m[36m(RolloutWorker pid=5728)[0m total score is -572 at time: 572
[2m[36m(RolloutWorker pid=5728)[0m total score is -573 at time: 573
[2m[36m(RolloutWorker pid=5728)[0m total score is -573 at time: 573
[2m[36m(RolloutWorker pid=5728)[0m total score is -574 at time: 574
[2m[36m(RolloutWorker pid=5728)[0m total score is -574 at time: 574
[2m[36m(RolloutWorker pid=5728)[0m total score is -575 at time: 575
[2m[36m(RolloutWorker pid=5728)[0m total score is -575 at time: 575
[2m[36m(RolloutWorker pid=5728)[0m total score is -576 at time: 576
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -632 at time: 632
[2m[36m(RolloutWorker pid=5728)[0m total score is -632 at time: 632
[2m[36m(RolloutWorker pid=5728)[0m total score is -633 at time: 633
[2m[36m(RolloutWorker pid=5728)[0m total score is -634 at time: 634
[2m[36m(RolloutWorker pid=5728)[0m total score is -634 at time: 634
[2m[36m(RolloutWorker pid=5728)[0m total score is -635 at time: 635
[2m[36m(RolloutWorker pid=5728)[0m total score is -635 at time: 635
[2m[36m(RolloutWorker pid=5728)[0m total score is -636 at time: 636
[2m[36m(RolloutWorker pid=5728)[0m total score is -636 at time: 636
[2m[36m(RolloutWorker pid=5728)[0m total score is -637 at time: 637
[2m[36m(RolloutWorker pid=5728)[0m total score is -637 at time: 637
[2m[36m(RolloutWorker pid=5728)[0m total score is -638 at time: 638
[2m[36m(RolloutWorker pid=5728)[0m total score is -639 at time: 639
[2m[36m(RolloutWorker pid=5728)[0m total score is -639 at time: 639
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -695 at time: 695
[2m[36m(RolloutWorker pid=5728)[0m total score is -696 at time: 696
[2m[36m(RolloutWorker pid=5728)[0m total score is -696 at time: 696
[2m[36m(RolloutWorker pid=5728)[0m total score is -697 at time: 697
[2m[36m(RolloutWorker pid=5728)[0m total score is -697 at time: 697
[2m[36m(RolloutWorker pid=5728)[0m total score is -698 at time: 698
[2m[36m(RolloutWorker pid=5728)[0m total score is -698 at time: 698
[2m[36m(RolloutWorker pid=5728)[0m total score is -699 at time: 699
[2m[36m(RolloutWorker pid=5728)[0m total score is -700 at time: 700
[2m[36m(RolloutWorker pid=5728)[0m total score is -700 at time: 700
[2m[36m(RolloutWorker pid=5728)[0m total score is -701 at time: 701
[2m[36m(RolloutWorker pid=5728)[0m total score is -701 at time: 701
[2m[36m(RolloutWorker pid=5728)[0m total score is -702 at time: 702
[2m[36m(RolloutWorker pid=5728)[0m total score is -702 at time: 702
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -758 at time: 758
[2m[36m(RolloutWorker pid=5728)[0m total score is -759 at time: 759
[2m[36m(RolloutWorker pid=5728)[0m total score is -760 at time: 760
[2m[36m(RolloutWorker pid=5728)[0m total score is -760 at time: 760
[2m[36m(RolloutWorker pid=5728)[0m total score is -761 at time: 761
[2m[36m(RolloutWorker pid=5728)[0m total score is -761 at time: 761
[2m[36m(RolloutWorker pid=5728)[0m total score is -762 at time: 762
[2m[36m(RolloutWorker pid=5728)[0m total score is -762 at time: 762
[2m[36m(RolloutWorker pid=5728)[0m total score is -763 at time: 763
[2m[36m(RolloutWorker pid=5728)[0m total score is -763 at time: 763
[2m[36m(RolloutWorker pid=5728)[0m total score is -764 at time: 764
[2m[36m(RolloutWorker pid=5728)[0m total score is -764 at time: 764
[2m[36m(RolloutWorker pid=5728)[0m total score is -765 at time: 765
[2m[36m(RolloutWorker pid=5728)[0m total score is -766 at time: 766
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -821 at time: 821
[2m[36m(RolloutWorker pid=5728)[0m total score is -821 at time: 821
[2m[36m(RolloutWorker pid=5728)[0m total score is -822 at time: 822
[2m[36m(RolloutWorker pid=5728)[0m total score is -822 at time: 822
[2m[36m(RolloutWorker pid=5728)[0m total score is -823 at time: 823
[2m[36m(RolloutWorker pid=5728)[0m total score is -823 at time: 823
[2m[36m(RolloutWorker pid=5728)[0m total score is -824 at time: 824
[2m[36m(RolloutWorker pid=5728)[0m total score is -824 at time: 824
[2m[36m(RolloutWorker pid=5728)[0m total score is -825 at time: 825
[2m[36m(RolloutWorker pid=5728)[0m total score is -826 at time: 826
[2m[36m(RolloutWorker pid=5728)[0m total score is -826 at time: 826
[2m[36m(RolloutWorker pid=5728)[0m total score is -827 at time: 827
[2m[36m(RolloutWorker pid=5728)[0m total score is -827 at time: 827
[2m[36m(RolloutWorker pid=5728)[0m total score is -828 at time: 828
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -884 at time: 884
[2m[36m(RolloutWorker pid=5728)[0m total score is -884 at time: 884
[2m[36m(RolloutWorker pid=5728)[0m total score is -885 at time: 885
[2m[36m(RolloutWorker pid=5728)[0m total score is -885 at time: 885
[2m[36m(RolloutWorker pid=5728)[0m total score is -886 at time: 886
[2m[36m(RolloutWorker pid=5728)[0m total score is -887 at time: 887
[2m[36m(RolloutWorker pid=5728)[0m total score is -887 at time: 887
[2m[36m(RolloutWorker pid=5728)[0m total score is -888 at time: 888
[2m[36m(RolloutWorker pid=5728)[0m total score is -888 at time: 888
[2m[36m(RolloutWorker pid=5728)[0m total score is -889 at time: 889
[2m[36m(RolloutWorker pid=5728)[0m total score is -889 at time: 889
[2m[36m(RolloutWorker pid=5728)[0m total score is -890 at time: 890
[2m[36m(RolloutWorker pid=5728)[0m total score is -890 at time: 890
[2m[36m(RolloutWorker pid=5728)[0m total score is -891 at time: 891
[2m[

[2m[36m(RolloutWorker pid=5728)[0m total score is -947 at time: 947
[2m[36m(RolloutWorker pid=5728)[0m total score is -948 at time: 948
[2m[36m(RolloutWorker pid=5728)[0m total score is -948 at time: 948
[2m[36m(RolloutWorker pid=5728)[0m total score is -949 at time: 949
[2m[36m(RolloutWorker pid=5728)[0m total score is -949 at time: 949
[2m[36m(RolloutWorker pid=5728)[0m total score is -950 at time: 950
[2m[36m(RolloutWorker pid=5728)[0m total score is -950 at time: 950
[2m[36m(RolloutWorker pid=5728)[0m total score is -951 at time: 951
[2m[36m(RolloutWorker pid=5728)[0m total score is -951 at time: 951
[2m[36m(RolloutWorker pid=5728)[0m total score is -952 at time: 952
[2m[36m(RolloutWorker pid=5728)[0m total score is -953 at time: 953
[2m[36m(RolloutWorker pid=5728)[0m total score is -953 at time: 953
[2m[36m(RolloutWorker pid=5728)[0m total score is -954 at time: 954
[2m[36m(RolloutWorker pid=5728)[0m total score is -954 at time: 954
[2m[

2022-09-26 17:56:27,369	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(RolloutWorker pid=5728)[0m total score is -987 at time: 987
[2m[36m(RolloutWorker pid=5728)[0m total score is -987 at time: 987
[2m[36m(RolloutWorker pid=5728)[0m total score is -988 at time: 988
[2m[36m(RolloutWorker pid=5728)[0m total score is -988 at time: 988
[2m[36m(RolloutWorker pid=5728)[0m total score is -989 at time: 989
[2m[36m(RolloutWorker pid=5728)[0m total score is -989 at time: 989
[2m[36m(RolloutWorker pid=5728)[0m total score is -990 at time: 990
[2m[36m(RolloutWorker pid=5728)[0m total score is -991 at time: 991
[2m[36m(RolloutWorker pid=5728)[0m total score is -991 at time: 991
[2m[36m(RolloutWorker pid=5728)[0m total score is -992 at time: 992
[2m[36m(RolloutWorker pid=5728)[0m total score is -992 at time: 992
[2m[36m(RolloutWorker pid=5728)[0m total score is -993 at time: 993
[2m[36m(RolloutWorker pid=5728)[0m total score is -993 at time: 993
[2m[36m(RolloutWorker pid=5728)[0m total score is -994 at time: 994
[2m[

2022-09-26 17:58:35,682	ERROR algorithm.py:2173 -- Error in training or evaluation attempt! Trying to recover.
Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2373, in _run_one_training_iteration
    results = self.training_step()
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step
    train_batch = synchronous_parallel_sample(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 100, in synchronous_parallel_sample
    sample_batches = ray.get(
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2275, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36m

[2m[36m(pid=33168)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=33168)[0m 
[2m[36m(pid=33168)[0m   if (distutils.version.LooseVersion(tf.__version__) <


[2m[36m(RolloutWorker pid=33168)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=33168)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=33168)[0m [False]
[2m[36m(RolloutWorker pid=33168)[0m resetting
[2m[36m(RolloutWorker pid=33168)[0m resetting in wrapper
[2m[36m(RolloutWorker pid=33168)[0m Torch was not collected this episode...
[2m[36m(RolloutWorker pid=33168)[0m [False, 0]
[2m[36m(RolloutWorker pid=33168)[0m resetting
[2m[36m(RolloutWorker pid=33168)[0m total score is -2 at time: 1
[2m[36m(RolloutWorker pid=33168)[0m total score is -2 at time: 1
[2m[36m(RolloutWorker pid=33168)[0m total score is -3 at time: 2
[2m[36m(RolloutWorker pid=33168)[0m total score is -3 at time: 2
[2m[36m(RolloutWorker pid=33168)[0m total score is -4 at time: 3
[2m[36m(RolloutWorker pid=33168)[0m total score is -4 at time: 3
[2m[36m(RolloutWorker pid=33168)[0m total score is -5 at time: 4
[2m[36m(RolloutWorker pid=33168)[0m total score 

[2m[36m(RolloutWorker pid=33168)[0m total score is -63 at time: 62
[2m[36m(RolloutWorker pid=33168)[0m total score is -63 at time: 62
[2m[36m(RolloutWorker pid=33168)[0m total score is -64 at time: 63
[2m[36m(RolloutWorker pid=33168)[0m total score is -64 at time: 63
[2m[36m(RolloutWorker pid=33168)[0m total score is -65 at time: 64
[2m[36m(RolloutWorker pid=33168)[0m total score is -65 at time: 64
[2m[36m(RolloutWorker pid=33168)[0m total score is -66 at time: 65
[2m[36m(RolloutWorker pid=33168)[0m total score is -66 at time: 65
[2m[36m(RolloutWorker pid=33168)[0m total score is -67 at time: 66
[2m[36m(RolloutWorker pid=33168)[0m total score is -68 at time: 67


  mo = re.match("state_in_(\d+)", view_col)


KeyboardInterrupt: 