In [1]:
import gym
import random, math
import numpy as np
import arcade
from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean
from PIL import Image

        
from LightEnvCopy import LightEnv

import gym.spaces
from gym.spaces import Discrete, Box

from ray.rllib.env.env_context import EnvContext
from ray.rllib.models import ModelCatalog

from collections import namedtuple

# Do the math to figure out our screen dimensions
SCREEN_WIDTH = 800
SCREEN_HEIGHT = 600
SCREEN_TITLE = "Game 1: Let There Be Light!"

# COnvenient data structure to hold information about actions
Action = namedtuple('Action', 'name index delta_i delta_j')

up = Action('up', 0, -1, 0)    
down = Action('down', 1, 1, 0)    
left = Action('left', 2, 0, -1)    
right = Action('right', 3, 0, 1)    

index_to_actions = {}
for action in [up, down, left, right]:
    index_to_actions[action.index] = action
# print(index_to_actions[0].name)
str_to_actions = {}
for action in [up, down, left, right]:
    str_to_actions[action.name] = action
#TF End - Adding in actions for action conversion


class LightEnvWrapper(gym.Env, LightEnv):
    """Class that wraps the Lights Environment to make it 
    compatible with RLLib."""

    metadata = {"render.modes": ["rgb_array", "state_pixels"]}
    
    def __init__(self, config: EnvContext):
        super().__init__(SCREEN_WIDTH, SCREEN_HEIGHT, SCREEN_TITLE)
        self.counting = 0    
        self.torch_collected = False
        self.torch_collected_count = []
        self.mygame = LightEnv
        
        #The action space is a choice of 4 actions: U/D/L/R.
        self.action_space = Discrete(4)
        
        #The observation space is a fixed image of the current game screen
        self.observation_space = Box(low=0, high=255, shape=(84,84, 4), dtype=np.uint8)
        
        self.counting = 0

    def reset(self):
        print("resetting in wrapper")
        
        if self.torch_collected == 1:
            print("Torch was collected this episode!")
        else:
            print("Torch was not collected this episode...")
        self.torch_collected_count.append(self.torch_collected)
        print(self.torch_collected_count)

        self.render(self)
        #Resets the state of the environment for a new episode and an initial observation.
        obs_mygame = self.mygame.reset(self)
        
        #Open up the resetted image to verify working correctly.
        obs_mygame.show()
        
        #Convert observation to 84x84 resolution and np array for rllib.
        obs = self.convert_observations(obs_mygame)
        
        return obs

    def step(self, action):
        self.counting += 1
        
        #Making sure an action is chosen, either: 0, 1, 2, 3.
        assert action in [0, 1, 2, 3] #0-up,1-down,2-left,3-right.
        
        #Convert the numeric action to a keyword: up, down, left, right.
        actions_myenv = index_to_actions[action].name #returns a word, one of: up/down/left/right
#         print(f"action taken: {actions_myenv}")
        
        #Update the window with on_update()
        self.render(self)
        
        #Compute observation extracted from the window (800x600), with reward and done flag.
        obs, reward, done, torch_collected = self.mygame.step(self,actions_myenv)
        if torch_collected == True:
            self.torch_collected = 1
        else:
            self.torch_collected = 0
                    
#         if self.counting % 33 == 0: #33 steps roughly equates to 1 second in game time
#             print(f"total score is {self.score} at time: {self.mygame.time_taken_reported(self)}")
        
        #Convert observation to 84x84 resolution and np array for rllib.
        obs_mygame = self.convert_observations(obs)
        
        #If the reward has been obtained, reset the environment and start again
        if done == True:
            print(f"done is {done}, resetting environment in wrapper.")
            self.reset()
        
        return obs_mygame, reward, done, {}

    def seed(self, seed=None):
        random.seed(seed)

    def convert_observations(self, obs_mygame): #resizing and converting to array for rllib processing
        # We normalize and concatenate observations
        obs = obs_mygame
        obs_resized = obs.resize((84,84))
        obsarray = np.array(obs_resized)
        return obsarray
    
    def render(self, mode='state_pixels'):
        self.mygame.on_update(self, 1/60)
        self.mygame.on_draw(self)
        test = self.mygame.time_taken_reported(self)
        

  if (distutils.version.LooseVersion(tf.__version__) <


### Now run the rllib script to train the agent

Gridsearch attempt below:

In [2]:
import ray
import gym
import ray.rllib.agents.ppo.ppo as ppo
from ray.rllib.algorithms.ppo import PPOConfig
from ray import air
from ray import tune

def evaluation_fn(result):
    return result['episode_reward_mean']


def objective_fn(config):

    
    trainer = ppo.PPOTrainer(config=config)

    for i in range(100):
        # Perform one iteration of training the policy with PPO
        result = trainer.train()
        intermediate_score = evaluation_fn(result)

        # Feed the score back back to Tune.
        tune.report(iterations=i, mean_reward=intermediate_score)

In [3]:
from ray import tune
import torch
config = PPOConfig().training(gamma=0.9, lr=tune.grid_search([0.001,0.01,0.1]), kl_coeff=0.2, entropy_coeff=1,
#                               entropy_coeff_schedule=[[0,1],[1000,0]],
                             sgd_minibatch_size=128, num_sgd_iter=60)\
            .resources(num_gpus=0)\
            .rollouts(num_envs_per_worker=1, num_rollout_workers=1, recreate_failed_workers=True)
config.normalize_actions=False
config.env=LightEnvWrapper
config.clip_actions=False
config.num_cpus=1
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device='cuda:0'
# config.to(device)
config = config.to_dict()

In [4]:
analysis = tune.run(
        objective_fn,
        metric="mean_reward",
        mode="max",
        num_samples=1,
        config=config)

2022-09-16 18:11:57,643	INFO worker.py:1518 -- Started a local Ray instance.


Trial name,status,loc,lr
objective_fn_LightEnvWrapper_ad0f9_00000,ERROR,127.0.0.1:12216,0.001
objective_fn_LightEnvWrapper_ad0f9_00001,ERROR,,0.01
objective_fn_LightEnvWrapper_ad0f9_00002,ERROR,,0.1

Trial name,# failures,error file
objective_fn_LightEnvWrapper_ad0f9_00000,1,C:\Users\Tim\ray_results\objective_fn_2022-09-16_18-12-00\objective_fn_LightEnvWrapper_ad0f9_00000_0_lr=0.0010_2022-09-16_18-12-00\error.txt
objective_fn_LightEnvWrapper_ad0f9_00001,1,C:\Users\Tim\ray_results\objective_fn_2022-09-16_18-12-00\objective_fn_LightEnvWrapper_ad0f9_00001_1_lr=0.0100_2022-09-16_18-12-11\error.txt
objective_fn_LightEnvWrapper_ad0f9_00002,1,C:\Users\Tim\ray_results\objective_fn_2022-09-16_18-12-00\objective_fn_LightEnvWrapper_ad0f9_00002_2_lr=0.1000_2022-09-16_18-12-22\error.txt


  return bundle_reservation_check.options(
  return actor_cls._remote(args=args, kwargs=kwargs, **updated_options)
  return actor_cls._remote(args=args, kwargs=kwargs, **updated_options)
  return actor_cls._remote(args=args, kwargs=kwargs, **updated_options)
[2m[36m(pid=12216)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=12216)[0m 
[2m[36m(pid=12216)[0m   if (distutils.version.LooseVersion(tf.__version__) <
[2m[36m(objective_fn pid=12216)[0m 2022-09-16 18:12:11,607	ERROR function_trainable.py:298 -- Runner Thread raised error.
[2m[36m(objective_fn pid=12216)[0m Traceback (most recent call last):
[2m[36m(objective_fn pid=12216)[0m   File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\tune\trainable\function_trainable.py", line 289, in run
[2m[36m(objective_fn pid=12216)[0m     self._entrypoint()
[2m[36m(objective_fn pid=12216)[0m   File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\tune\trainable\function_trainable.py", line 362, i

[2m[36m(pid=10128)[0m   if (distutils.version.LooseVersion(tf.__version__) <
[2m[36m(ImplicitFunc pid=10128)[0m 2022-09-16 18:12:22,464	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::ImplicitFunc.__init__()[39m (pid=10128, ip=127.0.0.1, repr=objective_fn)
[2m[36m(ImplicitFunc pid=10128)[0m   File "python\ray\_raylet.pyx", line 620, in ray._raylet.execute_task
[2m[36m(ImplicitFunc pid=10128)[0m   File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\memory_monitor.py", line 162, in raise_if_low_memory
[2m[36m(ImplicitFunc pid=10128)[0m     raise RayOutOfMemoryError(
[2m[36m(ImplicitFunc pid=10128)[0m ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-BKAPO4O is used (7.38 / 7.73 GB). The top 10 memory consumers are:
[2m[36m(ImplicitFunc pid=10128)[0m 
[2m[36m(ImplicitFunc pid=10128)[0m PID	MEM	COMMAND
[2m[36m(ImplicitF

Result for objective_fn_LightEnvWrapper_ad0f9_00000:
  date: 2022-09-16_18-12-11
  experiment_id: bb4aad0d16744006b1176d86dcf5ec98
  hostname: DESKTOP-BKAPO4O
  node_ip: 127.0.0.1
  pid: 12216
  timestamp: 1663348331
  trial_id: ad0f9_00000
  


2022-09-16 18:12:34,979	ERROR trial_runner.py:980 -- Trial objective_fn_LightEnvWrapper_ad0f9_00002: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\tune\execution\ray_trial_executor.py", line 989, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2277, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::ImplicitFunc.__init__()[39m (pid=32644, ip=127.0.0.1, repr=objective_fn)
  File "python\ray\_raylet.pyx", line 620, in ray._raylet.execute_task
  File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\memory_monitor.py", li

Result for objective_fn_LightEnvWrapper_ad0f9_00002:
  trial_id: ad0f9_00002
  
Result for objective_fn_LightEnvWrapper_ad0f9_00001:
  trial_id: ad0f9_00001
  


[2m[36m(ImplicitFunc pid=32644)[0m 2022-09-16 18:12:34,935	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::ImplicitFunc.__init__()[39m (pid=32644, ip=127.0.0.1, repr=objective_fn)
[2m[36m(ImplicitFunc pid=32644)[0m   File "python\ray\_raylet.pyx", line 620, in ray._raylet.execute_task
[2m[36m(ImplicitFunc pid=32644)[0m   File "C:\Users\Tim\Anaconda3\envs\rllib\lib\site-packages\ray\_private\memory_monitor.py", line 162, in raise_if_low_memory
[2m[36m(ImplicitFunc pid=32644)[0m     raise RayOutOfMemoryError(
[2m[36m(ImplicitFunc pid=32644)[0m ray._private.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node DESKTOP-BKAPO4O is used (7.5 / 7.73 GB). The top 10 memory consumers are:
[2m[36m(ImplicitFunc pid=32644)[0m 
[2m[36m(ImplicitFunc pid=32644)[0m PID	MEM	COMMAND
[2m[36m(ImplicitFunc pid=32644)[0m 12216	0.53GiB	C:\Users\Tim\Anaconda3\envs\rllib\python.exe C:\

TuneError: ('Trials did not complete', [objective_fn_LightEnvWrapper_ad0f9_00000, objective_fn_LightEnvWrapper_ad0f9_00001, objective_fn_LightEnvWrapper_ad0f9_00002])