In [1]:
#!pip install compiler_gym 'ray[default,rllib]' &>/dev/null || echo "Install failed!"

import compiler_gym
import ray

from ray.rllib.agents.ppo import PPOTrainer
from compiler_gym.wrappers import ConstrainedCommandline, TimeLimit
from ray import tune
from itertools import islice
from compiler_gym.wrappers import CycleOverBenchmarks
from compiler_gym.util.registration import register

import loop_tool_service

from service_py.datasets import loop_tool_dataset
from service_py.rewards import flops_loop_nest_reward, flops_reward, runtime_reward

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def make_env() -> compiler_gym.envs.CompilerEnv:
    """Make the reinforcement learning environment for this experiment."""
    
    env = loop_tool_service.make(
        "loop_tool_env-v0",
        observation_space="ir_tensor",
        reward_space="flops_loop_nest_tensor",
        # reward_space="runtime",
    )

    env = TimeLimit(env, max_episode_steps=10)
    return env

In [3]:
with make_env() as env:
    print("Action space:", env.action_space)
    print("Observation space:", env.observation_space)
    print("Reward space:", env.reward_space)

Action space: NamedDiscrete([up, down, swap_up, swap_down])
Observation space: Box([[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]], [[256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256.
  256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256.
  256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256.
  256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256. 256.
  256. 256. 256. 256.]], (1, 60), float32)
Reward space: flops_loop_nest_tensor


In [4]:
with make_env() as env:
    # The two datasets we will be using:
    lt_dataset = env.datasets["loop_tool_simple-v0"]
    # train_benchmarks = list(islice(lt_dataset.benchmarks(), 1))
    # test_benchmarks = list(islice(lt_dataset.benchmarks(), 2))
    
    bench = ["benchmark://loop_tool_simple-v0/simple"]
            #  "benchmark://loop_tool_simple-v0/mm128", 
            #  "benchmark://loop_tool_simple-v0/mm"] 

    train_benchmarks = bench
    test_benchmarks = bench

print("Number of benchmarks for training:", len(train_benchmarks))
print("Number of benchmarks for testing:", len(test_benchmarks))


Number of benchmarks for training: 1
Number of benchmarks for testing: 1


In [5]:
def make_training_env(*args) -> compiler_gym.envs.CompilerEnv:
    """Make a reinforcement learning environment that cycles over the
    set of training benchmarks in use.
    """
    del args  # Unused env_config argument passed by ray
    return CycleOverBenchmarks(make_env(), train_benchmarks)


In [6]:
with make_training_env() as env:
    env.reset()
    print(env.benchmark)
    env.reset()
    print(env.benchmark)

E0628 12:45:36.566179 140569554388544 example_service.py:250] CRITICAL - 

Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124535-535834-ecb5

E0628 12:45:36.673875 140569554388544 example_service.py:250] CRITICAL - 

Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124535-535834-ecb5



for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
 for n_5625 in 128 : L1  
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  

benchmark://loop_tool_simple-v0/simple
for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
 for n_5625 in 128 : L1  
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  

benchmark://loop_tool_simple-v0/simple


In [9]:
env = make_training_env()
env.reset()

for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
 for n_5625 in 128 : L1  
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  



E0628 12:45:59.961900 139699708487232 example_service.py:250] CRITICAL - 

Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124558-926091-2897



array([[  1.,   0.,   0.,   0., 128.,   0.,   0.,   0.,   0.,   2., 128.,
          0.,   0.,   0.,   0.,   1., 128.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.]], dtype=float32)

In [10]:
env.step(2)

(array([[  1.,   0.,   0.,   0., 128.,   0.,   0.,   0.,   0.,   2., 128.,
           0.,   0.,   0.,   0.,   1., 128.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.]], dtype=float32),
 -2134068176.24735,
 False,
 {'action_had_no_effect': True, 'new_action_space': False})

In [11]:
if ray.is_initialized():
    ray.shutdown()
ray.init(include_dashboard=False, ignore_reinit_error=True)

tune.register_env("compiler_gym", make_training_env)

In [12]:
import time
from ray import tune
from ray.tune import Stopper

class TimeStopper(Stopper):
    def __init__(self):
        self._start = time.time()
        self._deadline = 1

    def __call__(self, trial_id, result):
        return False

    def stop_all(self):
        return time.time() - self._start > self._deadline


In [14]:
analysis = tune.run(
    PPOTrainer,
    checkpoint_at_end=True,
    stop={
        "episodes_total": 5,
    },
    config={
        "seed": 0xCC,
        "num_workers": 1,
        # Specify the environment to use, where "compiler_gym" is the name we 
        # passed to tune.register_env().
        "env": "compiler_gym",
        # Reduce the size of the batch/trajectory lengths to match our short 
        # training run.
        "rollout_fragment_length": 5,
        "train_batch_size": 5,
        "sgd_minibatch_size": 5,
    }
)

[2m[36m(PPOTrainer pid=2264213)[0m 2022-06-28 12:48:54,326	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=2264213)[0m 2022-06-28 12:48:54,543	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=2264213)[0m 2022-06-28 12:48:54,543	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=2264258)[0m E0628 12:48:59.955425 140386616428096 example_service.py:250] CRITICAL - 
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Working_dir = /dev/shm/compiler

[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 


Trial name,status,loc
PPOTrainer_compiler_gym_2f650_00000,RUNNING,100.37.253.28:2264213


[2m[36m(RolloutWorker pid=2264258)[0m E0628 12:49:02.697907 140386616428096 example_service.py:250] CRITICAL - 
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124858-923072-6f35
[2m[36m(RolloutWorker pid=2264258)[0m 


[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_2f650_00000,RUNNING,100.37.253.28:2264213,1,3.73721,5,,,,


[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  3
[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %

[2m[36m(RolloutWorker pid=2264258)[0m E0628 12:49:09.867813 140386616428096 example_service.py:250] CRITICAL - 
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124858-923072-6f35
[2m[36m(RolloutWorker pid=2264258)[0m 


[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  1
[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_2f650_00000,RUNNING,100.37.253.28:2264213,3,10.4483,15,-2132170000.0,-2132170000.0,-2132170000.0,10


[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  4
[2m[36m(RolloutWorker pid=2264258)[0m Action = up
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)

[2m[36m(RolloutWorker pid=2264258)[0m E0628 12:49:16.290218 140386616428096 example_service.py:250] CRITICAL - 
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124858-923072-6f35
[2m[36m(RolloutWorker pid=2264258)[0m 


[2m[36m(RolloutWorker pid=2264258)[0m Action = swap_down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  1
[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_2f650_00000,RUNNING,100.37.253.28:2264213,5,17.4125,25,-2126680000.0,-2121180000.0,-2132170000.0,10


[2m[36m(RolloutWorker pid=2264258)[0m Action = swap_down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  6
[2m[36m(RolloutWorker pid=2264258)[0m Action = up
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0

[2m[36m(RolloutWorker pid=2264258)[0m E0628 12:49:22.892536 140386616428096 example_service.py:250] CRITICAL - 
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124858-923072-6f35
[2m[36m(RolloutWorker pid=2264258)[0m 


[2m[36m(RolloutWorker pid=2264258)[0m Action = up
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  8
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- mult

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_2f650_00000,RUNNING,100.37.253.28:2264213,7,23.5898,35,-2121240000.0,-2110370000.0,-2132170000.0,10


[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  3
[2m[36m(RolloutWorker pid=2264258)[0m Action = swap_up
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0

[2m[36m(RolloutWorker pid=2264258)[0m E0628 12:49:30.131244 140386616428096 example_service.py:250] CRITICAL - 
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124858-923072-6f35
[2m[36m(RolloutWorker pid=2264258)[0m 


[2m[36m(RolloutWorker pid=2264258)[0m Action = swap_down
[2m[36m(RolloutWorker pid=2264258)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=2264258)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  1
[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_2f650_00000,RUNNING,100.37.253.28:2264213,9,30.7767,45,-2119950000.0,-2110370000.0,-2132170000.0,10


[2m[36m(RolloutWorker pid=2264258)[0m Action = swap_up
[2m[36m(RolloutWorker pid=2264258)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=2264258)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=2264258)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=2264258)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m >>> AGENT ITERATION =  3
[2m[36m(RolloutWorker pid=2264258)[0m Action = down
[2m[36m(RolloutWorker pid=2264258)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=2264258)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=2264258)[0m   for m_5586 in 128 : L2  
[2m[36m(RolloutWorker pid=2264258)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0

[2m[36m(RolloutWorker pid=2264258)[0m E0628 12:49:37.508636 140386616428096 example_service.py:250] CRITICAL - 
[2m[36m(RolloutWorker pid=2264258)[0m 
[2m[36m(RolloutWorker pid=2264258)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0628T124858-923072-6f35
[2m[36m(RolloutWorker pid=2264258)[0m 


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_2f650_00000,TERMINATED,100.37.253.28:2264213,10,34.6903,50,-2120560000.0,-2110370000.0,-2132170000.0,10


2022-06-28 12:49:38,808	INFO tune.py:747 -- Total run time: 48.95 seconds (47.83 seconds for the tuning loop).


In [15]:
checkpoint = analysis.get_best_checkpoint(
    metric="episode_reward_mean",
    mode="max",
    trial=analysis.trials[0]
)

In [19]:
analysis.dataframe()

Unnamed: 0,episode_reward_max,episode_reward_min,episode_reward_mean,episode_len_mean,episodes_this_iter,num_healthy_workers,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_trained,...,info/learner/default_policy/learner_stats/kl,info/learner/default_policy/learner_stats/entropy,info/learner/default_policy/learner_stats/entropy_coeff,config/env,config/num_workers,config/rollout_fragment_length,config/seed,config/sgd_minibatch_size,config/train_batch_size,logdir
0,-2110369000.0,-2132173000.0,-2120558000.0,10.0,1,1,50,50,50,50,...,0.004308,1.294609,0.0,compiler_gym,1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-28...


In [20]:
trial = analysis.get_best_trial(metric="episode_reward_mean", mode="max")
trial, type(trial)

(PPOTrainer_compiler_gym_2f650_00000, ray.tune.trial.Trial)

In [21]:
trial.stopping_criterion

{'episodes_total': 5}

In [22]:
trial.metric_analysis

{'episode_reward_max': {'max': -2110369216.9891403,
  'min': -2132172938.4211705,
  'avg': nan,
  'last': -2110369216.9891403,
  'last-5-avg': -2110369216.9891403,
  'last-10-avg': nan},
 'episode_reward_min': {'max': -2132172938.4211705,
  'min': -2132172938.4211705,
  'avg': nan,
  'last': -2132172938.4211705,
  'last-5-avg': -2132172938.4211705,
  'last-10-avg': nan},
 'episode_reward_mean': {'max': -2119953738.3542862,
  'min': -2132172938.4211705,
  'avg': nan,
  'last': -2120558380.904883,
  'last-5-avg': -2120589127.5848541,
  'last-10-avg': nan},
 'episode_len_mean': {'max': 10.0,
  'min': 10.0,
  'avg': nan,
  'last': 10.0,
  'last-5-avg': 10.0,
  'last-10-avg': nan},
 'episodes_this_iter': {'max': 1,
  'min': 0,
  'avg': 0.5,
  'last': 1,
  'last-5-avg': 0.6,
  'last-10-avg': 0.5},
 'num_healthy_workers': {'max': 1,
  'min': 1,
  'avg': 1.0,
  'last': 1,
  'last-5-avg': 1.0,
  'last-10-avg': 1.0},
 'num_agent_steps_sampled': {'max': 50,
  'min': 5,
  'avg': 27.5,
  'last': 50

In [23]:
ray.shutdown()

In [16]:
# agent.restore(checkpoint)

In [17]:
# def run_agent_on_benchmarks(benchmarks):
#     """Run agent on a list of benchmarks and return a list of cumulative rewards."""
#     with make_env() as env:
#         rewards = []
#         for i, benchmark in enumerate(benchmarks, start=1):
#             observation, done = env.reset(benchmark=benchmark), False
#             while not done:
#                 action = agent.compute_single_action(observation)
#                 observation, _, done, _ = env.step(int(action))
#             rewards.append(env.episode_reward)
            
#             print(f"[{i}/{len(benchmarks)}] ")

#     return rewards

In [24]:
# train_rewards = run_agent_on_benchmarks(train_benchmarks)
# test_rewards = run_agent_on_benchmarks(test_benchmarks)


In [25]:
# from matplotlib import pyplot as plt
# import numpy as np

In [26]:
# fig, axs = plt.subplots(1, 2)

# axs[0].title.set_text('Train rewards')
# axs[0].plot(train_rewards, color="red")
# axs[0].plot(np.zeros_like(train_rewards), color="blue")

# axs[1].title.set_text('Test rewards')
# axs[1].plot(test_rewards, color="green")
# axs[1].plot(np.zeros_like(test_rewards), color="blue")

# plt.tight_layout()
# plt.show()
