In [1]:
#!pip install compiler_gym 'ray[default,rllib]' &>/dev/null || echo "Install failed!"

import compiler_gym
import ray

from ray.rllib.agents.ppo import PPOTrainer
from compiler_gym.wrappers import ConstrainedCommandline, TimeLimit
from ray import tune
from itertools import islice
from compiler_gym.wrappers import CycleOverBenchmarks
from compiler_gym.util.registration import register

import loop_tool_service

from service_py.datasets import loop_tool_dataset
from service_py.rewards import flops_loop_nest_reward, flops_reward, runtime_reward
import wandb
wandb.init(project="loop_tool", entity="dejang", sync_tensorboard=True)

  from .autonotebook import tqdm as notebook_tqdm
ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdejang[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
def make_env() -> compiler_gym.envs.CompilerEnv:
    """Make the reinforcement learning environment for this experiment."""
    
    env = loop_tool_service.make(
        "loop_tool_env-v0",
        observation_space="stride_tensor",
        reward_space="flops_loop_nest_tensor",
    )

    env = TimeLimit(env, max_episode_steps=10)
    return env

In [3]:
with make_env() as env:
    print("Action space:", env.action_space)
    print("Observation space:", env.observation_space)
    print("Reward space:", env.reward_space)

Action space: NamedDiscrete([up, down, swap_up, swap_down])
Observation space: Box([[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]], [[inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf
  inf inf inf inf inf inf inf inf inf inf inf inf inf inf]], (1, 32), float32)
Reward space: flops_loop_nest_tensor


In [4]:
with make_env() as env:
    # The two datasets we will be using:
    lt_dataset = env.datasets["loop_tool_simple-v0"]
    # train_benchmarks = list(islice(lt_dataset.benchmarks(), 1))
    # test_benchmarks = list(islice(lt_dataset.benchmarks(), 2))
    
    bench = ["benchmark://loop_tool_simple-v0/simple"]
            #  "benchmark://loop_tool_simple-v0/mm128", 
            #  "benchmark://loop_tool_simple-v0/mm"] 

    train_benchmarks = bench
    test_benchmarks = bench

print("Number of benchmarks for training:", len(train_benchmarks))
print("Number of benchmarks for testing:", len(test_benchmarks))


Number of benchmarks for training: 1
Number of benchmarks for testing: 1


In [5]:
def make_training_env(*args) -> compiler_gym.envs.CompilerEnv:
    """Make a reinforcement learning environment that cycles over the
    set of training benchmarks in use.
    """
    del args  # Unused env_config argument passed by ray
    return CycleOverBenchmarks(make_env(), train_benchmarks)


In [6]:
env = make_training_env()
env.reset()

for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
 for n_5625 in 128 : L1  
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  

observation_space.name stride_tensor
observation_space.name flops_loop_nest_tensor
<<<<<<<<<<<<<<< Reward = 2.145350517399148 GFLOPS >>>>>>>>>>>>>>>


E0630 14:09:05.359035 139782905841216 example_service.py:249] CRITICAL - 

Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140904-570442-9cca



array([[2113536.,       0.,       0.,       0.,       0.,       0.,
              0., 2097280.,       0.,       0.,       0.,       0.,
              0.,       0.,       0.,       0.,       0.,       0.,
              0.,       0.,       0.,       0.,       0.,       0.,
              0.,       0.,       0.,       0.,       0.,       0.,
              0.,       0.]], dtype=float32)

In [7]:
if ray.is_initialized():
    ray.shutdown()
ray.init(include_dashboard=False, ignore_reinit_error=True)

tune.register_env("compiler_gym", make_training_env)

In [8]:
import time
from ray import tune
from ray.tune import Stopper

class TimeStopper(Stopper):
    def __init__(self):
        self._start = time.time()
        self._deadline = 30

    def __call__(self, trial_id, result):
        return False

    def stop_all(self):
        return time.time() - self._start > self._deadline


In [9]:
PPO_CONFIG = {
    "log_level": "ERROR",
    "seed": 0xCC,
    "num_workers": 1,
    # Specify the environment to use, where "compiler_gym" is the name we 
    # passed to tune.register_env().
    "env": "compiler_gym",
    # Reduce the size of the batch/trajectory lengths to match our short 
    # training run.
    "rollout_fragment_length": 5,
    "train_batch_size": 5,
    "sgd_minibatch_size": 5,
    "gamma": tune.grid_search([0.5, 0.8, 0.9]), # def 0.99
    "lr": tune.grid_search([0.01, 0.001, 0.0001]), # def 1e-4
    "horizon": 3, # def None

    "evaluation_interval": 5, # def None
    "evaluation_num_episodes": 1, # def 10
    "model": {'fcnet_hiddens': [5, 5]}
    # "model": {                            # The NN model we'll optimize.
    #     'fcnet_hiddens': [                # "Fully-connected network with N hidden layers".
    #         tune.grid_search([20, 40]),   # Try these four values for layer one.
    #         tune.grid_search([20, 40])    # Try these four values for layer two.
    #     ]
    # },
}

In [10]:
analysis = tune.run(
    PPOTrainer,
    fail_fast=True,
    checkpoint_at_end=True,
    # stop=TimeStopper(),
    stop={
        "episodes_total": 100,
        # "episode_reward_mean": 30
    },
    config=PPO_CONFIG
)

[2m[36m(PPOTrainer pid=3479764)[0m 2022-06-30 14:09:12,331	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=3479764)[0m 2022-06-30 14:09:12,537	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3479764)[0m 2022-06-30 14:09:12,537	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:09:17.680739 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compile

[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.1075548909416884 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:09:21.080853 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.128394705887726 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3479764)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3479764)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3479764)[0m  

Trial name,status,loc,gamma,lr
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01
PPOTrainer_compiler_gym_bc2ca_00001,PENDING,,0.8,0.01
PPOTrainer_compiler_gym_bc2ca_00002,PENDING,,0.9,0.01
PPOTrainer_compiler_gym_bc2ca_00003,PENDING,,0.5,0.001
PPOTrainer_compiler_gym_bc2ca_00004,PENDING,,0.8,0.001
PPOTrainer_compiler_gym_bc2ca_00005,PENDING,,0.9,0.001
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001


[2m[36m(PPOTrainer pid=3479764)[0m 2022-06-30 14:09:22,582	INFO trainable.py:159 -- Trainable.setup took 10.251 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:09:22.594779 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.14015193263041 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_s

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:09:25.916023 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.08762823996926 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.110815307

[2m[36m(PPOTrainer pid=3480016)[0m 2022-06-30 14:09:27,110	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.


[2m[36m(RolloutWorker pid=3479826)[0m Action = up
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.1394652744657465 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(PPOTrainer pid=3480016)[0m 2022-06-30 14:09:27,311	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3480016)[0m 2022-06-30 14:09:27,311	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:09:32.562998 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.1463462561919187 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:09:36.484875 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.1370289604010844 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480016)[0m   for

Trial name,status,loc,gamma,lr
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01
PPOTrainer_compiler_gym_bc2ca_00002,PENDING,,0.9,0.01
PPOTrainer_compiler_gym_bc2ca_00003,PENDING,,0.5,0.001
PPOTrainer_compiler_gym_bc2ca_00004,PENDING,,0.8,0.001
PPOTrainer_compiler_gym_bc2ca_00005,PENDING,,0.9,0.001
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001


[2m[36m(PPOTrainer pid=3480016)[0m 2022-06-30 14:09:37,992	INFO trainable.py:159 -- Trainable.setup took 10.882 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:09:38.006237 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.15404573292077 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_s

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:09:41.309847 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.130433634234004 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.13437007

[2m[36m(PPOTrainer pid=3480438)[0m 2022-06-30 14:09:42,328	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=3480438)[0m 2022-06-30 14:09:42,531	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3480438)[0m 2022-06-30 14:09:42,531	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=3480226)[0m Action = up
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.1396180696371228 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:09:47.722572 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.147422077114205 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )


[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:09:51.225703 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480438)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480438)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480438)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.138398499456772 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3480438)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTraine

Trial name,status,loc,gamma,lr
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01
PPOTrainer_compiler_gym_bc2ca_00003,PENDING,,0.5,0.001
PPOTrainer_compiler_gym_bc2ca_00004,PENDING,,0.8,0.001
PPOTrainer_compiler_gym_bc2ca_00005,PENDING,,0.9,0.001
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001


[2m[36m(PPOTrainer pid=3480438)[0m 2022-06-30 14:09:53,302	INFO trainable.py:159 -- Trainable.setup took 10.975 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:09:53.317152 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.1396180696371228 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:09:56.636016 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.106741889079088 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.12493014

[2m[36m(PPOTrainer pid=3480856)[0m 2022-06-30 14:09:57,697	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=3480856)[0m 2022-06-30 14:09:57,909	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3480856)[0m 2022-06-30 14:09:57,909	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.122452697379108 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:03.107848 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.145196902425165 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )


[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:10:06.600629 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480856)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480856)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480856)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480856)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 2.130659821685741 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3480856)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOT

Trial name,status,loc,gamma,lr
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001
PPOTrainer_compiler_gym_bc2ca_00004,PENDING,,0.8,0.001
PPOTrainer_compiler_gym_bc2ca_00005,PENDING,,0.9,0.001
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001


[2m[36m(PPOTrainer pid=3480856)[0m 2022-06-30 14:10:08,650	INFO trainable.py:159 -- Trainable.setup took 10.953 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:08.664380 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.132930848518473 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:11.964687 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.1317947302641573 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.1175877

[2m[36m(PPOTrainer pid=3481284)[0m 2022-06-30 14:10:13,003	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=3481284)[0m 2022-06-30 14:10:13,213	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3481284)[0m 2022-06-30 14:10:13,213	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.1330816269239063 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:18.404669 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.1459640994050675 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:10:21.961224 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(PPOTrainer pid=3481284)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3481284)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3481284)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 2.143895350323094 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3481284)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTraine

Trial name,status,loc,gamma,lr
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001
PPOTrainer_compiler_gym_bc2ca_00005,PENDING,,0.9,0.001
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001


[2m[36m(PPOTrainer pid=3481284)[0m 2022-06-30 14:10:24,067	INFO trainable.py:159 -- Trainable.setup took 11.065 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:24.081827 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.1510376440203536 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:27.359859 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.1175877808793766 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.1271889

[2m[36m(PPOTrainer pid=3481705)[0m 2022-06-30 14:10:28,398	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=3481705)[0m 2022-06-30 14:10:28,619	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3481705)[0m 2022-06-30 14:10:28,619	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.1217032896157892 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:33.843253 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.1390844729227037 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:10:37.397319 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3481705)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3481705)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481705)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481705)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481705)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 2.142365553830823 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m Action = down
[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3481705)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3481705)[0m   for 

Trial name,status,loc,gamma,lr
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001


Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timesteps_total: 5
  counters:
    num_agent_steps_sampled: 5
    num_agent_steps_trained: 5
    num_env_steps_sampled: 5
    num_env_steps_trained: 5
  custom_metrics: {}
  date: 2022-06-30_14-09-43
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: -0.028362816820877157
  episode_reward_mean: -0.028362816820877157
  episode_reward_min: -0.028362816820877157
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 6fef992d5aa944f986f45bb8186c6f64
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 0.009999999776482582
          entropy: 1.3349884748458862
          entropy_coeff: 0.0
          kl: 0.058248527348041534
          model: {}
          policy_loss: -0.1564324051141739
          total_loss: -0.14356572926044464
          vf_explained_var: 3.973643192267673e-09
       

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,1.0,5.18564,5.0,-0.0283628,-0.0283628,-0.0283628,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 5
  counters:
    num_agent_steps_sampled: 5
    num_agent_steps_trained: 5
    num_env_steps_sampled: 5
    num_env_steps_trained: 5
  custom_metrics: {}
  date: 2022-06-30_14-09-58
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: -0.02872839966494345
  episode_reward_mean: -0.02872839966494345
  episode_reward_min: -0.02872839966494345
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 3d62e2a8608640d6929689d0a46136af
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 0.009999999776482582
          entropy: 1.3217337131500244
          entropy_coeff: 0.0
          kl: 0.07213372737169266
          model: {}
          policy_loss: -0.16510730981826782
          total_loss: -0.14989472925662994
          vf_explained_var: 3.973643103449831e-08
          

[2m[36m(PPOTrainer pid=3481705)[0m 2022-06-30 14:10:38,970	INFO trainable.py:159 -- Trainable.setup took 10.573 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:38.984082 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 3, actions = ['down', 'up', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.023818921383663 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[3

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:10:39.576707 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:10:39.654863 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:10:39.690027 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:39.780905 1399146413

[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.064305798290501 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0337567514230175 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9941814440459646 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5

[2m[36m(RolloutWorker pid=3481073)[0m cc1: fatal error: /tmp/fn_120.c: No such file or directory
[2m[36m(RolloutWorker pid=3481073)[0m compilation terminated.
[2m[36m(RolloutWorker pid=3481497)[0m cc1: fatal error: /tmp/fn_122.c: No such file or directory
[2m[36m(RolloutWorker pid=3481497)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 35.493213282334224 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:10:41.482982 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0488823800093594 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 35.47220107914278 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:10:41.686115 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:10:41.792774 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.021842413771827 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 23.00076224945847 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=348

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:42.047219 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:41.984369 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.022115343821473 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< curs

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:42.224766 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.038935299851004 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = swap_up)




[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0515421156841107 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3481073)[0m cc1: fatal error: /tmp/fn_164.c: No such file or directory
[2m[36m(RolloutWorker pid=3481073)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0318985926464554 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2



[2m[36m(RolloutWorker pid=3481922)[0m Action = up
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 12

[2m[36m(RolloutWorker pid=3481073)[0m cc1: fatal error: /tmp/fn_174.c: No such file or directory
[2m[36m(RolloutWorker pid=3481073)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.051332413544855 GFLOPS >>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:10:44.036552 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,2.0,8.32463,10.0,9.06534,27.288,-0.0522174,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,2.0,8.56702,10.0,11.0976,33.4276,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,2.0,8.64613,10.0,6.94525,20.9456,-0.0810796,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,2.0,9.24468,10.0,9.20414,27.7553,-0.131902,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,2.0,9.24686,10.0,8.00994,24.2307,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,1.0,5.19082,5.0,-0.0542532,-0.0542532,-0.0542532,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.040112494759032 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.n

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:10:44.290272 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:10:44.377593 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.036445557918451 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.052664600121565 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.051262187962008 GFLOPS >>>>>>>>>>>>>>>
[

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:44.657947 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0403199090921915 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.028193423597679 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:44.819974 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:44.792454 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.112374829206678 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:46.961176 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0343762353186547 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_sp

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:10:47.308670 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00000:
  agent_timesteps_total: 15
  counters:
    num_agent_steps_sampled: 15
    num_agent_steps_trained: 15
    num_env_steps_sampled: 15
    num_env_steps_trained: 15
  custom_metrics: {}
  date: 2022-06-30_14-10-47
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.28804382905825
  episode_reward_mean: 5.435323092602788
  episode_reward_min: -0.05221737542879801
  episodes_this_iter: 2
  episodes_total: 5
  experiment_id: 3bc2c681e0ae435185cfcf75e4ae07c3
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 0.009999999776482582
          entropy: 1.3254896402359009
          entropy_coeff: 0.0
          kl: 0.013525964692234993
          model: {}
          policy_loss: -0.03354928269982338
          total_loss: -0.025612277910113335
          vf_explained_var: -3.774960788405224e-08
        

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:10:47.523041 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:10:47.636177 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 



Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timesteps_total: 15
  counters:
    num_agent_steps_sampled: 15
    num_agent_steps_trained: 15
    num_env_steps_sampled: 15
    num_env_steps_trained: 15
  custom_metrics: {}
  date: 2022-06-30_14-10-47
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.42764328386841
  episode_reward_mean: 6.659688988947887
  episode_reward_min: -0.10661471285034096
  episodes_this_iter: 2
  episodes_total: 5
  experiment_id: 6fef992d5aa944f986f45bb8186c6f64
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 0.009999999776482582
          entropy: 1.3344969749450684
          entropy_coeff: 0.0
          kl: 0.03233673423528671
          model: {}
          policy_loss: -0.10550098866224289
          total_loss: -0.09064392745494843
          vf_explained_var: 1.2715658215256553e-07
         

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:47.900326 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.023886303857217 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00004:
  agent_timesteps_total: 15
  counters:
    num_agent_steps_sampled: 15
    num_agent_steps_trained: 15
    num_env_steps_sampled: 15
    num_env_steps_trained: 15
  custom_metrics: {}
  date: 2022-06-30_14-10-48
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 24.230708511936424
  episode_reward_mean: 4.82550750513735
  episode_reward_min: -0.15859748918192285
  episodes_this_iter: 2
  episodes_total: 5
  experiment_id: 7b53c32298d04fc1955d389baf905d12
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:48.073247 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00003:
  agent_timesteps_total: 15
  counters:
    num_agent_steps_sampled: 15
    num_agent_steps_trained: 15
    num_env_steps_sampled: 15
    num_env_steps_trained: 15
  custom_metrics: {}
  date: 2022-06-30_14-10-48
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.75533415766789
  episode_reward_mean: 5.514901194300647
  episode_reward_min: -0.13190153608354893
  episodes_this_iter: 2
  episodes_total: 5
  experiment_id: 1ef703bcf6274298b5c0bc6beb0ab2f4
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 0.0010000000474974513
          entropy: 1.3653206825256348
          entropy_coeff: 0.0
          kl: 0.0019189101876690984
          model: {}
          policy_loss: -0.05797595530748367
          total_loss: -0.057733021676540375
          vf_explained_var: 0.0
          vf_loss: 0.0001



[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:49.634985 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:10:49.897521 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9792621954816259 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_ne

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:10:50.120061 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:10:50.187393 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.028056127728297 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.028056127728297 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = swap_up)


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:50.468102 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 29.922765764673148 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:50.704831 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 22.87793862567772 GFLOPS >>>>>>>

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,3.0,13.5429,15.0,5.43532,27.288,-0.0522174,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,4.0,17.7573,20.0,11.1321,33.4941,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,3.0,13.8527,15.0,4.1671,20.9456,-0.0810796,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,3.0,14.1728,15.0,5.5149,27.7553,-0.131902,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,3.0,14.0544,15.0,4.82551,24.2307,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,2.0,9.15939,10.0,11.0684,33.3091,-0.0542532,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0205499216455354 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_down)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0232058494099165 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480646)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:10:52.591255 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:10:52.688402 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(Rollout

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:10:52.875453 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:52.939563 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9994937271265487 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.936189767927961 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0387965458659516 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:53.380597 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.958926246214605 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:53.544175 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.018852812519253 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0126952806340346 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:10:54.996657 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.014383016245489 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_down
[2m[

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:10:55.339849 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:10:55.468279 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0191920062352597 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = swa

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:55.507404 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0393506835754223 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.041221510014838 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0202783870927346 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=34

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:10:55.731287 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:10:56.101505 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.045045269066812 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0116836101396087 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0115485403182176 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0082526786731747 GFLOPS >>>>>>>>>>>>>>>
[

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:10:56.590845 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:10:56.714002 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:10:56.664090 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.0164797046941536 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.013639329277221 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0059044112829443 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = up
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,4.0,18.2578,20.0,4.53013,27.288,-0.0522174,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,4.0,17.7573,20.0,11.1321,33.4941,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,4.0,17.8026,20.0,8.00882,27.2174,-0.0810796,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,4.0,18.8101,20.0,8.07094,27.7553,-0.131902,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,4.0,18.7059,20.0,8.67107,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,4.0,17.9782,20.0,11.118,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0314173654707326 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3480016)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480856)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480856)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480856)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480856)[0m   

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:10:57.376936 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 3, actions = ['down', 'up', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:10:57.745393 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0225239018090972 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0685011900170736 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.0240230357083395 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3481284)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3481284)[0m   for k_5587 in

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:10:57.923621 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0300448087569225 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 2.026823259711278 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3480438)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3480438)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480438)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480438)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:10:59.069022 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0311429023867835 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3480438)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3480438)[0m  for k_5587 in 128 : L1  
[2m[36m(PPOTrainer pid=3480438)[0m   for m_5586 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480438)[0m  for m_5586 in 128 : L5  
[2m[36m(PPOTrainer pid=3480438)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:10:59.273634 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:10:59.430253 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 0.6906184456514115 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.032998693236237 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timesteps_total: 25
  counters:
    num_agent_steps_sampled: 25
    num_agent_steps_t

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:11:00.134689 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 2.028878219050724 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.01053898576283 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=348

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:00.369436 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9981630047449357 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3481284)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3481284)[0m  for k_5587 in 128 : L1  
[2m[36m(PPOTrainer pid=3481284)[0m   for m_5586 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m  for m_5586 in 128 : L5  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_down']
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=34

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:11:00.609541 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 0.7277869713363986 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3481284)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3481284)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00004:
  agent_timesteps_total: 25
  counters:
    num_agent_steps_sampled: 25
    num_agent_steps_trained: 25
    num_env_steps_sampled: 25
    num_env_steps_trained: 25
  custom_metrics: {}
  date: 2022

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:00.831521 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:00.907866 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 25.275570527229227 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:01.076098 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:11:01.124460 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0186487680589553 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0026939390191902 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0303867911467224 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:01.799460 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0081853716645184 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m Actions = ['down', 'dumm

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:02.292958 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9468602307655887 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:03.228081 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9425151664055373 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:03.501701 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,5.0,25.1377,25.0,3.39549,27.288,-0.0522174,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,6.0,29.3464,30.0,6.6616,33.4941,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,5.0,25.1847,25.0,5.98676,27.2174,-0.0845977,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,5.0,26.0993,25.0,6.04973,27.7553,-0.131902,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,5.0,26.6441,25.0,6.49388,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,4.0,17.9782,20.0,11.118,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:03.572444 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0213659375850908 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9464816668546179 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.022319114641368 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:11:03.796646 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9461664598763342 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:04.429450 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.93581444851056 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.021570531744406 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:04.898061 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.034997084061509 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 35.264032285185806 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_562

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:05.456005 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0361005432606025 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.028123792655088 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:05.986921 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.017156611672594 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions 

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:06.366190 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:06.387085 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0252524984270908 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0178339732620296 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m obs

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:07.908686 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:08.079988 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : 

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:08.251337 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0225229265338283 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0206862030473824 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for n_

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:08.767782 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.013842362237767 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )


[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:09.013264 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:09.107218 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.036169734535528 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[3

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,7.0,33.9706,35.0,4.58135,27.288,-0.0591934,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,7.0,33.6024,35.0,6.05655,33.4941,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,7.0,33.8948,35.0,4.34402,27.2174,-0.0919976,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,7.0,35.289,35.0,4.37926,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,6.0,30.9306,30.0,5.18162,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,6.0,29.0043,30.0,9.97331,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9620620647779718 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:10.671232 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:10.774039 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.02702993977828 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.024705994800059 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:11.066624 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:11.200628 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0173603545756467 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9365017512204985 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0412225034066576 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3480646)[0m cc1: fatal error: /tmp/fn_577.c: No such file or directory
[2m[36m(RolloutWorker pid=3480646)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0067095984001075 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_l

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:11.606660 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.051682609997241 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:11.995755 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0297697341358907 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions =

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:12.798604 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9371260195268842 GFLOPS >

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:13.461886 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0110094065650816 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:13.730508 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:13.979172 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.031211757122649 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0932313776297 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0189878932083842 GFLOPS >>>>>>>>>>>>>>>
[2

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:14.142378 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 25.275570527229227 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.001159385992995 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:14.797353 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,8.0,38.1042,40.0,3.87041,27.288,-0.0904539,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,8.0,38.5815,40.0,5.12489,33.4941,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,8.0,38.1819,40.0,3.66902,27.2174,-0.0919976,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,8.0,39.6963,40.0,3.69773,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,8.0,40.6932,40.0,3.97849,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,7.0,33.0767,35.0,9.06713,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9481868172957095 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0370676705240576 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[3

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:15.582607 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0252515205185504 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = swap_down)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.035066200359434 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_558

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:16.157065 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:16.329099 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9801770038514814 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9624467138886148 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0318985926464554 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:16.676336 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0044319873453715 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = swap_down)
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:17.051299 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.021094183679763 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0525942832981636 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rollout

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:17.401260 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.980568789083768 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_down
[2

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:18.241022 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0306620330517853 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', '

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:18.780833 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9589253313094754 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:19.141730 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.943143318438834 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0230009241270475 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:19.353031 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.020210271595995 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.017834944020444 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:19.690923 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:19.748785 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9688804247666642 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.03045559462767 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9413211783530142 GFLOPS >>>>>>>>>>>>>>>
R

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:20.918936 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:21.016646 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,9.0,42.6076,45.0,3.35013,27.288,-0.0904539,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,9.0,42.2549,45.0,5.98939,33.4941,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,9.0,43.1705,45.0,3.17119,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,9.0,44.5745,45.0,3.20035,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,9.0,45.5883,45.0,3.44154,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,9.0,41.6469,45.0,6.63888,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.983249986760378 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0173

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:21.893299 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.016953879830192 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m Action = swap_up
[2m[36m(PPOTrainer pid=3480016)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for m_5586 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_55

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:11:22.998000 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.0110094065650816 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m Action = swap_down
[2m[36m(PPOTrai

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:11:23.347759 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0310730687803016 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutW

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:23.547956 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:11:23.668961 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3480438)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480438)[0m  for k_5587 in 128 : L1  
[2m[36m(PPOTrainer pid=3480438)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480438)[0m  for n_5625 in 128 : L5  
[2m[36m(PPOTrainer pid=3480438)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 35.03779196043706 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=34804

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:23.988272 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:24.076842 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0197345642837226 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0327238027111685 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:24.410368 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.055544692114563 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481284)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name f

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:11:25.234921 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 1.9358153419554103 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480856)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480856)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480856)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480856)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=348192

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:26.004515 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:11:26.057975 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,10.0,49.1812,50.0,3.14173,27.288,-0.0904539,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,10.0,48.8138,50.0,5.60951,33.4941,-0.106615,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,10.0,49.318,50.0,2.97031,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,10.0,50.9138,50.0,2.9956,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,10.0,51.8582,50.0,3.22141,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,9.0,41.6469,45.0,6.63888,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:26.242186 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.028123792655088 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0181058221068096 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:26.655818 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:26.831702 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.92003281306077 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9537005595637518 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:27.068775 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9293470411003013 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0160057524578203 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 i

[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:11:27.346661 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 29.366529903518966 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:28.181399 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:28.152421 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.013639329277221 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0251156004196718 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0303877740207623 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:28.451877 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 1

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:28.992148 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0394200959245676 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:29.393037 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0289469203263106 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (acti

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:29.783263 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9378142540533374 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.017766993187042 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:30.654779 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.012761924414127 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:30.910776 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.960268416728281 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.025798318718923 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=348

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:31.130377 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.020822502834198 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWork

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:31.679375 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:31.824817 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0284661090804272 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:31.903204 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,12.0,56.4592,60.0,2.5144,27.288,-0.0904539,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,12.0,57.5852,60.0,4.4786,33.4941,-0.112691,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,11.0,53.7469,55.0,2.64251,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,11.0,54.8847,55.0,2.66302,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,11.0,55.7671,55.0,2.86526,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,10.0,47.9964,50.0,8.28878,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.7953062607302865 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 60
  counters:
    num_agent_steps_sampled: 60
    num_agent_steps_trained: 60
    num_env_steps_sampled: 60
    num_env_steps_trained: 60
  custom_metrics: {}
  date: 2022-06-30_14-11-32
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.217420858992803
  episode_reward_mean: 2.374095985615754
  episode_reward_min: -0.10350326456706238
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: 3d62e2a8608640d6929689d0a46136af
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.417187452316284
          cur_lr: 0.009999999776482582
          entropy: 1.2370153665542603
          entropy_coeff: 0.0
          kl: 0.012160620652139187
          model: {}
          policy_loss: -0.1238555088639

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:32.781216 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m cc1: fatal error: /tmp/fn_897.c: No such file or directory
[2m[36m(RolloutWorker pid=3480226)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.042610221856001 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.052664600121565 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9712715283766855 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:33.001326 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.029838495802207 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9281700675362263 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00003:
  agent_timesteps_total: 60
  counters:
    num_agent_steps_

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:33.871989 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_up', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  f

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:34.110325 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:34.076224 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0106739200362025 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<<

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:34.256167 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.8810621624906434 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = swap_

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:34.980571 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:35.081064 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.021910639376869 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.025798318718923 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_562

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:36.281265 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9986943191446174 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[3

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:36.424341 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0090597525021616 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (acti

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:36.752617 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:36.901261 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.011346934117098 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = up
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[3

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,13.0,59.9415,65.0,2.39609,27.288,-0.0904539,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,13.0,61.9047,65.0,4.26073,33.4941,-0.112691,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,13.0,61.2559,65.0,2.26789,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,13.0,62.7354,65.0,2.27939,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,12.0,59.9626,60.0,2.57199,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,12.0,56.945,60.0,6.62937,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:37.428773 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.044627562545762 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.998562892512365 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=34802

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:38.450840 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:38.427909 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:38.584457 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9557365980669734 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0022264442768356 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:39.028126 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.028056127728297 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = up
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:39.282016 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 1

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:39.824160 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9242777714312715 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.025798318718923 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9486294146451604 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:40.337040 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0128305045052657 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_ne

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:40.940743 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9980306961779597 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:41.186911 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:41.231972 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 23.027154041263604 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:41.344719 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0670770622823236 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.020075027296007 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00003:
  agent_timesteps_total: 70
  counters:
    num_agent_steps_sampled: 70
    num_agent_steps_trained: 70
    num_env_steps_sampled: 70
    num_env_steps_trained: 70
  custom_metrics: {}
  date: 2022-06-30_14-11-41
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.75533415766789
  episode_reward_mean: 3.2716414965754836
  episode_reward_min: -0.17761185556224857
  episodes_this_iter: 2
  episodes_total: 23
  experiment_id: 1ef703bcf6274298b5c0bc6beb0ab2f4
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.441406286379788e-05
          cur_lr: 0.0010000000474974513
          entropy: 1.3389416933059692
          e

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:42.018996 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']Result for PPOTrainer_compiler_gym_bc2ca_00004:

[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 29.223711713720355 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(R

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:43.039894 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:43.008177 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9378142540533374 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = up
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:43.352396 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:43.452984 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9493249429512889 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.018241773999289 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3479764)[0m for n_5625 in

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:43.783168 140251398047296 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141017-377415-6f35
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.027439506295773 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3480016)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0183087855993476 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Rew

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,14.0,63.7006,70.0,3.37799,27.3879,-0.0904539,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,14.0,65.5371,70.0,5.34555,33.4941,-0.112691,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,14.0,65.1451,70.0,3.25036,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,14.0,66.8548,70.0,3.27164,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,14.0,67.6165,70.0,3.42655,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,14.0,64.5169,70.0,7.18745,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:11:44.697889 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00000:
  agent_timesteps_total: 75
  counters:
    num_agent_steps_sampled: 75
    num_agent_steps_trained: 75
    num_env_steps_sampled: 75
    num_env_steps_trained: 75
  custom_metrics: {}
  date: 2022-06-30_14-11-44
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.387861055243647
  episode_reward_mean: 4.195308693287299
  episode_reward_min: -0.09045392768706462
  episodes_this_iter: 2
  episodes_total: 25
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -1.316186106988189
    episode_reward_mean: -1.316186106988189
    episode_reward_min: -1.316186106988189
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -1.316186106988189
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.15788078308

[2m[36m(RolloutWorker pid=3481497)[0m Fatal Python error: Illegal instruction
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Current thread 0x00007f8ed2c28640 (most recent call first):
[2m[36m(RolloutWorker pid=3481497)[0m   File "/home/dejang/loop_tool_env/loop_tool_service/service_py/env/loop_tool_env.py", line 74 in get_available_actions
[2m[36m(RolloutWorker pid=3481497)[0m   File "./example_service.py", line 300 in apply_action
[2m[36m(RolloutWorker pid=3481497)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/compiler_gym/service/runtime/compiler_gym_service.py", line 201 in Step
[2m[36m(RolloutWorker pid=3481497)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/grpc/_server.py", line 443 in _call_behavior
[2m[36m(RolloutWorker pid=3481497)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/grpc/_server.py", line 560 in _unary_response_i

[2m[36m(PPOTrainer pid=3480016)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3480438)[0m Action = down
[2m[36m(PPOTrainer pid=3480438)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480438)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480438)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480438)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.019260053082374 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=347982

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:45.692247 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:45.735417 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0053692571539496 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9976310213327149 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.014179874259744 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m Action = down
[2m[36m(PPOTrainer pid=3480438)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480438)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480438)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  


[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:11:46.002051 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 75
  counters:
    num_agent_steps_sampled: 75
    num_agent_steps_trained: 75
    num_env_steps_sampled: 75
    num_env_steps_trained: 75
  custom_metrics: {}
  date: 2022-06-30_14-11-46
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.217420858992803
  episode_reward_mean: 2.988307474848489
  episode_reward_min: -0.10350326456706238
  episodes_this_iter: 2
  episodes_total: 25
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: 0.03434095713063012
    episode_reward_mean: 0.03434095713063012
    episode_reward_min: 0.03434095713063012
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - 0.03434095713063012
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.1374006

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:11:46.308355 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9299675923305684 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0117511

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:47.440619 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.934692932424884 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.024363969126102 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:47.725403 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m Action = down
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 1

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:11:47.919208 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:47.953773 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


  agent_timesteps_total: 75
  counters:
    num_agent_steps_sampled: 75
    num_agent_steps_trained: 75
    num_env_steps_sampled: 75
    num_env_steps_trained: 75
  custom_metrics: {}
  date: 2022-06-30_14-11-48
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.75533415766789
  episode_reward_mean: 3.0032104743025543
  episode_reward_min: -0.17761185556224857
  episodes_this_iter: 2
  episodes_total: 25
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.0014153871413098429
    episode_reward_mean: -0.0014153871413098429
    episode_reward_min: -0.0014153871413098429
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -0.0014153871413098429
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.15130043029785156
      mean_env_render_ms:

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:48.120752 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.012223105497807 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_up', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:48.447190 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9377515856857013 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.027781591746357 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.011885283842816 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:49.612462 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 0.6676538015464122 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.021094183679763 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observat

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,16.0,74.4585,80.0,4.03159,27.3879,-0.0904539,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,15.0,71.8818,75.0,6.03518,33.4941,-0.117752,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,16.0,75.171,80.0,2.87336,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,15.0,73.305,75.0,3.00321,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,14.0,67.6165,70.0,3.42655,27.8989,-0.158597,3.0
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,14.0,64.5169,70.0,7.18745,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0131676774173424 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:50.085208 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.02388728044779 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:50.626048 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:11:50.732589 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0168850184771556 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3481705)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name str

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:50.851108 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:11:51.011560 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 0.737764878675145 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3481284)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3481284)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00004:
  agent_timesteps_total: 75
  counters:
    num_agent_steps_sampled: 75
    num_agent_steps_trained: 75
    num_env_steps_sampled: 75
    num_env_steps_trained: 75
  custom_metrics: {}
  date: 2022-

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:52.280776 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 0.7577412636632852 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.015058516055883 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481922)[0m for n_562

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:52.739792 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:52.863254 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0146529682755987 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cur

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:52.983913 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:52.953359 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.010875391334781 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0141121693690818 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m obse

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:53.482022 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = up
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.010875391334781 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'sw

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:54.866092 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:54.949035 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:55.191324 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0199388283181254 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0205499216455354 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:55.481246 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,17.0,78.4021,85.0,3.70173,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,17.0,78.9367,85.0,5.38632,33.4941,-0.117752,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,17.0,79.4159,85.0,2.66551,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,17.0,80.7604,85.0,2.68191,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,15.0,76.3795,75.0,3.14988,27.8989,-0.158597,2.96
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,16.0,74.7002,80.0,7.19348,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0421249023804564 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cur

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:55.800449 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0933765953103576 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.933944490502042 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.008588319521498 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=34806

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:56.204274 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0002260463585793 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:57.406103 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0053692571539496 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:57.596868 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:11:57.522983 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9126335474591951 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:11:58.105521 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0270289601524847 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0091261

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:11:58.635716 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:11:58.577319 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.031141918781519 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_spa

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:11:59.811341 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.010942878786426 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['do

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:11:59.995954 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:00.074460 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00004:
  agent_timesteps_total: 85
  counters:
    num_agent_steps_sampled: 85
    num_agent_steps_trained: 85
    num_env_steps_sampled: 85
    num_env_steps_trained: 85
  custom_metrics: {}
  date: 2022-06-30_14-12-00
  done: false
  episode_len_mean: 2.9642857142857144
  episode_media: {}
  episode_reward_max: 27.898879460815934
  episode_reward_mean: 2.8109593405066904
  episode_reward_min: -0.15859748918192285
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 7b53c32298d04fc1955d389baf905d12
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.051757857974735e-06
          cur_lr: 0.0010000000474974513
          entropy: 1.2616552114486694
          entropy_coeff: 0.0
          kl: 0.0028188182041049004
          model: {}
          policy_loss: -0.03677087277173996
          total_loss: -0.010984305292367935
          vf_explained_var: 3.5762788

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:00.341239 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(Ro

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:00.792992 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0237505669371885 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=34

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:01.222070 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,19.0,86.6537,95.0,3.34414,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,18.0,83.5489,90.0,5.02303,33.4941,-0.117752,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,18.0,83.905,90.0,2.4882,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,18.0,85.3264,90.0,2.50024,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,17.0,85.2907,85.0,2.81096,27.8989,-0.158597,2.96429
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,17.0,78.2346,85.0,6.67898,33.581,-0.0970849,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:02.363309 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:02.370030 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9385647987879533 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.016953879830192 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m 

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:02.608240 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.014179874259744 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:02.904541 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9682355254371546 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutW

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:03.548039 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.024911265685408 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.02702993977828 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.008589281404321 GFLOPS >>>>>>>>>>>>>>>
[2

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:04.083766 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.020210271595995 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  <<<<<< curso

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:04.547786 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0106729561557413 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:04.806472 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:04.876627 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:04.859187 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0220461295857324 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:06.229463 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0065080010524556 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00005:
  agent_timesteps_t

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,19.0,86.6537,95.0,3.34414,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,19.0,87.1478,95.0,4.85846,33.4941,-0.117752,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,19.0,88.549,95.0,2.40719,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,19.0,88.9482,95.0,2.4171,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,18.0,89.795,90.0,2.63609,27.8989,-0.158597,2.96667
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,19.0,86.3948,95.0,6.02387,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(PPOTrainer pid=3479764)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name s

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:06.587247 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.013639329277221 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', 'swap_up', 'up'

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:07.031043 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.039557943025864 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9998931941806084 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m  

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:07.467800 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 1.9941160249888272 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0397662365830898 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m Action = down
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(P

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:12:08.018079 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m Action = swap_up
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480016)[0m   for n_5625 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L5  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 23.62509012256669 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:12:08.458113 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 24.323407117879363 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00000:
  agent_timesteps_total: 100
  counters:
    num_agent_steps_sampled: 100
    num_agent_steps_trained: 100
    num_env_steps_sampled: 100
    num_env_steps_trained: 100
  custom_metrics: {}
  date:

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:08.774158 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.029426977616472 GFLOPS >>>>>>>>>>>>>>>
  agent_timesteps_total: 95
  counters:
    num_agent_steps_sampled: 95
    num_agent_steps_trained: 95
    num_env_steps_sampled: 95
    num_env_steps_trained: 95
  custom_metrics: {}
  date: 2022-06-30_14-12-08
  done: false
  episode_len_mean: 2.96875
  episode_media: {}
  episode_reward_max: 27.898879460815934
  episode_reward_mean: 2.469071184118851
  episode_reward_min: -0.15859748918192285
  episodes_this_iter: 2
  episodes_total: 32
  experiment_id: 7b53c32298d04fc1955d389baf905d12
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.629394644936838e-07
          cur_lr: 0.0010000000474974513
          entropy: 1.207869052886963
          entropy_coeff: 0.0
          kl: 0.001165997819043696
          model: {}
          policy_loss: -0.009367992170155048
          total_loss: -0.0057072411

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:09.797900 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:09.917416 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.021978869586513 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.023341513266784 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.008118069167328 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:10.147351 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9464184366390196 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.013910048990069 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:12:10.493584 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.000426383301418 GFLOPS >>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:11.357475 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:12:11.460000 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,20.0,93.789,100.0,3.14388,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,20.0,93.3185,100.0,5.56937,33.4941,-0.117752,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,20.0,95.6336,100.0,2.25907,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,20.0,96.5391,100.0,2.26692,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,19.0,93.8956,95.0,2.46907,27.8989,-0.158597,2.96875
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,19.0,86.3948,95.0,6.02387,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m Action = up
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:12.235826 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:12.277222 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:12.293660 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9271795790745707 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.011481012209006 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.020142160692251 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:13.088163 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9457881656963945 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0160057524578203 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker p

[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:12:13.457274 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0127628902984567 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 1.9980297443812463 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 1.996237218035272 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=348

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:12:14.871635 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(PPOTrainer pid=3481284)[0m Action = swap_up
[2m[36m(PPOTrainer pid=3481284)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3481284)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3481284)[0m   for n_5625 in 128 : L2  
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m  for n_5625 in 128 : L5  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 28.77560905330031 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:15.026459 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:15.103564 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:15.102517 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0173603545756467 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cur

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:15.199917 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0351353213524273 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_sp

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:15.542098 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:15.673149 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : 

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,21.0,97.5549,105.0,2.95759,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,22.0,101.651,110.0,5.09977,33.4941,-0.201143,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,21.0,100.652,105.0,2.13063,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,21.0,100.255,105.0,2.1361,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,20.0,99.8713,100.0,2.3933,27.8989,-0.158597,2.9697
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,20.0,93.4527,100.0,5.65774,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


Result for PPOTrainer_compiler_gym_bc2ca_00000:
  agent_timesteps_total: 110
  counters:
    num_agent_steps_sampled: 110
    num_agent_steps_trained: 110
    num_env_steps_sampled: 110
    num_env_steps_trained: 110
  custom_metrics: {}
  date: 2022-06-30_14-12-16
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.387861055243647
  episode_reward_mean: 2.87802357860507
  episode_reward_min: -1.263352920016478
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 3bc2c681e0ae435185cfcf75e4ae07c3
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.1624388694763184
          cur_lr: 0.009999999776482582
          entropy: 1.0635335445404053
          entropy_coeff: 0.0
          kl: 0.024999244138598442
          model: {}
          policy_loss: -0.15942765772342682
          total_loss: -0.10314415395259857
          vf_explained_var: 1.5894572769070692e-08
      

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:17.624946 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:17.642134 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:17.781377 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:17.703265 1405849115

[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0195313139483932 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0104715254173917 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.021094183679763 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:18.492123 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.010875391334781 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0277139496511705 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Act

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:18.858058 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorke

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:20.366760 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:20.344718 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:20.440445 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:20.417783 1400370647

[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.02443334213388 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.033136659150003 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_spac

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:21.154892 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9981630047449357 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0127628902984567 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker p

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,23.0,106.228,115.0,2.72767,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,23.0,106.59,115.0,4.83057,33.4941,-0.201143,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,22.0,106.02,110.0,2.07086,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,22.0,105.153,110.0,2.07567,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,21.0,103.213,105.0,2.25781,27.8989,-0.158597,2.97143
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,22.0,101.774,110.0,5.18534,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0110094065650816 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:22.242174 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:22.845328 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:22.929692 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:22.994225 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:23.088492 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0247744138041566 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0128305045052657 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.006843058926766 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625]

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:23.853879 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0386578107646356 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[3

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:24.890013 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:25.003034 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.014383016245489 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.014179874259744 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=348

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:25.266169 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 0.7210756830467693 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.009529481263717 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 i

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:25.481823 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:25.631443 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.932326483300915 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.8325028540670618 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.012222140131471 GFLOPS >>>>>>>>>>>>>>>
R

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:26.521460 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0215023290896426 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:27.550070 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:27.576437 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.015465195621462 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0210269869943764 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=34

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:27.925725 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:27.999887 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.011346934117098 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:28.207976 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9967019198178058 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.952810954689596 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0118177331229887 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:29.155943 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0341700276052848 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAI

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,24.0,110.789,120.0,2.58711,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,24.0,110.384,120.0,4.55462,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,23.0,110.497,115.0,1.96395,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,24.0,114.01,120.0,1.86479,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,23.0,111.23,115.0,2.07823,27.8989,-0.158597,2.97368
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,23.0,105.99,115.0,4.91251,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9219992475693786 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loo

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:29.940021 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9934535433991483 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.016072615652607 GFLOPS >>>>>>>>>>>>>>>
[2m

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:30.200274 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9387520777441478 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 120
  counters:
    num_agent_steps_sampled: 120
    num_agent_steps_trained: 120
    num_env_steps_sampled: 120
    num_env_steps_trained: 12

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:30.652036 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


  agent_timesteps_total: 120
  counters:
    num_agent_steps_sampled: 120
    num_agent_steps_trained: 120
    num_env_steps_sampled: 120
    num_env_steps_trained: 120
  custom_metrics: {}
  date: 2022-06-30_14-12-30
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.58101458260745
  episode_reward_mean: 4.6668999740647354
  episode_reward_min: -0.12707442999548824
  episodes_this_iter: 2
  episodes_total: 40
  experiment_id: 95ebd9396e454a1fb9b29e46202e9f6f
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.3841858265427618e-08
          cur_lr: 0.0010000000474974513
          entropy: 0.7534444332122803
          entropy_coeff: 0.0
          kl: 0.003737854538485408
          model: {}
          policy_loss: -0.06507334113121033
          total_loss: -0.06299923360347748
          vf_explained_var: 3.5762788286319847e-08
          vf_loss: 0.002074106130748987
      

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:12:31.451820 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m Action = swap_up
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480016)[0m   for n_5625 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L5  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 28.611117553565215 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:31.739757 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:12:31.872725 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.032448001643673 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0003595998616923 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00000:
  agent_timesteps_total: 125
  counters:
    num_agent_steps_sampled: 125
    num_agent_st

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:32.231299 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:32.169551 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0044319873453715 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_n

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:32.639480 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.000227000248937 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['d

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:32.829257 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0365147726445096 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:33.383208 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.00483343020588 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:34.392863 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9818752359626164 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions =

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:34.814853 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.010942878786426 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_ne

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:12:35.369798 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:35.382180 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,25.0,116.914,125.0,2.5234,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,25.0,116.47,125.0,4.4439,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,24.0,115.142,120.0,1.8645,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,25.0,120.141,125.0,1.81914,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,24.0,115.305,120.0,2.66218,27.8989,-0.158597,2.975
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,24.0,110.536,120.0,4.6669,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0141121693690818 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.013571660720964 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m Action = down
[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3481705)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3481705)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481705)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481705)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481705)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name flops_loop_nest_tensor
[2m

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:36.114579 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9863983723511873 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3481284)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3481284)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3481284)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m >>> AGENT ITERATION = 3, actions = ['down', 'up', 'swap_down']
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m Action = down
[2m[36m(PPOTrainer pid=3481705

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:12:36.514501 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.0141131365497587 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3481705)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3479826)[0m Action = up
[2m[36m(RolloutWorker pid=3479826)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:12:37.260512 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:12:37.427539 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00005:
  agent_timesteps_total: 125
  counters:
    num_agent_steps_sampled: 125
    num_agent_steps_trained: 125
    num_env_steps_sampled: 125
    num_env_steps_trained: 125
  custom_metrics: {}
  date: 2022-06-30_14-12-37
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.58101458260745
  episode_reward_mean: 4.552897041707646
  episode_reward_min: -0.12707442999548824
  episodes_this_iter: 1
  episodes_total: 41
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: 0.02295478819998764
    episode_reward_mean: 0.02295478819998764
    episode_reward_min: 0.02295478819998764
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - 0.02295478819998764
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.163

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:37.533292 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:37.628938 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9784154201446016 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0015595189351747 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:38.029466 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9863983723511873 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = up)

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:38.702595 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.000426383301418 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_d

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:38.900217 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9965023281318872 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy'

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:39.892487 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:40.165291 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:40.090272 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.009933931925012 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0065089609438425 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0086556535439857 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timesteps_total: 135
  counters:
    num_agent_steps_sampled: 135
    num_agent_steps_trained: 135
    num_env_steps_sampled: 135
    num_env_steps_trained: 135
  custom_metrics: {}
  date: 2022-06-30_14-12-40
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.49408995423195
  episode_reward_mean: 4.047878130947913
  episode_reward_min: -1.2917548214584964
  episodes_this_iter: 2
  episodes_total: 45
  experiment_id: 6fef992d5aa944f986f45bb8186c6f64
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:40.808272 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9980973242013789 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:41.360116 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,27.0,125.15,135.0,2.29652,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,27.0,125.067,135.0,4.04788,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,25.0,122.115,125.0,1.81889,27.2174,-0.103503,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,26.0,124.744,130.0,1.73292,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,26.0,126.049,130.0,3.10448,27.8989,-0.158597,2.97674
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,25.0,117.116,125.0,4.5529,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:41.788613 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9819407908570823 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0122906834431764 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.000893037740385 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for m_5586 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L5  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:42.202270 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:42.817281 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:42.900565 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0227960403526763 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9904801768428166 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:43.041637 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9770473613232906 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.983315632633722 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:44.024339 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0209578472432357 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_sp

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:44.359675 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : 

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:45.386996 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:45.432074 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:45.509450 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:45.599793 1398522869

[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0036295837348024 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.007312712250516 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:46.494853 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,28.0,129.149,140.0,2.24688,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,28.0,129.017,140.0,3.96029,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,27.0,130.37,135.0,1.65647,27.2174,-0.123941,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,27.0,129.07,135.0,1.65644,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,27.0,131.096,135.0,2.93848,27.8989,-1.26218,2.97778
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,26.0,122.065,130.0,4.33954,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:46.969508 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.002561029759318 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(R

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:47.479740 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:47.561705 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9464184366390196 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0044990429855982 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0122906834431764 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:47.795336 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0216387390014483 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.012829538556041 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:48.200877 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9182546154370346 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:49.157573 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0319665142237033 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9863983

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:49.693328 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:50.155315 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:50.223952 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.7919850157502957 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9485026791131377 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:50.540673 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9998264463361581 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00005:
  agent_timesteps_t

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:50.913044 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(Rollo

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:51.742038 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:51.750779 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,29.0,133.009,145.0,2.15175,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,29.0,133.309,145.0,3.79403,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,28.0,134.148,140.0,1.62026,27.2174,-0.123941,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,29.0,136.931,145.0,1.55489,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,28.0,135.48,140.0,2.81051,27.8989,-1.26218,2.97872
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,28.0,130.122,140.0,4.05998,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0115485403182176 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:52.711518 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:52.697737 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9809597937759675 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.010875391334781 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_sp

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:53.119298 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.001425805164774 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:53.721951 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9998931941806084 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9805023897463307 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m Action = down
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:54.276196 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:54.334892 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.887269682269635 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.018784793116029 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.012694314814593 GFLOPS >>>>>>>>>>>>>>>
[

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:12:54.835229 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:12:54.894203 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timesteps_total: 150
  counters:
    num_agent_steps_sampled: 150
    num_agent_steps_trained: 150
    num_env_steps_sampled: 150
    num_env_steps_trained: 150
  custom_metrics: {}
  date: 2022-06-30_14-12-54
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.49408995423195
  episode_reward_mean: 3.6386188107838886
  episode_reward_min: -1.2917548214584964
  episodes_this_iter: 2
  episodes_total: 50
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: 0.01353407539142637
    episode_reward_mean: 0.01353407539142637
    episode_reward_min: 0.01353407539142637
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - 0.01353407539142637
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.138

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:55.790418 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9902827714458304 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.020142160692251 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9944469567392349 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:56.438212 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:56.810765 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:56.860506 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:12:56.856139 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 34.91558101009765 GFLOPS >>>

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:57.470255 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0127628902984567 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,30.0,139.543,150.0,2.06136,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,31.0,142.94,155.0,4.21246,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,29.0,138.362,145.0,1.55263,27.2174,-0.123941,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,29.0,136.931,145.0,1.55489,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,29.0,139.679,145.0,3.17174,27.8989,-1.26218,2.97917
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,29.0,134.293,145.0,3.89142,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:12:58.586150 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:12:58.608567 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9837743371935461 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m Action = down
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPO

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:12:58.820276 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.003897618477712 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9941823921797963 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION =

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:12:59.487490 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:12:59.487851 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = up
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.994315139815418 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 2

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:12:59.650864 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9894916538952836 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.015058516055883 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:13:01.237097 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.0269603886997176 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480438)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480438)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480438)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 150
  counters:
    num_agent_steps_sampled: 150
    num_agent_steps_trained: 150
    num_env_steps_sampled: 150
    num_env_steps_trained: 150
  custom_metrics: {}
  date:

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:01.432702 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:13:01.520846 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 35.09936567976033 GFLOPS >>>

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:13:01.864276 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 2.0104040695988448 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']


[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:02.146994 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:02.348074 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0185807624041794 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00000:
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:03.574007 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9990277250864084 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loo

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,32.0,147.301,160.0,1.94533,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,32.0,146.579,160.0,4.67808,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,30.0,145.783,150.0,1.49009,27.2174,-0.123941,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,31.0,148.311,155.0,1.45896,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,30.0,146.093,150.0,3.04389,27.8989,-1.26218,2.98
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,30.0,140.881,150.0,4.39175,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:04.026159 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:04.026159 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9273655502634197 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.987714407847159 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:04.250025 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.94207534187587 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9260680430427097 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=348

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:04.573740 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0091261557386235 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.018647796517339 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)


[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:04.919930 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.990942201830072 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0229326249241577 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:06.291516 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:06.286675 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9234773803714966 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.925388125338548 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:06.655666 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0093956632010217 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:06.855535 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:07.110411 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.996700969286643 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0534363926371477 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:07.398811 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9931229412066596 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.022387372422595 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:08.221974 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(Ro

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:08.976309 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,33.0,151.75,165.0,1.87415,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,33.0,151.292,165.0,4.50717,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,31.0,150.163,155.0,1.45913,27.2174,-0.123941,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,32.0,152.108,160.0,1.40268,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,31.0,150.518,155.0,2.92371,27.8989,-1.26218,2.98077
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,32.0,148.084,160.0,4.54083,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9921317922424582 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9997606563926913 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0337567514230175 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:09.396192 139852286993984 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140946-687409-6f35
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0114819768643586 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:09.677095 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:09.733838 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9963027763446222 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9936525259419846 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0334116613572646 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:09.960897 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9916682692878427 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:10.291438 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3480646)[0m Fatal Python error: Segmentation fault
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Current thread 0x00007f31e5e32640 (most recent call first):
[2m[36m(RolloutWorker pid=3480646)[0m   File "/home/dejang/loop_tool_env/loop_tool_service/service_py/env/loop_tool_env.py", line 74 in get_available_actions
[2m[36m(RolloutWorker pid=3480646)[0m   File "./example_service.py", line 300 in apply_action
[2m[36m(RolloutWorker pid=3480646)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/compiler_gym/service/runtime/compiler_gym_service.py", line 201 in Step
[2m[36m(RolloutWorker pid=3480646)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/grpc/_server.py", line 443 in _call_behavior
[2m[36m(RolloutWorker pid=3480646)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/grpc/_server.py", line 560 in _unary_response_in

[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9115988358049236 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorke

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:11.727887 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:11.817648 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:11.996027 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.951478973337769 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9970337104758586 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=34

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:12.579262 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.000893037740385 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.008186333161448 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:12.800818 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9567557530934234 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9524936946565878 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_s

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:13.402643 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:14.092243 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWork

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:14.617944 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0173603545756467 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:15.047233 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:15.069013 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.015058516055883 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_d

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:15.450173 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.028672143136291 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWork

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:15.669981 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,34.0,155.735,170.0,1.8414,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,34.0,155.58,170.0,4.4269,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,32.0,154.94,160.0,1.41039,27.2174,-0.123941,3.0
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,33.0,156.203,165.0,1.35067,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,33.0,159.811,165.0,2.7664,27.8989,-1.26218,2.98182
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,33.0,152.007,165.0,4.97308,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


Result for PPOTrainer_compiler_gym_bc2ca_00003:
  agent_timesteps_total: 170
  counters:
    num_agent_steps_sampled: 170
    num_agent_steps_trained: 170
    num_env_steps_sampled: 170
    num_env_steps_trained: 170
  custom_metrics: {}
  date: 2022-06-30_14-13-15
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.75533415766789
  episode_reward_mean: 1.3269132633127507
  episode_reward_min: -0.17761185556224857
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 1ef703bcf6274298b5c0bc6beb0ab2f4
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.8626451769865326e-10
          cur_lr: 0.0010000000474974513
          entropy: 1.0296859741210938
          entropy_coeff: 0.0
          kl: 0.0012783310376107693
          model: {}
          policy_loss: -0.017051611095666885
          total_loss: -0.015774378553032875
          vf_explained_var: 0.0
          vf_l

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:16.859210 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0039636809103087 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m Action = down
[2m[36m(PPOTr

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:17.851006 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:13:18.004873 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.0145852315764547 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(R

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:13:18.134146 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:18.066534 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:18.164396 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00000:[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0061730550404433 GFLOPS >>>>>>>>>>>>>>>

  agent_timesteps_total: 175
  counters:
    num_agent_steps_sampled: 175
    num_agent_steps_trained: 175
    num_env_steps_sampled: 175
    num_env_steps_trained: 175
  custom_metrics: {}
  date: 2022-06-30_14-13-18
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.387861055243647
  episode_reward_mean: 1.7785301959238764
  episode_reward_min: -1.263352920016478
  episodes_this_iter: 2
  episodes_total: 58
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.04007716085310298
    episode_reward_mean: -0.04007716085310298
    episode_reward_min: -0.04007716085310298
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -0.04007716085310298
    off_policy_estimator: {}
    policy_reward_max: {}


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:19.454420 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.002894770646013 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m   

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:19.982892 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0341700276052848 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0224556348116827 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:20.212768 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0108079084127564 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.009529481263717 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0006926073231295 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:20.447090 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 i

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:20.759502 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:20.856270 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.98777940754818 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0007594130004636 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.021229564484067 GFLOPS >>>>>>>>>>>>>>>
[

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,35.0,162.352,175.0,1.77853,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,35.0,162.282,175.0,4.27363,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,34.0,165.161,170.0,1.30951,27.2174,-0.123941,2.98246
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,34.0,160.016,170.0,1.32691,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,34.0,164.515,170.0,2.66869,27.8989,-1.26218,2.98246
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,34.0,156.268,170.0,4.88545,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,



[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.943332383513915 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m Action = down
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480856)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480856)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480856)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480856)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 2.0025600736415066 GFLOPS >>>>>>>>>>>>>>>
[2

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:22.574758 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:22.679187 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:13:22.626497 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0224556348116827 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.014179874259744 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9698494916757114 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:23.158958 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.007514471340189 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:23.513557 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3481705)[0m Action = down
[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3481705)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3481705)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481705)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481705)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481705)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(R

[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:13:24.151141 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:24.207977 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.028467090095811 GFLOPS >>>>>>>>>>>>>>>Result for PPOTrainer_compiler_gym_bc2ca_00005:

[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
  agent_timesteps_total: 175
  counters:
    num_agent_steps_sampled: 175
    num_agent_steps_trained: 175
    num_env_steps_sampled: 175
    num_env_steps_trained: 175

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:25.471460 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:25.451994 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.008991429116098 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.009933931925012 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:26.096498 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m Action = down
[2m[36m(PPOTrainer pid=3480438)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480438)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480438)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480438)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480438)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480438)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 1.9980963723411855 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3481284

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:26.640748 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,36.0,166.872,180.0,1.71956,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,36.0,166.843,180.0,4.13102,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,34.0,165.161,170.0,1.30951,27.2174,-0.123941,2.98246
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,36.0,170.756,180.0,1.23917,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,34.0,164.515,170.0,2.66869,27.8989,-1.26218,2.98246
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,35.0,162.937,175.0,4.71564,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 i

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:13:27.711869 139783527458368 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140950-192436-659b
[2m[36m(PPOTrainer pid=3480438)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.0116170377465 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m Action = up
[2m[36m(PPOTrainer pid=3481284)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3481284)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3481284)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(PPOTrainer pid=3

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:28.007463 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:13:27.938150 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9949772453996133 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 2.00423084734019 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0173603545756467 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:28.134837 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.021910639376869 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0168859883227204 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_sp

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:28.481788 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9769169024513598 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:28.689574 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:28.664778 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0025600736415066 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.na

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:29.194274 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.7993404612481299 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:30.020309 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9945797396011413 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:30.614761 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9891623643991356 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tens

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:31.059060 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:31.273935 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:31.285221 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,38.0,175.131,190.0,1.6376,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,38.0,174.945,190.0,4.45844,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,35.0,171.926,175.0,1.286,27.2174,-0.123941,2.98276
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,37.0,174.956,185.0,1.21869,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,36.0,175.8,180.0,2.53491,27.8989,-1.26218,2.98333
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,36.0,167.427,180.0,4.55869,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:32.202414 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9863325222510895 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0112794195429062 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m obser

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:32.863609 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.013707002381802 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.033068659323785 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.004364936191361 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:33.229774 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0126952806340346 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9988943493627258 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:33.992231 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:33.966526 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(Rollo

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:34.415959 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:34.771969 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0090587901686834 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.024227191209487 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_sp

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:35.365421 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9972999832856106 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.010270135033967 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observatio

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:35.767118 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.007648038951485 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_up
Result for PPO

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:35.939448 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:36.103191 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9322650595416064 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.01060548681984 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:36.512363 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,39.0,179.621,195.0,1.58737,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,39.0,179.066,195.0,4.32007,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,37.0,180.652,185.0,1.20249,27.2174,-0.123941,2.98387
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,38.0,179.109,190.0,1.18385,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,37.0,179.984,185.0,2.45184,27.8989,-1.26218,2.98387
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,38.0,175.663,190.0,4.85844,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(Rollout

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:37.453386 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:38.140228 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0053692571539496 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.018784793116029 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:38.379062 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0517518607023097 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.022387372422595 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0012930641660427 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:38.694419 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9966353848219107 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:39.470470 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.021026013162219 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:39.922134 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0272337225269084 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nes

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:40.738188 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.002761834729212 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00003:
  agent_timesteps_total: 195
  counters:
    num_agent_steps_sampled: 195
    num_agent_steps_trained: 195
    num_env_steps_sampled: 195
    num_env_steps_trained: 195

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:41.167680 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.001358954978891 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.001

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:41.577922 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m Action = up
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'up']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (li

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:13:41.949036 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0040977340142505 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:42.123812 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:13:42.193567 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 1.9346304657767275 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.031141918781519 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timest

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,40.0,185.66,200.0,1.56349,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,40.0,185.624,200.0,4.25476,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,38.0,185.499,190.0,1.18376,27.2174,-0.123941,2.98413
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,39.0,184.465,195.0,1.14724,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,38.0,184.681,190.0,2.41333,27.8989,-1.26218,2.98413
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,39.0,179.762,195.0,4.70896,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.028123792655088 GFLOPS >>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:43.131337 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9977642285135646 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.954209399280247 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutW

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:43.383619 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:43.352808 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.013301998371811 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:43.572173 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0361005432606025 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:44.887323 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.988503169569478 GFLOPS >>>>>>>>>>

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:45.388696 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:45.598048 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9932555478251612 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 1.9904801768428166 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.000959856803868 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:45.875152 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9963692891745988 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00004:
  agent_timesteps_total: 195
  counters:
    num_agent_steps_sampled: 195
    num_agent_steps_trained: 195
    num_env_steps_sampled: 195
    num_env_steps_trained: 195
  custom_metrics: {}
  date: 2022-06-30_14-13-46
  done: false
  episode_len_mean: 2.9846153846153847
  episode_media: {}
  episode_reward_max: 27.898879460815934
  episode_reward_mean: 2.340277160940511
  episode_reward_min: -1.2621811876728306
  episodes_this_iter: 2
  episodes_total: 65
  experiment_id: 7b53c32298d04fc1955d389baf905d12
  hostname:

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:13:47.112164 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 1.9011302610442495 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 in 128 : L0  <<<<<< c

[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:13:47.442039 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,41.0,189.894,205.0,1.51703,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,41.0,189.858,205.0,4.12945,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,39.0,190.5,195.0,1.14782,27.2174,-0.123941,2.98462
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,40.0,190.69,200.0,1.13003,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,39.0,189.71,195.0,2.34028,27.8989,-1.26218,2.98462
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,40.0,185.955,200.0,4.63653,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 3, actions = ['down', 'up', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:47.837536 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:47.800662 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0277139496511705 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9850201942471715 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:48.303727 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:48.324182 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:48.460709 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.018784793116029 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< curs

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:48.572525 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.026687128570303 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9932555478251612 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(Roll

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:50.389632 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


  agent_timesteps_total: 210
  counters:
    num_agent_steps_sampled: 210
    num_agent_steps_trained: 210
    num_env_steps_sampled: 210
    num_env_steps_trained: 210
  custom_metrics: {}
  date: 2022-06-30_14-13-50
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.387861055243647
  episode_reward_mean: 1.4739732977902626
  episode_reward_min: -1.263352920016478
  episodes_this_iter: 2
  episodes_total: 70
  experiment_id: 3bc2c681e0ae435185cfcf75e4ae07c3
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.001336346147581935
          cur_lr: 0.009999999776482582
          entropy: 0.2601809799671173
          entropy_coeff: 0.0
          kl: 0.020389754325151443
          model: {}
          policy_loss: -0.05936839431524277
          total_loss: -0.0586826428771019
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 0.0006585001247003675
        nu

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:50.907202 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:51.021986 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0112794195429062 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:51.144039 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:51.118905 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9939833038583679 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0062402272234814 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Ro

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:51.570181 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0032286227917844 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9959712284992992 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3481284)[0m Action = down
[2m[36m(PPOTrainer pid=3481284)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3481284)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3481284)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481284)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481284)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481284)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:52.997691 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:53.079971 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.015532022964059 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.013301998371811 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.0082526786731747 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m ACTION_NOT_AVAILABLE (action = up)


[2m[36m(PPOTrainer pid=3480438)[0m Fatal Python error: Illegal instruction
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Current thread 0x00007f21e37fe640 (most recent call first):
[2m[36m(PPOTrainer pid=3480438)[0m   File "/home/dejang/loop_tool_env/loop_tool_service/service_py/env/loop_tool_env.py", line 74 in get_available_actions
[2m[36m(PPOTrainer pid=3480438)[0m   File "./example_service.py", line 300 in apply_action
[2m[36m(PPOTrainer pid=3480438)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/compiler_gym/service/runtime/compiler_gym_service.py", line 201 in Step
[2m[36m(PPOTrainer pid=3480438)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/grpc/_server.py", line 443 in _call_behavior
[2m[36m(PPOTrainer pid=3480438)[0m   File "/home/dejang/anaconda3/envs/compiler_gym/lib/python3.8/site-packages/grpc/_server.py", line 560 in _unary_response_in_pool
[2m[36m(PPOTrai

[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] 

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:53.593654 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9918668955675387 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481284)[0m Actions = ['down', 'dummy', 'swap_dow

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:13:53.749252 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,42.0,194.055,210.0,1.47397,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,42.0,194.228,210.0,4.01528,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,39.0,190.5,195.0,1.14782,27.2174,-0.123941,2.98462
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,41.0,195.761,205.0,1.09632,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,40.0,197.423,200.0,2.27092,27.8989,-1.26218,2.98507
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,41.0,190.066,205.0,4.50042,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9935208140047758 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:54.572075 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0168859883227204 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3480226)[0m cc1: fatal error: /tmp/fn_3028.c: No such file or directory
[2m[36m(RolloutWorker pid=3480226)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:55.594746 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:55.778711 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.023545531298999 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0122906834431764 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0012930641660427 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:13:56.000349 140002956658240 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141354-670083-5508
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:56.136024 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.011818698101334 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.99670191981

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:57.088975 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:57.185915 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0054354166547133 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.005436375520149 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:13:58.042788 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9904140558414467 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.997964072597913 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] 

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:13:58.229285 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:13:58.371979 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9955068667294042 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9225533912227175 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,43.0,198.041,215.0,1.45392,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,43.0,197.787,215.0,3.95789,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,40.0,199.943,200.0,1.11472,27.2174,-0.123941,2.98507
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,42.0,200.667,210.0,1.0652,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,41.0,202.095,205.0,2.23685,27.8989,-1.26218,2.98529
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,42.0,194.601,210.0,4.37045,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timesteps_total: 220
  counters:
    num_agent_steps_sampled: 220
    num_agent_steps_trained: 220
    num_env_steps_sampled: 220
    num_env_steps_trained: 220
  custom_metrics: {}
  date: 2022-06-30_14-13-58
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.49408995423195
  episode_reward_mean: 3.8493939846340957
  episode_reward_min: -1.2917548214584964
  episodes_this_iter: 2
  episodes_total: 73
  experiment_id: 6fef992d5aa944f986f45bb8186c6f64
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.024054229259490967
          cur_lr: 0.009999999776482582
          entropy: 0.246401846408844
          entropy_coeff: 0.0
          kl: 0.0008560208370909095
          model: {}
          policy_loss: 6.298224519696305e-08
          total_loss: 0.0006377618992701173
          vf_explained_var: 1.9868215517249155e-08


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:13:59.267198 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:13:59.364569 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9976319727495881 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9877134658537567 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9939833038583679 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:13:59.990876 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:00.875175 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:00.909977 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9845609134185676 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.01959938366718 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9381894537603879 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:01.026728 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0072464288295118 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:01.663930 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0141121693690818 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.986399313098535 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.00149265981672 GFLOPS >>>>>>>>>>>>>>>
[

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:01.992734 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:02.058496 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.984101845071515 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9957062594211064 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 i

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:03.319869 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:03.424029 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0006926073231295 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.00852195183153 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=34810

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:03.494210 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:03.761363 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0091271181366386 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0022274000760927 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9935208140047758 GFLOPS >>>>>>>>>>>>>>>

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,44.0,202.14,220.0,1.41322,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,44.0,202.024,220.0,3.84939,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,41.0,204.624,205.0,1.09843,27.2174,-0.123941,2.98529
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,43.0,204.321,215.0,1.04995,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,42.0,205.923,210.0,2.17275,27.8989,-1.26218,2.98571
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,44.0,202.405,220.0,4.18978,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9873188491632925 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:04.684980 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observa

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:05.265386 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.026276887044942 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.005771075568189 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(PPOTrainer pid=3480016)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:14:05.840849 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.002561029759318 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
Result for PPOTrainer_compiler_gym_bc2ca_00001:
[2m[36m(PPOTrainer pid=3479764)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3479764)[0m observati

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:14:06.021394 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:06.176917 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 1.9489454200081875 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.922984416886009 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.n

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:06.732793 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9976976227025642 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0030286807759956 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:07.103810 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.7639312713851703 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0055706257318917 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:08.177165 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9972990321849458 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0479740003300737 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9882401794500835 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=348022

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:08.475335 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:08.582148 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:08.582278 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:09.176860 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:09.251022 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9284182920712072 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9981639566686153 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9895577136259646 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9980963723411855 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_spa

[2m[36m(RolloutWorker pid=3479826)[0m cc1: fatal error: /tmp/fn_3233.c: No such file or directory
[2m[36m(RolloutWorker pid=3479826)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 i

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,45.0,209.209,225.0,1.37568,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,46.0,213.06,230.0,3.69703,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,42.0,210.168,210.0,1.0674,27.2174,-0.123941,2.98571
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,44.0,208.189,220.0,1.02157,27.7553,-0.177612,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,43.0,209.956,215.0,2.11186,27.8989,-1.26218,2.98611
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,44.0,202.405,220.0,4.18978,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9988276681748125 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 1.9895567698846388 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3479826)[0m cc1: fatal error: /tmp/fn_3241.c: No such file or directory
[2m[36m(RolloutWorker pid=3479826)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0239546674734874 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 1.9984952811961134 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9966353848219107 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(PPOTrainer pid=3481705)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(PPOTrainer pid=3480856)[0m Action = down
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480856)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480856)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTr

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:11.164449 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:14:11.166969 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.015532022964059 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 2.0141131365497587 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00005:
  agent_timesteps_total: 225
  counters:
    num_agent_steps_sampled: 225
    num_agent_steps_trained: 225
    num_env_steps_sampled: 225
    num_env_steps_trained: 225
  custom_metrics: {}
  date: 2022-06-30_14-14-11
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.58101458260745
  episode_reward_mean: 4.080703705082621
  episode_reward_min: -0.12707442999548824
  episodes_this_iter: 2
  episodes_total: 75
  evaluation:
    custom_metrics: {}
    episode_len

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:11.379436 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:11.432423 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0036295837348024 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0057039347968715 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.010606450635615 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:14:11.822506 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00003:
  agent_timesteps_total: 225
  counters:
    num_agent_steps_sampled: 225
    num_agent_steps_trained: 225
    num_env_steps_sampled: 2

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:12.617207 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0053021432802627 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9994270059439794 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m obser

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:13.627577 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:13.677525 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:13.711890 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0007594130004636 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:14.049631 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.00469927866646 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:14.262872 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.011414453224096 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.032861731316258 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_spa

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:15.154738 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,47.0,217.766,235.0,1.32282,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,47.0,217.24,235.0,3.60139,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,43.0,215.255,215.0,1.03752,27.2174,-0.123941,2.98611
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,45.0,215.108,225.0,0.98959,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,44.0,214.549,220.0,2.08293,27.8989,-1.26218,2.9863
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,46.0,213.357,230.0,4.02719,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0096642800638405 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:16.208050 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:16.335276 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.011076903012609 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:16.679879 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9941160249888272 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0007594130004636 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:16.936482 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:17.039834 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.015058516055883 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0173603545756467 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.000426383301418 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:18.775116 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:18.791723 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9437718768551737 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.005905370596917 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=34812

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:19.125647 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:19.381229 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0224556348116827 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0201431336728253 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9619344937252114 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:14:19.497371 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:19.838493 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9944469567392349 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,48.0,222.184,240.0,1.28973,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,48.0,221.783,240.0,3.51156,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,44.0,219.499,220.0,1.02306,27.2174,-0.123941,2.9863
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,47.0,223.368,235.0,0.95098,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,45.0,222.586,225.0,2.02729,27.8989,-1.26218,2.98667
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,47.0,217.55,235.0,3.92407,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9815484626868491 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.990810847756252 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:21.180905 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0062402272234814 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0007594130004636 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:21.533947 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:21.504817 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:21.678092 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:22.136189 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.013640296003833 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.99949372

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:23.975352 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:23.975242 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9918016282771644 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0132353188231535 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9805678538509193 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=348022

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:24.134633 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:24.275011 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:14:24.269960 140002956658240 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141354-670083-5508
[2m[36m(PPOTrainer pid=3480438)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.003429561675269 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0219778948368092 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.0081853716645184 GFLOPS >>>>>>>>>>>>>>>
[2

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:24.666977 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:25.861730 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.000626760378632 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9902827714458304 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=34

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:26.084095 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9945133459602147 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:26.552664 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:26.623113 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:26.732219 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0215023290896426 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0160057524578203 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:27.440267 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0143152976974883 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00005:
  agent_timesteps_t

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,49.0,226.114,245.0,1.27386,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,49.0,225.86,245.0,3.46891,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,45.0,227.895,225.0,0.996395,27.2174,-0.123941,2.98667
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,48.0,227.525,240.0,0.926813,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,46.0,227.016,230.0,1.97442,27.8989,-1.26218,2.98701
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,49.0,225.512,245.0,4.18237,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0141131365497587 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m Action = down
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:28.648892 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:28.630351 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0081190305998113 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0152618353217027 GFLOPS >>>>>>>>

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:29.220721 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(PPOTrainer pid=3480016)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(PPOTrainer pid=3479764)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 1.9859393939393937 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:14:29.760837 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:14:29.834629 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.021910639376869 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.0181058221068096 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:30.197942 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0323111139106795 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor


[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:31.113873 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.022319114641368 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0149907520837838 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0003586458448592 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:31.798489 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0032286227917844 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:32.009331 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:32.021188 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:32.130358 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9957727325068495 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m Action = down
[2m[36m(PPOTrainer pi

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,50.0,232.741,250.0,1.24292,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,50.0,232.539,250.0,3.38445,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,46.0,232.268,230.0,0.970804,27.2174,-0.123941,2.98701
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,49.0,231.587,245.0,0.916962,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,48.0,235.552,240.0,1.90215,27.8989,-1.26218,2.9875
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,49.0,225.512,245.0,4.18237,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9450942564982492 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9322027495730716 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.993454490840945 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:32.792875 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0003595998616923 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m Action = down
[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3481705)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3481705)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3481705)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481705)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481705)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(PP

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:34.341955 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:14:34.296752 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9981630047449357 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(Ro

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:34.598246 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:34.607761 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9955078161239996 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9969006006931038 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_spa

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:35.412312 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.007581733383368 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9827268507182019 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(Ro

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:14:36.066629 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:36.191519 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9957062594211064 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = dow

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:36.953104 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9954404113525384 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nes

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:37.199495 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:37.278007 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0073136729119874 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9945133459602147 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutW

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:38.054117 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0166154375771983 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00001:
  agent_timesteps_total: 260
  counters:
    num_agent_steps_sampled: 260
    num_agent_steps_trained: 260
    num_env_steps_sampled: 260
    num_env_steps_trained: 260
  custom_metrics: {}
  date: 2022-06-30_14-14-38
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.49408995423195
  episode_reward_mean: 3.2661264350222976
  episode_reward_min: -1.2917548214584964
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 6fef992d5aa944f986f45bb8186c6f64
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learne

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,51.0,237.125,255.0,1.21325,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,52.0,241.105,260.0,3.26613,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,47.0,237.663,235.0,0.95809,27.2174,-0.123941,2.98718
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,50.0,238.558,250.0,0.892993,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,49.0,240.197,245.0,1.85552,27.8989,-1.26218,2.9878
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,50.0,232.195,250.0,4.08172,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.003161652120838 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=347

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:38.717927 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:38.647604 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:39.568181 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:39.756607 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9863983723511873 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9935208140047758 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 240
  counters:
    num_agent_steps_sampled: 240
    num_agent_steps_trained: 240
    num_env_steps_sampled: 240
    num_env_steps_trained: 240
  custom_metrics: {}
  date: 2022-06-30_14-14-39
  done: false
  episode_len_mean: 2.9875
  episode_media: {}
  episode_reward_max: 27.217420858992803
  episode_reward_mean: 0.9338233697772752
  episode_reward_min: -0.12394140336676607
  episodes_this_iter: 2
  episodes_total: 80
  experiment_id: 3d62e2a8608640d6929689

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:40.224550 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0141121693690818 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9890963

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:40.736958 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.005167929008623 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9823342113136102 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:41.366790 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.012761924414127 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:42.048598 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:42.121966 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.004566103112492 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.n

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:42.234781 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.011481012209006 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tenso

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:42.917468 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Action = up
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 12

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:43.969234 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3481284)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9918668955675387 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_up', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_t

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:44.333276 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:44.412731 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.006844019138756 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0533660229271025 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00003:
  agent_timesteps_total: 260
  counters:
    num_agent_steps_sampled: 260
    num_agent_steps_trained: 260
    num_env_steps_sampled: 260
    num_env_steps_trained: 260
  custom_metrics: {}
  date: 2022-06-30_14-14-44
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.75533415766789
  episode_reward_mean: 0.8617911431501208
  episode_reward_min: -0.2295895426196055
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 1ef703bcf6274298b5c0bc6beb0a

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,53.0,245.223,265.0,1.17224,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,53.0,245.352,265.0,3.1906,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,48.0,243.211,240.0,0.933823,27.2174,-0.123941,2.9875
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,52.0,246.77,260.0,0.861791,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,49.0,240.197,245.0,1.85552,27.8989,-1.26218,2.9878
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,52.0,240.563,260.0,3.93825,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9994937271265487 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481284)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:44.618680 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.964179962789121 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 2.0123582659277357 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0108079084127564 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:14:45.319624 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9818752359626164 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9872529380215334 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9868585041470415 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:45.585815 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(Rollo

[2m[36m(RolloutWorker pid=3479826)[0m cc1: fatal error: /tmp/fn_3793.c: No such file or directory
[2m[36m(RolloutWorker pid=3479826)[0m compilation terminated.
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:46.047601 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.023273191064862 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop

[2m[36m(RolloutWorker pid=3479826)[0m cc1: fatal error: /tmp/fn_3797.c: No such file or directory
[2m[36m(RolloutWorker pid=3479826)[0m compilation terminated.
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:46.339520 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9931892423103352 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9845609134185676 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3479826)[0m cc1: fatal error: /tmp/fn_3801.c: No such file or directory
[2m[36m(RolloutWorker pid=3479826)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:46.895551 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:47.044872 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.027097536244912 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9783500982499898 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0251146226433314 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:47.928088 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:48.019566 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.009933931925012 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = up
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:48.297684 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.026823259711278 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.015532022964059 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:48.988570 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0174273076560487 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.004231805054083 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.005503498379313 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9854796877048828 GFLOPS >>>>>>>>>>>>>>>
[

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:49.536081 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9302775734938804 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:49.669897 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.998961034999776 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nes

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,54.0,249.412,270.0,1.14558,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,54.0,249.523,270.0,3.11934,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,49.0,248.027,245.0,0.910684,27.2174,-0.123941,2.9878
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,53.0,251.178,265.0,0.842502,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,51.0,252.845,255.0,1.79005,27.8989,-1.26218,2.98824
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,53.0,244.358,265.0,4.11081,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9927924486527293 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorke

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:50.237402 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0079844390091406 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 1

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:50.618170 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00005:
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9993602892141045 GFLOPS >>>>>>>>>>>>>>>  agent_timesteps_total: 270

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:51.715297 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m Action = down
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3479764)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3479764)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3479764)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3479764)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.005436375520149 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:52.398109 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9284182920712072 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3481922)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:14:53.425752 140002956658240 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141354-670083-5508
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:53.437954 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:14:53.560790 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3479764)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.040250435479689 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480016)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480016)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PP

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:14:53.670029 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.015532022964059 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dumm

[2m[36m(RolloutWorker pid=3479826)[0m cc1: fatal error: /tmp/fn_3881.c: No such file or directory
[2m[36m(RolloutWorker pid=3479826)[0m compilation terminated.


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9372512793984515 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.899686261970123 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:54.576804 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:54.668270 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:54.747530 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0240240124308726 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:54.926204 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:55.002616 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9309609494446243 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9619335760079555 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:57.311617 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.015532022964059 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0072454682324024 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=34

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:14:57.483516 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0032286227917844 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m Action = down
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480856)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480856)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480856)[0m    %3[m_5586

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:14:57.701596 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 1.9914053868501753 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.011413488633508 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:14:57.892806 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,55.0,255.924,275.0,1.13306,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,56.0,260.337,280.0,3.01937,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,50.0,256.744,250.0,0.900023,27.2174,-0.123941,2.98795
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,54.0,254.676,270.0,1.12177,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,52.0,257.759,260.0,1.74819,27.8989,-1.26218,2.98851
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,54.0,248.309,270.0,4.26958,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,



[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0086556535439857 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.025798318718923 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_spac

[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:14:58.734477 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:14:58.809467 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 1.9819407908570823 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 1.99318

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:14:59.558081 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:14:59.760986 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9890963309191485 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = swap_down)
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_s

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:14:59.980839 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:00.124518 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9936525259419846 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9778286109849712 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:00.347512 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9988943493627258 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9886351621539993 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_s

[2m[36m(RolloutWorker pid=3479826)[0m cc1: fatal error: /tmp/fn_4000.c: No such file or directory
[2m[36m(RolloutWorker pid=3479826)[0m compilation terminated.
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:01.264256 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0126952806340346 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:01.879026 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9967019198178058 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tens

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:02.157042 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.9336333305826159 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_sp

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:15:02.509687 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:02.607461 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0145861992115153 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.020074054381037 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0417083878731117 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:02.862549 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


  agent_timesteps_total: 285
  counters:
    num_agent_steps_sampled: 285
    num_agent_steps_trained: 285
    num_env_steps_sampled: 285
    num_env_steps_trained: 285
  custom_metrics: {}
  date: 2022-06-30_14-15-02
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.49408995423195
  episode_reward_mean: 2.9559517073499975
  episode_reward_min: -1.2917548214584964
  episodes_this_iter: 2
  episodes_total: 95
  experiment_id: 6fef992d5aa944f986f45bb8186c6f64
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.808921847958118e-06
          cur_lr: 0.009999999776482582
          entropy: 0.06684708595275879
          entropy_coeff: 0.0
          kl: -3.195022202362452e-08
          model: {}
          policy_loss: -1.589457454542753e-08
          total_loss: 0.0002578075509518385
          vf_explained_var: 0.0
          vf_loss: 0.00025782702141441405
        num_agent_st

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,57.0,264.823,285.0,1.08534,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,57.0,264.903,285.0,2.95595,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,51.0,262.235,255.0,0.878937,27.2174,-0.123941,2.98824
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,56.0,264.604,280.0,1.08518,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,53.0,261.774,265.0,1.72638,27.8989,-1.26218,2.98864
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,56.0,260.287,280.0,4.13032,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:04.235363 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9967675086536985 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=34

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:04.484761 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:04.510890 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9984952811961134 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:04.993344 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 29.23797175401174 GFLOPS >>>

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:05.402761 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.005502539449689 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down'

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:15:05.666323 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0192590809522666 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0059725248552307 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_s

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:06.837219 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:06.997836 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.001359909950103 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:07.142462 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.005771075568189 GFLOPS >>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:07.882620 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:08.003343 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0686440168044182 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.008520990013193 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.0050346911829804 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:15:08.215421 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9935198664999345 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.8586483810901329 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=348

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:15:09.213425 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,58.0,268.879,290.0,1.07356,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,58.0,268.81,290.0,2.92477,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,52.0,267.711,260.0,0.858434,27.2174,-0.123941,2.98851
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,57.0,269.109,285.0,1.06234,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,55.0,271.652,275.0,2.24228,27.8989,-1.26218,2.98913
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,57.0,264.322,285.0,4.28093,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:09.508053 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:09.579440 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9921308460575387 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9957727325068495 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 265
  counters:
    num_agent_steps_sampled: 265
    num_agent_steps_trained: 265
    num_env_steps_sampled: 265
    num_env_steps_trained: 265
  custom_metrics: {}
  date: 2022-06-30_14-15-10
  done: false
  episode_len_mean: 2.9886363636363638
  episode_media: {}
  episode_reward_max: 27.217420858992803
  e

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:10.448097 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.024227191209487 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0206180600485704 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=34

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:15:10.747859 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0114819768643586 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:11.273045 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0260723179070266 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:11.848968 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:11.931300 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m Action = down
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 2.0192590809522666 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:12.134588 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.003964638368889 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:13.014632 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9912730262829939 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3479826)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3479826)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3479826)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3479826)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3479826)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3479826)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9877134658537567 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 2.010740430019895 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:15:13.375793 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 2.0071119541795293 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0229316492546707 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Action = down

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:14.074849 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:14.148620 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0043658940334055 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.985544541331743 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Action = up
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:14.428277 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3479826)[0m <<<<<<<<<<<<<<< Reward = 1.9958392100209181 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0022264442768356 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=348

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:14.620988 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9921980274204234 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m Action = down
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,RUNNING,100.37.253.28:3479764,0.5,0.01,59.0,273.035,295.0,1.05249,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,RUNNING,100.37.253.28:3480016,0.8,0.01,59.0,273.018,295.0,2.86647,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,53.0,273.153,265.0,0.848582,27.2174,-0.123941,2.98864
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,59.0,276.917,295.0,1.02965,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,56.0,275.828,280.0,2.21843,27.8989,-1.26218,2.98925
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,58.0,268.4,290.0,4.23625,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,PENDING,,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9943805719377614 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = down
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481922)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 27

[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:15.584725 139877823804992 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3479826)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140916-651932-6f35
[2m[36m(RolloutWorker pid=3479826)[0m 


[2m[36m(RolloutWorker pid=3480226)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480226)[0m <<<<<<<<<<<<<<< Reward = 1.9948444096088027 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480226)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480226)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480226)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480226)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480226)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480226)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m observation_space.na

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:15:15.956504 140037064746560 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480226)[0m 
[2m[36m(RolloutWorker pid=3480226)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140931-527493-6f35
[2m[36m(RolloutWorker pid=3480226)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m Action = down
[2m[36m(PPOTrainer pid=3479764)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3479764)[0m  for n_5625 in 128

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:16.281987 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.030524402771847 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0062402272234814 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m Action = up
[2m[36m(RolloutWorker pid=3481922)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481922)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481922)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481922)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481922)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481922)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rollo

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:16.680372 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.9895577136259646 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3479764)[0m Action = down
[2m[36m(PPOTrainer pi

[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:17.130358 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 1.999959946480869 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481073)[0m Action = down
[2m[36m(RolloutWorker pid=3481073)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481073)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481073)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481073)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481073)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481073)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480016)[0m Action = down
[2m[36m(PPOTrainer pid

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:17.638040 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0071119541795293 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.010

[2m[36m(PPOTrainer pid=3479764)[0m E0630 14:15:18.095349 139746248087104 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3479764)[0m 
[2m[36m(PPOTrainer pid=3479764)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140920-045457-659b
[2m[36m(PPOTrainer pid=3479764)[0m 


[2m[36m(PPOTrainer pid=3479764)[0m <<<<<<<<<<<<<<< Reward = 2.0094630513645537 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481073)[0m <<<<<<<<<<<<<<< Reward = 1.991669215033401 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480016)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3480016)[0m <<<<<<<<<<<<<<< Reward = 2.0046331677265026 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480016)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3480016)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480016)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3480016)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0

[2m[36m(PPOTrainer pid=3480016)[0m E0630 14:15:18.541978 140042165020224 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480016)[0m 
[2m[36m(PPOTrainer pid=3480016)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T140935-442280-659b
[2m[36m(PPOTrainer pid=3480016)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.015058516055883 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9967684592482144 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:18.979035 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3479826)[0m E0630 14:15:19.057290435 3479859 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"
[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:19.223944 139914641393216 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481073)[0m 
[2m[36m(RolloutWorker pid=3481073)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141002-073910-6f35
[2m[36m(RolloutWorker pid=3481073)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.008925034784837 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481073)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481073)[0m observation_space.n

[2m[36m(RolloutWorker pid=3480226)[0m E0630 14:15:19.689180584 3480264 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:19.680730 139625302967872 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481922)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141032-812581-6f35
[2m[36m(RolloutWorker pid=3481922)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0191920062352597 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481922)[0m <<<<<<<<<<<<<<< Reward = 2.0394200959245676 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m ACTION_NOT_AVAILABLE (action = up)




[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3480856)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3481705)[0m Action = down
[2m[36m(PPOTrainer pid=3481705)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3481705)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3481705)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3481705)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3481705)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3481705)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3481705)[0m observation_space.name 

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:20.882751 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.000093464450605 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 2.0119538042070486 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = up
[2m[36m(RolloutWorker pid=3481497)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'down', 'up']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m 

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:21.142814 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0091261557386235 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3481705)[0m <<<<<<<<<<<<<<< Reward = 2.0164797046941536 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3480856)[0m Action = down
[2m[36m(PPOTrainer pid=3480856)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3480856)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3480856)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3480856)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3480856)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3480856)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(PPOTrainer pid=3480856)[0m observation_space.name stride_tensor
[2m[

[2m[36m(PPOTrainer pid=3481705)[0m E0630 14:15:21.746423 140681823794752 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3481705)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141036-358326-659b
[2m[36m(PPOTrainer pid=3481705)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m E0630 14:15:21.875652 140564898985536 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480856)[0m 
[2m[36m(PPOTrainer pid=3480856)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141005-567598-659b
[2m[36m(PPOTrainer pid=3480856)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3480438)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3480856)[0m <<<<<<<<<<<<<<< Reward = 2.0072454682324024 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.0551921690799784 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0657261087082217 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3480438)[0m Actions = 

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:15:24.184742 140002956658240 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141354-670083-5508
[2m[36m(PPOTrainer pid=3480438)[0m 


[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.0431664348745278 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:24.526248 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m 2022-06-30 14:15:24,671	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.


[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.054489498322582 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0547713142577764 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(PPOTrainer pid=3533185)[0m 2022-06-30 14:15:24,890	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3533185)[0m 2022-06-30 14:15:24,890	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.0624634276110108 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 2.051472879136703 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:15:30.330974 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.098276676298496 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:15:34.018275 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.1330067774251273 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3533185)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrain

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,54.0,278.263,270.0,0.829723,27.2174,-0.123941,2.98889
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,59.0,276.917,295.0,1.02965,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,58.0,283.459,290.0,2.12542,27.8989,-1.26218,2.98969
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,59.0,272.594,295.0,4.14978,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,PENDING,,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60.0,279.961,300.0,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60.0,280.127,300.0,2.80905,33.4941,-1.29175,3.0


[2m[36m(PPOTrainer pid=3533185)[0m 2022-06-30 14:15:36,065	INFO trainable.py:159 -- Trainable.setup took 11.395 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:15:36.078902 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0767273861571445 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:15:39.345960 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.102243634081746 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.06522872

[2m[36m(PPOTrainer pid=3533918)[0m 2022-06-30 14:15:40,323	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=3533918)[0m 2022-06-30 14:15:40,561	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3533918)[0m 2022-06-30 14:15:40,561	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0710689575486327 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:15:46.330659 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.086974867060778 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:15:49.864240 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.089734487436843 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m Action = down
[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533918)[0m   for 

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,54.0,278.263,270.0,0.829723,27.2174,-0.123941,2.98889
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,59.0,276.917,295.0,1.02965,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,58.0,283.459,290.0,2.12542,27.8989,-1.26218,2.98969
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,59.0,272.594,295.0,4.14978,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60.0,279.961,300.0,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60.0,280.127,300.0,2.80905,33.4941,-1.29175,3.0


Result for PPOTrainer_compiler_gym_bc2ca_00005:
  agent_timesteps_total: 300
  counters:
    num_agent_steps_sampled: 300
    num_agent_steps_trained: 300
    num_env_steps_sampled: 300
    num_env_steps_trained: 300
  custom_metrics: {}
  date: 2022-06-30_14-15-21
  done: true
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.58101458260745
  episode_reward_mean: 4.066795683299172
  episode_reward_min: -0.12707442999548824
  episodes_this_iter: 2
  episodes_total: 100
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: 27.131292575303355
    episode_reward_mean: 27.131292575303355
    episode_reward_min: 27.131292575303355
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - 27.131292575303355
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.1416464

[2m[36m(PPOTrainer pid=3533918)[0m 2022-06-30 14:15:51,422	INFO trainable.py:159 -- Trainable.setup took 11.099 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:15:51.435674 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,54.0,278.263,270.0,0.829723,27.2174,-0.123941,2.98889
PPOTrainer_compiler_gym_bc2ca_00003,RUNNING,100.37.253.28:3480856,0.5,0.001,59.0,276.917,295.0,1.02965,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,58.0,283.459,290.0,2.12542,27.8989,-1.26218,2.98969
PPOTrainer_compiler_gym_bc2ca_00005,RUNNING,100.37.253.28:3481705,0.9,0.001,60.0,278.765,300.0,4.0668,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00008,PENDING,,0.9,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60.0,279.961,300.0,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60.0,280.127,300.0,2.80905,33.4941,-1.29175,3.0


Result for PPOTrainer_compiler_gym_bc2ca_00004:
  agent_timesteps_total: 295
  counters:
    num_agent_steps_sampled: 295
    num_agent_steps_trained: 295
    num_env_steps_sampled: 295
    num_env_steps_trained: 295
  custom_metrics: {}
  date: 2022-06-30_14-15-26
  done: false
  episode_len_mean: 2.989795918367347
  episode_media: {}
  episode_reward_max: 27.898879460815934
  episode_reward_mean: 2.1041968722569253
  episode_reward_min: -1.2621811876728306
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 7b53c32298d04fc1955d389baf905d12
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.55271373174006e-16
          cur_lr: 0.0010000000474974513
          entropy: 0.9847444891929626
          entropy_coeff: 0.0
          kl: 0.00020337870228104293
          model: {}
          policy_loss: 4.053115176816391e-08
          total_loss: 0.0007881218916736543
          vf_explained_var: 0.0
 

[2m[36m(RolloutWorker pid=3481073)[0m E0630 14:15:52.498110834 3481110 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:15:52.436917 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0266195594717065 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 1.9986952715745996 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m <<

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:52.678974 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481922)[0m E0630 14:15:52.678280748 3481958 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"
[2m[36m(RolloutWorker pid=3481922)[0m 2022-06-30 14:15:52,686	ERROR worker.py:451 -- SystemExit was raised from the worker.
[2m[36m(RolloutWorker pid=3481922)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=3481922)[0m   File "python/ray/_raylet.pyx", line 799, in ray._raylet.task_execution_handler
[2m[36m(RolloutWorker pid=3481922)[0m   File "python/ray/_raylet.pyx", line 618, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=3481922)[0m   File "python/ray

[2m[36m(RolloutWorker pid=3481497)[0m <<<<<<<<<<<<<<< Reward = 1.9972334084265335 GFLOPS >>>>>>>>>>>>>>>




[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9704306492976913 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:15:54.736828 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:15:54.892915 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:15:54.872923 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0237505669371885 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.026550036551597 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3481497)[0m Action = down
[2m[36m(RolloutWorker pid=3481497)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3481497)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3481497)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3481497)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3481497)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3481497)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3481497)[0m observ

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:15:55.356957 140584911554112 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3481497)[0m 
[2m[36m(RolloutWorker pid=3481497)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141146-393331-f40e
[2m[36m(RolloutWorker pid=3481497)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.996700969286643 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0097990969497253 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.011346934117098 GFLOPS >>>>>>>>>>>>>>>




[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3481497)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3481497)[0m observation_space.name stride_tensor
[2m[36m(RolloutWo

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:15:57.452623 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:15:57.525079 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 



[2m[36m(PPOTrainer pid=3481284)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9945133459602147 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9949108252929129 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer p

[2m[36m(PPOTrainer pid=3534592)[0m 2022-06-30 14:15:58,570	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=3534592)[0m 2022-06-30 14:15:58,803	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=3534592)[0m 2022-06-30 14:15:58,803	INFO trainer.py:903 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 35.43055051063937 GFLOPS >>>>>>>

[2m[36m(PPOTrainer pid=3481284)[0m E0630 14:15:59.269454 140638611818048 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3481284)[0m 
[2m[36m(PPOTrainer pid=3481284)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141020-925002-659b
[2m[36m(PPOTrainer pid=3481284)[0m 


[2m[36m(PPOTrainer pid=3481284)[0m <<<<<<<<<<<<<<< Reward = 2.034997084061509 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.05589632837843 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 36.33099171048187 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:15:59.733176 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.064305798290501 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0455319777748624 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:00.778035 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.045532975366623 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0589987531049654 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:04.403942 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.1280934141956984 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_spac

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:16:08.449079 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.139693383857862 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3534592)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3534592)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3534592)[0m  

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,55.0,287.155,275.0,0.812902,27.2174,-0.123941,2.98913
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,59.0,288.905,295.0,2.1042,27.8989,-1.26218,2.9898
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,2.0,9.43441,10.0,9.17429,27.6449,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,1.0,5.46705,5.0,-0.112995,-0.112995,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60.0,279.961,300.0,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60.0,280.127,300.0,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60.0,283.663,300.0,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60.0,278.765,300.0,4.0668,33.581,-0.127074,3.0


Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 280
  counters:
    num_agent_steps_sampled: 280
    num_agent_steps_trained: 280
    num_env_steps_sampled: 280
    num_env_steps_trained: 280
  custom_metrics: {}
  date: 2022-06-30_14-15-57
  done: false
  episode_len_mean: 2.989247311827957
  episode_media: {}
  episode_reward_max: 27.217420858992803
  episode_reward_mean: 0.8043819869951566
  episode_reward_min: -0.12394140336676607
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 3d62e2a8608640d6929689d0a46136af
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0721626877784729
          cur_lr: 0.009999999776482582
          entropy: 0.1688702553510666
          entropy_coeff: 0.0
          kl: 4.7479210479650646e-05
          model: {}
          policy_loss: 4.371007555903361e-09
          total_loss: 0.000362607795977965
          vf_explained_var: -9.53674

[2m[36m(PPOTrainer pid=3534592)[0m 2022-06-30 14:16:10,006	INFO trainable.py:159 -- Trainable.setup took 11.437 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:10.020547 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,56.0,292.689,280.0,0.804382,27.2174,-0.123941,2.98925
PPOTrainer_compiler_gym_bc2ca_00004,RUNNING,100.37.253.28:3481284,0.8,0.001,59.0,288.905,295.0,2.1042,27.8989,-1.26218,2.9898
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,2.0,9.43441,10.0,9.17429,27.6449,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,1.0,5.46705,5.0,-0.112995,-0.112995,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,,,,,,,
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60.0,279.961,300.0,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60.0,280.127,300.0,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60.0,283.663,300.0,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60.0,278.765,300.0,4.0668,33.581,-0.127074,3.0


Result for PPOTrainer_compiler_gym_bc2ca_00004:
  agent_timesteps_total: 300
  counters:
    num_agent_steps_sampled: 300
    num_agent_steps_trained: 300
    num_env_steps_sampled: 300
    num_env_steps_trained: 300
  custom_metrics: {}
  date: 2022-06-30_14-15-59
  done: true
  episode_len_mean: 2.99
  episode_media: {}
  episode_reward_max: 27.898879460815934
  episode_reward_mean: 2.0618387245328087
  episode_reward_min: -1.2621811876728306
  episodes_this_iter: 2
  episodes_total: 100
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: 0.04229746177395266
    episode_reward_mean: 0.04229746177395266
    episode_reward_min: 0.04229746177395266
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - 0.04229746177395266
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.1

[2m[36m(RolloutWorker pid=3481497)[0m E0630 14:16:10.716919932 3481532 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"
[2m[36m(RolloutWorker pid=3481497)[0m 2022-06-30 14:16:10,720	ERROR worker.py:451 -- SystemExit was raised from the worker.
[2m[36m(RolloutWorker pid=3481497)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=3481497)[0m   File "python/ray/_raylet.pyx", line 799, in ray._raylet.task_execution_handler
[2m[36m(RolloutWorker pid=3481497)[0m   File "python/ray/_raylet.pyx", line 618, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=3481497)[0m   File "python/ray/_raylet.pyx", line 658, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=3481497)[0m   File "python/ray/_raylet.pyx", line 665, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=3481497)[0m   File "python/ray/_raylet.pyx", line 669, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=3481

[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0222508614675347 GFLOPS >>>>>>>>>>

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:11.100405 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.022387372422595 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0273709070643005 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0451140724270926 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- m

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:11.717251 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.042123908110111 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0242965548429255 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0694993496894494 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:12.720730 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0552636711189254 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.039488521293216 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(Ro

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:13.315611 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.041638819673663 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.044139291611351 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0394200959245676 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- mu

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:14.325387 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.037966398828424 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0563186523607464 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:14.939047 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:14.913523 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9863334629360654 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 12

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,56,292.689,280,0.804382,27.2174,-0.123941,2.98925
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,4,18.3087,20,8.05425,27.6449,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,3,14.1904,15,6.83427,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,1,5.28865,5,-0.0748521,-0.0748521,-0.0748521,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.019667457974797 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['d

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:15.927394 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.047904004499789 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0385894411248078 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:17.487071 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:17.462633 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0534363926371477 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.044627562545762 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:17.733107 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.045045269066812 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.028673124350971 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=353

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:18.168220 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Action = down
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.050001955034213 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.01040406

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:16:18.725967 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.0133029647745517 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 2

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:19.621910 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0234781715142516 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.026276887044942 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:20.282598 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.022387372422595 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533185)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3533185)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 1.940256100793486 GFLOPS >>>>>>>>>>>

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,58,302.979,290,0.771074,27.2174,-0.123941,2.98969
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,4,18.3087,20,8.05425,27.6449,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,4,18.082,20,11.2865,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,2,9.32356,10,11.1974,33.6778,-0.0748521,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0181058221068096 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:20.835665 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0276453318469523 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_te

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:16:21.362762 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 0.7258868235130826 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 25
  counters:
    num_agent_steps_sampled: 25
    num_agent_steps_trained: 25
    num_env_

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:22.187666 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0211623587963343 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.051471875742834 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:23.018816 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.041638819673663 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.055544692114563 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multip

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:16:23.437711 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:23.628196 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.041221510014838 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:24.089523 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps_total: 15
  counters:
    num_agent_steps_sampled: 15
    num_agent_steps_trained: 15
    num_env_steps_sampled: 15
    num_env_steps_trained: 15
  custom_metrics: {}
  date: 2022-06-30_14-16-24
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.67779295384028
  episode_reward_mean: 6.71222782887545
  episode_reward_min: -0.07485214438904952
  episodes_this_iter: 2
  episodes_total: 5
  experiment_id: f704b34bf2e248bba8b9e3f96107a68b
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 1.3861595392227173
          entropy_coeff: 0.0
          kl: 1.629168946237769e-05
          model: {}
          policy_loss: -0.004469314124435186
          total_loss: -0.004426528234034777
          vf_explained_var: -1.5894572769070692e-08
     

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:25.661515 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,58,302.979,290,0.771074,27.2174,-0.123941,2.98969
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,6,29.7211,30,4.81046,27.6449,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,4,18.082,20,11.2865,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,3,14.1498,15,6.71223,33.6778,-0.0748521,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


Result for PPOTrainer_compiler_gym_bc2ca_00002:
  agent_timesteps_total: 295
  counters:
    num_agent_steps_sampled: 295
    num_agent_steps_trained: 295
    num_env_steps_sampled: 295
    num_env_steps_trained: 295
  custom_metrics: {}
  date: 2022-06-30_14-16-25
  done: false
  episode_len_mean: 2.989795918367347
  episode_media: {}
  episode_reward_max: 27.217420858992803
  episode_reward_mean: 0.7633982680752198
  episode_reward_min: -0.12394140336676607
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 3d62e2a8608640d6929689d0a46136af
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.018040671944618225
          cur_lr: 0.009999999776482582
          entropy: 0.06326017528772354
          entropy_coeff: 0.0
          kl: 1.9147009879816324e-05
          model: {}
          policy_loss: -6.745258929186093e-08
          total_loss: 0.00016247878374997526
          vf_explained_var: 1.

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:16:26.142210 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0313494806001784 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m ACTION_NOT_AVAILABLE (action = up)


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:26.611083 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 35.59864880922069 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:27.007590 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0176990469299905 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0141808415054454 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:27.800213 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.029838495802207 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.005905370596917 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:28.818665 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:29.011910 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.022999948391677 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0202783870927346 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rollo

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:30.214518 140413595407936 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141312-078891-7a4f
[2m[36m(RolloutWorker pid=3480646)[0m 


[2m[36m(RolloutWorker pid=3480646)[0m <<<<<<<<<<<<<<< Reward = 2.0091261557386235 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3480646)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3480646)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3480646)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3480646)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3480646)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3480646)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3480646)[0m 
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3480646)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:30.415544 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.921753553199483 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:31.395452 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.036238930513164 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:31.711272 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0490915815016693 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.017834944020444 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.013774680035164 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:16:32.599758 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0247734363572554 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0348588655489497 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3480438)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3533624)[0m observa

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:33.761301 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:33.803749 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9728255220153532 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.038935299851004 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0497214457454502 GFLOPS >>>>>>>>>>>>>>>
[2

[2m[36m(PPOTrainer pid=3480438)[0m E0630 14:16:34.630404 140002956658240 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3480438)[0m 
[2m[36m(PPOTrainer pid=3480438)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141354-670083-5508
[2m[36m(PPOTrainer pid=3480438)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00002,RUNNING,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,7,34.4879,35,4.37252,27.6449,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,6,29.5215,30,6.74534,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,4,18.0579,20,11.1868,33.6778,-0.0748521,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(PPOTrainer pid=3480438)[0m <<<<<<<<<<<<<<< Reward = 2.011885283842816 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0397652446081933 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3480646)[0m E0630 14:16:35.306474971 3480679 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"
[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:16:35.278052 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0436532453434006 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:36.529459 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.047904004499789 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:36.926654 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0704984731550744 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.039974572689376 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:37.158612 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0558257899700227 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.068857273498991 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m obse

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:38.615910 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.979719026770054 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:39.558351 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0478350132923864 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.03845073420704 GFLOPS >>>>>>>>>>>>>>>


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,8,38.8237,40,3.69868,27.6449,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,8,38.6704,40,5.18682,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,6,29.4909,30,6.69424,33.6778,-0.0903224,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:39.800044 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9650145936500054 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 45
  counters:
    num_agent_steps_sampled: 45
    num_agent_steps_trained: 45
    num_env_steps_sampled: 45
    num_env_steps_trained: 45
  c

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:41.957394 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.066081237312673 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.05589632837843 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for m_558

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:42.382866 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0568117511251818 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward =

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:42.905187 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0682166154416635 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:44.611589 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0488123220730095 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWork

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:45.632908 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.037551578769416 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.042610221856001 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.081837999674395 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- mult

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:47.194842 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:16:47.289800 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 1.983707722751299 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.069642314357559 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 50
  counters:
    num_agent_steps_sampled: 50
    num_agent_steps

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,10,51.1401,50,4.73474,27.7685,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,9,43.575,45,4.49266,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,7,34.2783,35,6.08848,33.6778,-0.0903224,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_up', 'down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.970495449255903 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 12

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:47.716340 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0681442093200766 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_t

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:48.024839 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0445577952743625 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0341009721657772 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:50.658540 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0657973281532893 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0754356787751362 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- w

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:51.044006 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.084295026131309 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3533918)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.027508110169725 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(Rollout

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:16:52.106235 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0260028325273063 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:52.863858 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0945453383264527 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.050562201500801 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_down)
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:53.377266 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0462295318698858 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward =

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:53.681168 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,11,55.555,55,4.20576,27.7685,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,10,50.9883,50,6.32161,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,9,43.4905,45,4.45148,33.6778,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:55.504102 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.071640779619377 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:55.936537 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00006:[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.058010694599414 GFLOPS >>>>>>>>>>>>>>>

  agent_timesteps_total: 60
  counters:
    num_agent_steps_sampled: 60
    num_agent_steps_trained: 60
    num_env_steps_sampled: 60
    num_env_steps_trained: 60
  custom_metrics: {}
  date: 2022-06-30_14-16-56
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.76852883942572
  episode_reward_mean: 3.7821565869301366
  episode_reward_min: -0.12526150434662986
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: 22076fbffbd2490c9550a6b4ccaff3ea
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 9.999999747378752e-05
          entropy: 1.3861041069030762
          entropy_coeff: 0.0
          kl: 2.2749571144231595e-05
          model: {}
          policy_loss: -0.0053646

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:16:56.597288 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.023682217107898 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0412225034066576 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.043097762497601 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:16:57.918667 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0554741778331658 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0675050118821168 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:16:58.245379 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0594213416005367 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward =

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,13,63.1694,65,3.60193,27.7685,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,11,55.4289,55,5.61738,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,9,43.4905,45,4.45148,33.6778,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:00.409075 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0323800478742475 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:00.825448 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.063312641030775 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 60
  counters:
    num_agent_steps_sampled: 60
    num_agent_steps_trained: 60
    num_env_steps_sampled: 60
    num_env_steps_trained: 60
  custom_metrics: {}
  date: 2022-06-30_14-17-00
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 5.051851473273695
  episode_reward_min: -0.11299523638807041
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: 096d5d6c456c48bb8af81583f76a8187
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
     

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:17:01.575961 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0356173239153628 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0474151670543326 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps_total: 50
  counters:
    num_agent_steps_sampled: 50
    num_agent_steps_trained: 50
    num_env_steps_sampled: 50
    num_env_steps_trained: 50
  custom_metrics: {}
  date: 2022-06-30_14-17-01
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.67779295384028
  episode_reward_mean: 4.169735936060112
  episode_reward_min: -0.1167695097284771
  episodes_this_iter: 1
  episodes_total: 16
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.06125438573092845
    episode_reward_mean: -0.06125438573092845
    episode_reward_min: -0.06125438573092845
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
    

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:02.243737 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:02.318669 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.878003433513083 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:02.807705 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0393506835754223 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward =

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:04.483513 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'swap_up', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:05.032995 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0557562639901623 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0469974909748787 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:05.575815 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,14,66.7999,70,4.49466,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,13,63.0391,65,4.8103,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,11,55.2639,55,3.70564,33.6778,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0582924673624308 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(Rollo

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:06.840661 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.744727324303238 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:07.210516 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.974707311113444 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0349279724581737 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=35

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:07.741281 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.583602533538347 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:09.499336 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.96353813851912 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0783800711969493 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:09.786438 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.042679856272774 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps_total: 60
  counters:
    num_agent_steps_sampled: 60
    num_agent_steps_trained: 60
    num_env_steps_sampled: 60
    num_env_steps_trained: 60
  custom_metrics: {}
  date: 2022-06-30_14-17-09
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 33.67779295384028
  episode_reward_mean: 3.3316239267099084
  episode_reward_min: -0.1167695097284771
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: f704b34bf2e248bba8b9e3f96107a68b
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 9.999999747378752e-05
          entropy: 1.3851360082626343
          entropy_coeff: 0.0
          kl: 2.

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:17:10.478288 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.0680024711676563 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.060128402415981 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observatio

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:11.708917 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:11.788590 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0611893979787657 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0804687213237054 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.077158312661477 GFLOPS >>>>>>>>>>>>>>>


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,15,73.9014,75,5.23636,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,14,66.6387,70,5.86178,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,13,62.8914,65,3.17477,33.6778,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0632415928015106 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_sp

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:13.787139 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0469964919541397 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'swap_up', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:14.054500 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.9730853779755844 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0686440168044182 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=353391

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:17:14.466616 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.05589632837843 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.053154942732

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:15.866244 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 36.06886469566414 GFLOPS >>>>>>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:16.452464 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:17.075259 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[3

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:18.619017 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0314173654707326 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0730701

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,16,78.8635,80,5.03596,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,16,77.3586,80,6.47958,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,14,66.4987,70,4.37245,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:19.227961 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0200069255112623 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.045

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:19.731575 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0521022003947347 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.080324257084444 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observatio

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:21.269378 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:21.380009 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.069213479598462 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:21.889147 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.037828766770543 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3534592)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3534592)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 1.9945133459602147 GFLOPS >>>>>>>>>>

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:17:23.593865 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.049231740367379 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps_total: 75
  counters:
    num_agent_steps_sampled: 75
    num_agent_steps_trained: 75
    num_env_steps_sampled: 75
    num_env_steps_trained: 75
  custom_metrics: {}
  date: 2022-

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:23.826524 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,18,87.1819,90,4.36245,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,17,81.0871,85,6.01667,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,15,73.095,75,5.36871,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.035066200359434 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:24.551829 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0497915658705606 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.044070553823046 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:26.218559 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.073500615232352 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0677180337890135 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m o

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:26.532276 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9998931941806084 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0604816088080433 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=353

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:27.129950 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.03886592049777 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.051612360435415 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0369994074966247 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_562

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:28.865911 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0662949838585485 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0720695975219967 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:29.208017 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:29.392867 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.072999483023296 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0374822935510637 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=35

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:30.999639 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,19,91.2649,95,4.22,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,19,89.3027,95,6.51991,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,16,77.3384,80,5.15913,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.066579029832656 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_up', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:31.300320 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.1289229821237234 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.057375726393766 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:32.052378 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.042610221856001 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.043097762497601 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:33.689161 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.085235919338938 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0943257074577857 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m Action = down
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:34.148075 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0582217644365643 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3533185)[0m  for k_5587 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:17:34.887411 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00006:
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.043236107220713 GFLOPS >>>>>>>>>>>>>>>
  agent_timesteps_total: 100
  counters:
    num_agent_steps_sampled: 100
    num_agent_steps_trained: 100
    num_env_steps_sampled: 100
    num_env_steps_trained: 100
  custom_metrics: {}
  date: 2022-06-30_14-17-34
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.845623385638838
  episode_reward_mean: 3.967392224814192
  episode_reward_min: -0.12526150434662986
  episodes_this_iter: 2
  episodes_total: 33
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: 33.57299615224119
    episode_reward_mean: 33.57299615224119
    episode_reward_min: 33.57299615224119
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - 33.57299615224119
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_r

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:36.289919 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,20,98.1023,100,3.96739,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,19,89.3027,95,6.51991,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,18,85.693,90,4.46793,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9944479051255593 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:36.588623 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0561775224219367 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m Action = down
[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 i

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:17:37.741891 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3533918)[0m  for k_5587 in 128 : L1  
[2m[36m(PPOTrainer pid=3533918)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L5  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 23.151333837466673 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:38.542845 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:38.687858 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0374822935510637 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0725702

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:39.385881 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0589987531049654 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.031691902739679 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:41.186435 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.065583684549037 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0661524812476015 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 i

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:41.460079 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:41.525074 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0737148744885308 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0819103344415533 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0611184958955064 GFLOPS >>>>>>>>>>>>>>>

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,21,101.88,105,3.73846,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,21,99.8653,105,5.77355,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,19,89.3095,95,5.40675,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0513314102883884 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:43.281158 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0656548941736403 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.075005466654398 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:44.193160 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0454631462931716 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0443485

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:44.313860 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0436532453434006 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0465080876099604 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3534592)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.068572600945339 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m obse

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:17:46.858192 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:46.986591 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,22,106.304,110,3.63562,27.8456,-0.125262,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,22,104.31,110,5.61213,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,20,96.1204,100,5.08018,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:47.043377 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0827053145853784 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0976911066143997 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0992312885913496 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:48.524591 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0744317858653103 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_t

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:49.696745 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:49.807988 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0618257092112295 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0594921269662945 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.080827905084611 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:50.625956 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.035134333876611 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0374822935510637 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:51.870846 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


  agent_timesteps_total: 120
  counters:
    num_agent_steps_sampled: 120
    num_agent_steps_trained: 120
    num_env_steps_sampled: 120
    num_env_steps_trained: 120
  custom_metrics: {}
  date: 2022-06-30_14-17-52
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.845623385638838
  episode_reward_mean: 3.236441487619678
  episode_reward_min: -1.358377435662473
  episodes_this_iter: 2
  episodes_total: 40
  experiment_id: 22076fbffbd2490c9550a6b4ccaff3ea
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.3841858265427618e-08
          cur_lr: 9.999999747378752e-05
          entropy: 1.384562373161316
          entropy_coeff: 0.0
          kl: 1.658540713833645e-05
          model: {}
          policy_loss: -0.002140290103852749
          total_loss: 0.42410093545913696
          vf_explained_var: -4.7683716530855236e-08
          vf_loss: 0.4262412190437317
        n

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,24,114.946,120,3.23644,27.8456,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,23,109.169,115,5.31646,34.3304,-0.112995,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,21,99.8818,105,4.78737,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(Ro

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:52.462274 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.074502580822579 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0535771465278256 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[3

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:53.508263 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0828490753992335 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.063312641030775 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m o

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:54.528155 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:54.648892 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0385894411248078 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 0.7084439246637838 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.040112494759032 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:56.199924 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0469275618779808 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.056812759749415 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 i

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:17:57.294299 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m Action = up
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0615419550396235 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.0445567986335513 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:17:58.137684 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,25,121.114,125,3.15659,27.8456,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,24,112.921,120,5.01382,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,22,104.343,110,4.65472,34.0002,-0.11677,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.078452165863724 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0669344517821706 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for m_5586 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L5  
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:17:58.879704 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:17:58.977835 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.038935299851004 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.050351697332536 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0657261087082217 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:18:01.582756 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:01.645417 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:01.658577 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


  agent_timesteps_total: 125
  counters:
    num_agent_steps_sampled: 125
    num_agent_steps_trained: 125
    num_env_steps_sampled: 125
    num_env_steps_trained: 125
  custom_metrics: {}
  date: 2022-06-30_14-18-01
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 4.891700511738648
  episode_reward_min: -1.3660586561587955
  episodes_this_iter: 1
  episodes_total: 41
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.08106737927997498
    episode_reward_mean: -0.08106737927997498
    episode_reward_min: -0.08106737927997498
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -0.08106737927997498
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.17017126083374023
      mean_env_render_ms: 0.0

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:02.792549 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0471363643462914 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:03.816033 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,26,125.203,130,2.97758,27.8456,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,25,119.745,125,4.8917,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,24,112.918,120,4.15111,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9694619354172243 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_sp

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:04.336163 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.063880187497355 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.044627562545762 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Re

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:05.572920 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.970560253476302 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0451140724270926 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0397662365830898 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:06.452828 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:07.600534 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.057657325298226 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:08.298016 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0521734875858377 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0323800478742475 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.038935299851004 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actio

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:10.254667 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0672910149347747 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0563892245954563 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3534592)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3534592)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=353

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:18:10.758650 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,27,130.549,135,2.84312,27.8456,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,26,124.297,130,4.66205,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,25,119.776,125,4.04963,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.039557943025864 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:11.525163 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.032861731316258 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 135
  counters:
    num_agent_steps_sampled: 135
    num_agent_steps_trained: 135
    num_env_steps_sampled: 135
    num_env_steps_trained: 135

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:11.980056 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0786674497035116 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0464391904221486 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0502113852294785 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:13.598745 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.076296638415436 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'd

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:14.156760 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0636679555866295 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0665077564791705 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m obs

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:14.761051 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0469275618779808 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_ne

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:15.410464 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.68599112457445 GFLOPS >>>>>>>>

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,29,138.852,145,3.23882,27.8456,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,27,129.644,135,4.45385,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,26,124.348,130,3.85902,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(Roll

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:17.454477 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:17.531362 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.070426929197913 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0722118937527387 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=35

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:18.147787 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0681442093200766 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0327238027111685 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:19.308112 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0912630033824753 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0713543

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:20.652198 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:20.701909 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.98244359934807 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:22.049873 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.085380029732162 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533185)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3533185)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:18:23.432609 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:23.414532 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,30,146.211,150,3.66629,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,29,137.923,145,4.87569,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,27,129.668,135,3.68664,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.064376919832283 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)


[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:24.534992 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.044139291611351 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.031279632788308 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:25.963427 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.641933865256995 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:26.731115 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0361005432606025 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.055122685949644 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m Action = up
[2m[36m(PPOTrainer pid=3533918)[0m for n_5625 in 128 : 

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:18:27.204169 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0579390012997343 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 150
  counters:
    num_agent_steps_sampled: 150
    num_agent_steps_trained: 150
    num_env_steps_sampled: 150
    num_env_steps_trained: 150
  custom_metrics: {}
  date: 2022-06-30_14-18-27
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 5.348194278695393
  episode_reward_min: -1.3660586561587955
  episodes_this_iter: 2
  episodes_total: 50
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.06865349818818078
    episode_reward_mean: -0.06865349818818078
    episode_reward_min: -0.06865349818818078
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -0.06865349818818078
    off_policy_estimator: {}
    policy_reward_max: {}


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:27.904226 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 24.15711932544665 GFLOPS >>>>>>>>

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:28.621988 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 35.91813246099303 GFLOPS >>>>>>>>

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,31,150.082,155,4.1351,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,30,145.162,150,5.34819,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,29,138.046,145,4.15997,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)


[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:29.726950 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 35.96093796887727 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:30.672489 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.045045269066812 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:31.350991 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:31.655391 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 35.83221419174056 GFLOPS >>>>>>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:32.881934 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0574453619595428 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:33.830152 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.050841938193649 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[3

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:34.438278 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0764395158296 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,32,153.968,160,4.3949,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,32,152.945,160,6.32106,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,29,138.046,145,4.15997,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9932555478251612 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_sp

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:35.517115 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.091846032538466 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0267556815353904 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m obs

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:18:36.500917 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:36.676352 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.047276255854864 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0676466818500003 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0449754732868235 GFLOPS >>>>>>>>>>>>>>>
[2

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:38.282717 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0299760331122494 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:39.090260 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 35.72569695834007 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:39.305192 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0595629171982153 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(Rollo

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,34,162.566,170,4.13443,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,33,157.17,165,6.08951,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,31,149.129,155,5.23187,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 35.57751162080548 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:41.006051 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:41.024598 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0345824848436047 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0488823800093594 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m obs

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:42.076738 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.046787718016624 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0454631462931716 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m obser

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:43.652223 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:43.804530 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0321732600111435 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9611006428988895 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.9593728219090027 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:44.782672 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0535761410744704 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533185)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3533185)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3535289)[0m

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:45.997747 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.039835677223534 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m Action = swap_up
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.0300448087569225 GFLOPS >

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:18:46.922326 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,35,169.477,175,3.99003,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,34,161.504,170,5.95683,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,32,153.072,160,5.66615,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9740585896152465 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_sp

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:47.412822 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0497915658705606 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (act

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:48.613666 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0405283583207776 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533918)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3533918)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:49.185168 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0292217719618066 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.9576489565329813 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m obs

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:18:50.682824 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.028809522427507 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.015058516055883 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = down)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['dummy', 'swap_up', 'up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 0.6429711514818537 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m  

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:51.371301 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.060551454784392 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.045532975366623 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:51.837285 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0696412931113177 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0476950454155998 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:52.927898 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0445577952743625 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,36,174.385,180,3.85743,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,35,168.419,175,5.74881,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,34,161.609,170,5.3365,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0507015589873765 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[3

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:53.875211 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.922765764673148 GFLOPS >>

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:54.116240 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.9854796877048828 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0481130062352073 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for m_55

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:55.552744 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0714259257155754 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:18:56.739015 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.082344448625822 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy',

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:57.210966 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:18:57.573759 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 35.76834977784979 GFLOPS >>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:18:59.287779 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.070283856115335 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.073500615232352 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 185
  counters:
    num_agent_steps_sampled: 185
    num_agent_steps_trained: 185
    num_env_steps_sampled: 185
    num_env_steps_trained: 185
  custom_metrics: {}
  date: 2022-06-30_14-18-59
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 6.018907383295493
  episode_reward_min: -1.3660586561587955
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 096d5d6c456c48bb8af81583f76a8187

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,37,178.461,185,4.2508,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,37,177.383,185,6.01891,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,34,161.609,170,5.3365,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9886351621539993 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0699987908608826 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0454631462931716 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_g

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:18:59.987485 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(Rollout

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:00.928453 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.068288006722189 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0431664348745278 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(Roll

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:02.007234 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0563892245954563 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0696412931113177 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:02.250672 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0604816088080433 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Rewa

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:03.021751 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.958926246214605 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:04.770882 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:04.859879 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,39,187.196,195,3.98679,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,38,181.273,190,5.82734,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,35,168.493,175,5.14986,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0466478932619223 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:05.770694 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9720482190554507 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_sp

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:07.164188 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:07.492360 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.005235033898637 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.071497538731579 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=35352

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:08.777704 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0415692562149204 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0745036068719753 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps_total: 185
  counters:
    num_agent_steps_sampled: 185
    num_agent_steps_trained: 185
    num_env_steps_sampled: 185
    num_env_steps_trained: 185
  custom_metrics: {}
  date: 2022-06-30_14-19-09
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.00022067885972
  episode_reward_mean: 5.353071652328288
  episode_reward_min: -1.4296559309239536
  episodes_thi

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:10.503971 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.071426948723799 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:11.512749 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9734093657223786 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0687154375948213 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3535289)[0m for n_56

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:19:12.240456 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


  agent_timesteps_total: 200
  counters:
    num_agent_steps_sampled: 200
    num_agent_steps_trained: 200
    num_env_steps_sampled: 200
    num_env_steps_trained: 200
  custom_metrics: {}
  date: 2022-06-30_14-19-12
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.914299390027992
  episode_reward_mean: 3.9260428316931235
  episode_reward_min: -1.358377435662473
  episodes_this_iter: 1
  episodes_total: 66
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.06574221245880629
    episode_reward_mean: -0.06574221245880629
    episode_reward_min: -0.06574221245880629
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -0.06574221245880629
    off_policy_estimator: {}
    policy_reward_max: {}
    policy_reward_mean: {}
    policy_reward_min: {}
    sampler_perf:
      mean_action_processing_ms: 0.139312744140625
      mean_env_render_ms: 0.0
 

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,40,194.559,200,3.92604,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,39,186.407,195,5.64705,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,37,177.724,185,5.35307,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 3, actions = ['down', 'swap_up', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_t

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:12.608579 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0502815388804243 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:13.455194 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.042193509372521 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.051472879136703 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Roll

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:15.291244 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:15.395357 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.539848437896158 GFLOPS >>

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:19:16.270436 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 1.972695619700881 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00007:
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.047905004406534 GFLOPS >>>>>>>>>>>>>>>
  agent_timesteps_total: 200
  counters:
    num_agent_steps_sampled: 200
    num_agent_steps_

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:17.469614 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:17.556803 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0699987908608826 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0715691566993417 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0677883712342875 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:18.061440 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,41,198.278,205,4.21436,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,40,193.806,200,5.56046,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,39,186.447,195,5.02325,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.057868322654683 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:19.431186 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 3, actions = ['down', 'down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 35.7470106449166 GFLOPS >>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:20.363790 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 210
  counters:
    num_agent_steps_sampled: 210
    num_agent_steps_trained: 210
    num_env_steps_sampled: 210
    num_env_steps_trained: 210
  custom_metrics: {}
  date: 2022-06-30_14-19-20
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.914299390027992
  episode_reward_mean: 4.094127449461514
  episode_reward_min: -1.358377435662473
  episodes_this_iter: 2
  episodes_total: 70
  experiment_id: 22076fbffbd2490c9550a6b4ccaff3ea
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.094947153254554e-14
          cur_lr: 9.999999747378752e-05
          entropy: 1.3832221031188965
          entropy_coeff: 0.0
          kl: 2.5598232241463847e-05
          model: {}
          policy_loss: -0.005509329028427601
          total_loss: -0.001211812486872077
          vf_explained_var: 1.5894572769070692e-

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:20.956366 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0740019897899646 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 3, actions = ['

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:21.601715 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0737148744885308 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dumm

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:23.062911 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.056671561981303 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0626754118289847 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0307986987174553 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:24.363630 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,42,202.643,210,4.09413,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,42,201.868,210,5.72321,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,39,186.447,195,5.02325,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3534592)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3534592)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3534592)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.016411845104258 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pi

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:19:25.712434 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9778286109849712 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dumm

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:26.366333 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0737866458610834 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.075936962214647 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:26.918515 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.070140802805000

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:27.086324 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.066650308102712 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0363081311939895 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Rol

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:28.870659 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.037551578769416 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0696412931113177 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:29.590514 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,43,207.375,215,4.03642,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,43,206.628,215,5.64285,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,41,197.726,205,5.29311,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0393506835754223 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_t

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:30.425148 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.086105030809349 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[3

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:31.055875 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0677180337890135 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.055333163783087 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.07056899989485 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:32.273268 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0657271260942904 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0290146447087145 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< 

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:33.733264 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:33.813833 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0447661145288287 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward =

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:34.894906 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.046926562925497 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m ACTION_NOT_AVAILABLE (action = up)

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:36.369539 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.037966398828424 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m Action = down
[2m

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:36.524849 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.055052200618137 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0513314102883884 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3533185)[0m Action = up
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:19:37.545624 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.036445557918451 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 225
  counters:
    num_agent_steps_sampled: 225
    num_agent_steps_trained: 225
    num_env_steps_sampled: 225
    num_env_steps_trained: 225
  custom_metrics: {}
  date: 2022-06-30_14-19-37
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.914299390027992
  episode_reward_mean: 3.8201928660937523
  episode_reward_min: -1.358377435662473
  episodes_this_iter: 2
  episodes_total: 75
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.0038417745209606835
    episode_reward_mean:

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,45,219.67,225,3.82019,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,44,211.722,220,5.48767,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,42,202.037,210,5.14184,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.002627960275019 GFLOPS >

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:38.995203 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.058857255055959 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0587865133242036 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:39.866440 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0706415759037045 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0511919671872194 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=353

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:40.846458 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0696412931113177 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.076871351254995 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0453235024479683 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3533918)[0m 

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:19:42.303015 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.037966398828424 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 225
  counters:
    num_agent_steps_sampled: 225
    num_agent_steps_trained: 225
    num_env_steps_sampled: 225
    num_env_steps_trained: 225
  custom_metrics: {}
  date: 2022-06-30_14-19-42
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 5.340757519634555
  episode_reward_min: -1.3660586561587955
  episodes_this_iter: 2
  episodes_total: 75
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.010974844581753462
    episode_reward_mean: -0.010974844581753462
    episode_reward_min: -0.010974844581753462
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -0.010974844581753462
    off_policy_estimator: {}
    policy_reward_max: 

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:43.121432 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0694993496894494 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:43.615566 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.018240802849392 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.038935299851004 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,46,224.634,230,3.77032,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,45,219.623,225,5.34076,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,44,211.849,220,4.93083,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0483940758134134 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:45.602752 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0701408028050006 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0712121391690466 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:45.756903 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0465090861540323 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0521734875858377 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.034652559988552 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:46.888803 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0398346851810962 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward =

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:48.256074 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:48.345941 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.03045559462767 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3534592)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0427494954375085 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Rewar

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:50.254769 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.065158563533735 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0877737204477484 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0814061623815836 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3534592)[0m 

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:51.503486 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0637390331832464 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0686429965431405 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.na

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:19:51.651209 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,47,230.003,235,3.67304,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,46,224.51,230,5.27015,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,45,219.717,225,4.79847,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.075937989683425 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.065157546707632 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_t

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:53.557092 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0525942832981636 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 240
  counters:
    num_agent_steps_sampled: 240
    num_agent_steps_trained: 240
    num_env_steps_sampled: 240
    num_env_steps_trained: 240
  custom_metrics: {}
  date: 2022-06-30_14-19-53
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.914299390027992
  episode_reward_mean: 3.5812613091466092
  episode_reward_min: -1.358377435662473
  episodes_this_iter: 2
  episodes_total: 80
  experiment_id: 22076fbffbd2490c9550a6b4ccaff3ea
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.421085492696024e-15
          cur_lr: 9.999999747378752e-05
          entropy: 1.3814617395401
          entropy_coeff: 0.0
          kl: 1.2132210258641862e-06
          model: {}
          policy_loss: 0.000169

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:54.859951 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:54.943315 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0568823572105326 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.063880187497355 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0636679555866295 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:56.422126 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.067575334835183 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_lo

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:19:57.641074 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.017766993187042 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0739302035112845 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 i

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:19:58.124089 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,49,240.039,245,3.5361,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,47,229.84,235,5.13413,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,46,224.611,230,4.73577,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 240
  counters:
    num_agent_steps_sampled: 240
    num_agent_steps_trained: 240
    num_env_steps_sampled: 240
    num_env_steps_trained: 240
  custom_metrics: {}
  date: 2022-06-30_14-19-58
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 5.005701743020019
  episode_reward_min: -1.3660586561587955
  episodes_this_iter: 2
  episodes_total: 80
  experiment_id: 096d5d6c456c48bb8af81583f76a8187
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.421085492696024e-15
          cur_lr: 9.999999747378752e-05
          entropy: 1.3818988800048828
          entropy_coeff: 0.0
          kl: 2.8337562980595976e-06
          model: {}
          policy_loss: 0.00030887097818776965
          total_loss: 0.002223087241873145
          vf_explained_var: -3.1789145538141383

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:19:59.076112 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 1.9863983723511873 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0459510519494395 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:00.884329 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.072927766125063 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )


[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:01.047884 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9959712284992992 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0670770622823236 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=353

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:01.627873 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0286034604673477 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0435835445467947 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pi

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:03.691172 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0744317858653103 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.048813322866939 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:04.223920 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.040875868367695 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.0458113414798915 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3533185)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0778055525006662 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m 

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:20:05.498875 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,50,247.405,250,3.4497,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,49,239.824,245,4.94313,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,47,229.967,235,4.61374,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.048324051265923 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:06.296766 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0755065422706593 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0749336108945413 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m 

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:07.151387 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.050841938193649 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0378980755551583 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:07.476275 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3533918)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0306610499122 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.052593278807018 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 's

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:09.729766 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0616838223619958 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.079172357042475 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.nam

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:20:10.146614 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0660099982907645 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:10.342843 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.080901202062493 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:11.814443 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'up', 'down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.074502580822579 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< 

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:11.976706 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,51,251.632,255,3.35262,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,50,247.209,250,4.82356,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,49,239.939,245,4.44176,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0690715948504868 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:13.005977 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0488823800093594 GFLOPS >>>>>>>>>>>>>>>

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:14.379654 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:15.305256 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0619676155913567 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0627464210697664 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=353

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:16.251104 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.055966871627557 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.028467090095811 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(Roll

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:16.616726 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.069784277392182 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(R

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:17.766070 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.07895593815133 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0680738476649076 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(PPOTrainer pid=3534592)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,52,255.912,260,3.31293,27.9143,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,52,255.724,260,4.63866,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,49,239.939,245,4.44176,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.047904004499789 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0584328775728546 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(Rollou

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:20.008889 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:20:20.157025 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.039280284447798 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.038105039838324 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps_total: 250
  counters:
    num_agent_steps_sampled: 250
    num_agent_ste

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:20.481989 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.040320901606649 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0694983285842925 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:21.828581 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.066579029832656 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:22.483486 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.050841938193649 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = 

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:23.111375 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0680738476649076 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 270
  counters:
    num_agent_steps_sampled: 270
    num_agent_steps_trained: 270
    num_env_steps_sampled: 270
    num_env_steps_trained: 27

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:24.434576 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,54,264.898,270,3.475,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,53,260.471,265,4.91452,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,51,252.305,255,4.21554,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for m_5586 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 0.6929692837275543 GFLOPS >

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:25.171228 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0404598631619097 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:25.420720 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0312107734506966 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = up
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'up']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_ten

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:26.712310 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.9872529380215334 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:27.788019 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00007:[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0427494954375085 GFLOPS >>>>>>>>>>>>>>>

  agent_timesteps_total: 270
  counters:
    num_agent_steps_sampled: 270
    num_agent_steps_trained: 270
    num_env_steps_sampled: 270
    num_env_steps_trained: 270
  custom_metrics: {}
  date: 2022-06-30_14-20-27
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 4.804251404790347
  episode_reward_min: -1.3660586561587955
  episodes_this_iter: 2
  episodes_total: 90
  experiment_id: 096d5d6c456c48bb8af81583f76a8187
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.2204460823375376e-17
          cur_lr: 9.999999747378752e-05
          entropy: 1.3760967254638672
          entropy_coeff: 0.0
          kl: 8.300196896016132e-06
          model: {}
          policy_loss: -0.00

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:20:29.752513 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,55,271.429,275,3.43565,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,54,264.705,270,4.80425,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,52,256.62,260,4.16584,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_up
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'swap_down', 'swap_up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name strid

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:30.051440 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:30.047596 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.090826205315817 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0886439090365396 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0488123220730095 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:30.978154 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0488123220730095 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0317617789076365 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0244323650162515 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:32.523017 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 35.64099862340884 GFLOPS >>>>>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:34.222188 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.054489498322582 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533918)[0m for n_5625 in 128 : L0  
[2m[36m(PPOTrainer pid=3533918)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'swap_up', 'swap_down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.9873188

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:20:34.345686 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0569529681435705 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00006:
[2m[36m(RolloutWorker pid=35

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,56,276.453,280,3.36185,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,55,271.173,275,4.75076,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,53,261.334,265,4.4527,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 1.97802448630032 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0603399068837085 GFLOPS >>>>>>>>>>>>>>>


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:35.231843 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.068145229089407 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_spa

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:35.541481 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.047904004499789 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nes

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:37.147131 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0383813878261954 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 1, actions

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:37.845343 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m Action = up
[2m[36m(RolloutWorker pid=3533624)[0

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:38.783701 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0745036068719753 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Action = down
[2m[36m(RolloutWorker pid=3533624)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:39.327496 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0441402878451673 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0563186523607464 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 0.7265981836015523 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00007:
  agent_timesteps_total: 280
  counters:
    num_agent_steps_sampled: 280
    num_agent_steps_trained: 280
    num_env_steps_sampled: 280
    num_env_steps_trained: 280
  custom_metrics: {}
  date: 2022-06-30_14-20-39
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.330365904374915
  episode_reward_mean: 4.648718571700422
  episode_reward_min: -1.3660586561587955
  episodes_this_iter: 2
  episodes_total: 93
  experiment_id: 096d5d6c456c48bb8af81583f76a8187
  hostname: codah
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:40.112721 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.069570829554527 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = up
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'up']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0411

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:41.448071 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0640223768295094 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)


[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:41.911640 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3533624)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 29.833587026104276 GFLOPS >>>>>>

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,58,284.613,290,3.54527,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,56,276.193,280,4.64872,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,54,265.563,270,4.35277,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'up', 'down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 1.9859393939393937 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 1

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:43.566354 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0373437372493095 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Action = swap_up
[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L1  
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m >>> AGENT ITERATION = 2, actions = ['swap_down', 'swap_up']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0460199162724355 GFLOPS 

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:44.049997 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0464391904221486 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.054279193237141 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3534592

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:20:44.447818 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


Result for PPOTrainer_compiler_gym_bc2ca_00008:[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0570235839247952 GFLOPS >>>>>>>>>>>>>>>

  agent_timesteps_total: 275
  counters:
    num_agent_steps_sampled: 275
    num_agent_steps_trained: 275
    num_env_steps_sampled: 275
    num_env_steps_trained: 275
  custom_metrics: {}
  date: 2022-06-30_14-20-44
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.00022067885972
  episode_reward_mean: 4.304309944618052
  episode_reward_min: -1.4296559309239536
  episodes_this_iter: 1
  episodes_total: 91
  evaluation:
    custom_metrics: {}
    episode_len_mean: 3.0
    episode_media: {}
    episode_reward_max: -0.08830326906313757
    episode_reward_mean: -0.08830326906313757
    episode_reward_min: -0.08830326906313757
    episodes_this_iter: 1
    hist_stats:
      episode_lengths:
      - 3
      episode_reward:
      - -0.08830326906313757
    off_policy_estimator: {}
    policy_reward_max: {}
 

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:45.657850 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.063454752169693 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[3

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:46.152770 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker p

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:47.280617 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.057375726393766 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWorker pid=3534131)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3534131)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3534131)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3534131)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3534131)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3534131)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:48.255054 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.044836892931247 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.061259291949287 GFLOPS >>>>>>>>>>>>>>>
Result for PPOTrainer_compiler_gym_bc2ca_00006:
  agent_timesteps_total: 295
  counters:
    num_agent_steps_sampled: 295
    num_agent_steps_trained: 295
    num_env_steps_sampled: 295
    num_env_steps_trained: 295
  custom_metrics: {}
  date: 2022-06-30_14-20-48
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 27.934707484610307
  episode_reward_mean: 3.4727377357832494
  episode_reward_min: -1.358377435662473
  episodes

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,59,289.591,295,3.47274,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,58,284.284,290,4.85807,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,55,272.081,275,4.30431,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0387965458659516 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m observation_spac

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:48.867978 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.040250435479689 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.057940011029838 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:50.626987 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.0435835445467947 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3533624)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3533624)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3533624)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3533624)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3533624)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.042679856272774 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:51.514665 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:51.529109 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3533624)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0399051225921356 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0323800478742475 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.057375726393766 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625]

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:53.184330 139624798209600 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3533624)[0m 
[2m[36m(RolloutWorker pid=3533624)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141529-289655-6f35
[2m[36m(RolloutWorker pid=3533624)[0m 


[2m[36m(RolloutWorker pid=3533624)[0m <<<<<<<<<<<<<<< Reward = 2.057305086433414 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for n_5625 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for m_5586 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 3, actions = ['swap_down', 'up', 'down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flop

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:53.683377 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00006,RUNNING,100.37.253.28:3533185,0.5,0.0001,59,289.591,295,3.47274,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,59,289.27,295,4.75871,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,57,281.259,285,4.12197,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0


[2m[36m(PPOTrainer pid=3533185)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3534131)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.055966871627557 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3534131)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m Action = down
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533185)[0m   for k_5587 in 128

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:54.881459 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 2.044070553823046 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0474861289695845 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0417083878731117 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=

[2m[36m(PPOTrainer pid=3533185)[0m E0630 14:20:55.692075 139868898326080 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141532-974352-659b
[2m[36m(PPOTrainer pid=3533185)[0m 


[2m[36m(PPOTrainer pid=3533185)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3533185)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3533185)[0m  for k_5587 in 128 : L1  
[2m[36m(PPOTrainer pid=3533185)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3533185)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533185)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533185)[0m  for n_5625 in 128 : L5  
[2m[36m(PPOTrainer pid=3533185)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3533185)[0m 
[2m[36m(PPOTrainer pid=3533185)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533185)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3533185)[0m <<<<<<<<<<<<<<< Reward = 35.493213282334224 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:56.251977 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m Action = swap_down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for k_5587 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L5  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 36.003845625600874 GFLOPS >>>>>>

[2m[36m(RolloutWorker pid=3533624)[0m E0630 14:20:56.810240394 3533689 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"


[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 1, actions = ['down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0753637932169124 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3534131)[0m Action = down
[2m[36m(RolloutWor

[2m[36m(RolloutWorker pid=3534131)[0m E0630 14:20:57.444013 139843212535360 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3534131)[0m 
[2m[36m(RolloutWorker pid=3534131)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141545-298717-6f35
[2m[36m(RolloutWorker pid=3534131)[0m 


[2m[36m(RolloutWorker pid=3534131)[0m <<<<<<<<<<<<<<< Reward = 2.0764395158296 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3535289)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m >>> AGENT ITERATION = 2, actions = ['down', 'down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.06

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:20:58.399421 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(PPOTrainer pid=3533918)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3533918)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.068286986812051 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3533918)[0m <<<<<<<<<<<<<<< Reward = 2.0556152112341963 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3533918)[0m Action = down
[2m[36m(PPOTrainer pid=3533918)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3533918)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3533918)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3533918)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3533918)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3533918)[0m   %4[m

[2m[36m(PPOTrainer pid=3533918)[0m E0630 14:20:59.977487 140280020608576 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3533918)[0m 
[2m[36m(PPOTrainer pid=3533918)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141548-833774-659b
[2m[36m(PPOTrainer pid=3533918)[0m 


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00007,RUNNING,100.37.253.28:3533918,0.8,0.0001,60,296.565,300,4.66392,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,58,285.192,290,4.43279,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,TERMINATED,100.37.253.28:3533185,0.5,0.0001,60,296.862,300,3.40319,27.9347,-1.35838,3.0


[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.083282546447029 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.099818769837694 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:21:01.713280 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.1445837589977534 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
Result for PPOTrainer_compiler_gym_bc2ca_00008:
  agent_timesteps_total: 295
  counters:
    num_agent_steps_sampled: 295
    num_agent_steps_trained: 295
    num_env_steps_sampled: 295
    num_env_steps_trained: 295
  custom_metrics: {}
  date: 2022-06-30_14-21-02
  done: false
  episode_len_mean: 3.0
  episode_media: {}
  episode_reward_max: 34.00022067885972
  episode_reward_mean: 4.342500225019916
  episode_reward_min: -1.4296559309239536
  episodes_this_iter: 2
  episodes_total: 98
  experiment_id: f704b34bf2e248bba8b9e3f96107a68b
  hostname: codah
  info:
 

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:21:05.162385 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.1032008055166656 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m ACTION_NOT_AVAILABLE (action = swap_up)
[2m[36m(RolloutWorker pid=3535289)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3535289)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.0825605395794367 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3535289)[0m Action = down
[2m[36m(RolloutWorker pid=3535289)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3535289)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(RolloutWorker pid=3535289)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3535289)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3535289)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36

[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:21:07.800545 140445066905152 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3535289)[0m 
[2m[36m(RolloutWorker pid=3535289)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141603-370538-6f35
[2m[36m(RolloutWorker pid=3535289)[0m 


[2m[36m(RolloutWorker pid=3535289)[0m <<<<<<<<<<<<<<< Reward = 2.125382391616648 GFLOPS >>>>>>>>>>>>>>>


Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00008,RUNNING,100.37.253.28:3534592,0.9,0.0001,59,290.295,295,4.3425,34.0002,-1.42966,3.0
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,TERMINATED,100.37.253.28:3533185,0.5,0.0001,60,296.862,300,3.40319,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,TERMINATED,100.37.253.28:3533918,0.8,0.0001,60,296.565,300,4.66392,34.3304,-1.36606,3.0


[2m[36m(PPOTrainer pid=3534592)[0m ACTION_NOT_AVAILABLE (action = up)
[2m[36m(PPOTrainer pid=3534592)[0m Actions = ['down', 'dummy', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 2.0746462376143597 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=3534592)[0m Action = down
[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m[36m(PPOTrainer pid=3534592)[0m   for k_5587 in 128 : L2  
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m >>>

[2m[36m(PPOTrainer pid=3534592)[0m E0630 14:21:10.413963 140171274081856 example_service.py:249] CRITICAL - 
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T141607-414368-659b
[2m[36m(PPOTrainer pid=3534592)[0m 


[2m[36m(PPOTrainer pid=3534592)[0m Action = swap_down
[2m[36m(PPOTrainer pid=3534592)[0m for m_5586 in 128 : L0  
[2m[36m(PPOTrainer pid=3534592)[0m  for k_5587 in 128 : L1  
[2m[36m(PPOTrainer pid=3534592)[0m   for n_5625 in 128 : L2  <<<<<< cursor (line 2 )
[2m[36m(PPOTrainer pid=3534592)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(PPOTrainer pid=3534592)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(PPOTrainer pid=3534592)[0m  for n_5625 in 128 : L5  
[2m[36m(PPOTrainer pid=3534592)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(PPOTrainer pid=3534592)[0m 
[2m[36m(PPOTrainer pid=3534592)[0m >>> AGENT ITERATION = 2, actions = ['down', 'swap_down']
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name stride_tensor
[2m[36m(PPOTrainer pid=3534592)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(PPOTrainer pid=3534592)[0m <<<<<<<<<<<<<<< Reward = 32.35721504339441 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(PPOTrainer pid=35345

Trial name,status,loc,gamma,lr,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_compiler_gym_bc2ca_00000,TERMINATED,100.37.253.28:3479764,0.5,0.01,60,279.961,300,1.03133,27.3879,-1.26335,3.0
PPOTrainer_compiler_gym_bc2ca_00001,TERMINATED,100.37.253.28:3480016,0.8,0.01,60,280.127,300,2.80905,33.4941,-1.29175,3.0
PPOTrainer_compiler_gym_bc2ca_00002,TERMINATED,100.37.253.28:3480438,0.9,0.01,60,317.185,300,0.747815,27.2174,-0.123941,2.99
PPOTrainer_compiler_gym_bc2ca_00003,TERMINATED,100.37.253.28:3480856,0.5,0.001,60,283.663,300,1.00945,27.7553,-0.22959,3.0
PPOTrainer_compiler_gym_bc2ca_00004,TERMINATED,100.37.253.28:3481284,0.8,0.001,60,296.69,300,2.06184,27.8989,-1.26218,2.99
PPOTrainer_compiler_gym_bc2ca_00005,TERMINATED,100.37.253.28:3481705,0.9,0.001,60,278.765,300,4.0668,33.581,-0.127074,3.0
PPOTrainer_compiler_gym_bc2ca_00006,TERMINATED,100.37.253.28:3533185,0.5,0.0001,60,296.862,300,3.40319,27.9347,-1.35838,3.0
PPOTrainer_compiler_gym_bc2ca_00007,TERMINATED,100.37.253.28:3533918,0.8,0.0001,60,296.565,300,4.66392,34.3304,-1.36606,3.0
PPOTrainer_compiler_gym_bc2ca_00008,TERMINATED,100.37.253.28:3534592,0.9,0.0001,60,297.806,300,4.25577,34.0002,-1.42966,3.0


[2m[36m(RolloutWorker pid=3535289)[0m E0630 14:21:11.041180664 3535402 chttp2_transport.cc:1103]   Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings"
2022-06-30 14:21:11,146	INFO tune.py:747 -- Total run time: 722.97 seconds (722.39 seconds for the tuning loop).


In [11]:
checkpoint = analysis.get_best_checkpoint(
    metric="episode_reward_mean",
    mode="max",
    trial=analysis.trials[0]
)

In [12]:
analysis.dataframe()

Unnamed: 0,episode_reward_max,episode_reward_min,episode_reward_mean,episode_len_mean,episodes_this_iter,num_healthy_workers,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_trained,...,config/horizon,config/log_level,config/lr,config/model,config/num_workers,config/rollout_fragment_length,config/seed,config/sgd_minibatch_size,config/train_batch_size,logdir
0,27.387861,-1.263353,1.031329,3.0,2,1,300,300,300,300,...,3,ERROR,0.01,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
1,33.49409,-1.291755,2.809052,3.0,2,1,300,300,300,300,...,3,ERROR,0.01,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
2,27.217421,-0.123941,0.747815,2.99,2,1,300,300,300,300,...,3,ERROR,0.01,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
3,27.755334,-0.22959,1.009447,3.0,2,1,300,300,300,300,...,3,ERROR,0.001,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
4,27.898879,-1.262181,2.061839,2.99,2,1,300,300,300,300,...,3,ERROR,0.001,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
5,33.581015,-0.127074,4.066796,3.0,2,1,300,300,300,300,...,3,ERROR,0.001,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
6,27.934707,-1.358377,3.403192,3.0,2,1,300,300,300,300,...,3,ERROR,0.0001,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
7,34.330366,-1.366059,4.663916,3.0,2,1,300,300,300,300,...,3,ERROR,0.0001,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...
8,34.000221,-1.429656,4.255774,3.0,2,1,300,300,300,300,...,3,ERROR,0.0001,"{'fcnet_hiddens': [5, 5]}",1,5,204,5,5,/home/dejang/ray_results/PPOTrainer_2022-06-30...


In [13]:
trial = analysis.get_best_trial(metric="episode_reward_mean", mode="max")
log_dir = analysis.get_best_logdir(metric="episode_reward_mean", mode="max")
print(log_dir)

/home/dejang/ray_results/PPOTrainer_2022-06-30_14-09-08/PPOTrainer_compiler_gym_bc2ca_00007_7_gamma=0.8000,lr=0.0001_2022-06-30_14-15-36


In [14]:
trial.stopping_criterion

{'episodes_total': 100}

In [15]:
trial.metric_analysis

{'episode_reward_max': {'max': 34.330365904374915,
  'min': -0.11299523638807041,
  'avg': 33.75630988536219,
  'last': 34.330365904374915,
  'last-5-avg': 34.330365904374915,
  'last-10-avg': 34.33036590437491},
 'episode_reward_min': {'max': -0.11299523638807041,
  'min': -1.3660586561587955,
  'avg': -0.8857176785800177,
  'last': -1.3660586561587955,
  'last-5-avg': -1.3660586561587955,
  'last-10-avg': -1.3660586561587955},
 'episode_reward_mean': {'max': 11.395243818909387,
  'min': -0.11299523638807041,
  'avg': 5.584340245943804,
  'last': 4.663915948312102,
  'last-5-avg': 4.695850718270553,
  'last-10-avg': 4.7281389568376415},
 'episode_len_mean': {'max': 3.0,
  'min': 3.0,
  'avg': 2.999999999999998,
  'last': 3.0,
  'last-5-avg': 3.0,
  'last-10-avg': 3.0},
 'episodes_this_iter': {'max': 2,
  'min': 1,
  'avg': 1.666666666666666,
  'last': 2,
  'last-5-avg': 1.8,
  'last-10-avg': 1.7},
 'num_healthy_workers': {'max': 1,
  'min': 1,
  'avg': 0.9999999999999994,
  'last': 1,

In [16]:
trial.checkpoint.value

'/home/dejang/ray_results/PPOTrainer_2022-06-30_14-09-08/PPOTrainer_compiler_gym_bc2ca_00007_7_gamma=0.8000,lr=0.0001_2022-06-30_14-15-36/checkpoint_000060/checkpoint-60'

In [17]:
import ray.rllib.agents.ppo as ppo
ppo.DEFAULT_CONFIG



{'num_workers': 2,
 'num_envs_per_worker': 1,
 'create_env_on_driver': False,
 'rollout_fragment_length': 200,
 'batch_mode': 'truncate_episodes',
 'gamma': 0.99,
 'lr': 5e-05,
 'train_batch_size': 4000,
 'model': {'_use_default_native_models': False,
  '_disable_preprocessor_api': False,
  '_disable_action_flattening': False,
  'fcnet_hiddens': [256, 256],
  'fcnet_activation': 'tanh',
  'conv_filters': None,
  'conv_activation': 'relu',
  'post_fcnet_hiddens': [],
  'post_fcnet_activation': 'relu',
  'free_log_std': False,
  'no_final_linear': False,
  'vf_share_layers': False,
  'use_lstm': False,
  'max_seq_len': 20,
  'lstm_cell_size': 256,
  'lstm_use_prev_action': False,
  'lstm_use_prev_reward': False,
  '_time_major': False,
  'use_attention': False,
  'attention_num_transformer_units': 1,
  'attention_dim': 64,
  'attention_num_heads': 1,
  'attention_head_dim': 32,
  'attention_memory_inference': 50,
  'attention_memory_training': 50,
  'attention_position_wise_mlp_dim': 32,

In [21]:
trial.checkpoint.value

'/home/dejang/ray_results/PPOTrainer_2022-06-30_14-09-08/PPOTrainer_compiler_gym_bc2ca_00007_7_gamma=0.8000,lr=0.0001_2022-06-30_14-15-36/checkpoint_000060/checkpoint-60'

In [24]:
trial.config

{'log_level': 'ERROR',
 'seed': 204,
 'num_workers': 1,
 'env': 'compiler_gym',
 'rollout_fragment_length': 5,
 'train_batch_size': 5,
 'sgd_minibatch_size': 5,
 'gamma': 0.8,
 'lr': 0.0001,
 'horizon': 3,
 'evaluation_interval': 5,
 'evaluation_num_episodes': 1,
 'model': {'fcnet_hiddens': [5, 5]}}

In [25]:
trainer = PPOTrainer(config=trial.config, env="compiler_gym")
trainer.restore(trial.checkpoint.value)

[2m[36m(RolloutWorker pid=3606840)[0m E0630 16:05:23.323801 140144605840960 example_service.py:249] CRITICAL - 
[2m[36m(RolloutWorker pid=3606840)[0m 
[2m[36m(RolloutWorker pid=3606840)[0m Working_dir = /dev/shm/compiler_gym_dejang/s/0630T160522-537137-6f35
[2m[36m(RolloutWorker pid=3606840)[0m 


[2m[36m(RolloutWorker pid=3606840)[0m for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
[2m[36m(RolloutWorker pid=3606840)[0m  for n_5625 in 128 : L1  
[2m[36m(RolloutWorker pid=3606840)[0m   for k_5587 in 128 : L2  
[2m[36m(RolloutWorker pid=3606840)[0m    %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
[2m[36m(RolloutWorker pid=3606840)[0m    %3[m_5586, n_5625] <- add(%2)  
[2m[36m(RolloutWorker pid=3606840)[0m   %4[m_5586, n_5625] <- write(%3)  
[2m[36m(RolloutWorker pid=3606840)[0m 
[2m[36m(RolloutWorker pid=3606840)[0m observation_space.name stride_tensor
[2m[36m(RolloutWorker pid=3606840)[0m observation_space.name flops_loop_nest_tensor
[2m[36m(RolloutWorker pid=3606840)[0m <<<<<<<<<<<<<<< Reward = 2.1525787502399276 GFLOPS >>>>>>>>>>>>>>>
[2m[36m(RolloutWorker pid=3606840)[0m Action = down
[2m[36m(RolloutWorker pid=3606840)[0m for m_5586 in 128 : L0  
[2m[36m(RolloutWorker pid=3606840)[0m  for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
[2m

E0630 16:05:26.554905 140517126641216 example_service.py:249] CRITICAL - 

Working_dir = /dev/shm/compiler_gym_dejang/s/0630T160525-512497-659b



for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
 for n_5625 in 128 : L1  
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  

observation_space.name stride_tensor
observation_space.name flops_loop_nest_tensor
<<<<<<<<<<<<<<< Reward = 2.1461167261658365 GFLOPS >>>>>>>>>>>>>>>
Action = down
for m_5586 in 128 : L0  
 for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  

>>> AGENT ITERATION = 1, actions = ['down']
observation_space.name stride_tensor
observation_space.name flops_loop_nest_tensor
<<<<<<<<<<<<<<< Reward = 2.1491121592297784 GFLOPS >>>>>>>>>>>>>>>


2022-06-30 16:05:28,091	INFO trainable.py:588 -- Restored on 100.37.253.28 from checkpoint: /home/dejang/ray_results/PPOTrainer_2022-06-30_14-09-08/PPOTrainer_compiler_gym_bc2ca_00007_7_gamma=0.8000,lr=0.0001_2022-06-30_14-15-36/checkpoint_000060/checkpoint-60
2022-06-30 16:05:28,093	INFO trainable.py:597 -- Current state after restoring: {'_iteration': 60, '_timesteps_total': None, '_time_total': 296.5652365684509, '_episodes_total': 100}


In [26]:
model = trainer.get_policy().model
policy = trainer.get_policy()

In [27]:
model.model_config

{'_use_default_native_models': False,
 '_disable_preprocessor_api': False,
 '_disable_action_flattening': False,
 'fcnet_hiddens': [5, 5],
 'fcnet_activation': 'tanh',
 'conv_filters': None,
 'conv_activation': 'relu',
 'post_fcnet_hiddens': [],
 'post_fcnet_activation': 'relu',
 'free_log_std': False,
 'no_final_linear': False,
 'vf_share_layers': False,
 'use_lstm': False,
 'max_seq_len': 20,
 'lstm_cell_size': 256,
 'lstm_use_prev_action': False,
 'lstm_use_prev_reward': False,
 '_time_major': False,
 'use_attention': False,
 'attention_num_transformer_units': 1,
 'attention_dim': 64,
 'attention_num_heads': 1,
 'attention_head_dim': 32,
 'attention_memory_inference': 50,
 'attention_memory_training': 50,
 'attention_position_wise_mlp_dim': 32,
 'attention_init_gru_gate_bias': 2.0,
 'attention_use_n_prev_actions': 0,
 'attention_use_n_prev_rewards': 0,
 'framestack': True,
 'dim': 84,
 'grayscale': False,
 'zero_mean': True,
 'custom_model': None,
 'custom_model_config': {},
 'custo

In [28]:
def run_rollout (agent, env, n_iter=1, max_steps=5, verbose=False):
    """
    iterate through `n_iter` episodes in a rollout to emulate deployment in a production use case
    """
    for episode in range(n_iter):
        state = env.reset()
        sum_reward = 0

        for step in range(max_steps):
            try:
                
                action = int(agent.compute_single_action(state, explore=False))
                print(f"Compute action = {env.action_space.to_string(action)}")

                state, reward, done, info = env.step(action)
                sum_reward += reward
                print(f"Compute reward = {reward}")

                if verbose:
                    print("reward {:6.3f}  sum {:6.3f}".format(reward, sum_reward))
                    env.render()
            except Exception as e:
                print(f'----------------------------> Exception = {e}')
                break


        # report at the end of each episode
        print("CUMULATIVE REWARD:", round(sum_reward, 3), "\n")
        yield sum_reward


In [29]:
# trainer.restore(trial.checkpoint.value)
history = []
for episode_reward in run_rollout(trainer, env, verbose=False):
    history.append(episode_reward)
    
print("average reward:", round(sum(history) / len(history), 3))

E0630 16:06:08.752074 140659196266048 example_service.py:249] CRITICAL - 

Working_dir = /dev/shm/compiler_gym_dejang/s/0630T160607-966701-15e0



for m_5586 in 128 : L0  <<<<<< cursor (line 0 )
 for n_5625 in 128 : L1  
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  

observation_space.name stride_tensor
observation_space.name flops_loop_nest_tensor
<<<<<<<<<<<<<<< Reward = 2.1408390317003945 GFLOPS >>>>>>>>>>>>>>>
Compute action = down
Action = down
for m_5586 in 128 : L0  
 for n_5625 in 128 : L1  <<<<<< cursor (line 1 )
  for k_5587 in 128 : L2  
   %2[m_5586, k_5587, n_5625] <- multiply(%0, %1)  
   %3[m_5586, n_5625] <- add(%2)  
  %4[m_5586, n_5625] <- write(%3)  

>>> AGENT ITERATION = 1, actions = ['down']
observation_space.name stride_tensor
observation_space.name flops_loop_nest_tensor
<<<<<<<<<<<<<<< Reward = 2.1471912177336745 GFLOPS >>>>>>>>>>>>>>>
Compute reward = 0.006352186033280027
Compute action = down
Action = down
for m_5586 in 128 : L0  
 for n_5625 in 128 : L1  
  for k_5587 in 128 : L2  <<<<<< cursor (line

In [30]:
# If running in a notebook, finish the wandb run to upload the tensorboard logs to W&B
wandb.finish()
ray.shutdown()

0,1
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/global_step",▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███▁
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/agent_timesteps_total",▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_agent_steps_sampled",▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_agent_steps_trained",▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_env_steps_sampled",▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_env_steps_trained",▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/done",▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/episode_len_mean",▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/episode_reward_max",▁███████████████████████████████████████
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/episode_reward_mean",▁██▆▅▄▅▄▄▄▅▄▄▅▄▄▄▄▄▃▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃

0,1
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/global_step",0.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/agent_timesteps_total",300.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_agent_steps_sampled",300.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_agent_steps_trained",300.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_env_steps_sampled",300.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/counters/num_env_steps_trained",300.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/done",1.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/episode_len_mean",3.0
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/episode_reward_max",33.49409
"PPOTrainer_compiler_gym_bc2ca_00001_1_gamma=0.8000,lr=0.0100_2022-06-30_14-09-22/ray/tune/episode_reward_mean",2.80905
