# Manipulating CompilerGym Environment

## TODO
- edit environment to modify done, reward, etc
- figure out specific hyperparameters to modify for ppo alg paired with compiler env
- play with llvm opt and running code after pass
- compute some reward for runtime diff - implement into wrapper class

In [1]:
import gym
import compiler_gym

In [2]:
# create initial environment
# compiler=llvm, observation=autophase, reward=ir instruction count relative to -Oz optimization
env = gym.make("llvm-autophase-ic-v0")

In [4]:
help(env)

Help on LlvmEnv in module compiler_gym.envs.llvm.llvm_env object:

class LlvmEnv(compiler_gym.envs.compiler_env.CompilerEnv)
 |  LlvmEnv(*args, benchmark: Union[str, compiler_gym.datasets.benchmark.Benchmark, NoneType] = None, datasets_site_path: Union[pathlib.Path, NoneType] = None, **kwargs)
 |  
 |  A specialized CompilerEnv for LLVM.
 |  
 |  This extends the default :class:`CompilerEnv
 |  <compiler_gym.envs.CompilerEnv>` environment, adding extra LLVM
 |  functionality. Specifically, the actions use the :class:`CommandlineFlag
 |  <compiler_gym.spaces.CommandlineFlag>` space, which is a type of
 |  :code:`Discrete` space that provides additional documentation about each
 |  action, and the :meth:`LlvmEnv.commandline()
 |  <compiler_gym.envs.LlvmEnv.commandline>` method can be used to produce an
 |  equivalent LLVM opt invocation for the current environment state.
 |  
 |  :ivar actions: The list of actions that have been performed since the
 |      previous call to :func:`reset`.

In [5]:
env.reset()

array([  0,   4,  54,  39,  12,  46,  23,   6,  12,  31,   2,   4,   0,
        81,   4,  77,  13,  15, 108, 106,  75,  51,  71,  46,  15,   0,
         9,  46,   0,  13,  72,  51,  77,  81,  39,  31,   0, 163,   2,
         0,   4,   6,  13,   1,   0,  73,   8,   1,   0,  15,  85, 638,
       402,  16,  10, 298])

In [7]:
env.benchmark
# can call env.reset(benchmark="benchmark")

benchmark://cbench-v1/qsort

In [9]:
env.reset()

array([  0,   4,  54,  39,  12,  46,  23,   6,  12,  31,   2,   4,   0,
        81,   4,  77,  13,  15, 108, 106,  75,  51,  71,  46,  15,   0,
         9,  46,   0,  13,  72,  51,  77,  81,  39,  31,   0, 163,   2,
         0,   4,   6,  13,   1,   0,  73,   8,   1,   0,  15,  85, 638,
       402,  16,  10, 298])

In [10]:
# env.render() will print current IR of program
env.render()

; ModuleID = '-'
source_filename = "-"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, %struct._IO_codecvt*, %struct._IO_wide_data*, %struct._IO_FILE*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type opaque
%struct._IO_codecvt = type opaque
%struct._IO_wide_data = type opaque
%struct.my3DVertexStruct = type { i32, i32, i32, double }

@stderr = external dso_local global %struct._IO_FILE*, align 8
@.str = private unnamed_addr constant [27 x i8] c"Usage: qsort_large <file>\0A\00", align 1
@.str.1 = private unnamed_addr constant [2 x i8] c"r\00", align 1
@.str.2 = private unnamed_addr constant [3 x i8] c"%d\00", align 1
@.str.3 = private unnamed_addr constant [57 x i8] c"\0ASorting %d vectors based on distance from the origin.\0A\

In [11]:
# can also use env.make_benchmark to create own benchmark

In [14]:
# wrapper for env
class envWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.patience = 10
        self.reward_counter = 0

    def step(self, action):
        next_state, reward, done, info = self.env.step(action)
        if reward <= 0:
            self.reward_counter += 1
        else:
            self.reward_counter = 0

        if self.reward_counter > self.patience:
            done = True
        return next_state, reward, done, info

In [15]:
new_env = envWrapper(env)

In [16]:
help(new_env)

Help on envWrapper in module __main__ object:

class envWrapper(gym.core.Wrapper)
 |  envWrapper(env)
 |  
 |  Wraps the environment to allow a modular transformation.
 |  
 |  This class is the base class for all wrappers. The subclass could override
 |  some methods to change the behavior of the original environment without touching the
 |  original code.
 |  
 |  .. note::
 |  
 |      Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
 |  
 |  Method resolution order:
 |      envWrapper
 |      gym.core.Wrapper
 |      gym.core.Env
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, env)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  step(self, action)
 |      Run one timestep of the environment's dynamics. When end of
 |      episode is reached, you are responsible for calling `reset()`
 |      to reset this environment's state.
 |      
 |      Accepts an action and returns a tuple