Optuna framework: https://optuna.readthedocs.io/en/stable/tutorial/index.html

In [5]:
!pip install requests==2.23 # Avoid error
!apt install swig cmake libopenmpi-dev zlib1g-dev
!pip install stable-baselines3
!pip install compiler_gym
!pip install optuna
!pip install psycopg2-binary

Defaulting to user installation because normal site-packages is not writeable
Collecting requests==2.23
  Using cached requests-2.23.0-py2.py3-none-any.whl (58 kB)
Installing collected packages: requests
  Attempting uninstall: requests
    Found existing installation: requests 2.26.0
    Uninstalling requests-2.26.0:
      Successfully uninstalled requests-2.26.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
compiler-gym 0.2.1 requires requests>=2.24.0, but you have requests 2.23.0 which is incompatible.[0m
Successfully installed requests-2.23.0
[1;31mE: [0mCould not open lock file /var/lib/dpkg/lock-frontend - open (13: Permission denied)[0m
[1;31mE: [0mUnable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?[0m
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installat

In [6]:
import os

import gym

import compiler_gym
from compiler_gym.leaderboard.llvm_instcount import eval_llvm_instcount_policy # Evaluation method used by FB for leaderboard
from compiler_gym.envs import LlvmEnv
from compiler_gym.wrappers import TimeLimit

import numpy as np
import pandas as pd

from stable_baselines3 import DQN, SAC
from stable_baselines3.common.evaluation import evaluate_policy

import optuna

from itertools import islice
from compiler_gym.wrappers import CycleOverBenchmarks

from typing import Any, Dict

import calendar
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
os.chdir('/') # Navigate to root
os.chdir('/content/drive/Shareddrives/csc461_A-Team/experiments/dqn_w_optuna_hyperparam') # Navigate to this directory

In [None]:
pwd

'/content/drive/Shareddrives/csc461_A-Team/experiments/dqn_w_optuna_hyperparam'

Create the CompilerGym environment

In [7]:

def make_env(env_config=None) -> compiler_gym.envs.CompilerEnv:
    """Make the reinforcement learning environment for this experiment.
    
      From FB example.
    """
    env = compiler_gym.make(
        "llvm-ic-v0",
        observation_space="Autophase",
        reward_space="IrInstructionCountOz",
    )

    # Finally, we impose a time limit on the environment so that every episode
    # for 5 steps or fewer. This is because the environment's task is continuous
    # and no action is guaranteed to result in a terminal state. Adding a time
    # limit means we don't have to worry about learning when an agent should 
    # stop, though again this limits the potential improvements that the agent
    # can achieve compared to using an unbounded maximum episode length.
    env = TimeLimit(env, max_episode_steps=300)

    dataset = env.datasets["linux-v0"] # Small dataset

    # Each dataset has a `benchmarks()` method that returns an iterator over the
    # benchmarks within the dataset. Here we will use iterator sliceing to grab a 
    # handful of benchmarks for training and validation.

    N_benchmarks = 250

    train_benchmarks = list(islice(dataset.benchmarks(), N_benchmarks)) # N_bechmarks total benchmarks the dataset
    len(train_benchmarks) # , val_benchmarks = train_benchmarks[:50], train_benchmarks[50:]

    env = CycleOverBenchmarks(env, train_benchmarks)
    return env

In [8]:
training_env = make_env()

Hyperparameter optimization with Optuna

In [9]:
# from scipy import stats # Trying to use stats.gmean for geometric mean of rewards column.

# round(pd.read_csv('dqn_policy_results.csv')['reward'].mean(), 5)

In [10]:
# from subprocess import Popen, PIPE

# ts = calendar.timegm(time.gmtime())
# filename = f"dqn_policy_results_{ ts }.csv"

# try:
#   # os.system(f"python3 eval_llvm_codesize_model.py --leaderboard_results={filename}")
#   p = Popen(['python3', "eval_llvm_codesize_model.py",  f"--leaderboard_results={filename}"], stdout=PIPE, stderr=PIPE)
#   out, err = p.communicate()
#   print(out)
#   print(err)
# except Exception as e:
#   print(e)

In [29]:
def make_test_env(env_config=None) -> compiler_gym.envs.CompilerEnv:
    """
    Make the testing environment for evaluating approximate performance on the test set.
    
    Defining this function because the eval method by Facebook did not function as desired
    in tests.
    """
    env = compiler_gym.make(
        "llvm-ic-v0",
        observation_space="Autophase",
        reward_space="IrInstructionCountOz",
    )

    # Finally, we impose a time limit on the environment so that every episode
    # for 5 steps or fewer. This is because the environment's task is continuous
    # and no action is guaranteed to result in a terminal state. Adding a time
    # limit means we don't have to worry about learning when an agent should 
    # stop, though again this limits the potential improvements that the agent
    # can achieve compared to using an unbounded maximum episode length.
    env = TimeLimit(env, max_episode_steps=300)

    dataset = env.datasets["cbench-v1"] # Small dataset

    # Each dataset has a `benchmarks()` method that returns an iterator over the
    # benchmarks within the dataset. Here we will use iterator sliceing to grab a 
    # handful of benchmarks for training and validation.

    train_benchmarks = list(dataset) # N_bechmarks total benchmarks the dataset

    env = CycleOverBenchmarks(env, train_benchmarks)

    return env


def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]:
    """
    NOTE: Comes from: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py
    Sampler for DQN hyperparams.
    :param trial:
    :return:
    """
    policy = 'MlpPolicy' # trial.suggest_categorical("policy", ["MlpPolicy", "CnnPolicy"])
    gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 100, 128, 256, 512])
    buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(5e4), int(1e5), int(1e6)])
    exploration_final_eps = trial.suggest_uniform("exploration_final_eps", 0, 0.2)
    exploration_fraction = trial.suggest_uniform("exploration_fraction", 0, 0.5)
    target_update_interval = trial.suggest_categorical("target_update_interval", [1, 1000, 5000, 10000, 15000, 20000])
    learning_starts = trial.suggest_categorical("learning_starts", [0, 1000, 5000, 10000, 20000])

    train_freq = trial.suggest_categorical("train_freq", [1, 4, 8, 16, 128, 256, 1000])
    subsample_steps = trial.suggest_categorical("subsample_steps", [1, 2, 4, 8])
    gradient_steps = max(train_freq // subsample_steps, 1)

    net_arch = trial.suggest_categorical("net_arch", ["tiny", "small", "medium"])

    net_arch = {"tiny": [64], "small": [64, 64], "medium": [256, 256]}[net_arch]

    hyperparams = {
        "env": make_env(),
        "policy": policy,
        "gamma": gamma,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "buffer_size": buffer_size,
        "train_freq": train_freq,
        "gradient_steps": gradient_steps,
        "exploration_fraction": exploration_fraction,
        "exploration_final_eps": exploration_final_eps,
        "target_update_interval": target_update_interval,
        "learning_starts": learning_starts,
        "policy_kwargs": dict(net_arch=net_arch),
    }

    # if trial.using_her_replay_buffer:
    #     hyperparams = sample_her_params(trial, hyperparams)

    return hyperparams

def eval_model_on_compilergym_benchmark(model):
    # Define the test environment.
    test_env = make_test_env()

    # Run the evaluation helper method from SB3.
    mean_reward_per_episode, std_dev_of_reward_per_episode = evaluate_policy(model, make_test_env())

    return mean_reward_per_episode

def objective(trial):
    """
    Calls helper functions to choose hyperparameters with Optuna and trains a model iteratively.
    Set prune=True to 
    """
    ts = calendar.timegm(time.gmtime()) # Timestamp for uniqueness of the model when saving.

    model = DQN(**(sample_dqn_params(trial))) # Instantiate the model with sampled hyperparameters.


    # Iteratively train the model on the training environment.
    total_steps = 200000
    step_size = 1000
    for steps in range(1000, total_steps, step_size): # Steps goes up by step_size until it reaches total_steps.

      model.learn(total_timesteps=step_size) # Train

      score = eval_model_on_compilergym_benchmark(model) # Evaluate

      # Pruning. Example here: https://github.com/optuna/optuna-examples/blob/6a6b20ad634627eebb3e7e104f73b70b45c6e624/simple_pruning.py
      prune = True
      if prune == True:
        trial.report(score, steps)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.TrialPruned()
          
      
      # model.save(f"dqn_llvm_model_{ ts }")

      print("Model saved. . .")

    return score

In [26]:
def sample_sac_params(trial: optuna.Trial) -> Dict[str, Any]:
    """
    NOTE: Comes from: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py
    Sampler for SAC hyperparams.
    :param trial:
    :return:
    """
    gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128, 256, 512, 1024, 2048])
    buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)])
    learning_starts = trial.suggest_categorical("learning_starts", [0, 1000, 10000, 20000])
    # train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 300])
    train_freq = trial.suggest_categorical("train_freq", [1, 4, 8, 16, 32, 64, 128, 256, 512])
    # Polyak coeff
    tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02, 0.05, 0.08])
    # gradient_steps takes too much time
    # gradient_steps = trial.suggest_categorical('gradient_steps', [1, 100, 300])
    gradient_steps = train_freq
    # ent_coef = trial.suggest_categorical('ent_coef', ['auto', 0.5, 0.1, 0.05, 0.01, 0.0001])
    ent_coef = "auto"
    # You can comment that out when not using gSDE
    log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
    # NOTE: Add "verybig" to net_arch when tuning HER
    net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"])
    # activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU])

    net_arch = {
        "small": [64, 64],
        "medium": [256, 256],
        "big": [400, 300],
        # Uncomment for tuning HER
        # "large": [256, 256, 256],
        # "verybig": [512, 512, 512],
    }[net_arch]

    target_entropy = "auto"
    # if ent_coef == 'auto':
    #     # target_entropy = trial.suggest_categorical('target_entropy', ['auto', 5, 1, 0, -1, -5, -10, -20, -50])
    #     target_entropy = trial.suggest_uniform('target_entropy', -10, 10)

    hyperparams = {
        'env': training_env,
        'policy': 'MlpPolicy',
        "gamma": gamma,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "buffer_size": buffer_size,
        "learning_starts": learning_starts,
        "train_freq": train_freq,
        "gradient_steps": gradient_steps,
        "ent_coef": ent_coef,
        "tau": tau,
        "target_entropy": target_entropy,
        "policy_kwargs": dict(log_std_init=log_std_init, net_arch=net_arch),
    }

    # if trial.using_her_replay_buffer:
    #     hyperparams = sample_her_params(trial, hyperparams)

    return hyperparams


def multi_model_objective(trial):
    """
    UNDER DEVELOMENT!
    Objective function for running trials with multiple models at once.
    Inspired by this example: https://github.com/optuna/optuna-examples/blob/main/kubernetes/simple/sklearn_distributed.py
    """
    ts = calendar.timegm(time.gmtime()) # Timestamp for uniqueness of the model when saving.

    model_type = trial.suggest_categorical("model_type", ["DQN", "SAC"]) # Use Optuna helper function to choose which model should be used in this trial.

    if model_type == 'DQN':
      model = DQN(**(sample_dqn_params(trial))) # Instantiate the model with sampled hyperparameters.
    elif model_type == 'SAC':
      model = SAC(**(sample_sac_params(trial)))

    # Iteratively train the model on the training environment.
    total_steps = 3000
    step_size = 500
    for steps in range(1000, total_steps, step_size): # Steps goes up by step_size until it reaches total_steps.

      model.learn(total_timesteps=steps) # Train

      score = eval_model_on_compilergym_benchmark(model) # Evaluate

      # Pruning. Example here: https://github.com/optuna/optuna-examples/blob/6a6b20ad634627eebb3e7e104f73b70b45c6e624/simple_pruning.py
      prune = True
      if prune == True:
        trial.report(score, steps)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.TrialPruned()
          
      
      model.save(f"dqn_llvm_model_{ ts }")

      print("Model saved. . .")

    return score

In [42]:
# score = eval_model_on_compilergym_benchmark(DQN.load("dqn_llvm_model_1638728511"))
# score
import stable_baselines3
stable_baselines3.common.utils.get_device(device='auto')

device(type='cuda')

In [38]:
database_url = "postgresql://yzvxgwluxjnkap:8cd45bfa27d5df1577be2e2b20a35c90cf154d272c8b5975bb28266852c7dbd9@ec2-3-231-112-124.compute-1.amazonaws.com:5432/d1mqml0sjdqj22"

ts = calendar.timegm(time.gmtime()) # Timestamp for uniqueness of the study name.

study = optuna.create_study(study_name=f"dqn_test_{ ts }", direction="maximize", storage=database_url, load_if_exists=True)
study.optimize(objective, n_trials=2, show_progress_bar=True, n_jobs = 20)

[32m[I 2021-12-05 18:38:05,278][0m A new study created in RDB with name: dqn_test_1638747484[0m


Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .
Model saved. . .


localhost:43321 Socket closed (4 attempts remaining)


KeyboardInterrupt: 

localhost:38059 Socket closed (4 attempts remaining)
localhost:43321 failed to connect to all addresses (3 attempts remaining)
localhost:38059 failed to connect to all addresses (3 attempts remaining)
localhost:43321 failed to connect to all addresses (2 attempts remaining)
localhost:38059 failed to connect to all addresses (2 attempts remaining)
localhost:43321 failed to connect to all addresses (1 attempt remaining)
localhost:38059 failed to connect to all addresses (1 attempt remaining)
localhost:43321 failed to connect to all addresses (0 attempts remaining)
localhost:38059 failed to connect to all addresses (0 attempts remaining)
localhost:43321 failed to connect to all addresses (4 attempts remaining)
localhost:38059 failed to connect to all addresses (4 attempts remaining)
localhost:43321 failed to connect to all addresses (3 attempts remaining)
localhost:38059 failed to connect to all addresses (3 attempts remaining)
localhost:43321 failed to connect to all addresses (2 attempt

Model saved. . .


localhost:45283 failed to connect to all addresses (2 attempts remaining)
localhost:45283 failed to connect to all addresses (1 attempt remaining)
localhost:45283 failed to connect to all addresses (0 attempts remaining)
Failed to stop session 27 with ServiceTransportError: localhost:45283 failed to connect to all addresses (5 retries)
localhost:45283 failed to connect to all addresses (4 attempts remaining)
localhost:45283 failed to connect to all addresses (3 attempts remaining)
localhost:45283 failed to connect to all addresses (2 attempts remaining)
localhost:45283 failed to connect to all addresses (1 attempt remaining)
localhost:45283 failed to connect to all addresses (0 attempts remaining)
ServiceTransportError during reset(): localhost:45283 failed to connect to all addresses (5 retries)
[33m[W 2021-12-05 18:49:24,355][0m Trial 1 failed because of the following error: ServiceError('Service exited with returncode -2')[0m
Traceback (most recent call last):
  File "/home/atimp

Model saved. . .


localhost:46839 failed to connect to all addresses (2 attempts remaining)
localhost:46839 failed to connect to all addresses (1 attempt remaining)
localhost:46839 failed to connect to all addresses (0 attempts remaining)
Failed to stop session 23 with ServiceTransportError: localhost:46839 failed to connect to all addresses (5 retries)
localhost:46839 failed to connect to all addresses (4 attempts remaining)
localhost:46839 failed to connect to all addresses (3 attempts remaining)
localhost:46839 failed to connect to all addresses (2 attempts remaining)
localhost:46839 failed to connect to all addresses (1 attempt remaining)
localhost:46839 failed to connect to all addresses (0 attempts remaining)
ServiceTransportError during reset(): localhost:46839 failed to connect to all addresses (5 retries)
[33m[W 2021-12-05 18:49:41,666][0m Trial 0 failed because of the following error: ServiceError('Service exited with returncode -2')[0m
Traceback (most recent call last):
  File "/home/atimp

In [None]:
best_trial = study.best_trial
print("Best params: ", study.best_params)
print("Best score: ", best_trial.value)


Best params:  {'batch_size': 100, 'buffer_size': 100000, 'exploration_final_eps': 0.19371554815712957, 'exploration_fraction': 0.20676473505097892, 'gamma': 0.98, 'learning_rate': 0.31978860689353317, 'learning_starts': 10000, 'net_arch': 'tiny', 'subsample_steps': 1, 'target_update_interval': 1, 'train_freq': 16}
Best score:  0.00037471111863851547


In [None]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_buffer_size,params_exploration_final_eps,params_exploration_fraction,params_gamma,params_learning_rate,params_learning_starts,params_net_arch,params_subsample_steps,params_target_update_interval,params_train_freq,state
0,0,0.000375,2021-12-05 20:01:47.024646,2021-12-05 20:20:04.428868,0 days 00:18:17.404222,100,100000,0.193716,0.206765,0.98,0.319789,10000,tiny,1,1,16,COMPLETE


In [None]:
optuna.importance.get_param_importances(study)

OrderedDict([('exploration_final_eps', 0.23809523809523808),
             ('batch_size', 0.23809523809523808),
             ('train_freq', 0.14285714285714285),
             ('net_arch', 0.14285714285714285),
             ('subsample_steps', 0.09523809523809523),
             ('exploration_fraction', 0.09523809523809523),
             ('learning_rate', 0.04761904761904761),
             ('target_update_interval', 0.0),
             ('learning_starts', 0.0),
             ('gamma', 0.0),
             ('buffer_size', 0.0)])

In [None]:
!pip freeze > requirements.txt

In [None]:
!lscpu

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              2
On-line CPU(s) list: 0,1
Thread(s) per core:  2
Core(s) per socket:  1
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
CPU family:          6
Model:               79
Model name:          Intel(R) Xeon(R) CPU @ 2.20GHz
Stepping:            0
CPU MHz:             2199.998
BogoMIPS:            4399.99
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            256K
L3 cache:            56320K
NUMA node0 CPU(s):   0,1
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_sin