# Hyperparameters Optimization with ASHA

- Dataset used synthetic binomial options


In [None]:
!pip install -U ray[tune]

Collecting ray[tune]
  Downloading ray-1.11.0-cp37-cp37m-manylinux2014_x86_64.whl (52.7 MB)
[K     |████████████████████████████████| 52.7 MB 118 kB/s 
Collecting grpcio<=1.43.0,>=1.28.1
  Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 22.8 MB/s 
[?25hCollecting redis>=3.5.0
  Downloading redis-4.2.2-py3-none-any.whl (226 kB)
[K     |████████████████████████████████| 226 kB 33.0 MB/s 
Collecting tensorboardX>=1.9
  Downloading tensorboardX-2.5-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 35.8 MB/s 
Collecting deprecated>=1.2.3
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Collecting async-timeout>=4.0.2
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Installing collected packages: deprecated, async-timeout, redis, grpcio, tensorboardX, ray
  Attempting uninstall: grpcio
    Found existing installation: grpcio 1.44.0
    Uninst

In [None]:
#@title **Imports**
import os
from functools import partial

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [None]:
synthetic_calls_path = '../data/binom_synthetic_calls.csv'
synthetic_puts_path = '../data/binom_synthetic_puts.csv'
checkpoint_dir = '../models/opt_checkpoints'

In [None]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')
    
    return df

In [None]:
class OptDataset(Dataset):

  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

  def __len__(self):
    return len(self.X)

In [None]:
def preprocessing(df:pd.DataFrame):
  df = pd.get_dummies(df, prefix='', prefix_sep='')
  input_sc = StandardScaler()
  output_sc = StandardScaler()
  input_data = input_sc.fit_transform(df.drop('Option Price', axis=1))
  output_data = output_sc.fit_transform(df['Option Price'].values.reshape(-1, 1))

  return input_data, output_data

In [None]:
def load_data():
  synthetic_calls = pd.read_csv(synthetic_calls_path, index_col=0)
  synthetic_puts = pd.read_csv(synthetic_puts_path, index_col=0)

  synthetic_calls = reduce_mem_usage(synthetic_calls)
  synthetic_puts = reduce_mem_usage(synthetic_puts)
  
  synthetic_options = pd.concat([synthetic_calls, synthetic_puts], ignore_index=True)
  synthetic_options = shuffle(synthetic_options, random_state=0)
  
  input_data, output_data = preprocessing(synthetic_options)

  train_size = 0.8
  val_size = 0.1

  last_train_idx = int(np.round(len(input_data) * train_size))
  last_val_idx = last_train_idx + int(np.round(len(input_data) * val_size))

  X_train = Variable(torch.Tensor(input_data[0:last_train_idx]))
  X_val = Variable(torch.Tensor(input_data[last_train_idx:last_val_idx]))
  X_test = Variable(torch.Tensor(input_data[last_val_idx:]))

  y_train = Variable(torch.Tensor(output_data[0:last_train_idx]))
  y_val = Variable(torch.Tensor(output_data[last_train_idx:last_val_idx]))
  y_test = Variable(torch.Tensor(output_data[last_val_idx:]))

  return OptDataset(X_train, y_train), OptDataset(X_val, y_val), OptDataset(X_test, y_test)

# Configurable Model

FCN model with residual blocks, with the following configurable parameters:

- Number of layers
- Activation function: $[ReLU, LeakyReLU, ELU]$
- Hidden layer size


In [None]:
CUDA = torch.cuda.is_available()
device = 'cuda:0' if CUDA else 'cpu'

In [None]:
class ResBlock(nn.Module):

  def __init__(self, module):
    super(ResBlock, self).__init__()
    self.module = module

  def forward(self, x):
    return self.module(x) + x

In [None]:
class HiddenLayer(nn.Module):

  def __init__(self, layer_size, act_fn):
      super(HiddenLayer, self).__init__()
      
      if act_fn == 'ReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ReLU())
      elif act_fn == 'LeakyReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.LeakyReLU())
      elif act_fn == 'ELU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ELU())
    
  def forward(self, x):
    return self.layer(x)

In [None]:
class Net(nn.Module):

  def __init__(self, input_size, output_size, hidden_size, num_layers, act_fn):
    super(Net, self).__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.hidden_size = hidden_size

    if act_fn == 'ReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ReLU())
    elif act_fn == 'LeakyReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.LeakyReLU())
    elif act_fn == 'ELU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ELU())

    self.hidden_layers_list = []

    for i in range(num_layers // 2):
      self.hidden_layers_list.append(
          ResBlock(
            nn.Sequential(
                HiddenLayer(self.hidden_size, act_fn),
                HiddenLayer(self.hidden_size, act_fn)
            )
        )
      )

    self.hidden_layers = nn.Sequential(*self.hidden_layers_list)

    self.net = nn.Sequential(
        self.initial_layer,
        self.hidden_layers,
        nn.Linear(self.hidden_size, self.output_size)
    )
  
  def forward(self, x):
    return self.net(x)

In [None]:
@torch.no_grad()
def init_xuniform(m):
  if isinstance(m, nn.Linear):
    torch.nn.init.xavier_uniform_(m.weight)
    m.bias.data.fill_(0.01)

In [None]:
input_size = 7
output_size = 1
number_of_samples = 20

In [None]:
def train_cifar(config, checkpoint_dir=None):
    net = Net(input_size, output_size, config['hidden_size'], config['num_layers'], config['act_fn'])
    net = net.to(device)
    net.apply(init_xuniform)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters(), lr=config["lr"])

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    trainset, valset, testset = load_data()

    trainloader = torch.utils.data.DataLoader(
        trainset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=8)
    valloader = torch.utils.data.DataLoader(
        valset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=8)

    for epoch in range(25):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 50 == 49:  # print every 50 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps))
    print("Finished Training")

In [None]:
def test_accuracy(net, loss_fn):
    trainset, valset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2)

    losses = []
    with torch.no_grad():
        for data in testloader:
            X, y = data
            X, y = X.to(device), y.to(device)
            out = net(X)
            loss = loss_fn(out, y.to(device))
            losses.append(loss.cpu().item())

    return np.array(losses).mean()

Apart from the networks actitectural parameters, we would like to find the optimal **learning rate** and **batch size**. 

In [None]:
config = {
    "hidden_size": tune.choice([400, 600, 800]),
    "num_layers": tune.choice([4, 6, 8]),
    "act_fn": tune.choice(["ReLU", "LeakyReLU", "ELU"]),
    "lr": tune.loguniform(5e-5, 1e-1),
    "batch_size": tune.choice([512, 1024, 2048])
}

In [None]:
scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=30,
        grace_period=1,
        reduction_factor=2)

In [None]:
reporter = CLIReporter(
        metric_columns=["loss", "training_iteration"])

In [None]:
result = tune.run(
        partial(train_cifar, checkpoint_dir=checkpoint_dir),
        resources_per_trial={"cpu": 2, "gpu": 1},
        config=config,
        num_samples=number_of_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
res_df = result.results_df
res_df

Unnamed: 0_level_0,loss,time_this_iter_s,should_checkpoint,done,timesteps_total,episodes_total,training_iteration,experiment_id,date,timestamp,...,node_ip,time_since_restore,timesteps_since_restore,iterations_since_restore,experiment_tag,config.hidden_size,config.num_layers,config.act_fn,config.lr,config.batch_size
trial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
c7fdc_00000,0.0006884386,10.521141,True,True,,,25,d04ad7094264464ba6b93b8579b7fefa,2022-04-08_07-21-20,1649402480,...,172.28.0.2,299.223834,0,25,"0_act_fn=ELU,batch_size=1024,hidden_size=600,l...",600,8,ELU,0.000369,1024
c7fdc_00001,2788754000.0,10.325325,True,True,,,1,64146759d2a946e3862d5412e0599611,2022-04-08_07-21-34,1649402494,...,172.28.0.2,10.325325,0,1,"1_act_fn=LeakyReLU,batch_size=2048,hidden_size...",400,6,LeakyReLU,0.047322,2048
c7fdc_00002,3.494919e-05,8.502062,True,True,,,25,f61e09dd54bf4e4eb32530fd3a1cdb21,2022-04-08_07-25-15,1649402715,...,172.28.0.2,214.607754,0,25,"2_act_fn=ReLU,batch_size=2048,hidden_size=800,...",800,4,ReLU,0.008508,2048
c7fdc_00003,6.612644e-06,7.93035,True,True,,,25,ca4f13e3959b4d4785eff85d17877f9e,2022-04-08_07-28-51,1649402931,...,172.28.0.2,210.358592,0,25,"3_act_fn=ReLU,batch_size=1024,hidden_size=600,...",600,4,ReLU,7.9e-05,1024
c7fdc_00004,419.6531,12.348624,True,True,,,16,122f3c001e6445778255bb175b0da8e3,2022-04-08_07-32-16,1649403136,...,172.28.0.2,198.655214,0,16,"4_act_fn=LeakyReLU,batch_size=512,hidden_size=...",400,6,LeakyReLU,0.004833,512
c7fdc_00005,0.0004823192,6.508762,True,True,,,25,c52214159fb5489a9346712968ea8a77,2022-04-08_07-35-08,1649403308,...,172.28.0.2,166.277605,0,25,"5_act_fn=ReLU,batch_size=2048,hidden_size=400,...",400,4,ReLU,0.003327,2048
c7fdc_00006,30287.13,15.007996,True,True,,,1,696771f82fa0438eba0be8e2f0555f97,2022-04-08_07-35-29,1649403329,...,172.28.0.2,15.007996,0,1,"6_act_fn=ELU,batch_size=1024,hidden_size=600,l...",600,8,ELU,0.026657,1024
c7fdc_00007,8.205818e-05,13.195815,True,True,,,8,31ab646f5ec74413a22fe87e40813c51,2022-04-08_07-37-23,1649403443,...,172.28.0.2,108.169195,0,8,"7_act_fn=LeakyReLU,batch_size=512,hidden_size=...",800,4,LeakyReLU,0.001968,512
c7fdc_00008,0.0008995109,13.663177,True,True,,,1,20d38a907cb343a19ea30487129925ee,2022-04-08_07-37-43,1649403463,...,172.28.0.2,13.663177,0,1,"8_act_fn=LeakyReLU,batch_size=1024,hidden_size...",600,6,LeakyReLU,0.009591,1024
c7fdc_00009,0.004855514,15.613279,True,True,,,1,bdb9232e0e6b4965ab84d80f8605b02b,2022-04-08_07-38-04,1649403484,...,172.28.0.2,15.613279,0,1,"9_act_fn=LeakyReLU,batch_size=512,hidden_size=...",400,6,LeakyReLU,0.000244,512


In [None]:
res_df.to_csv('../results/asha_binom_results.csv')

In [None]:
best_trial = result.get_best_trial("loss", "min", "last")
print("Best trial config: {}".format(best_trial.config))
print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))

Best trial config: {'hidden_size': 600, 'num_layers': 4, 'act_fn': 'ReLU', 'lr': 7.939242104814728e-05, 'batch_size': 1024}
Best trial final validation loss: 6.612644329387398e-06


In [None]:
best_trained_model = Net(
    input_size, 
    output_size, 
    best_trial.config['hidden_size'], 
    best_trial.config['num_layers'], 
    best_trial.config['act_fn'])

In [None]:
best_trained_model = best_trained_model.to(device)
best_checkpoint_dir = best_trial.checkpoint.value
model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
best_trained_model.load_state_dict(model_state)

loss_fn = nn.MSELoss()
test_acc = test_accuracy(best_trained_model, loss_fn)
print("Best trial test set accuracy: {}".format(test_acc))

Best trial test set accuracy: 6.612037077677277e-06


- [Pytorch Tutorial](https://pytorch.org/tutorials/beginner/hyperparameter_tuning_tutorial.html)
- [ASHA Paper](https://openreview.net/pdf?id=S1MAriC5F7)