**Hyperparameters Tuning Code for RL-ANN (Delta Loss) with GBM Path Simulator Using GP (Bayesian Optimization)**

In [None]:
!pip install GpyOpt

Collecting GpyOpt
  Downloading GPyOpt-1.2.6.tar.gz (56 kB)
[?25l[K     |█████▊                          | 10 kB 27.0 MB/s eta 0:00:01[K     |███████████▌                    | 20 kB 33.3 MB/s eta 0:00:01[K     |█████████████████▎              | 30 kB 23.3 MB/s eta 0:00:01[K     |███████████████████████         | 40 kB 18.7 MB/s eta 0:00:01[K     |████████████████████████████▉   | 51 kB 20.0 MB/s eta 0:00:01[K     |████████████████████████████████| 56 kB 4.0 MB/s 
Collecting GPy>=1.8
  Downloading GPy-1.10.0.tar.gz (959 kB)
[K     |████████████████████████████████| 959 kB 28.0 MB/s 
Collecting paramz>=0.9.0
  Downloading paramz-0.9.5.tar.gz (71 kB)
[K     |████████████████████████████████| 71 kB 10.4 MB/s 
Building wheels for collected packages: GpyOpt, GPy, paramz
  Building wheel for GpyOpt (setup.py) ... [?25l[?25hdone
  Created wheel for GpyOpt: filename=GPyOpt-1.2.6-py3-none-any.whl size=83609 sha256=84f093ce35b7036bf41626bb97372d34775764967ed1418961694041865e62fd


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
cd RL_ANN/

/content/gdrive/MyDrive/Research Projects/Year 2 Summer Research/RL-Option Pricing/RL_ANN


In [None]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
matplotlib.use('Agg')
np.random.seed(496)
import tensorflow as tf
tf.random.set_seed(496)
tf.keras.backend.set_floatx('float32')
directory = '.'
import os
import pickle
import csv
import os.path
import GPyOpt
import math
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

GP=True # Gaussian Processes
out_dir = os.path.split(os.path.realpath('__file__'))[0]+'/Hyperparameters_Tuning/GBM_Delta/'

# execute_RL-ANN = calling the hyperparameter tunning training function
#x= [neurons in hidden layer 1, neurons in hiddden layer 2, activation_function, learning_rate_scheduler, # of batches, batch_size]
def execute_RL_ANN(x):
    # network for synthetic data
    hidden_layers = [int(x[:, 0]), int(x[:, 1])]  # hn1, hn2 (50, 1000, 10)
    activation_x = x[:, 2]
    if activation_x == 0:
      activation = tf.tanh
    elif activation_x == 1:
      activation = tf.nn.relu
    else:
      activation = tf.sigmoid
    n_outputs = 1
    learning_scheduler = x[:, 3]
    if learning_scheduler == 0:
      learning_rate = tf.keras.optimizers.schedules.CosineDecayRestarts(initial_learning_rate = 5e-4, first_decay_steps = 100, t_mul=2.0, m_mul=1.0, alpha=0.0,name=None)
    elif learning_scheduler == 1:
      learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate = 5e-4, decay_steps = 100000, decay_rate=0.96, staircase=False, name=None)
    else:
      learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries = [50,100,150], values = [5e-4, 1e-4, 5e-5, 1e-5], name=None)
    ann = tf.keras.Sequential(
                layers=
                [tf.keras.layers.Dense(hidden_layers[0], activation = activation, input_shape=(2,))] + \
                [tf.keras.layers.Dense(hidden_layers[i], activation = activation) for i in range(1, len(hidden_layers))] + \
                [tf.keras.layers.Dense(n_outputs, activation = tf.keras.activations.softplus)],
                name="ann")
    # define optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate)  
    # define loss function
    hedging_mse = tf.keras.losses.MeanSquaredError()
    #input processing
    def process_input(X_input, X_input_):
            r = tf.fill([tf.shape(input=X_input)[0],1], 0., name = 'r') # interest rate, if applicable
            S = tf.cast(tf.slice(X_input, (0,0), (-1,1)), tf.float32)
            K = tf.cast(tf.slice(X_input, (0,1), (-1,1)), tf.float32)
            T = tf.cast(tf.slice(X_input, (0,2), (-1,1)), tf.float32)
            S_ = tf.cast(tf.slice(X_input_, (0,0), (-1,1)), tf.float32)
            T_ = tf.cast(tf.slice(X_input_, (0,1), (-1,1)), tf.float32)
            return S, K, T, S_, T_, r

    # Simulating geometric Brownian motion -- the stock simulator
    def stock_sim_path(S, alpha, delta, sigma, T, N, n):
        """Simulates geometric Brownian motion."""
        h = T/n
        mean = (alpha - delta - .5*sigma**2)*h
        vol = sigma * h**.5
        return S*np.exp((mean + vol*np.random.randn(n,N)).cumsum(axis = 0))

    def get_batch2(stock_path,n, moneyness_range = (.5,2)): 
        """Constructs theoretical options based on the time series stock_path"""
        picks = np.random.randint(0, len(stock_path)-1, n.astype(np.int64))
        T = np.random.randint(1, 500, (n.astype(np.int64)[0],1))
        S = stock_path[picks]
        S_ = stock_path[picks+1]
        K = np.random.uniform(*moneyness_range, (n.astype(np.int64)[0],1))*S
        X = np.hstack([S, K, T/250])
        X_ = np.hstack([S_, (T-1)/250])
        return X, X_

    @tf.function
    def loss(X_input, X_input_):
      ''' Loss Function with only Delta term expanded using delta-hedging principle'''
      S, K, T, S_, T_, r = process_input(X_input, X_input_)
      with tf.GradientTape() as tape2:
            tape2.watch(S)
            X = tf.concat([S/(K*tf.exp(-r*T)), T], 1) #input matrix for ANN
            X_ = tf.concat([S_/(K*tf.exp(-r*T_)), T_], 1) #input matrix for ANN_

            out = ann(X)
            out = K*tf.where(tf.greater(T, 1e-3), out, tf.maximum(S/K - 1, 0))
            out_ = ann(X_)
            out_ = K*tf.where(tf.greater(T_, 1e-3), out_, tf.maximum(S_/K - 1, 0))
      delta = tape2.gradient(out, S)
      delta = tf.maximum(delta,0)
      delta = tf.minimum(delta, 1)
      return hedging_mse(delta*(S_-S), out_-out)

    @tf.function
    def grad(X_train, X_train_):
      with tf.GradientTape() as tape:
        tape.watch(ann.trainable_variables)
        loss_value = loss(X_train, X_train_)
      return loss_value, tape.gradient(loss_value, ann.trainable_variables)

    @tf.function
    def training_op(X_train, X_train_):
        loss_value, grads = grad(X_train, X_train_)
        optimizer.apply_gradients(zip(grads, ann.trainable_variables)) 

    #model training
    n_epochs = 200 #number of training epochs
    n_batches = x[:,4]
    batch_size= x[:,5]
    T=2
    days = int(250*T)
    stock_path = stock_sim_path(100, .06386, 0, .07425, T, 1, days) #simulate stock path
    stock_path_test = stock_sim_path(100, .06386, 0, .07425, T, 1, days) #simulate stock path for cross-validation
    losses = []
    X_test, X_test_ = get_batch2(stock_path_test, batch_size) #get test-set
    print("START TRAINING")
    print("Hyperparameters_Set:", "Number of Hidden Units:", hidden_layers[0], hidden_layers[1], "Activation Function:", int(activation_x), "Learning_Scheduler:", int(learning_scheduler),
          "Number of Batches:", int(n_batches), "Batch Size:", int(batch_size))
    for epoch in range(int(n_epochs)):
            for batch in range(int(n_batches)):
                    X_train, X_train_ = get_batch2(stock_path, batch_size) # get batch of theoretical options
                    training_op(X_train, X_train_)
            epoch_loss = loss(X_test, X_test_)
            losses.append(epoch_loss)
            print('Epoch:', epoch, 'Loss:', epoch_loss.numpy())
    plt.figure()
    plt.semilogy(np.arange(n_epochs), losses)
    plt.xlim([0, n_epochs])
    return hidden_layers, activation_x, learning_scheduler, n_batches, batch_size, losses

# myfunc() means give me a x (combination) and I will give you back the outcome of that x and save it in a csv file
def myfunc(x):
    hidden_layers, activation, learning_rate, n_batches, batch_size, losses = execute_RL_ANN(x)
    out_dir = os.path.split(os.path.realpath('__file__'))[0]+'/Hyperparameters_Tuning/GBM_Delta/'
    avg_loss = np.mean(losses[-51:-1])
    myCsvRow = hidden_layers + [activation] + [learning_rate] + [n_batches] + [batch_size]
    print("New row : ", myCsvRow)
    with open(out_dir+"Hyperparameters_tuning_results_RL_ANN.csv", "a") as file:
        writer = csv.writer(file, delimiter=";")
        writer.writerow(myCsvRow)
    return float(avg_loss)

# bounds define the range of hyperparameters from where the agent will sample from == our hypercube
bounds =[{'name': 'nh', 'type': 'discrete', 'domain': (32,64,128),'dimensionality': 1},
         {'name': 'nh2', 'type': 'discrete', 'domain': (32,64,128),'dimensionality': 1},
         {'name': 'activation', 'type': 'discrete', 'domain': (0,1,2),'dimensionality': 1},
         {'name': 'learning_scheduler', 'type': 'discrete', 'domain': (0,1,2),'dimensionality': 1},
         {'name': 'nbatches', 'type': 'discrete', 'domain': (1000,1500,2000),'dimensionality': 1},
         {'name': 'batch_size', 'type': 'discrete', 'domain': (10000,15000,20000),'dimensionality': 1}]

for i in range(103): # we will run 100 experiments of GP => 100 befief updates of the good region.
    existing_bayesian_optimizer = os.path.isfile(out_dir + "ev_file_RL_ANN") # ev_file is a simple txt file (empty @ beginning) "warning \n"
    if GP:
        if existing_bayesian_optimizer :
            evals = pd.read_csv(out_dir+"ev_file_RL_ANN", index_col=0, delimiter="\t")
            Y = np.array([[x] for x in evals["Y"]],dtype=np.float32)   # Just want add dimensions ??
            X = np.array(evals.filter(regex="var*"),dtype=np.float32)
            myBopt = GPyOpt.methods.BayesianOptimization(f=myfunc, domain=bounds, constraints=None, Y=Y,X=X, initial_design_numdata=3, maximize=False)
            myBopt.run_optimization(max_iter=1, verbosity=True, report_file=out_dir+"bayesian_optimizer_RL_ANN_log.txt", models_file=out_dir+"bayes_opt_RL_ANN_model", eps=0)
            myBopt.save_evaluations(out_dir+"ev_file_RL_ANN")
            myBopt.save_models(out_dir+"mod_file_RL_ANN")
        else :
            myBopt = GPyOpt.methods.BayesianOptimization(f=myfunc, domain=bounds, constraints=None, initial_design_numdata=3, maximize=False)
            myBopt.run_optimization(max_iter=1, verbosity=True, report_file=out_dir+"bayesian_optimizer_RL_ANN_log.txt", models_file=out_dir+"bayes_opt_RL_ANN_model", eps=0)
            myBopt.save_evaluations(out_dir+"ev_file_RL_ANN") #if not saved, then create an empty file in result_new_train called "ev_file_RL_ANN"
            myBopt.save_models(out_dir+"mod_file_RL_ANN")
    else:
        pass



START TRAINING
Hyperparameters_Set: Number of Hidden Units: 64 64 Activation Function: 2 Learning_Scheduler: 1 Number of Batches: 1000 Batch Size: 15000


KeyboardInterrupt: ignored