# DATA

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))



Found GPU at: /device:GPU:0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import sklearn
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV, ParameterGrid
from sklearn.preprocessing import StandardScaler
from datetime import datetime

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'  
tf.random.set_seed(1234)

In [None]:
data = pd.read_csv('/content/drive/MyDrive/THESIS/data/final_data.csv')
data.head()

Unnamed: 0,r,sigma,T,s0,k,t,asset,call,asset_greater_call,scaled_call,scaled_asset,tau,moneyness,d1,d2,delta,gamma,theta,vega,rho
0,0.05,0.05,0.1,10.0,10.0,0.0,10.0,0.090978,True,0.009098,1.0,0.1,1.0,0.324133,0.308322,0.627082,2.39401,-0.608243,1.197005,0.617984
1,0.05,0.05,0.1,10.0,10.0,1.0,9.977024,0.07124,True,0.007124,1.0,0.09,0.997702,0.154153,0.139153,0.561255,2.634254,-0.604191,1.179973,0.497558
2,0.05,0.05,0.1,10.0,10.0,2.0,9.979236,0.066344,True,0.006634,1.0,0.08,0.997924,0.142939,0.128797,0.556831,2.798086,-0.62283,1.114591,0.439232
3,0.05,0.05,0.1,10.0,10.0,3.0,10.085137,0.133263,True,0.013326,1.0,0.07,1.008514,0.91204,0.898811,0.819126,1.972797,-0.657203,0.702286,0.568942
4,0.05,0.05,0.1,10.0,10.0,4.0,9.964273,0.045868,True,0.004587,1.0,0.06,0.996427,-0.04116,-0.053407,0.483584,3.266261,-0.644005,0.972889,0.286362


In [None]:
scaler = StandardScaler()
X = data[['r','sigma','tau','moneyness']]
y = data['scaled_call']
X = scaler.fit_transform(X)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1234)

# NEURAL NET MODEL

In [None]:
pip install -q -U tensorflow_addons

[?25l[K     |▎                               | 10 kB 28.8 MB/s eta 0:00:01[K     |▋                               | 20 kB 32.4 MB/s eta 0:00:01[K     |▉                               | 30 kB 12.3 MB/s eta 0:00:01[K     |█▏                              | 40 kB 9.2 MB/s eta 0:00:01[K     |█▌                              | 51 kB 4.7 MB/s eta 0:00:01[K     |█▊                              | 61 kB 5.6 MB/s eta 0:00:01[K     |██                              | 71 kB 5.6 MB/s eta 0:00:01[K     |██▍                             | 81 kB 5.6 MB/s eta 0:00:01[K     |██▋                             | 92 kB 6.3 MB/s eta 0:00:01[K     |███                             | 102 kB 5.2 MB/s eta 0:00:01[K     |███▏                            | 112 kB 5.2 MB/s eta 0:00:01[K     |███▌                            | 122 kB 5.2 MB/s eta 0:00:01[K     |███▉                            | 133 kB 5.2 MB/s eta 0:00:01[K     |████                            | 143 kB 5.2 MB/s eta 0:00:01[K  

In [None]:
from keras.models import Sequential, Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
from keras.layers import Embedding, LSTM, Dense, BatchNormalization, Input, Flatten, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow_addons.optimizers import CyclicalLearningRate
from tensorflow.keras.initializers import RandomUniform, GlorotUniform, HeUniform
from keras import backend as K
import tensorflow_addons as tfa

In [None]:
def create_model(activation, lr_0, batch_norm, dropout_rate, layer_number, neuron_number,\
                neuron_decrease, data_length, initializer):
    opt = Adam(learning_rate = lr_0)
    model = Sequential()
    model.add(Dense(neuron_number, input_shape=(4,), activation = activation, \
                    kernel_initializer=initializer , bias_initializer=initializer))
    for i in range(layer_number):
        if batch_norm == True:
            model.add(BatchNormalization())         
        neuron_number = int(neuron_number/neuron_decrease)
        model.add(Dense(neuron_number, activation = activation))
    if batch_norm == True:
        model.add(BatchNormalization()) 
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, name='Final_1D_output', activation = activation))
    model.compile(optimizer=opt,loss='mean_squared_error',\
                  metrics=[tfa.metrics.RSquare(dtype=tf.float32, y_shape=(1,))],\
                 run_eagerly=True)
    print(model.summary())
    return model

def callback_list(patience):
    early_stop = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
    return early_stop  

In [None]:
callbacks = callback_list(patience=2)

# HYPERPARAMETER TUNING: SKLEARN

In [None]:
sklearn_model = KerasRegressor(build_fn = create_model)

params = dict(activation=['relu'],lr_0 = [0.001,0.0001,0.00001], batch_norm=[True, False], \
              dropout_rate = [0.0,0.1], layer_number = [3], neuron_decrease = [1,2], \
              neuron_number = [512,256,128,64], data_length = [X.shape[0]], batch_size = [512,1024,2048,4096], \
              initializer = [RandomUniform(), GlorotUniform()] )

  """Entry point for launching an IPython kernel.


In [None]:
len(ParameterGrid(params))

768

In [None]:
random_search = RandomizedSearchCV(sklearn_model, param_distributions=params, n_iter = 100, cv=3, verbose=3) 

We include fitting and saving of the model in the same cell, so we can leave it running in background with the Colab Pro + feature. We save model, its weights and the optimal parameters, so we can use them in another notebook, with the already trained NN. Finally, we print the total running time in seconds.

In [None]:
start = datetime.now()

random_search_results = random_search.fit(X_train,y_train,callbacks=[callbacks],validation_split=0.2,\
                                         epochs = 50)

end = datetime.now()

# serialize model to JSON
model_json = random_search_results.best_estimator_.model.to_json()
with open("/content/drive/MyDrive/THESIS/results_search/model_r.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
random_search_results.best_estimator_.model.save_weights("/content/drive/MyDrive/THESIS/results_search/model_r.h5")
print("Saved model to disk")

import pickle
search_params = random_search.best_params_

a_file = open("/content/drive/MyDrive/THESIS/results_search/search_params_r.pkl", "wb")
pickle.dump(search_params, a_file)
a_file.close()
print("Saved params to disk")

model_hist = random_search_results.best_estimator_.model.history
train_hist = pd.DataFrame(model_hist.history)
train_hist.to_csv('/content/drive/MyDrive/THESIS/results_search/train_hist_r.csv', index=False)
print("Saved history to disk")

total_time = (end-start).total_seconds()
print(total_time)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 dense_945 (Dense)           (None, 64)                4160      
                                                                 
 dense_946 (Dense)           (None, 64)                4160      
                                                                 
 dense_947 (Dense)           (None, 64)                4160      
                                                                 
 dropout_236 (Dropout)       (None, 64)                0         
                                                                 
 Final_1D_output (Dense)     (None, 1)                 65        
                                                                 
Total params: 12,865
Trainable params: 12,865
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
[CV 3/3] END activation