In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, concatenate
from keras.optimizers import RMSprop, SGD, Adam
from keras.constraints import non_neg
from keras.layers import Multiply
from keras.layers import dot
from keras import layers

#### Data

### Cut Test Data row
euro : 68,023
s&p : 162,908

In [2]:
# S&P 500
call_option = pd.read_csv("snp_revised_data.csv")
call_option.head()

Unnamed: 0.1,Unnamed: 0,underlying,underlying_last,expiration,quotedate,strike,price,impliedvol,delta,gamma,theta,vega,interest rate,moneyness,tau,moneyness_class,tau_class
0,0,SPX,2726.62,2018-07-20,2018-07-02,800,1924.25,0.0996,1.0,0.0,-18.6218,0.0,0.02,3.408275,0.075397,itm,shortterm
1,1,SPX,2726.62,2018-07-20,2018-07-02,900,1824.4,0.0996,1.0,0.0,-20.9495,0.0,0.02,3.029578,0.075397,itm,shortterm
2,2,SPX,2726.62,2018-07-20,2018-07-02,1000,1724.55,0.0996,1.0,0.0,-23.2772,0.0,0.02,2.72662,0.075397,itm,shortterm
3,3,SPX,2726.62,2018-07-20,2018-07-02,1050,1674.6,0.0996,1.0,0.0,-24.4411,0.0,0.02,2.596781,0.075397,itm,shortterm
4,4,SPX,2726.62,2018-07-20,2018-07-02,1100,1624.65,0.0996,1.0,0.0,-25.6049,0.0,0.02,2.478745,0.075397,itm,shortterm


In [3]:
# inverse moneyness
call_option['inverse_moneyness']=1/call_option['moneyness']

# transform market price: y=c*exp(rt)/St
call_option['transform']=call_option['price']*np.exp(call_option['interest rate']*call_option['tau'])/call_option['underlying_last']

# # create moneyness_class for call option
# call_option['moneyness_class'] = np.where(call_option['moneyness']>1.05,'itm','atm')
# call_option['moneyness_class'] = np.where(call_option['moneyness']<0.97,'otm',call_option['moneyness_class'])

# # create tau_class for call option
# call_option['tau_class'] = np.where(call_option['tau']>0.2,'longterm','midterm')
# call_option['tau_class'] = np.where(call_option['tau']<0.1,'shortterm',call_option['tau_class'])

#call_option

In [5]:
# train, validation, test
data_train = call_option.iloc[:int(len(call_option)*0.7),:]
data_validation = call_option.iloc[int(len(call_option)*0.7):162908,:]
data_test= call_option.iloc[162908:,:]        

### model

In [6]:
from keras import backend as K
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

In [7]:
# 9 - model
# left multi model 
# model1
model_1_left_input = keras.layers.Input(shape=(1, ))
model_1_right_input = keras.layers.Input(shape=(1, ))
model_1_left_dense = Dense(5, activation="softplus")(model_1_left_input)
model_1_right_dense = Dense(5, activation="sigmoid")(model_1_right_input)

model_1_multi = layers.multiply([model_1_left_dense, model_1_right_dense])
model_1_out = Dense(1, activation="linear", use_bias=False)(model_1_multi)
model_1 = keras.models.Model(inputs=[model_1_left_input, model_1_right_input], outputs=model_1_out)

# model 2
model_2_left_input = keras.layers.Input(shape=(1, ))
model_2_right_input = keras.layers.Input(shape=(1, ))
model_2_left_dense = Dense(5, activation="softplus")(model_2_left_input)
model_2_right_dense = Dense(5, activation="sigmoid")(model_2_right_input)

model_2_multi = layers.multiply([model_2_left_dense, model_2_right_dense])
model_2_out = Dense(1, activation="linear", use_bias=False)(model_2_multi)
model_2 = keras.models.Model(inputs=[model_2_left_input,model_2_right_input], outputs = model_2_out)

# model3
model_3_left_input = keras.layers.Input(shape=(1, ))
model_3_right_input = keras.layers.Input(shape=(1, ))
model_3_left_dense = Dense(5, activation="softplus")(model_3_left_input)
model_3_right_dense = Dense(5, activation="sigmoid")(model_3_right_input)

model_3_multi = layers.multiply([model_3_left_dense, model_3_right_dense])
model_3_out = Dense(1, activation="linear", use_bias=False)(model_3_multi)
model_3 = keras.models.Model(inputs=[model_3_left_input,model_3_right_input], outputs = model_3_out)

# model4
model_4_left_input = keras.layers.Input(shape=(1, ))
model_4_right_input = keras.layers.Input(shape=(1, ))
model_4_left_dense = Dense(5, activation="softplus")(model_4_left_input)
model_4_right_dense = Dense(5, activation="sigmoid")(model_4_right_input)

model_4_multi = layers.multiply([model_4_left_dense, model_4_right_dense])
model_4_out = Dense(1, activation="linear", use_bias=False)(model_4_multi)
model_4 = keras.models.Model(inputs=[model_4_left_input,model_4_right_input], outputs = model_4_out)

# model_5
model_5_left_input = keras.layers.Input(shape=(1, ))
model_5_right_input = keras.layers.Input(shape=(1, ))
model_5_left_dense = Dense(5, activation="softplus")(model_5_left_input)
model_5_right_dense = Dense(5, activation="sigmoid")(model_5_right_input)

model_5_multi = layers.multiply([model_5_left_dense, model_5_right_dense])
model_5_out = Dense(1, activation="linear", use_bias=False)(model_5_multi)
model_5 = keras.models.Model(inputs=[model_5_left_input,model_5_right_input], outputs = model_5_out)

# model_6
model_6_left_input = keras.layers.Input(shape=(1, ))
model_6_right_input = keras.layers.Input(shape=(1, ))
model_6_left_dense = Dense(5, activation="softplus")(model_6_left_input)
model_6_right_dense = Dense(5, activation="sigmoid")(model_6_right_input)

model_6_multi = layers.multiply([model_6_left_dense, model_6_right_dense])
model_6_out = Dense(1, activation="linear", use_bias=False)(model_6_multi)
model_6 = keras.models.Model(inputs=[model_6_left_input,model_6_right_input], outputs = model_6_out)

# model_7
model_7_left_input = keras.layers.Input(shape=(1, ))
model_7_right_input = keras.layers.Input(shape=(1, ))
model_7_left_dense = Dense(5, activation="softplus")(model_7_left_input)
model_7_right_dense = Dense(5, activation="sigmoid")(model_7_right_input)

model_7_multi = layers.multiply([model_7_left_dense, model_7_right_dense])
model_7_out = Dense(1, activation="linear", use_bias=False)(model_7_multi)
model_7 = keras.models.Model(inputs=[model_7_left_input,model_7_right_input], outputs = model_7_out)

# model_8
model_8_left_input = keras.layers.Input(shape=(1, ))
model_8_right_input = keras.layers.Input(shape=(1, ))
model_8_left_dense = Dense(5, activation="softplus")(model_8_left_input)
model_8_right_dense = Dense(5, activation="sigmoid")(model_8_right_input)

model_8_multi = layers.multiply([model_8_left_dense, model_8_right_dense])
model_8_out = Dense(1, activation="linear", use_bias=False)(model_8_multi)
model_8 = keras.models.Model(inputs=[model_8_left_input,model_8_right_input], outputs = model_8_out)

# model_9
model_9_left_input = keras.layers.Input(shape=(1, ))
model_9_right_input = keras.layers.Input(shape=(1, ))
model_9_left_dense = Dense(5, activation="softplus")(model_9_left_input)
model_9_right_dense = Dense(5, activation="sigmoid")(model_9_right_input)

model_9_multi = layers.multiply([model_9_left_dense, model_9_right_dense])
model_9_out = Dense(1, activation="linear", use_bias=False)(model_9_multi)
model_9 = keras.models.Model(inputs=[model_9_left_input,model_9_right_input], outputs = model_9_out)


model_concat = concatenate([model_1_out, model_2_out, model_3_out,
                           model_4_out, model_5_out, model_6_out,
                           model_7_out, model_8_out, model_9_out,])

# right model
model_right_input = keras.layers.Input(shape=(2, ))
model_right_1 = Dense(5, activation="sigmoid")(model_right_input)
model_right_2 = Dense(9, activation="softplus")(model_right_1)

# final model
model_final = dot([model_concat,model_right_2],axes = 1,normalize=False)
model = keras.models.Model(inputs=[model_1_left_input, model_1_right_input,
                                   model_2_left_input, model_2_right_input,
                                   model_3_left_input, model_3_right_input,
                                   model_4_left_input, model_4_right_input,
                                   model_5_left_input, model_5_right_input,
                                   model_6_left_input, model_6_right_input,
                                   model_7_left_input, model_7_right_input,
                                   model_8_left_input, model_8_right_input,
                                   model_9_left_input, model_9_right_input, model_right_input],
                           outputs = model_final)
# keras.utils.plot_model(model, show_shapes=True)

# model compile
model.compile(loss='mean_squared_error',
                      optimizer = Adam(), #RMSprop(),SGD(),Adam()
                      metrics=['mape', root_mean_squared_error])              

In [9]:
#keras.utils.plot_model(model, show_shapes=True)

### fit data

In [8]:
# normalize
sc1 = StandardScaler() # fit training feature1
sc2 = StandardScaler() # fit training feature2
sc3 = StandardScaler() # fit validation feature1 and/or target (shared without reuse conflict)
sc4 = StandardScaler() # fit validation feature2,can reuse
sc5 = StandardScaler() # fit training target, inverse transform for test prediction
sc7 = StandardScaler()
sc8 = StandardScaler()  # can reuse

# early stopping
es = keras.callbacks.EarlyStopping(monitor='val_loss',min_delta=0,patience=30, verbose=2, mode='auto') 

# inverse_moneyness - fit
X1_train = sc1.fit_transform(-data_train[['inverse_moneyness']])
X2_train = sc2.fit_transform(data_train[['tau']])
X1_validation = sc3.fit_transform(-data_validation[['inverse_moneyness']])
X2_validation = sc4.fit_transform(data_validation[['tau']])
right_train = sc7.fit_transform(data_train[['inverse_moneyness','tau']])
right_validation = sc8.fit_transform(data_validation[['inverse_moneyness','tau']])

In [10]:
# no kernerl constraint + inverse_moneyness
history = model.fit([X1_train, X2_train, X1_train, X2_train, X1_train, X2_train,
                     X1_train, X2_train, X1_train, X2_train, X1_train, X2_train,
                     X1_train, X2_train, X1_train, X2_train, X1_train, X2_train,
                     right_train], sc5.fit_transform(data_train[['transform']]),
                    batch_size=50, epochs=100, verbose=2,
                    validation_data=([X1_validation, X2_validation, X1_validation, X2_validation, X1_validation, X2_validation,
                                      X1_validation, X2_validation, X1_validation, X2_validation, X1_validation, X2_validation,
                                      X1_validation, X2_validation, X1_validation, X2_validation, X1_validation, X2_validation,
                                      right_validation],
                                     sc3.fit_transform(data_validation[['transform']])), callbacks=[es]) 

Train on 143586 samples, validate on 19322 samples
Epoch 1/100
 - 6s - loss: 0.1011 - mape: 79.6556 - root_mean_squared_error: 0.1052 - val_loss: 0.0044 - val_mape: 11.4144 - val_root_mean_squared_error: 0.0472
Epoch 2/100
 - 5s - loss: 0.0019 - mape: 36.6905 - root_mean_squared_error: 0.0312 - val_loss: 0.0023 - val_mape: 18.4159 - val_root_mean_squared_error: 0.0358
Epoch 3/100
 - 5s - loss: 9.9885e-04 - mape: 25.6794 - root_mean_squared_error: 0.0232 - val_loss: 0.0019 - val_mape: 18.6571 - val_root_mean_squared_error: 0.0330
Epoch 4/100
 - 5s - loss: 7.1865e-04 - mape: 18.7504 - root_mean_squared_error: 0.0197 - val_loss: 0.0019 - val_mape: 23.9972 - val_root_mean_squared_error: 0.0346
Epoch 5/100
 - 5s - loss: 5.4008e-04 - mape: 16.6114 - root_mean_squared_error: 0.0173 - val_loss: 0.0015 - val_mape: 31.6051 - val_root_mean_squared_error: 0.0329
Epoch 6/100
 - 5s - loss: 4.0704e-04 - mape: 13.2649 - root_mean_squared_error: 0.0150 - val_loss: 0.0016 - val_mape: 32.0152 - val_root_

Epoch 00050: early stopping


In [36]:
performance = []
years_data = pd.DataFrame()

# inverse_moneyness predict
X1_test = sc1.transform(-data_test[['inverse_moneyness']])
X2_test = sc2.transform(data_test[['tau']])
right_test = sc7.transform(data_test[['inverse_moneyness','tau']])

prediction = model.predict([X1_test, X2_test, X1_test, X2_test, X1_test, X2_test,
                            X1_test, X2_test, X1_test, X2_test, X1_test, X2_test,
                            X1_test, X2_test, X1_test, X2_test, X1_test, X2_test,
                            right_test])

# percentage mean-squared-error
data_test_copy = data_test.copy()
data_test_copy['temp'] = sc5.inverse_transform(prediction)  # inverse transform
data_test_copy['GNN_price'] = data_test_copy['temp']*data_test['underlying_last']*np.exp(-data_test['interest rate']*data_test['tau'])
data_test_copy['GNN_square_error'] = ((data_test['price'] - data_test_copy['GNN_price'])/data_test['price'])**2
data_test_copy = data_test_copy.drop(['temp'],1)

performance.append(data_test_copy['GNN_square_error'].mean())     
years_data = years_data.append(data_test_copy) 

# GNN Price Clipping
years_data['GNN_price_clip'] = np.clip(years_data['GNN_price'],0,max(years_data['GNN_price']))

In [39]:
years_data

Unnamed: 0.1,Unnamed: 0,underlying,underlying_last,expiration,quotedate,strike,price,impliedvol,delta,gamma,...,interest rate,moneyness,tau,moneyness_class,tau_class,inverse_moneyness,transform,GNN_price,GNN_square_error,GNN_price_clip
162908,162908,SPX,2641.88,2018-12-21,2018-11-20,100,2534.40,0.1735,0.9980,0.0000,...,0.0225,26.418800,0.126984,itm,midterm,0.037852,0.962062,2545.553923,0.000019,2545.553923
162909,162909,SPX,2641.88,2018-12-21,2018-11-20,200,2434.75,0.1735,0.9980,0.0000,...,0.0225,13.209400,0.126984,itm,midterm,0.075704,0.924234,2444.038908,0.000015,2444.038908
162910,162910,SPX,2641.88,2018-12-21,2018-11-20,250,2384.70,0.1735,0.9980,0.0000,...,0.0225,10.567520,0.126984,itm,midterm,0.094630,0.905235,2393.410941,0.000013,2393.410941
162911,162911,SPX,2641.88,2018-12-21,2018-11-20,300,2334.85,0.1735,0.9980,0.0000,...,0.0225,8.806267,0.126984,itm,midterm,0.113555,0.886312,2342.869178,0.000012,2342.869178
162912,162912,SPX,2641.88,2018-12-21,2018-11-20,400,2235.10,0.1735,0.9980,0.0000,...,0.0225,6.604700,0.126984,itm,midterm,0.151407,0.848447,2242.041435,0.000010,2242.041435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205118,205118,SPX,2506.85,2021-12-17,2018-12-31,3200,71.55,0.1432,0.2231,0.0005,...,0.0250,0.783391,4.297619,otm,longterm,1.276502,0.031779,52.648984,0.069783,52.648984
205119,205119,SPX,2506.85,2021-12-17,2018-12-31,3300,54.80,0.1407,0.1830,0.0004,...,0.0250,0.759652,4.297619,otm,longterm,1.316393,0.024340,37.071626,0.104659,37.071626
205120,205120,SPX,2506.85,2021-12-17,2018-12-31,3400,42.10,0.1392,0.1494,0.0004,...,0.0250,0.737309,4.297619,otm,longterm,1.356284,0.018699,25.659992,0.152489,25.659992
205121,205121,SPX,2506.85,2021-12-17,2018-12-31,3500,32.20,0.1380,0.1209,0.0003,...,0.0250,0.716243,4.297619,otm,longterm,1.396174,0.014302,17.856886,0.198415,17.856886


In [40]:
years_data.to_csv('./GNN_snp_predict.csv')

In [42]:
y_true = years_data['price']
y_pred = years_data['GNN_price']

# MAE
def MAE(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred)))

print("MAE == ", MAE(y_true, y_pred))

# MAPE
def MAPE(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true))

print("MAPE == ", MAPE(y_true, y_pred))

# MSE
def MSE(y_true, y_pred):
    return np.mean(np.square((y_true - y_pred)))

print("MSE == ", MSE(y_true, y_pred))

#RMSE
print("RMSE == ", np.sqrt(MSE(y_true, y_pred)))

MAE ==  13.0899211008666
MAPE ==  0.4620056523845975
MSE ==  353.9745836546551
RMSE ==  18.814212278345725


In [41]:
# Clip GNN_price
y_true = years_data['price']
y_pred = years_data['GNN_price_clip']

# MAE
def MAE(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred)))

print("MAE == ", MAE(y_true, y_pred))

# MAPE
def MAPE(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true))

print("MAPE == ", MAPE(y_true, y_pred))

# MSE
def MSE(y_true, y_pred):
    return np.mean(np.square((y_true - y_pred)))

print("MSE == ", MSE(y_true, y_pred))

#RMSE
print("RMSE == ", np.sqrt(MSE(y_true, y_pred)))


MAE ==  12.950215485210194
MAPE ==  0.41924975119876695
MSE ==  351.2461148538932
RMSE ==  18.741561163731618
