In [77]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, LeakyReLU, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [74]:
df = pd.read_excel('ASIANPAINT_Dataset.xlsx')
df["T"] = df["t"] / 365.0
df.head()

Unnamed: 0,Date,Expiry,t,strike_price,underlying_value,sigma,r,close,T
0,2020-01-01,2020-01-30,29,1980,1793.2,0.008151,0.0494,3.8,0.079452
1,2020-01-01,2020-01-30,29,1440,1793.2,0.008151,0.0494,398.5,0.079452
2,2020-01-01,2020-01-30,29,2020,1793.2,0.008151,0.0494,1.2,0.079452
3,2020-01-01,2020-01-30,29,1920,1793.2,0.008151,0.0494,6.5,0.079452
4,2020-01-01,2020-01-30,29,1940,1793.2,0.008151,0.0494,5.0,0.079452


In [75]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35591 entries, 0 to 35590
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Date              35591 non-null  datetime64[ns]
 1   Expiry            35591 non-null  datetime64[ns]
 2   t                 35591 non-null  int64         
 3   strike_price      35591 non-null  int64         
 4   underlying_value  35591 non-null  float64       
 5   sigma             35591 non-null  float64       
 6   r                 35591 non-null  float64       
 7   close             35591 non-null  float64       
 8   T                 35591 non-null  float64       
dtypes: datetime64[ns](2), float64(5), int64(2)
memory usage: 2.4 MB


In [78]:
features = [
    "underlying_value",
    "strike_price",
    "T",
    "r",
    "sigma"
]
X = df[features].values

In [79]:
spread = 0.005 
bid = df["close"].values * (1 - spread)
ask = df["close"].values * (1 + spread)

y = np.column_stack((bid, ask))

In [80]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.02, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)

In [81]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)

In [82]:
def build_mlp2(input_dim):
    inputs = Input(shape=(input_dim,))

    x = Dense(400, kernel_initializer="glorot_uniform")(inputs)
    x = LeakyReLU(alpha=0.01)(x)
    x = BatchNormalization()(x)

    x = Dense(400, kernel_initializer="glorot_uniform")(x)
    x = LeakyReLU(alpha=0.01)(x)
    x = BatchNormalization()(x)

    x = Dense(400, kernel_initializer="glorot_uniform")(x)
    x = LeakyReLU(alpha=0.01)(x)
    x = BatchNormalization()(x)

    outputs = Dense(2, activation="relu")(x) 
    return Model(inputs, outputs)

In [83]:
def lr_schedule(epoch):
    if epoch < 30:
        return 1e-3
    elif epoch < 40:
        return 1e-4
    elif epoch < 50:
        return 1e-5
    else:
        return 1e-6


In [84]:
model = build_mlp2(X_train.shape[1])

model.compile(
    optimizer=Adam(),
    loss="mse",
    metrics=["mse"]
)

lr_callback = LearningRateScheduler(lr_schedule)

history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=60,          # optimal
    batch_size=4096,
    callbacks=[lr_callback],
    verbose=1
)




Epoch 1/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 278ms/step - loss: 70603.3594 - mse: 70603.3594 - val_loss: 60697.0898 - val_mse: 60697.0898 - learning_rate: 0.0010
Epoch 2/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 226ms/step - loss: 68125.0547 - mse: 68125.0547 - val_loss: 60332.9648 - val_mse: 60332.9648 - learning_rate: 0.0010
Epoch 3/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 203ms/step - loss: 66177.8828 - mse: 66177.8828 - val_loss: 60043.7656 - val_mse: 60043.7656 - learning_rate: 0.0010
Epoch 4/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 205ms/step - loss: 64232.9141 - mse: 64232.9141 - val_loss: 59858.7188 - val_mse: 59858.7188 - learning_rate: 0.0010
Epoch 5/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 201ms/step - loss: 62885.8945 - mse: 62885.8945 - val_loss: 59782.5859 - val_mse: 59782.5859 - learning_rate: 0.0010
Epoch 6/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [85]:
model.evaluate(X_test, y_test, verbose=1)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 29953.6074 - mse: 29953.6074


[29594.044921875, 29594.044921875]

In [86]:
pred_bidask = model.predict(X_test)

pred_mid = pred_bidask.mean(axis=1)
true_mid = y_test.mean(axis=1)

mse_mid = np.mean((pred_mid - true_mid) ** 2)
print("Equilibrium Price MSE:", mse_mid)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Equilibrium Price MSE: 29593.708216773648
