In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from google.colab import drive
import yfinance as yf
import seaborn as sns
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, Flatten, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Volatility/New_data_2000_2024/Just_normal/'

Mounted at /content/drive


In [None]:
def set_index(df):
  df.index = pd.to_datetime(df['Date'])
  df.drop(columns=['Date'], inplace=True)

In [None]:
filename = 'GKYZV_data.csv'
GKYZV_data = pd.read_csv(path + filename)

In [None]:
set_index(GKYZV_data)

In [None]:
data = pd.DataFrame(columns=['garch', 'return'], index=GKYZV_data.index)

In [None]:
data['garch'] = GKYZV_data["('GARCH', 'normal', 0)"].values

In [None]:
data['return'] = GKYZV_data["log_returns"].values

In [None]:
data['GKYZV'] = GKYZV_data["GKYZV"].values

In [None]:
data

Unnamed: 0_level_0,garch,return,GKYZV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-02-02,0.800313,-0.027564,0.854942
2000-02-03,0.655577,0.893688,0.819442
2000-02-04,0.491617,-0.052641,0.714186
2000-02-07,0.408321,-0.025746,0.693115
2000-02-08,0.339040,0.976294,0.645003
...,...,...,...
2024-08-26,0.093281,-0.275848,1.044257
2024-08-27,-0.162765,0.111656,1.038189
2024-08-28,-0.126100,-0.507014,1.017680
2024-08-29,-0.402733,-0.021515,0.965867


# Split train, validation and test data

In [None]:
def split_data(data, target, train_end =datetime(2022, 5, 30), test_start=datetime(2022, 5, 31), test_size=0.1):
  test_data = data.loc[test_start:]

  train_val = data.loc[:train_end]
  train_data, val_data = train_test_split(train_val, test_size=test_size, shuffle=False)

  return train_data, val_data, test_data

In [None]:
def sliding_window_transform(X, y, timesteps):
  X_windows = []
  y_windows = []
  for i in range(len(X) - timesteps + 1):
    X_window = X[i:i + timesteps]
    y_window = y[i + timesteps - 1]
    X_windows.append(X_window)
    y_windows.append(y_window)

  return np.array(X_windows), np.array(y_windows)

In [None]:
# Define the rolling window function
def rolling_window(df, in_sample_window_size, out_of_sample_size):
  X, y = [], []
  for i in range(in_sample_window_size, len(df) - out_of_sample_size):
    X.append(df.iloc[i - in_sample_window_size:i, :-1].values)  # All features except the target column
    y.append(df.iloc[i:i + out_of_sample_size, -1].values)  # Target column

  return np.array(X), np.array(y)

# Model

In [None]:
def calculate_mape(actual, predicted):
  actual, predicted = np.array(actual), np.array(predicted)
  return np.mean(np.abs((actual - predicted) / actual)) * 100

## GKYZV

In [None]:
daily_train, daily_val, daily_test = split_data(data, 'GKYZV')

In [None]:
in_sam_win_sz = 25
out_sam_win_sz = 5

In [None]:
GKYZV_X_train, GKYZV_y_train = rolling_window(daily_train, in_sam_win_sz, out_sam_win_sz)
GKYZV_X_val, GKYZV_y_val = rolling_window(daily_val, in_sam_win_sz, out_sam_win_sz)
GKYZV_X_test, GKYZV_y_test = rolling_window(daily_test, in_sam_win_sz, out_sam_win_sz)

In [None]:
# GKYZV_X_train = GKYZV_X_train.reshape(GKYZV_X_train.shape[0], win_sz, -1)
# GKYZV_X_val = GKYZV_X_val.reshape(GKYZV_X_val.shape[0], win_sz, -1)
# GKYZV_X_test = GKYZV_X_test.reshape(GKYZV_X_test.shape[0], win_sz, -1)

### LSTM

In [None]:
lstm_garch_model = Sequential()
lstm_garch_model.add(LSTM(48, input_shape=(GKYZV_X_train.shape[1], GKYZV_X_train.shape[2]), return_sequences=True))
lstm_garch_model.add(Dropout(0.1))
lstm_garch_model.add(LSTM(16))
lstm_garch_model.add(Dropout(0.1))
lstm_garch_model.add(Dense(5))

lstm_garch_model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

  super().__init__(**kwargs)


In [None]:
lstm_garch_model.summary()

In [None]:
lstm_history = lstm_garch_model.fit(GKYZV_X_train, GKYZV_y_train, epochs=80, batch_size=32, validation_data=(GKYZV_X_val, GKYZV_y_val))

Epoch 1/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - loss: 0.2149 - val_loss: 0.1513
Epoch 2/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - loss: 0.1002 - val_loss: 0.1450
Epoch 3/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - loss: 0.0798 - val_loss: 0.1323
Epoch 4/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.0702 - val_loss: 0.1684
Epoch 5/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - loss: 0.0658 - val_loss: 0.1511
Epoch 6/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.0596 - val_loss: 0.0977
Epoch 7/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.0530 - val_loss: 0.1896
Epoch 8/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 29ms/step - loss: 0.0522 - val_loss: 0.1954
Epoch 9/80
[1m158/158[0m [32m

In [None]:
# Predictions
GKYZV_test_predictions = lstm_garch_model.predict(GKYZV_X_test)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step


In [None]:
mse = mean_squared_error(GKYZV_y_test, GKYZV_test_predictions)
print(f'MSE: {mse}')

rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')

mae = mean_absolute_error(GKYZV_y_test, GKYZV_test_predictions)
print(f'MAE: {mae}')

mape = calculate_mape(GKYZV_y_test, GKYZV_test_predictions)
print(f'MAPE: {mape}%')

MSE: 0.14330446774497857
RMSE: 0.3785557657003504
MAE: 0.2913541276562839
MAPE: 198.0407697836941%


### GRU

In [None]:
gru_model = Sequential()
gru_model.add(GRU(48, input_shape=(GKYZV_X_train.shape[1], GKYZV_X_train.shape[2]), return_sequences=True))
gru_model.add(Dropout(0.1))
gru_model.add(GRU(16))
gru_model.add(Dropout(0.1))
gru_model.add(Dense(5))

gru_model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

  super().__init__(**kwargs)


In [None]:
gru_model.summary()

In [None]:
gru_history = gru_model.fit(GKYZV_X_train, GKYZV_y_train, epochs=80, batch_size=32, validation_data=(GKYZV_X_val, GKYZV_y_val))

Epoch 1/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - loss: 0.2012 - val_loss: 0.2932
Epoch 2/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - loss: 0.0909 - val_loss: 0.1140
Epoch 3/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 34ms/step - loss: 0.0760 - val_loss: 0.1250
Epoch 4/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - loss: 0.0722 - val_loss: 0.1032
Epoch 5/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 25ms/step - loss: 0.0761 - val_loss: 0.1190
Epoch 6/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - loss: 0.0695 - val_loss: 0.1211
Epoch 7/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 0.0500 - val_loss: 0.1505
Epoch 8/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 0.0458 - val_loss: 0.1647
Epoch 9/80
[1m158/158[0m [32m

In [34]:
# Predictions
GKYZV_gru_predictions = gru_model.predict(GKYZV_X_test)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [35]:
mse = mean_squared_error(GKYZV_y_test, GKYZV_gru_predictions)
print(f'MSE: {mse}')

rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')

mae = mean_absolute_error(GKYZV_y_test, GKYZV_gru_predictions)
print(f'MAE: {mae}')

mape = calculate_mape(GKYZV_y_test, GKYZV_gru_predictions)
print(f'MAPE: {mape}%')

MSE: 0.11359816837000063
RMSE: 0.3370432737349918
MAE: 0.23872064813077518
MAPE: 169.34836806164688%


### LSTM_GRU

In [36]:
lstm_gru_model = Sequential()
lstm_gru_model.add(LSTM(48, input_shape=(GKYZV_X_train.shape[1], GKYZV_X_train.shape[2]), return_sequences=True))
lstm_gru_model.add(Dropout(0.1))
lstm_gru_model.add(GRU(16))
lstm_gru_model.add(Dropout(0.1))
lstm_gru_model.add(Dense(5))

lstm_gru_model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

  super().__init__(**kwargs)


In [37]:
lstm_gru_model.summary()

In [38]:
lstm_gru_history = lstm_garch_model.fit(GKYZV_X_train, GKYZV_y_train, epochs=80, batch_size=32, validation_data=(GKYZV_X_val, GKYZV_y_val))

Epoch 1/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.0123 - val_loss: 0.1079
Epoch 2/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 27ms/step - loss: 0.0168 - val_loss: 0.1122
Epoch 3/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - loss: 0.0157 - val_loss: 0.1145
Epoch 4/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.0151 - val_loss: 0.1209
Epoch 5/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 31ms/step - loss: 0.0170 - val_loss: 0.1261
Epoch 6/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.0157 - val_loss: 0.1174
Epoch 7/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.0167 - val_loss: 0.1220
Epoch 8/80
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - loss: 0.0143 - val_loss: 0.1327
Epoch 9/80
[1m158/158[0m [32m

In [39]:
# Predictions
GKYZV_lstm_gru_predictions = lstm_garch_model.predict(GKYZV_X_test)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


In [40]:
mse = mean_squared_error(GKYZV_y_test, GKYZV_lstm_gru_predictions)
print(f'MSE: {mse}')

rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')

mae = mean_absolute_error(GKYZV_y_test, GKYZV_lstm_gru_predictions)
print(f'MAE: {mae}')

mape = calculate_mape(GKYZV_y_test, GKYZV_lstm_gru_predictions)
print(f'MAPE: {mape}%')

MSE: 0.1272274541354434
RMSE: 0.35668957671264157
MAE: 0.27336542993858937
MAPE: 181.5227237730966%
