In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import tensorflow as tf
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load your dataset
df = pd.read_csv('VIX_data.csv')  # Ensure this file is in the same directory or provide the full path
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Select the features you want to use for prediction
selected_features = [
    'Unemployment Rate',
    'BBB Rate',
    '10-Year Treasury Constant Maturity Rate',
    'Six Month T Bill Rate',
    'SP500 Index'
]


target = 'VIX'  # Assuming 'VIX' is your target variable in the dataset

In [None]:
# ONLY RAN ONCE TO GET THE FEATURES, TAKES ABOUT 45 MINUTES
# THIS ALGORITHM GIVES ['Unemployment Rate', 'BBB Rate', '10-Year Treasury Constant Maturity Rate', 'Six Month T Bill Rate', 'SP500 Index']
# Algorithm to determine feature combination that gives the best RMSE

df['Next Month VIX'] = df['VIX'].shift(-1)

all_feature_names = ['Unemployment Rate', 'CPI', 'Three Month T Bill Rate',
                     'Six Month T Bill Rate', '10-Year Treasury Constant Maturity Rate',
                     'Fed Rate', 'BBB Rate', 'SP500 Index', 'GDP',
                     'Initial Claims', 'JPY/USD Spot Rate',
                     'Total Nonfarm Employees', 'Spot Crude Oil Price']

best_features = []
best_rmse = float('inf')
remaining_features = all_feature_names.copy()

# Function to create sequences for LSTM
def create_sequences(df, features, time_steps):
    X, y = [], []
    for i in range(time_steps, len(df)):
        X.append(df[features].values[i-time_steps:i])
        y.append(df['Next Month VIX'].values[i])
    return np.array(X), np.array(y)

while remaining_features:
    for feature in remaining_features:
        current_features = best_features + [feature]

        # Create sequences for the current feature set
        X_seq, y_seq = create_sequences(df, current_features, time_steps=20)

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.1, shuffle=False)

        # Build and train LSTM model
        model = Sequential()
        model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
        model.add(Dropout(0.2))
        model.add(LSTM(units=50))
        model.add(Dropout(0.2))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mean_squared_error')

        model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

        # Predictions and RMSE calculation
        y_pred = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred.flatten()))

        # Update best features if RMSE is improved
        if rmse < best_rmse:
            best_rmse = rmse
            best_features = current_features

    # Remove the tested feature from remaining_features
    remaining_features.remove(feature)

print("Best Features:", set(best_features))

In [None]:
# Define the train and test dates
train_end_date = '2023-10-01'
test_start_date = '2022-10-01'
test_end_date = '2024-09-01'

# Create masks for training and testing
train_mask = (df.index < train_end_date) & (df.index >= '2014-10-1')
test_mask = (df.index >= test_start_date) & (df.index <= test_end_date)

# Split the data based on the masks
X_train = df.loc[train_mask, selected_features]
y_train = df.loc[train_mask, [target]]
X_test = df.loc[test_mask, selected_features]
y_test = df.loc[test_mask, [target]]

In [None]:
# Initialize scalers
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

# Fit the scaler on the training data only and transform it
X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train)

# Transform the test data using the scaler fitted on the training data
X_test_scaled = scaler_X.transform(X_test)
y_test_scaled = scaler_y.transform(y_test)

# Define the function to create sequences
def create_sequences(X, y, time_steps=12):
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    return np.array(X_seq), np.array(y_seq)

# Create sequences for the training and testing datasets
X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(50, activation='relu', input_shape=(X_train_seq.shape[1], X_train_seq.shape[2])),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
epochs = 100
history = model.fit(
    X_train_seq,
    y_train_seq,
    epochs=epochs,
    batch_size=32,
    validation_data=(X_test_seq, y_test_seq)
)

In [None]:
# Make predictions
y_pred_scaled = model.predict(X_test_seq)
y_pred = scaler_y.inverse_transform(y_pred_scaled)  # Inverse scaling to get actual VIX values
y_test_actual = scaler_y.inverse_transform(y_test_seq)  # Use the same scaler for the target

In [None]:
# Create a DataFrame for comparison
results_df = pd.DataFrame({
    'Date': df.index[test_mask][12:],  # Adjust the index to match the predictions
    'Actual VIX': y_test_actual.flatten(),
    'Predicted VIX': y_pred.flatten()
})

print(results_df)

# Calculate RMSE and R^2 values
rmse = np.sqrt(mean_squared_error(results_df['Actual VIX'], results_df['Predicted VIX']))
r2 = r2_score(results_df['Actual VIX'], results_df['Predicted VIX'])

print(f'RMSE: {rmse}')
print(f'R^2: {r2}')

In [None]:
# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(results_df['Date'], results_df['Actual VIX'], label='Actual VIX', color='blue')
plt.plot(results_df['Date'], results_df['Predicted VIX'], label='Predicted VIX', color='orange')
plt.title('Actual vs Predicted VIX')
plt.xlabel('Date')
plt.ylabel('VIX Value')
plt.legend()
plt.show()