## S&P500 forecasts

### Simple/Naive model
Use model of the form: (2*p - 1) * sigma * k where k is a calibration constant to scale returns

In [10]:
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

In [11]:
results = pd.read_csv('LSTM_Sign_Forecast_Selected_Features.csv')

In [12]:
# estimate k
results['Signal'] = (2 * results['Pos_probability'] - 1)
results['expected_signed_volatility'] = results['Signal'] * results['RiskMetrics_vol_forecast']

training_size = int(0.8 * len(results))

X = results[['expected_signed_volatility']]
y = results['Daily_Return_S&P500']

X_train = X[:training_size]
y_train = y[:training_size]
X_test = X[training_size:]
y_test = y[training_size:]

model = LinearRegression()
model.fit(X_train, y_train)
k = model.coef_[0]
print("Estimated k:", k)

Estimated k: -0.0011997787976893754


k should not be negative 😅

In [13]:
full_results = results.copy()
results = results.iloc[training_size:]
return_hat = results['expected_signed_volatility'] * k
results['Return_Hat'] = return_hat
results["predicted Close"] = np.exp(np.log(results['Close_S&P500'].shift(1)) + results['Return_Hat'])
results

Unnamed: 0,Date,Actual_Sign,Predicted_Sign,Pos_probability,RiskMetrics_vol_forecast,Daily_Return_S&P500,Close_S&P500,Signal,expected_signed_volatility,Return_Hat,predicted Close
6568,2016-06-23,1,1,0.824151,0.085342,0.013276,2113.320068,0.648303,0.055328,-0.000066,
6569,2016-06-24,0,1,0.825569,0.082992,-0.036581,2037.410034,0.651137,0.054039,-0.000065,2113.183056
6570,2016-06-27,0,1,0.826385,0.095599,-0.018262,2000.540039,0.652769,0.062404,-0.000075,2037.257497
6571,2016-06-28,1,1,0.826545,0.169775,0.017614,2036.089966,0.653091,0.110879,-0.000133,2000.273925
6572,2016-06-29,1,1,0.829530,0.179268,0.016889,2070.770020,0.659060,0.118148,-0.000142,2035.801367
...,...,...,...,...,...,...,...,...,...,...,...
8205,2022-12-23,1,0,0.277615,0.220183,0.005851,3844.820068,-0.444770,-0.097931,0.000117,3822.839031
8206,2022-12-27,0,0,0.265720,0.220852,-0.004058,3829.250000,-0.468560,-0.103482,0.000124,3845.297456
8207,2022-12-28,0,0,0.217642,0.215329,-0.012093,3783.219971,-0.564716,-0.121600,0.000146,3829.808702
8208,2022-12-29,1,0,0.271934,0.209365,0.017311,3849.280029,-0.456132,-0.095498,0.000115,3783.653464


In [14]:
# evaluate predicted return vs actual return
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(results['Daily_Return_S&P500'], results['Return_Hat'])
print("Mean Squared Error of Return Prediction:", mse)

Mean Squared Error of Return Prediction: 0.00015376826489361728


In [15]:
# evaluated predicted price vs actual price
results = results.dropna()
mse_price = mean_squared_error(results['Close_S&P500'], results['predicted Close'])
print("Mean Squared Error of Price Prediction:", mse_price)

Mean Squared Error of Price Prediction: 1584.351753525584


### Sophisticated Approach: Another NN for Return Prediction

In [16]:
y = full_results["Daily_Return_S&P500"]
X = full_results[["Pos_probability", "RiskMetrics_vol_forecast"]]

In [17]:
# Training and test set split
training_size = int(0.8 * len(full_results))
X_train = X[:training_size]
y_train = y[:training_size]
X_test = X[training_size:]
y_test = y[training_size:]

# transform to tensors
X_train_t = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1)
y_train_t = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)
X_test_t = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1)        

In [18]:
class ReturnGRU(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=16, num_layers=1, dropout=0.0):
        super(ReturnGRU, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, 1)
    def forward(self, x):
        out, _ = self.gru(x)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

hidden_dim = 16
num_layers = 1
dropout = 0.0

model = ReturnGRU(input_dim=X.shape[1], hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout)
optimizer = optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.MSELoss()
batch_size = 32
num_epochs = 30
train_dataset = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


In [19]:
model.train()
for epoch in range(num_epochs+1):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: loss = {loss.item():.6f}")


Epoch 0: loss = 0.000064
Epoch 10: loss = 0.000160
Epoch 20: loss = 0.000429
Epoch 30: loss = 0.000108


In [20]:
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test_t).squeeze().numpy()
return_results = pd.DataFrame({
    'Actual_Return': y_test,
    'Predicted_Return': y_pred_test
})

mse = mean_squared_error(return_results['Actual_Return'], return_results['Predicted_Return'])
print("Mean Squared Error of Return Prediction (GRU):", mse)


Mean Squared Error of Return Prediction (GRU): 0.0001536884375869661


In [21]:
return_results['Close_S&P500'] = full_results['Close_S&P500'].iloc[training_size:].values
return_results["Predicted_Close"] = np.exp(np.log(return_results['Close_S&P500'].shift(1)) + return_results['Predicted_Return'])
return_results 

Unnamed: 0,Actual_Return,Predicted_Return,Close_S&P500,Predicted_Close
6568,0.013276,0.000205,2113.320068,
6569,-0.036581,0.000209,2037.410034,2113.761765
6570,-0.018262,0.000202,2000.540039,2037.821826
6571,0.017614,0.000146,2036.089966,2000.832059
6572,0.016889,0.000143,2070.770020,2036.381299
...,...,...,...,...
8205,0.005851,-0.000173,3844.820068,3821.730044
8206,-0.004058,-0.000161,3829.250000,3844.201859
8207,-0.012093,-0.000079,3783.219971,3828.948345
8208,0.017311,-0.000135,3849.280029,3782.708859


In [22]:
return_results.dropna(inplace=True)
mse_price = mean_squared_error(return_results['Close_S&P500'], return_results['Predicted_Close'])
print("Mean Squared Error of Price Prediction (GRU):", mse_price)

Mean Squared Error of Price Prediction (GRU): 1584.0294332444937
