In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pandas.plotting import autocorrelation_plot
from IPython import display
import duckdb

In [2]:
import datetime as dt
from pandas.tseries.offsets import Day
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from arch import arch_model
from sklearn.metrics import mean_squared_error, r2_score
from Utils import *

  from pandas import Int64Index as NumericIndex


In [3]:
import torch
import torch.nn as nn
from torchvision import models
from torch import optim
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

# Data Preprocessing

In [4]:
surfaces, targets = load_data("processed_data/SPX.csv", "data/VIX.csv")

In [5]:
stacked_surfaces, stacked_targets, dates = stack_days(surfaces, targets, 20)

In [6]:
stacked_surfaces, stacked_targets_lagged, dates = lag_targets(stacked_surfaces, stacked_targets, 10, dates)

In [7]:
surfaces_train, surfaces_test, targets_train, targets_test, train_dates, test_dates = train_test_split(
    '2019', '2023', dates, stacked_surfaces, stacked_targets_lagged)

In [10]:
surfaces_train.shape

(5754, 34, 11, 20)

In [8]:
input_shape = surfaces_train[0].shape

In [11]:
input_shape

(34, 11, 20)

# Utility functions

In [9]:
def print_results(targets, targets_pred, lags):
    """
    targets: (N,)
    targets_pred: (L, N)
    lags: (L,)
    """
    print("-------------MSE-------------")
    for i, lag in enumerate(lags):
        print(f'Lag-{lag} MSE: {mean_squared_error(targets, targets_pred[i])}')

    print("-------------MAE-------------")
    for i, lag in enumerate(lags):
        print(f'Lag-{lag} MAE: {np.mean(np.abs(targets - targets_pred[i]))}')

    print("-------------R2--------------")
    for i, lag in enumerate(lags):
        print(f'Lag-{lag} R2: {r2_score(targets, targets_pred[i])}')

    print("-----------AE STD------------")
    for i, lag in enumerate(lags):
        print(f'Lag-{lag} AE std: {np.std(np.abs(targets - targets_pred[i]))}')

# 3D CNN

In [None]:
learning_rate = 1e-3

in_channel, in_dim = 3, 32

In [None]:
learning_rate = 1e-3
in_channel, in_dim = 3, 32
num_classes = 10
# channel_1, channel_2, channel_3 = 64, 128, 256
channel_1, channel_2, channel_3 = 128, 256, 512
kernel_size_1, kernel_size_2, kernel_size_3 = 5, 3, 3
dropout_rate = 0.2

model = nn.Sequential(
    ## 1. conv-batchnorm-relu-pool
    nn.Conv2d(in_channel, channel_1, kernel_size_1, padding="same"),
    nn.BatchNorm2d(channel_1),
    nn.ReLU(),
    nn.MaxPool2d(2),
    ## 2. conv-batchnorm-relu-pool
    nn.Conv2d(channel_1, channel_2, kernel_size_2, padding="same"),
    nn.BatchNorm2d(channel_2),
    nn.ReLU(),
    nn.MaxPool2d(2),
    ## 3. conv-batchnorm-relu-pool-dropout
    nn.Conv2d(channel_2, channel_3, kernel_size_3, padding="same"),
    nn.BatchNorm2d(channel_3),
    nn.ReLU(),
    nn.Dropout(dropout_rate),
    ## 4. Fully-connected
    nn.Flatten(),
    nn.Linear(channel_3*(in_dim//4)**2, num_classes)
)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_part34(model, optimizer, epochs=10)

def train_part34(model, optimizer, epochs=1):
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out