# Neural Network Analysis

**Author**: Maleakhi Agung Wijaya  
**Email**: *maw219@cam.ac.uk*  
**Description**: This file contains code implementation of various neural networks, including 2D-CNNPred, 3D-CNNPred, shallow feedforward network, and RNN-based network.

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import scale
from os.path import join
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report, mean_absolute_error as mae
import os
from pathlib2 import Path
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display, HTML
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras import layers, models, backend as K, callbacks

In [2]:
%run Utilities.ipynb

## Load data and preprocessing

In this step, we will load all dataframes,fill missing values, and scale so that each column all features are on the same scale. Afterwards, we will generate sequential datasets using **full features, PCA features**, and only **technical indicator features**.

In [3]:
market_orders, n_markets, aggregated_datasets = load_aggregated_datasets([DATASET_DJI, 
                                                                          DATASET_NASDAQ, 
                                                                          DATASET_NYSE,
                                                                          DATASET_RUSSELL, 
                                                                          DATASET_SP])

# Load datasets
## DJI
dji_df = aggregated_datasets["DJI"]

## NASDAQ
nasdaq_df = aggregated_datasets["NASDAQ"]

## NYSE
nyse_df = aggregated_datasets["NYA"]

## Russell
russell_df = aggregated_datasets["RUT"]

## SP
sp_df = aggregated_datasets["S&P"]

In [4]:
## DO NOT RUN BELOW FOR 3D CNN PRED
# Fill missing values, do some scaling (run prev cell first)
list_df = []

for df in [dji_df, nasdaq_df, nyse_df, russell_df, sp_df]:
    columns = df.columns
    df.fillna(0, inplace=True) # fill na with 0
    y = df["MOVEMENT"].copy()
    X = df.drop(columns=["MOVEMENT"]).copy()
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X))
    X["MOVEMENT"] = np.array(y)
    X.columns = columns
    list_df.append(X)
    
### Clean dataframe (full features)
dji_df_full = list_df[0]
nasdaq_df_full = list_df[1]
nyse_df_full = list_df[2]
russell_df_full = list_df[3]
sp_df_full = list_df[4]

In [5]:
# PCA dataframe (30 features - explained 90% variance) - pca.explained_variance_ratio_.cumsum()
list_df_pca = []

for df in [dji_df_full, nasdaq_df_full, nyse_df_full, russell_df_full, sp_df_full]:
    pca = PCA(n_components=30)
    y = df["MOVEMENT"].copy()
    X = df.drop(columns=["MOVEMENT"]).copy()
    reduced_X = pd.DataFrame(pca.fit_transform(X))
    reduced_X["MOVEMENT"] = y
    list_df_pca.append(reduced_X)

### Clean dataframe (pca features)
dji_df_pca = list_df_pca[0]
nasdaq_df_pca = list_df_pca[1]
nyse_df_pca = list_df_pca[2]
russell_df_pca = list_df_pca[3]
sp_df_pca = list_df_pca[4]

In [6]:
# Technical indicator dataframe
ti_columns = ["Volume", "mom", "mom1", "mom2", "mom3", 
              "ROC_5", "ROC_10", "ROC_15", "ROC_20",
              "EMA_10", "EMA_20", "EMA_50", "EMA_200"]
list_df_ti = []

for df in [dji_df_full, nasdaq_df_full, nyse_df_full, russell_df_full, sp_df_full]:
    y = df["MOVEMENT"].copy()
    X = df[ti_columns].copy()
    X["MOVEMENT"] = np.array(y)
    list_df_ti.append(X)

### Clean dataframe (ti features)
dji_df_ti = list_df_ti[0]
nasdaq_df_ti = list_df_ti[1]
nyse_df_ti = list_df_ti[2]
russell_df_ti = list_df_ti[3]
sp_df_ti = list_df_ti[4]

In [7]:
# Build sequential dataset
sequence_length = 60

### Sequential dataset (full features)
dji_X_seq, dji_y_seq = generate_sequential_data(dji_df_full, sequence_length)
nasdaq_X_seq, nasdaq_y_seq = generate_sequential_data(nasdaq_df_full, sequence_length)
nyse_X_seq, nyse_y_seq = generate_sequential_data(nyse_df_full, sequence_length)
russell_X_seq, russell_y_seq = generate_sequential_data(russell_df_full, sequence_length)
sp_X_seq, sp_y_seq = generate_sequential_data(sp_df_full, sequence_length)

In [8]:
### Sequential dataset (PCA features)
dji_X_pca_seq, dji_y_pca_seq = generate_sequential_data(dji_df_pca, sequence_length)
nasdaq_X_pca_seq, nasdaq_y_pca_seq = generate_sequential_data(nasdaq_df_pca, sequence_length)
nyse_X_pca_seq, nyse_y_pca_seq = generate_sequential_data(nyse_df_pca, sequence_length)
russell_X_pca_seq, russell_y_pca_seq = generate_sequential_data(russell_df_pca, sequence_length)
sp_X_pca_seq, sp_y_pca_seq = generate_sequential_data(sp_df_pca, sequence_length)

In [9]:
### Sequential dataset (TI features)
dji_X_ti_seq, dji_y_ti_seq = generate_sequential_data(dji_df_ti, sequence_length)
nasdaq_X_ti_seq, nasdaq_y_ti_seq = generate_sequential_data(nasdaq_df_ti, sequence_length)
nyse_X_ti_seq, nyse_y_ti_seq = generate_sequential_data(nyse_df_ti, sequence_length)
russell_X_ti_seq, russell_y_ti_seq = generate_sequential_data(russell_df_ti, sequence_length)
sp_X_ti_seq, sp_y_ti_seq = generate_sequential_data(sp_df_ti, sequence_length)

## 2D CNNpred

In this section, we will implement a 2D CNNpred from the paper.

### Load data

In [10]:
# Sequential flatten (full features)
dji_X_seq_flatten = sequential_reshape(dji_X_seq, (len(dji_X_seq), sequence_length, -1, 1))
nasdaq_X_seq_flatten = sequential_reshape(nasdaq_X_seq, (len(dji_X_seq), sequence_length, -1, 1))
nyse_X_seq_flatten = sequential_reshape(nyse_X_seq, (len(dji_X_seq), sequence_length, -1, 1))
russell_X_seq_flatten = sequential_reshape(russell_X_seq, (len(dji_X_seq), sequence_length, -1, 1))
sp_X_seq_flatten = sequential_reshape(sp_X_seq, (len(dji_X_seq), sequence_length, -1, 1))

In [11]:
# Sequential flatten (pca)
dji_X_pca_seq_flatten = sequential_reshape(dji_X_pca_seq, (len(dji_X_seq), sequence_length, -1, 1))
nasdaq_X_pca_seq_flatten = sequential_reshape(nasdaq_X_pca_seq, (len(dji_X_seq), sequence_length, -1, 1))
nyse_X_pca_seq_flatten = sequential_reshape(nyse_X_pca_seq, (len(dji_X_seq), sequence_length, -1, 1))
russell_X_pca_seq_flatten = sequential_reshape(russell_X_pca_seq, (len(dji_X_seq), sequence_length, -1, 1))
sp_X_pca_seq_flatten = sequential_reshape(sp_X_pca_seq, (len(dji_X_seq), sequence_length, -1, 1))

In [12]:
# Sequential flatten (technical indicator)
dji_X_ti_seq_flatten = sequential_reshape(dji_X_ti_seq, (len(dji_X_seq), sequence_length, -1, 1))
nasdaq_X_ti_seq_flatten = sequential_reshape(nasdaq_X_ti_seq, (len(dji_X_seq), sequence_length, -1, 1))
nyse_X_ti_seq_flatten = sequential_reshape(nyse_X_ti_seq, (len(dji_X_seq), sequence_length, -1, 1))
russell_X_ti_seq_flatten = sequential_reshape(russell_X_ti_seq, (len(dji_X_seq), sequence_length, -1, 1))
sp_X_ti_seq_flatten = sequential_reshape(sp_X_ti_seq, (len(dji_X_seq), sequence_length, -1, 1))

### Split into training, validation, and testing (80/10/10)

In [13]:
## Full features
dji_X_train_full, dji_X_test_full, dji_y_train_full, dji_y_test_full = train_test_split(dji_X_seq_flatten,
                                                                                        dji_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1,
                                                                                        shuffle=False)
dji_X_train_full, dji_X_valid_full, dji_y_train_full, dji_y_valid_full = train_test_split(dji_X_train_full,
                                                                                        dji_y_train_full,
                                                                                        stratify=None,
                                                                                        test_size=0.1,
                                                                                        shuffle=False)
nasdaq_X_train_full, nasdaq_X_test_full, nasdaq_y_train_full, nasdaq_y_test_full = train_test_split(nasdaq_X_seq_flatten,
                                                                                        nasdaq_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1,
                                                                                        shuffle=False)
nasdaq_X_train_full, nasdaq_X_valid_full, nasdaq_y_train_full, nasdaq_y_valid_full = train_test_split(nasdaq_X_train_full,
                                                                                        nasdaq_y_train_full,
                                                                                        stratify=None,
                                                                                        test_size=0.1,
                                                                                        shuffle=False)
nyse_X_train_full, nyse_X_test_full, nyse_y_train_full, nyse_y_test_full = train_test_split(nyse_X_seq_flatten,
                                                                                        nyse_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nyse_X_train_full, nyse_X_valid_full, nyse_y_train_full, nyse_y_valid_full = train_test_split(nyse_X_train_full,
                                                                                        nyse_y_train_full,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)

russell_X_train_full, russell_X_test_full, russell_y_train_full, russell_y_test_full = train_test_split(russell_X_seq_flatten,
                                                                                        russell_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
russell_X_train_full, russell_X_valid_full, russell_y_train_full, russell_y_valid_full = train_test_split(russell_X_train_full,
                                                                                        russell_y_train_full,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
sp_X_train_full, sp_X_test_full, sp_y_train_full, sp_y_test_full = train_test_split(sp_X_seq_flatten,
                                                                                        sp_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
sp_X_train_full, sp_X_valid_full, sp_y_train_full, sp_y_valid_full = train_test_split(sp_X_train_full,
                                                                                        sp_y_train_full,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)

In [14]:
dji_X_train_full.shape

(1395, 60, 82, 1)

In [15]:
dji_X_valid_full.shape

(156, 60, 82, 1)

In [16]:
dji_X_test_full.shape

(173, 60, 82, 1)

In [14]:
## pca features
dji_X_train_pca, dji_X_test_pca, dji_y_train_pca, dji_y_test_pca = train_test_split(dji_X_pca_seq_flatten,
                                                                                        dji_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
dji_X_train_pca, dji_X_valid_pca, dji_y_train_pca, dji_y_valid_pca = train_test_split(dji_X_train_pca,
                                                                                        dji_y_train_pca,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nasdaq_X_train_pca, nasdaq_X_test_pca, nasdaq_y_train_pca, nasdaq_y_test_pca = train_test_split(nasdaq_X_pca_seq_flatten,
                                                                                        nasdaq_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nasdaq_X_train_pca, nasdaq_X_valid_pca, nasdaq_y_train_pca, nasdaq_y_valid_pca = train_test_split(nasdaq_X_train_pca,
                                                                                        nasdaq_y_train_pca,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nyse_X_train_pca, nyse_X_test_pca, nyse_y_train_pca, nyse_y_test_pca = train_test_split(nyse_X_pca_seq_flatten,
                                                                                        nyse_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nyse_X_train_pca, nyse_X_valid_pca, nyse_y_train_pca, nyse_y_valid_pca = train_test_split(nyse_X_train_pca,
                                                                                        nyse_y_train_pca,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
russell_X_train_pca, russell_X_test_pca, russell_y_train_pca, russell_y_test_pca = train_test_split(russell_X_pca_seq_flatten,
                                                                                        russell_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
russell_X_train_pca, russell_X_valid_pca, russell_y_train_pca, russell_y_valid_pca = train_test_split(russell_X_train_pca,
                                                                                        russell_y_train_pca,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
sp_X_train_pca, sp_X_test_pca, sp_y_train_pca, sp_y_test_pca = train_test_split(sp_X_pca_seq_flatten,
                                                                                        sp_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
sp_X_train_pca, sp_X_valid_pca, sp_y_train_pca, sp_y_valid_pca = train_test_split(sp_X_train_pca,
                                                                                        sp_y_train_pca,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)

In [15]:
## ti features
dji_X_train_ti, dji_X_test_ti, dji_y_train_ti, dji_y_test_ti = train_test_split(dji_X_ti_seq_flatten,
                                                                                        dji_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
dji_X_train_ti, dji_X_valid_ti, dji_y_train_ti, dji_y_valid_ti = train_test_split(dji_X_train_ti,
                                                                                        dji_y_train_ti,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nasdaq_X_train_ti, nasdaq_X_test_ti, nasdaq_y_train_ti, nasdaq_y_test_ti = train_test_split(nasdaq_X_ti_seq_flatten,
                                                                                        nasdaq_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nasdaq_X_train_ti, nasdaq_X_valid_ti, nasdaq_y_train_ti, nasdaq_y_valid_ti = train_test_split(nasdaq_X_train_ti,
                                                                                        nasdaq_y_train_ti,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nyse_X_train_ti, nyse_X_test_ti, nyse_y_train_ti, nyse_y_test_ti = train_test_split(nyse_X_ti_seq_flatten,
                                                                                        nyse_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
nyse_X_train_ti, nyse_X_valid_ti, nyse_y_train_ti, nyse_y_valid_ti = train_test_split(nyse_X_train_ti,
                                                                                        nyse_y_train_ti,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
russell_X_train_ti, russell_X_test_ti, russell_y_train_ti, russell_y_test_ti = train_test_split(russell_X_ti_seq_flatten,
                                                                                        russell_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
russell_X_train_ti, russell_X_valid_ti, russell_y_train_ti, russell_y_valid_ti = train_test_split(russell_X_train_ti,
                                                                                        russell_y_train_ti,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
sp_X_train_ti, sp_X_test_ti, sp_y_train_ti, sp_y_test_ti = train_test_split(sp_X_ti_seq_flatten,
                                                                                        sp_y_seq,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)
sp_X_train_ti, sp_X_valid_ti, sp_y_train_ti, sp_y_valid_ti = train_test_split(sp_X_train_ti,
                                                                                        sp_y_train_ti,
                                                                                        stratify=None,
                                                                                        test_size=0.1, shuffle=False)

### Combine datasets

In [16]:
# full
# Train
X_train_full = np.concatenate(
    (dji_X_train_full,
     nasdaq_X_train_full,
     nyse_X_train_full,
     russell_X_train_full,
     sp_X_train_full)
)
y_train_full = np.concatenate(
    (np.array(dji_y_train_full),
     np.array(nasdaq_y_train_full),
     np.array(nyse_y_train_full),
     np.array(russell_y_train_full),
     np.array(sp_y_train_full)
    )
)

# Valid
X_valid_full = np.concatenate(
    (dji_X_valid_full,
     nasdaq_X_valid_full,
     nyse_X_valid_full,
     russell_X_valid_full,
     sp_X_valid_full)
)
y_valid_full = np.concatenate(
    (np.array(dji_y_valid_full),
     np.array(nasdaq_y_valid_full),
     np.array(nyse_y_valid_full),
     np.array(russell_y_valid_full),
     np.array(sp_y_valid_full)
    )
)

In [17]:
# pca
# Train
X_train_pca = np.concatenate(
    (dji_X_train_pca,
     nasdaq_X_train_pca,
     nyse_X_train_pca,
     russell_X_train_pca,
     sp_X_train_pca)
)
y_train_pca = np.concatenate(
    (np.array(dji_y_train_pca),
     np.array(nasdaq_y_train_pca),
     np.array(nyse_y_train_pca),
     np.array(russell_y_train_pca),
     np.array(sp_y_train_pca)
    )
)

# Valid
X_valid_pca = np.concatenate(
    (dji_X_valid_pca,
     nasdaq_X_valid_pca,
     nyse_X_valid_pca,
     russell_X_valid_pca,
     sp_X_valid_pca)
)
y_valid_pca = np.concatenate(
    (np.array(dji_y_valid_pca),
     np.array(nasdaq_y_valid_pca),
     np.array(nyse_y_valid_pca),
     np.array(russell_y_valid_pca),
     np.array(sp_y_valid_pca)
    )
)

In [18]:
# ti
# Train
X_train_ti = np.concatenate(
    (dji_X_train_ti,
     nasdaq_X_train_ti,
     nyse_X_train_ti,
     russell_X_train_ti,
     sp_X_train_ti)
)
y_train_ti = np.concatenate(
    (np.array(dji_y_train_ti),
     np.array(nasdaq_y_train_ti),
     np.array(nyse_y_train_ti),
     np.array(russell_y_train_ti),
     np.array(sp_y_train_ti)
    )
)

# Valid
X_valid_ti = np.concatenate(
    (dji_X_valid_ti,
     nasdaq_X_valid_ti,
     nyse_X_valid_ti,
     russell_X_valid_ti,
     sp_X_valid_ti)
)
y_valid_ti = np.concatenate(
    (np.array(dji_y_valid_ti),
     np.array(nasdaq_y_valid_ti),
     np.array(nyse_y_valid_ti),
     np.array(russell_y_valid_ti),
     np.array(sp_y_valid_ti)
    )
)

### Model training and evaluation

#### Full features

In [37]:
model_full = cnnpred_2d(60, 82, [8, 8, 8])
epochs = 200
batch_size=128

## Training
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model_full.compile(optimizer="Adam", loss="binary_crossentropy", 
                   metrics=["acc", f1])
model_full.fit(X_train_full, y_train_full, epochs=epochs,
              batch_size=batch_size, callbacks=[early_stopping],
              validation_data=(X_valid_full, y_valid_full))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f32bbf18450>

In [39]:
## DJI
result_dji_full = model_full.predict(dji_X_test_full)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"DJI Accuracy: {accuracy_score(result_dji_full, dji_y_test_full)}")
print(f"DJI F1: {f1_score(result_dji_full, dji_y_test_full, average='macro')}")

DJI Accuracy: 0.5317919075144508
DJI F1: 0.4891173575412884


In [40]:
## NASDAQ
result_nasdaq_full = model_full.predict(nasdaq_X_test_full)
result_nasdaq_full = (result_nasdaq_full > 0.5).astype(int)
print(f"NASDAQ Accuracy: {accuracy_score(result_nasdaq_full, nasdaq_y_test_full)}")
print(f"NASDAQ F1: {f1_score(result_nasdaq_full, nasdaq_y_test_full, average='macro')}")

NASDAQ Accuracy: 0.5780346820809249
NASDAQ F1: 0.5246000376435158


In [41]:
## nyse
result_nyse_full = model_full.predict(nyse_X_test_full)
result_nyse_full = (result_nyse_full > 0.5).astype(int)
print(f"nyse Accuracy: {accuracy_score(result_nyse_full, nyse_y_test_full)}")
print(f"nyse F1: {f1_score(result_nyse_full, nyse_y_test_full, average='macro')}")

nyse Accuracy: 0.5202312138728323
nyse F1: 0.47650297130773994


In [42]:
## russell
result_russell_full = model_full.predict(russell_X_test_full)
result_russell_full = (result_russell_full > 0.5).astype(int)
print(f"russell Accuracy: {accuracy_score(result_russell_full, russell_y_test_full)}")
print(f"russell F1: {f1_score(result_russell_full, russell_y_test_full, average='macro')}")

russell Accuracy: 0.5317919075144508
russell F1: 0.4852892561983471


In [43]:
## sp
result_sp_full = model_full.predict(sp_X_test_full)
result_sp_full = (result_sp_full > 0.5).astype(int)
print(f"S&P 500 Accuracy: {accuracy_score(result_sp_full, sp_y_test_full)}")
print(f"S&P 500 F1: {f1_score(result_sp_full, sp_y_test_full, average='macro')}")

S&P 500 Accuracy: 0.5375722543352601
S&P 500 F1: 0.4896755162241888


#### PCA features

In [50]:
model_pca = cnnpred_2d(60, 30, [8, 8, 8])
epochs = 200
batch_size=128

## Training
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model_pca.compile(optimizer="Adam", loss="mae", 
                   metrics=["acc", f1])
model_pca.fit(X_train_pca, y_train_pca, epochs=epochs,
              batch_size=batch_size, callbacks=[early_stopping],
              validation_data=(X_valid_pca, y_valid_pca))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200


<tensorflow.python.keras.callbacks.History at 0x7f31f8448950>

In [51]:
## DJI
result_dji_pca = model_pca.predict(dji_X_test_pca)
result_dji_pca = (result_dji_pca > 0.5).astype(int)
print(f"DJI Accuracy: {accuracy_score(result_dji_pca, dji_y_test_pca)}")
print(f"DJI F1: {f1_score(result_dji_pca, dji_y_test_pca, average='macro')}")

DJI Accuracy: 0.48554913294797686
DJI F1: 0.4770928850399049


In [52]:
## NASDAQ
result_nasdaq_pca = model_pca.predict(nasdaq_X_test_pca)
result_nasdaq_pca = (result_nasdaq_pca > 0.5).astype(int)
print(f"NASDAQ Accuracy: {accuracy_score(result_nasdaq_pca, nasdaq_y_test_pca)}")
print(f"NASDAQ F1: {f1_score(result_nasdaq_pca, nasdaq_y_test_pca, average='macro')}")

NASDAQ Accuracy: 0.5433526011560693
NASDAQ F1: 0.5175615094073212


In [53]:
## nyse
result_nyse_pca = model_pca.predict(nyse_X_test_pca)
result_nyse_pca = (result_nyse_pca > 0.5).astype(int)
print(f"nyse Accuracy: {accuracy_score(result_nyse_pca, nyse_y_test_pca)}")
print(f"nyse F1: {f1_score(result_nyse_pca, nyse_y_test_pca, average='macro')}")

nyse Accuracy: 0.5086705202312138
nyse F1: 0.49902906006200387


In [54]:
## russell
result_russell_pca = model_pca.predict(russell_X_test_pca)
result_russell_pca = (result_russell_pca > 0.5).astype(int)
print(f"russell Accuracy: {accuracy_score(result_russell_pca, russell_y_test_pca)}")
print(f"russell F1: {f1_score(result_russell_pca, russell_y_test_pca, average='macro')}")

russell Accuracy: 0.4797687861271676
russell F1: 0.46678082191780823


In [55]:
## sp
result_sp_pca = model_pca.predict(sp_X_test_pca)
result_sp_pca = (result_sp_pca > 0.5).astype(int)
print(f"S&P 500 Accuracy: {accuracy_score(result_sp_pca, sp_y_test_pca)}")
print(f"S&P 500 F1: {f1_score(result_sp_pca, sp_y_test_pca, average='macro')}")

S&P 500 Accuracy: 0.5260115606936416
S&P 500 F1: 0.5189229517091698


#### Technical indicator features

In [68]:
model_ti = cnnpred_2d(60, 13, [8, 8, 8])
epochs = 200
batch_size=128

## Training
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=50, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model_ti.compile(optimizer="Adam", loss="mae", 
                   metrics=["acc", f1])
model_ti.fit(X_train_ti, y_train_ti, epochs=epochs,
              batch_size=batch_size, callbacks=[early_stopping],
              validation_data=(X_valid_ti, y_valid_ti))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200


<tensorflow.python.keras.callbacks.History at 0x7f31cdc082d0>

In [69]:
## DJI
result_dji_ti = model_ti.predict(dji_X_test_ti)
result_dji_ti = (result_dji_ti > 0.5).astype(int)
print(f"DJI Accuracy: {accuracy_score(result_dji_ti, dji_y_test_ti)}")
print(f"DJI F1: {f1_score(result_dji_ti, dji_y_test_ti, average='macro')}")

DJI Accuracy: 0.47398843930635837
DJI F1: 0.4682317176152677


In [70]:
## NASDAQ
result_nasdaq_ti = model_ti.predict(nasdaq_X_test_ti)
result_nasdaq_ti = (result_nasdaq_ti > 0.5).astype(int)
print(f"NASDAQ Accuracy: {accuracy_score(result_nasdaq_ti, nasdaq_y_test_ti)}")
print(f"NASDAQ F1: {f1_score(result_nasdaq_ti, nasdaq_y_test_ti, average='macro')}")

NASDAQ Accuracy: 0.4682080924855491
NASDAQ F1: 0.45056614194973765


In [71]:
## nyse
result_nyse_ti = model_ti.predict(nyse_X_test_ti)
result_nyse_ti = (result_nyse_ti > 0.5).astype(int)
print(f"nyse Accuracy: {accuracy_score(result_nyse_ti, nyse_y_test_ti)}")
print(f"nyse F1: {f1_score(result_nyse_ti, nyse_y_test_ti, average='macro')}")

nyse Accuracy: 0.5028901734104047
nyse F1: 0.48639878486605914


In [72]:
## russell
result_russell_ti = model_ti.predict(russell_X_test_ti)
result_russell_ti = (result_russell_ti > 0.5).astype(int)
print(f"russell Accuracy: {accuracy_score(result_russell_ti, russell_y_test_ti)}")
print(f"russell F1: {f1_score(result_russell_ti, russell_y_test_ti, average='macro')}")

russell Accuracy: 0.4682080924855491
russell F1: 0.46417990842984114


In [73]:
## sp
result_sp_ti = model_ti.predict(sp_X_test_ti)
result_sp_ti = (result_sp_ti > 0.5).astype(int)
print(f"S&P 500 Accuracy: {accuracy_score(result_sp_ti, sp_y_test_ti)}")
print(f"S&P 500 F1: {f1_score(result_sp_ti, sp_y_test_ti, average='macro')}")

S&P 500 Accuracy: 0.48554913294797686
S&P 500 F1: 0.4770928850399049


## Feedforward Neural Networks

### Load data

In [105]:
# Sequential flatten (full features)
dji_X_seq_flatten = sequential_reshape(dji_X_seq, (len(dji_X_seq), -1))
nasdaq_X_seq_flatten = sequential_reshape(nasdaq_X_seq, (len(nasdaq_X_seq), -1))
nyse_X_seq_flatten = sequential_reshape(nyse_X_seq, (len(nyse_X_seq), -1))
russell_X_seq_flatten = sequential_reshape(russell_X_seq, (len(russell_X_seq), -1))
sp_X_seq_flatten = sequential_reshape(sp_X_seq, (len(sp_X_seq), -1))

In [106]:
# Sequential flatten (pca)
dji_X_pca_seq_flatten = sequential_reshape(dji_X_pca_seq, (len(dji_X_pca_seq), -1))
nasdaq_X_pca_seq_flatten = sequential_reshape(nasdaq_X_pca_seq, (len(nasdaq_X_pca_seq), -1))
nyse_X_pca_seq_flatten = sequential_reshape(nyse_X_pca_seq, (len(nyse_X_pca_seq), -1))
russell_X_pca_seq_flatten = sequential_reshape(russell_X_pca_seq, (len(russell_X_pca_seq), -1))
sp_X_pca_seq_flatten = sequential_reshape(sp_X_pca_seq, (len(sp_X_pca_seq), -1))

In [107]:
# Sequential flatten (technical indicator)
dji_X_ti_seq_flatten = sequential_reshape(dji_X_ti_seq, (len(dji_X_ti_seq), -1))
nasdaq_X_ti_seq_flatten = sequential_reshape(nasdaq_X_ti_seq, (len(nasdaq_X_ti_seq), -1))
nyse_X_ti_seq_flatten = sequential_reshape(nyse_X_ti_seq, (len(nyse_X_ti_seq), -1))
russell_X_ti_seq_flatten = sequential_reshape(russell_X_ti_seq, (len(russell_X_ti_seq), -1))
sp_X_ti_seq_flatten = sequential_reshape(sp_X_ti_seq, (len(sp_X_ti_seq), -1))

### Split into training/validation/test (80/10/10)

In [108]:
## Full features
dji_X_train_full, dji_X_test_full, dji_y_train_full, dji_y_test_full = train_test_split(dji_X_seq_flatten,
                                                                                        dji_y_seq,
                                                                                        stratify=dji_y_seq,
                                                                                        test_size=0.1,
                                                                                        shuffle=True)
dji_X_train_full, dji_X_valid_full, dji_y_train_full, dji_y_valid_full = train_test_split(dji_X_train_full,
                                                                                        dji_y_train_full,
                                                                                        stratify=dji_y_train_full,
                                                                                        test_size=0.1,
                                                                                        shuffle=True)
nasdaq_X_train_full, nasdaq_X_test_full, nasdaq_y_train_full, nasdaq_y_test_full = train_test_split(nasdaq_X_seq_flatten,
                                                                                        nasdaq_y_seq,
                                                                                        stratify=nasdaq_y_seq,
                                                                                        test_size=0.1,
                                                                                        shuffle=True)
nasdaq_X_train_full, nasdaq_X_valid_full, nasdaq_y_train_full, nasdaq_y_valid_full = train_test_split(nasdaq_X_train_full,
                                                                                        nasdaq_y_train_full,
                                                                                        stratify=nasdaq_y_train_full,
                                                                                        test_size=0.1,
                                                                                        shuffle=True)
nyse_X_train_full, nyse_X_test_full, nyse_y_train_full, nyse_y_test_full = train_test_split(nyse_X_seq_flatten,
                                                                                        nyse_y_seq,
                                                                                        stratify=nyse_y_seq,
                                                                                        test_size=0.1, shuffle=True)
nyse_X_train_full, nyse_X_valid_full, nyse_y_train_full, nyse_y_valid_full = train_test_split(nyse_X_train_full,
                                                                                        nyse_y_train_full,
                                                                                        stratify=nyse_y_train_full,
                                                                                        test_size=0.1, shuffle=True)

russell_X_train_full, russell_X_test_full, russell_y_train_full, russell_y_test_full = train_test_split(russell_X_seq_flatten,
                                                                                        russell_y_seq,
                                                                                        stratify=russell_y_seq,
                                                                                        test_size=0.1, shuffle=True)
russell_X_train_full, russell_X_valid_full, russell_y_train_full, russell_y_valid_full = train_test_split(russell_X_train_full,
                                                                                        russell_y_train_full,
                                                                                        stratify=russell_y_train_full,
                                                                                        test_size=0.1, shuffle=True)
sp_X_train_full, sp_X_test_full, sp_y_train_full, sp_y_test_full = train_test_split(sp_X_seq_flatten,
                                                                                        sp_y_seq,
                                                                                        stratify=sp_y_seq,
                                                                                        test_size=0.1, shuffle=True)
sp_X_train_full, sp_X_valid_full, sp_y_train_full, sp_y_valid_full = train_test_split(sp_X_train_full,
                                                                                        sp_y_train_full,
                                                                                        stratify=sp_y_train_full,
                                                                                        test_size=0.1, shuffle=True)

In [109]:
## pca features
dji_X_train_pca, dji_X_test_pca, dji_y_train_pca, dji_y_test_pca = train_test_split(dji_X_pca_seq_flatten,
                                                                                        dji_y_seq,
                                                                                        stratify=dji_y_seq,
                                                                                        test_size=0.1, shuffle=True)
dji_X_train_pca, dji_X_valid_pca, dji_y_train_pca, dji_y_valid_pca = train_test_split(dji_X_train_pca,
                                                                                        dji_y_train_pca,
                                                                                        stratify=dji_y_train_pca,
                                                                                        test_size=0.1, shuffle=True)
nasdaq_X_train_pca, nasdaq_X_test_pca, nasdaq_y_train_pca, nasdaq_y_test_pca = train_test_split(nasdaq_X_pca_seq_flatten,
                                                                                        nasdaq_y_seq,
                                                                                        stratify=nasdaq_y_seq,
                                                                                        test_size=0.1, shuffle=True)
nasdaq_X_train_pca, nasdaq_X_valid_pca, nasdaq_y_train_pca, nasdaq_y_valid_pca = train_test_split(nasdaq_X_train_pca,
                                                                                        nasdaq_y_train_pca,
                                                                                        stratify=nasdaq_y_train_pca,
                                                                                        test_size=0.1, shuffle=True)
nyse_X_train_pca, nyse_X_test_pca, nyse_y_train_pca, nyse_y_test_pca = train_test_split(nyse_X_pca_seq_flatten,
                                                                                        nyse_y_seq,
                                                                                        stratify=nyse_y_seq,
                                                                                        test_size=0.1, shuffle=True)
nyse_X_train_pca, nyse_X_valid_pca, nyse_y_train_pca, nyse_y_valid_pca = train_test_split(nyse_X_train_pca,
                                                                                        nyse_y_train_pca,
                                                                                        stratify=nyse_y_train_pca,
                                                                                        test_size=0.1, shuffle=True)
russell_X_train_pca, russell_X_test_pca, russell_y_train_pca, russell_y_test_pca = train_test_split(russell_X_pca_seq_flatten,
                                                                                        russell_y_seq,
                                                                                        stratify=russell_y_seq,
                                                                                        test_size=0.1, shuffle=True)
russell_X_train_pca, russell_X_valid_pca, russell_y_train_pca, russell_y_valid_pca = train_test_split(russell_X_train_pca,
                                                                                        russell_y_train_pca,
                                                                                        stratify=russell_y_train_pca,
                                                                                        test_size=0.1, shuffle=True)
sp_X_train_pca, sp_X_test_pca, sp_y_train_pca, sp_y_test_pca = train_test_split(sp_X_pca_seq_flatten,
                                                                                        sp_y_seq,
                                                                                        stratify=sp_y_seq,
                                                                                        test_size=0.1, shuffle=True)
sp_X_train_pca, sp_X_valid_pca, sp_y_train_pca, sp_y_valid_pca = train_test_split(sp_X_train_pca,
                                                                                        sp_y_train_pca,
                                                                                        stratify=sp_y_train_pca,
                                                                                        test_size=0.1, shuffle=True)

In [110]:
## ti features
dji_X_train_ti, dji_X_test_ti, dji_y_train_ti, dji_y_test_ti = train_test_split(dji_X_ti_seq_flatten,
                                                                                        dji_y_seq,
                                                                                        stratify=dji_y_seq,
                                                                                        test_size=0.1, shuffle=True)
dji_X_train_ti, dji_X_valid_ti, dji_y_train_ti, dji_y_valid_ti = train_test_split(dji_X_train_ti,
                                                                                        dji_y_train_ti,
                                                                                        stratify=dji_y_train_ti,
                                                                                        test_size=0.1, shuffle=True)
nasdaq_X_train_ti, nasdaq_X_test_ti, nasdaq_y_train_ti, nasdaq_y_test_ti = train_test_split(nasdaq_X_ti_seq_flatten,
                                                                                        nasdaq_y_seq,
                                                                                        stratify=nasdaq_y_seq,
                                                                                        test_size=0.1, shuffle=True)
nasdaq_X_train_ti, nasdaq_X_valid_ti, nasdaq_y_train_ti, nasdaq_y_valid_ti = train_test_split(nasdaq_X_train_ti,
                                                                                        nasdaq_y_train_ti,
                                                                                        stratify=nasdaq_y_train_ti,
                                                                                        test_size=0.1, shuffle=True)
nyse_X_train_ti, nyse_X_test_ti, nyse_y_train_ti, nyse_y_test_ti = train_test_split(nyse_X_ti_seq_flatten,
                                                                                        nyse_y_seq,
                                                                                        stratify=nyse_y_seq,
                                                                                        test_size=0.1, shuffle=True)
nyse_X_train_ti, nyse_X_valid_ti, nyse_y_train_ti, nyse_y_valid_ti = train_test_split(nyse_X_train_ti,
                                                                                        nyse_y_train_ti,
                                                                                        stratify=nyse_y_train_ti,
                                                                                        test_size=0.1, shuffle=True)
russell_X_train_ti, russell_X_test_ti, russell_y_train_ti, russell_y_test_ti = train_test_split(russell_X_ti_seq_flatten,
                                                                                        russell_y_seq,
                                                                                        stratify=russell_y_seq,
                                                                                        test_size=0.1, shuffle=True)
russell_X_train_ti, russell_X_valid_ti, russell_y_train_ti, russell_y_valid_ti = train_test_split(russell_X_train_ti,
                                                                                        russell_y_train_ti,
                                                                                        stratify=russell_y_train_ti,
                                                                                        test_size=0.1, shuffle=True)
sp_X_train_ti, sp_X_test_ti, sp_y_train_ti, sp_y_test_ti = train_test_split(sp_X_ti_seq_flatten,
                                                                                        sp_y_seq,
                                                                                        stratify=sp_y_seq,
                                                                                        test_size=0.1, shuffle=True)
sp_X_train_ti, sp_X_valid_ti, sp_y_train_ti, sp_y_valid_ti = train_test_split(sp_X_train_ti,
                                                                                        sp_y_train_ti,
                                                                                        stratify=sp_y_train_ti,
                                                                                        test_size=0.1, shuffle=True)

### Model training and evaluation

#### Full Features

In [111]:
## DJI
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam(lr=1e-2)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=50, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(dji_X_train_full, np.array(dji_y_train_full), 
          epochs=200, batch_size=32,
          validation_data=(dji_X_valid_full, np.array(dji_y_valid_full)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200


Epoch 51/200
Epoch 52/200


<tensorflow.python.keras.callbacks.History at 0x7f32de8cd7d0>

In [112]:
result_dji_full = model.predict(dji_X_test_full)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"DJI Accuracy: {accuracy_score(result_dji_full, dji_y_test_full)}")
print(f"DJI F1: {f1_score(result_dji_full, dji_y_test_full, average='macro')}")

DJI Accuracy: 0.5317919075144508
DJI F1: 0.5277525022747953


In [113]:
## NASDAQ
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(nasdaq_X_train_full, np.array(nasdaq_y_train_full), 
          epochs=200, batch_size=128,
          validation_data=(nasdaq_X_valid_full, np.array(nasdaq_y_valid_full)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 101/200


<tensorflow.python.keras.callbacks.History at 0x7f32de771490>

In [114]:
## NASDAQ
result_nasdaq_full = model.predict(nasdaq_X_test_full)
result_nasdaq_full = (result_nasdaq_full > 0.5).astype(int)
print(f"NASDAQ Accuracy: {accuracy_score(result_nasdaq_full, nasdaq_y_test_full)}")
print(f"NASDAQ F1: {f1_score(result_nasdaq_full, nasdaq_y_test_full, average='macro')}")

NASDAQ Accuracy: 0.5433526011560693
NASDAQ F1: 0.539412934317393


In [119]:
## NYSE
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(nyse_X_train_full, np.array(nyse_y_train_full), 
          epochs=200, batch_size=32,
          validation_data=(nyse_X_valid_full, np.array(nyse_y_valid_full)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200


Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200


<tensorflow.python.keras.callbacks.History at 0x7f32cac41310>

In [120]:
## NYSE
result_nyse_full = model.predict(nyse_X_test_full)
result_nyse_full = (result_nyse_full > 0.5).astype(int)
print(f"NYSE Accuracy: {accuracy_score(result_nyse_full, nyse_y_test_full)}")
print(f"NYSE F1: {f1_score(result_nyse_full, nyse_y_test_full, average='macro')}")

NYSE Accuracy: 0.42196531791907516
NYSE F1: 0.4219460037423149


In [135]:
## Russell
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=50, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(russell_X_train_full, np.array(russell_y_train_full), 
          epochs=200, batch_size=128,
          validation_data=(russell_X_valid_full, np.array(russell_y_valid_full)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200


<tensorflow.python.keras.callbacks.History at 0x7f32ca9a8c90>

In [136]:
## russell
result_russell_full = model.predict(nyse_X_test_full)
result_russell_full = (result_russell_full > 0.5).astype(int)
print(f"russell Accuracy: {accuracy_score(result_russell_full, russell_y_test_full)}")
print(f"russell F1: {f1_score(result_russell_full, russell_y_test_full, average='macro')}")

russell Accuracy: 0.4393063583815029
russell F1: 0.43863111765296225


In [131]:
## S&P 500
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(sp_X_train_full, np.array(sp_y_train_full), 
          epochs=200, batch_size=128,
          validation_data=(sp_X_valid_full, np.array(sp_y_valid_full)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 101/200


<tensorflow.python.keras.callbacks.History at 0x7f32af8e70d0>

In [74]:
## russell
result_sp_full = model.predict(nyse_X_test_full)
result_sp_full = (result_sp_full > 0.5).astype(int)
print(f"NYSE Accuracy: {accuracy_score(result_sp_full, sp_y_test_full)}")
print(f"NYSE F1: {f1_score(result_sp_full, sp_y_test_full, average='macro')}")

NYSE Accuracy: 0.5086705202312138
NYSE F1: 0.5062951149907672


#### PCA Features

In [137]:
## DJI
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam(lr=1e-2)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(dji_X_train_pca, np.array(dji_y_train_pca), 
          epochs=200, batch_size=128,
          validation_data=(dji_X_valid_pca, np.array(dji_y_valid_pca)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200


Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200


<tensorflow.python.keras.callbacks.History at 0x7f32a7c36a10>

In [138]:
result_dji_full = model.predict(dji_X_test_pca)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"DJI Accuracy: {accuracy_score(result_dji_full, dji_y_test_pca)}")
print(f"DJI F1: {f1_score(result_dji_full, dji_y_test_pca, average='macro')}")

DJI Accuracy: 0.5202312138728323
DJI F1: 0.5179117005203961


In [139]:
## NASDAQ
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(nasdaq_X_train_pca, np.array(nasdaq_y_train_pca), 
          epochs=200, batch_size=128,
          validation_data=(nasdaq_X_valid_pca, np.array(nasdaq_y_valid_pca)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 101/200
Epoch 102/200


<tensorflow.python.keras.callbacks.History at 0x7f31aa5e8690>

In [140]:
## NASDAQ
result_nasdaq_full = model.predict(nasdaq_X_test_pca)
result_nasdaq_full = (result_nasdaq_full > 0.5).astype(int)
print(f"NASDAQ Accuracy: {accuracy_score(result_nasdaq_full, nasdaq_y_test_pca)}")
print(f"NASDAQ F1: {f1_score(result_nasdaq_full, nasdaq_y_test_pca, average='macro')}")

NASDAQ Accuracy: 0.5317919075144508
NASDAQ F1: 0.526667792602601


In [141]:
## NYSE
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(nyse_X_train_pca, np.array(nyse_y_train_pca), 
          epochs=200, batch_size=128,
          validation_data=(nyse_X_valid_pca, np.array(nyse_y_valid_pca)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 101/200
Epoch 102/200


<tensorflow.python.keras.callbacks.History at 0x7f31aa3ba550>

In [142]:
## NYSE
result_nyse_full = model.predict(nyse_X_test_pca)
result_nyse_full = (result_nyse_full > 0.5).astype(int)
print(f"NYSE Accuracy: {accuracy_score(result_nyse_full, nyse_y_test_pca)}")
print(f"NYSE F1: {f1_score(result_nyse_full, nyse_y_test_pca, average='macro')}")

NYSE Accuracy: 0.5028901734104047
NYSE F1: 0.4968208874458875


In [143]:
## Russell
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(russell_X_train_pca, np.array(russell_y_train_pca), 
          epochs=200, batch_size=128,
          validation_data=(russell_X_valid_pca, np.array(russell_y_valid_pca)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 101/200


<tensorflow.python.keras.callbacks.History at 0x7f31a60ffbd0>

In [144]:
## russell
result_russell_full = model.predict(russell_X_test_pca)
result_russell_full = (result_russell_full > 0.5).astype(int)
print(f"russell Accuracy: {accuracy_score(result_russell_full, russell_y_test_pca)}")
print(f"russell F1: {f1_score(result_russell_full, russell_y_test_pca, average='macro')}")

russell Accuracy: 0.4913294797687861
russell F1: 0.48844086021505373


In [145]:
## S&P 500
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(sp_X_train_pca, np.array(sp_y_train_pca), 
          epochs=200, batch_size=128,
          validation_data=(sp_X_valid_pca, np.array(sp_y_valid_pca)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 101/200


<tensorflow.python.keras.callbacks.History at 0x7f31a3e9ba10>

In [146]:
## russell
result_sp_full = model.predict(sp_X_test_pca)
result_sp_full = (result_sp_full > 0.5).astype(int)
print(f"S&P 500 Accuracy: {accuracy_score(result_sp_full, sp_y_test_pca)}")
print(f"S&P 500 F1: {f1_score(result_sp_full, sp_y_test_pca, average='macro')}")

S&P 500 Accuracy: 0.4508670520231214
S&P 500 F1: 0.44485728762033444


#### Technical indicator features

In [101]:
## DJI
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam(lr=1e-2)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(dji_X_train_ti, np.array(dji_y_train_ti), 
          epochs=200, batch_size=128,
          validation_data=(dji_X_valid_ti, np.array(dji_y_valid_ti)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 101/200
Epoch 102/200


<tensorflow.python.keras.callbacks.History at 0x7f5d791ed050>

In [102]:
result_dji_full = model.predict(dji_X_test_ti)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"DJI Accuracy: {accuracy_score(result_dji_full, dji_y_test_ti)}")
print(f"DJI F1: {f1_score(result_dji_full, dji_y_test_ti, average='macro')}")

DJI Accuracy: 0.4913294797687861
DJI F1: 0.48636977058029696


In [103]:
## NASDAQ
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(nasdaq_X_train_ti, np.array(nasdaq_y_train_ti), 
          epochs=200, batch_size=128,
          validation_data=(nasdaq_X_valid_ti, np.array(nasdaq_y_valid_ti)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 102/200


<tensorflow.python.keras.callbacks.History at 0x7f5d78f62f10>

In [104]:
## NASDAQ
result_nasdaq_full = model.predict(nasdaq_X_test_ti)
result_nasdaq_full = (result_nasdaq_full > 0.5).astype(int)
print(f"NASDAQ Accuracy: {accuracy_score(result_nasdaq_full, nasdaq_y_test_ti)}")
print(f"NASDAQ F1: {f1_score(result_nasdaq_full, nasdaq_y_test_ti, average='macro')}")

NASDAQ Accuracy: 0.5144508670520231
NASDAQ F1: 0.5040950040950041


In [115]:
## NYSE
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(nyse_X_train_ti, np.array(nyse_y_train_ti), 
          epochs=200, batch_size=128,
          validation_data=(nyse_X_valid_ti, np.array(nyse_y_valid_ti)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f5d6b8fb310>

In [116]:
## NYSE
result_nyse_full = model.predict(nyse_X_test_ti)
result_nyse_full = (result_nyse_full > 0.5).astype(int)
print(f"NYSE Accuracy: {accuracy_score(result_nyse_full, nyse_y_test_ti)}")
print(f"NYSE F1: {f1_score(result_nyse_full, nyse_y_test_ti, average='macro')}")

NYSE Accuracy: 0.5028901734104047
NYSE F1: 0.4939455782312925


In [135]:
## Russell
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(russell_X_train_ti, np.array(russell_y_train_ti), 
          epochs=200, batch_size=128,
          validation_data=(russell_X_valid_ti, np.array(russell_y_valid_ti)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 102/200
Epoch 103/200


<tensorflow.python.keras.callbacks.History at 0x7f5d55c55210>

In [136]:
## russell
result_russell_full = model.predict(russell_X_test_ti)
result_russell_full = (result_russell_full > 0.5).astype(int)
print(f"russell Accuracy: {accuracy_score(result_russell_full, russell_y_test_ti)}")
print(f"russell F1: {f1_score(result_russell_full, russell_y_test_ti, average='macro')}")

russell Accuracy: 0.5202312138728323
russell F1: 0.5201670843776107


In [139]:
## S&P 500
model = keras.Sequential(
    [
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ]
)

optimizer = keras.optimizers.Adam()
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=100, verbose=0,
    mode='auto', baseline=None, restore_best_weights=False
)

model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit(sp_X_train_ti, np.array(sp_y_train_ti), 
          epochs=200, batch_size=128,
          validation_data=(sp_X_valid_ti, np.array(sp_y_valid_ti)), callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 102/200
Epoch 103/200


<tensorflow.python.keras.callbacks.History at 0x7f5d567d17d0>

In [142]:
## russell
result_sp_full = model.predict(sp_X_test_ti)
result_sp_full = (result_sp_full > 0.5).astype(int)
print(f"S&P Accuracy: {accuracy_score(result_sp_full, sp_y_test_ti)}")
print(f"S&P F1: {f1_score(result_sp_full, sp_y_test_ti, average='macro')}")

S&P Accuracy: 0.5202312138728323
S&P F1: 0.5108166115899567


## 3D CNN Pred

#### Full features

In [17]:
### Clean dataframe (full features)
dji_df_full = dji_df_full
nasdaq_df_full = nasdaq_df_full
nyse_df_full = nyse_df_full
russell_df_full = russell_df_full
sp_df_full = sp_df_full

## Combine
X_full = dji_df_full.drop(columns=["MOVEMENT"]).copy()
y_full = list(dji_df_full["MOVEMENT"].copy())

for df in [nasdaq_df_full, nyse_df_full, russell_df_full, sp_df_full]:
    X_full = np.concatenate((X_full, df.drop(columns=["MOVEMENT"]).copy()))
    y_full.extend(list(df["MOVEMENT"].copy()))

In [18]:
## Reshape
X_full = X_full.reshape((5, -1, 82))
y_full = np.array(y_full)

## Sequencing
X_seq, y_seq = generate_sequential_data_3d(X_full, y_full, 60)

In [19]:
X_seq_train, X_seq_test, y_seq_train, y_seq_test = train_test_split(
    X_seq, y_seq, stratify=None, test_size=0.1, shuffle=False
)
X_seq_train, X_seq_valid, y_seq_train, y_seq_valid = train_test_split(
    X_seq_train, y_seq_train, stratify=None, test_size=0.1, shuffle=False
)

In [22]:
model_full = cnnpred_3d(5, 60, 82, [8, 8, 8])
epochs = 100
batch_size=128

model_full.compile(optimizer="Adam", loss="mae", 
                   metrics=["acc", f1])
model_full.fit(X_seq_train, y_seq_train, epochs=epochs,
              batch_size=batch_size,
              validation_data=(X_seq_valid, y_seq_valid))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fba3852e310>

In [23]:
result_full = model_full.predict(X_seq_test)
result_full = (result_full > 0.5).astype(int)
print(f"FULL Accuracy: {accuracy_score(result_full, y_seq_test)}")
print(f"FULL F1: {f1_score(result_full, y_seq_test, average='macro')}")

FULL Accuracy: 0.5953757225433526
FULL F1: 0.5534660766961652


#### PCA features

In [86]:
### Clean dataframe (full features)
dji_df_full = dji_df_pca
nasdaq_df_full = nasdaq_df_pca
nyse_df_full = nyse_df_pca
russell_df_full = russell_df_pca
sp_df_full = sp_df_pca

## Combine
X_full = dji_df_full.drop(columns=["MOVEMENT"]).copy()
y_full = list(dji_df_full["MOVEMENT"].copy())

for df in [nasdaq_df_full, nyse_df_full, russell_df_full, sp_df_full]:
    X_full = np.concatenate((X_full, df.drop(columns=["MOVEMENT"]).copy()))
    y_full.extend(list(df["MOVEMENT"].copy()))

In [87]:
## Reshape
X_full = X_full.reshape((5, -1, 30))
y_full = np.array(y_full)

## Sequencing
X_seq, y_seq = generate_sequential_data_3d(X_full, y_full, 60)

In [88]:
X_seq_train, X_seq_test, y_seq_train, y_seq_test = train_test_split(
    X_seq, y_seq, stratify=None, test_size=0.1, shuffle=False
)
X_seq_train, X_seq_valid, y_seq_train, y_seq_valid = train_test_split(
    X_seq_train, y_seq_train, stratify=None, test_size=0.1, shuffle=False
)

In [89]:
model_full = cnnpred_3d(5, 60, 30, [8, 8, 8])
epochs = 100
batch_size=128

model_full.compile(optimizer="Adam", loss="mae", 
                   metrics=["acc", f1])
model_full.fit(X_seq_train, y_seq_train, epochs=epochs,
              batch_size=batch_size,
              validation_data=(X_seq_valid, y_seq_valid))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f31cbe4d090>

In [90]:
result_full = model_full.predict(X_seq_test)
result_full = (result_full > 0.5).astype(int)
print(f"FULL Accuracy: {accuracy_score(result_full, y_seq_test)}")
print(f"FULL F1: {f1_score(result_full, y_seq_test, average='macro')}")

FULL Accuracy: 0.6011560693641619
FULL F1: 0.45315864217325574


#### Technical indicator features

In [91]:
### Clean dataframe (full features)
dji_df_full = dji_df_ti
nasdaq_df_full = nasdaq_df_ti
nyse_df_full = nyse_df_ti
russell_df_full = russell_df_ti
sp_df_full = sp_df_ti

## Combine
X_full = dji_df_full.drop(columns=["MOVEMENT"]).copy()
y_full = list(dji_df_full["MOVEMENT"].copy())

for df in [nasdaq_df_full, nyse_df_full, russell_df_full, sp_df_full]:
    X_full = np.concatenate((X_full, df.drop(columns=["MOVEMENT"]).copy()))
    y_full.extend(list(df["MOVEMENT"].copy()))

In [92]:
## Reshape
X_full = X_full.reshape((5, -1, 13))
y_full = np.array(y_full)

## Sequencing
X_seq, y_seq = generate_sequential_data_3d(X_full, y_full, 60)

In [93]:
X_seq_train, X_seq_test, y_seq_train, y_seq_test = train_test_split(
    X_seq, y_seq, stratify=None, test_size=0.1, shuffle=False
)
X_seq_train, X_seq_valid, y_seq_train, y_seq_valid = train_test_split(
    X_seq_train, y_seq_train, stratify=None, test_size=0.1, shuffle=False
)

In [94]:
model_full = cnnpred_3d(5, 60, 13, [8, 8, 8])
epochs = 100
batch_size=128

model_full.compile(optimizer="Adam", loss="mae", 
                   metrics=["acc", f1])
model_full.fit(X_seq_train, y_seq_train, epochs=epochs,
              batch_size=batch_size,
              validation_data=(X_seq_valid, y_seq_valid))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f31cbbfd690>

In [95]:
result_full = model_full.predict(X_seq_test)
result_full = (result_full > 0.5).astype(int)
print(f"FULL Accuracy: {accuracy_score(result_full, y_seq_test)}")
print(f"FULL F1: {f1_score(result_full, y_seq_test, average='macro')}")

FULL Accuracy: 0.47398843930635837
FULL F1: 0.46366640547814536


## LSTM

#### Full features

In [7]:
### DJI
# Split
X = np.array(dji_df_full.drop(columns=["MOVEMENT"]).copy())
y = np.array(dji_df_full["MOVEMENT"].copy())

In [8]:
# Split train test, should not shuffle as the data is time series
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, shuffle=False
)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.1, shuffle=False
)

In [9]:
# Get input ready for lstm
win_length = 60
batch_size = 32
num_features = 82
train_generator = TimeseriesGenerator(np.array(X_train), np.array(y_train),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

test_generator = TimeseriesGenerator(np.array(X_test), np.array(y_test),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

valid_generator = TimeseriesGenerator(np.array(X_valid), np.array(y_valid),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

In [10]:
model = lstm(win_length, num_features)

optimizer = keras.optimizers.Adam()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                                 patience=20,
                                                 mode="min")
model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit_generator(train_generator,
                   epochs=100,
                   validation_data=valid_generator,
                   shuffle=False, callbacks=[early_stopping])

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50


<tensorflow.python.keras.callbacks.History at 0x7fbb0c819110>

In [11]:
result_dji_full = model.predict(test_generator)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"DJI Accuracy: {accuracy_score(result_dji_full, y_test[win_length:])}")
print(f"DJI F1: {f1_score(result_dji_full, y_test[win_length:], average='macro')}")

DJI Accuracy: 0.6386554621848739
DJI F1: 0.42998774646318366


In [12]:
### NASDAQ
# Split
X = np.array(nasdaq_df_full.drop(columns=["MOVEMENT"]).copy())
y = np.array(nasdaq_df_full["MOVEMENT"].copy())

In [13]:
# Split train test, should not shuffle as the data is time series
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, shuffle=False
)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.1, shuffle=False
)

In [14]:
# Get input ready for lstm
win_length = 60
batch_size = 32
num_features = 82
train_generator = TimeseriesGenerator(np.array(X_train), np.array(y_train),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

test_generator = TimeseriesGenerator(np.array(X_test), np.array(y_test),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

valid_generator = TimeseriesGenerator(np.array(X_valid), np.array(y_valid),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

In [15]:
model = lstm(win_length, num_features)

optimizer = keras.optimizers.Adam()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                                 patience=20,
                                                 mode="min")
model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit_generator(train_generator,
                   epochs=100,
                   validation_data=valid_generator,
                   shuffle=False, callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


<tensorflow.python.keras.callbacks.History at 0x7fbabc25e4d0>

In [16]:
result_dji_full = model.predict(test_generator)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"NASDAQ Accuracy: {accuracy_score(result_dji_full, y_test[win_length:])}")
print(f"NASDAQ F1: {f1_score(result_dji_full, y_test[win_length:], average='macro')}")

NASDAQ Accuracy: 0.5294117647058824
NASDAQ F1: 0.48484848484848486


In [24]:
### NYSE
# Split
X = np.array(nyse_df_full.drop(columns=["MOVEMENT"]).copy())
y = np.array(nyse_df_full["MOVEMENT"].copy())

In [25]:
# Split train test, should not shuffle as the data is time series
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, shuffle=False
)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.1, shuffle=False
)

In [26]:
# Get input ready for lstm
win_length = 60
batch_size = 32
num_features = 82
train_generator = TimeseriesGenerator(np.array(X_train), np.array(y_train),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

test_generator = TimeseriesGenerator(np.array(X_test), np.array(y_test),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

valid_generator = TimeseriesGenerator(np.array(X_valid), np.array(y_valid),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

In [27]:
model = lstm(win_length, num_features)

optimizer = keras.optimizers.Adam()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                                 patience=20,
                                                 mode="min")
model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit_generator(train_generator,
                   epochs=100,
                   validation_data=valid_generator,
                   shuffle=False, callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


<tensorflow.python.keras.callbacks.History at 0x7fba38173f10>

In [29]:
result_dji_full = model.predict(test_generator)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"NYSE Accuracy: {accuracy_score(result_dji_full, y_test[win_length:])}")
print(f"Nyse F1: {f1_score(result_dji_full, y_test[win_length:], average='macro')}")

NYSE Accuracy: 0.5294117647058824
Nyse F1: 0.4015804597701149


In [30]:
### russell
# Split
X = np.array(russell_df_full.drop(columns=["MOVEMENT"]).copy())
y = np.array(russell_df_full["MOVEMENT"].copy())

In [31]:
# Split train test, should not shuffle as the data is time series
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, shuffle=False
)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.1, shuffle=False
)

In [32]:
# Get input ready for lstm
win_length = 60
batch_size = 32
num_features = 82
train_generator = TimeseriesGenerator(np.array(X_train), np.array(y_train),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

test_generator = TimeseriesGenerator(np.array(X_test), np.array(y_test),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

valid_generator = TimeseriesGenerator(np.array(X_valid), np.array(y_valid),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

In [35]:
model = lstm(win_length, num_features)

optimizer = keras.optimizers.Adam()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                                 patience=20,
                                                 mode="min")
model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit_generator(train_generator,
                   epochs=100,
                   validation_data=valid_generator,
                   shuffle=False, callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


<tensorflow.python.keras.callbacks.History at 0x7fb9fd48e290>

In [36]:
result_dji_full = model.predict(test_generator)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"RUSSELL Accuracy: {accuracy_score(result_dji_full, y_test[win_length:])}")
print(f"RUSSELL F1: {f1_score(result_dji_full, y_test[win_length:], average='macro')}")

RUSSELL Accuracy: 0.4957983193277311
RUSSELL F1: 0.3928571428571429


In [37]:
### sp
# Split
X = np.array(sp_df_full.drop(columns=["MOVEMENT"]).copy())
y = np.array(sp_df_full["MOVEMENT"].copy())

In [38]:
# Split train test, should not shuffle as the data is time series
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, shuffle=False
)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.1, shuffle=False
)

In [39]:
# Get input ready for lstm
win_length = 60
batch_size = 32
num_features = 82
train_generator = TimeseriesGenerator(np.array(X_train), np.array(y_train),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

test_generator = TimeseriesGenerator(np.array(X_test), np.array(y_test),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

valid_generator = TimeseriesGenerator(np.array(X_valid), np.array(y_valid),
                                     length=win_length,
                                     sampling_rate=1,
                                     batch_size=batch_size)

In [40]:
model = lstm(win_length, num_features)

optimizer = keras.optimizers.Adam()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                                 patience=20,
                                                 mode="min")
model.compile(optimizer=optimizer, loss="binary_crossentropy", 
                   metrics=["acc", f1])
model.fit_generator(train_generator,
                   epochs=100,
                   validation_data=valid_generator,
                   shuffle=False, callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


<tensorflow.python.keras.callbacks.History at 0x7fb9f6e8c450>

In [41]:
result_dji_full = model.predict(test_generator)
result_dji_full = (result_dji_full > 0.5).astype(int)
print(f"SP Accuracy: {accuracy_score(result_dji_full, y_test[win_length:])}")
print(f"SP F1: {f1_score(result_dji_full, y_test[win_length:], average='macro')}")

SP Accuracy: 0.5630252100840336
SP F1: 0.39285714285714285
