# **Predicting Stock Returns with Elliot Waves**

## *Neural Network and Random Forest Classifiers*

### *Michele Orlandi ISYE6767 Fall 2022*

# 1. **Setup**

## 1.1 **Packages and Classes**

In [1]:
import os
import sys
sys.path.append(os.getcwd())
import gc
import warnings
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from feat_eng import Engineer
from reader import Reader
from processor import Processor

In [2]:
gc.enable()
warnings.filterwarnings('ignore')

## 1.2 **Import Stocks Data**

In [3]:
# input parameters
start_date = '2000-01-01'
end_date = '2021-11-12'
exchange = 'nasd'

In [4]:
# read tickers from given file
tickers = pd.read_csv('./tickers.csv')['Ticker'].to_list()
large_universe = pd.read_csv('./tickers_{}.csv'.format(exchange))['Symbol'].to_list()

In [5]:
# create reader object
reader_1 = Reader(tickers=tickers, start=start_date, end=end_date)
reader_2 = Reader(tickers=large_universe, start=start_date, end=end_date)

In [6]:
# retrieve data frames
dfs = reader_1.read_yahoo()

In [None]:
# retireve large universe data frames
large_dfs = reader_2.read_yahoo()

## 1.3 **Helper Functions**

In [None]:
def get_scatter(xval: pd.Series, yval: pd.Series, yname: str, mode: str = 'markers'):
    fig = go.Scatter(
        mode=mode,
        x=xval,
        y=yval,
        name=yname
    )
    return fig

In [None]:
def plot_data(df: pd.DataFrame, title: str, acc_bench: float, prec_bench: float):
    fig = go.Figure()
    for col in df.columns:
        fig.add_trace(
            get_scatter(
                df.index,
                df[col],
                col
            )
        )
    
    fig.add_hline(y=acc_bench, annotation_text='Accuracy Benchmark')
    fig.add_hline(y=prec_bench, annotation_text='Precision Benchmark')

    fig.update_layout(
        title=title,
        xaxis_title='Tickers',
        yaxis_title='Value'
    )
    return fig

## 1.4 **Preprocess Data**

In [None]:
# preprocess data
processor = Processor()
engineer = Engineer()

In [None]:
# input parameters for technical indicators
fractal_period, holding_period = 9, 24
fast, slow, signal = 5, 34, 5
tide, wave, ripple = 5, 13, 34
feats = ['macd', 'signal', 'tide', 'wave',\
    'ripple', 'tops', 'bottoms', 'mfi_change',\
        'volume_change', 'prediction']

### 1.4.1 **Small Universe Stocks**

In [None]:
# clean data and engineer predictor variables
# also get prediction variable and shift the values by HP -> future n_day return is positive or negative
feature_dict = dict()
for t in dfs.keys():
    processor.clean_data(dfs[t])
    new_df = engineer.engineer_features(
        dfs[t],
        period=fractal_period,
        fast=fast,
        slow=slow,
        signal=signal,
        tide=tide,
        wave=wave,
        ripple=ripple,
        holding_period=holding_period
    )
    new_df['prediction'] = new_df['prediction'].shift(-holding_period)
    new_df = new_df[np.isfinite(new_df).all(1)].copy()
    processor.scale_data(new_df)
    feature_dict[t] = new_df.loc[:, feats].copy()
    gc.collect()

### 1.4.2 **Large Universe Stocks**

In [None]:
# clean ata and engineer predictor variables
# also get prediction variable and shift the values by HP -> future n_day return is positive or negative
large_feature_dict = dict()
for t in large_dfs.keys():
    processor.clean_data(large_dfs[t])
    new_df = engineer.engineer_features(
        large_dfs[t],
        period=fractal_period,
        fast=fast,
        slow=slow,
        signal=signal,
        tide=tide,
        wave=wave,
        ripple=ripple,
        holding_period=holding_period
    )
    new_df['prediction'] = new_df['prediction'].shift(-holding_period)
    new_df = new_df[np.isfinite(new_df).all(1)].copy()
    processor.scale_data(new_df)
    large_feature_dict[t] = new_df.loc[:, feats].copy()
    gc.collect()

In [None]:
del dfs, new_df, large_dfs

# 2. **Neural Network**

## 2.1 **Small Universe Stocks**

In [None]:
# create a dictionary to store results
results = {
    'ticker': tickers,
    'accuracy': [],
    'precision': []
}

In [None]:
# # apply NN to each stock and store results
for t in feature_dict.keys():
    x_train, x_test, y_train, y_test = processor.split_data(
        feature_dict[t].loc[:, feature_dict[t].columns[:-1]].copy(),
        feature_dict[t].loc[:, 'prediction'].copy()
        )
    accuracy, precision = processor.fit_and_score(x_train, x_test, y_train, y_test, 'mlp')
    results['accuracy'].append(round(accuracy, 5))
    results['precision'].append(round(precision, 5))

In [None]:
results = pd.DataFrame.from_dict(results, orient='columns').set_index('ticker')
gc.collect()

## 2.2 **Large Universe Stocks**

In [None]:
# create a dictionary to store results
large_results = {
    'ticker': large_universe,
    'accuracy': [],
    'precision': []
}

In [None]:
# # apply NN to each stock and store results
for t in large_feature_dict.keys():
    x_train, x_test, y_train, y_test = processor.split_data(
        large_feature_dict[t].loc[:, large_feature_dict[t].columns[:-1]].copy(),
        large_feature_dict[t].loc[:, 'prediction'].copy()
        )
    accuracy, precision = processor.fit_and_score(x_train, x_test, y_train, y_test, 'mlp')
    results['accuracy'].append(round(accuracy, 5))
    results['precision'].append(round(precision, 5))

In [None]:
large_results = pd.DataFrame.from_dict(large_results, orient='columns').set_index('ticker')
gc.collect()

## 2.3 **Plot Results**

In [None]:
fig = plot_data(results, 'Small Universe Neural Network Results', acc_bench=0.5014, prec_bench=0.5141)
fig.show(renderer='notebook')

In [None]:
large_fig = plot_data(large_results, 'Large Universe Neural Network Results', acc_bench=0.5014, prec_bench=0.5141)
large_fig.show(renderer='notebook')

In [None]:
del x_train, x_test, y_train, y_test

## 3. **Support Vector Machine**

## 3.1 **Small Universe Stocks**

In [None]:
# set up dictionary to store results and paramters to be tunes
params = {
    'C': [0.1, 1, 10, 100, 1000], 
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['rbf']
}
svm_results = {
    'ticker': tickers,
    'accuracy': [],
    'precision': []
}

In [None]:
for t in feature_dict.keys():
    x_train, x_test, y_train, y_test = processor.split_data(
        feature_dict[t].loc[:, feature_dict[t].columns[:-1]].copy(),
        feature_dict[t].loc[:, 'prediction'].copy()
        )
    accuracy, precision = processor.fit_and_score(x_train, x_test, y_train, y_test, 'svm', params)
    svm_results['accuracy'].append(round(accuracy, 5))
    svm_results['precision'].append(round(precision, 5))

In [None]:
svm_results = pd.DataFrame.from_dict(svm_results, orient='columns').set_index('ticker')
gc.collect()

## 3.2 **Large Universe Stocks**

In [None]:
# set up dictionary to store results
large_svm_results = {
    'ticker': tickers,
    'accuracy': [],
    'precision': []
}

In [None]:
for t in large_feature_dict.keys():
    x_train, x_test, y_train, y_test = processor.split_data(
        large_feature_dict[t].loc[:, large_feature_dict[t].columns[:-1]].copy(),
        large_feature_dict[t].loc[:, 'prediction'].copy()
        )
    accuracy, precision = processor.fit_and_score(x_train, x_test, y_train, y_test, 'svm', params)
    large_svm_results['accuracy'].append(round(accuracy, 5))
    large_svm_results['precision'].append(round(precision, 5))

In [None]:
large_svm_results = pd.DataFrame.from_dict(large_svm_results, orient='columns').set_index('ticker')
gc.collect()

## 3.3 **Plot Results**

In [None]:
svm_fig = plot_data(svm_results, 'Small Universe Support Vector Machine Results', acc_bench=0.5014, prec_bench=0.5141)
svm_fig.show(renderer='notebook')

In [None]:
large_svm_fig = plot_data(large_svm_results, 'Large Universe Support Vector Machine Results', acc_bench=0.5014, prec_bench=0.5141)
large_svm_fig.show(renderer='notebook')