In [8]:
# Importing libraries
import os
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, timedelta
import math
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf
import scipy
import yfinance as yf

from neurotrader.download_data import download_df
from neurotrader.directional_change import directional_change
from neurotrader.perceptually_important import find_pips
from neurotrader.head_shoulders import find_hs_patterns, get_pattern_return, load_attributes, HSPattern

# Reversal Pattern Detection Algorithm
- Look at the 1 day intervals for now
- Want to look at min by min at some point
- Goal: If a pattern is detected, how much wieght should we give the buying/selling power of that pattern
- AKA HOW ACCURATE THAT PATTERN IS FOR a certain stock (Profit factor)
- See which patterns are most common
- Look for a way to add an "Early Detection" of a pattern

## TODO:
- Run each program to see output, see how to generate data we can add to neural network
- 

## How to apply to a neural network
- For a stock, determine what error threshold to use by calculating the profit factor on known data
-   HAVE TO OPTIMIZE HIS CODE FOR THIS APPROACH
- Determine most common pattern
- Determine most profitable pattern
- Determine most accurate pattern
- Find a way to detect patterns before the occure (test to see if market finishes the pattern)
- Mess with different exit methods. Those are the hyperparameters
- Run this on different stock

### Idea:
Train a NN to detect different patterns, testing different exit strategies on each pattern. Have it assign a buy/sell confidence value to these patterns. Then test this on unseen new data. Try to impliment flag detection here too

We can further test this by training the network on each stock individually, trying to make predictions off of early identification

# Testing Different Algorithms for finding local extrema
- Rolling Window
- Directional Change
- Perceptually Important Points

In [ ]:
# Trendline parameter
lookback = 30
data = download_df('AAPL', '5y')
data.head(1)

In [ ]:
# ROLLING WINDOW ALGORITHM
# Use Scipy to get local extrema. BE CAREFUL NOT TO CHEAT WITH FUTURE DATA
# Use close data because it is more stable than adj high and adj low
arr = data['close'].to_numpy()
bottoms = scipy.signal.argrelextrema(arr, np.less, order=3)
tops = scipy.signal.argrelextrema(arr, np.greater, order=3)
print(bottoms[0][0:11])
print(tops[0][0:11])

In [ ]:
# Perceptually Important Points
PIP = 10
pips_x, pips_y = find_pips(arr, PIP, 1)
print(pips_x)
print(pips_y)

# Building an ML model to detect patterns in a rolling window.
Abstract: We want to use mathematical equations to detect patterns in our stock data in our data. We will look at those patterns and tie a value to them. We will then use a rolling window technique to make calculations on unseen data???
- Use a 72-hour trend rolling window
- LOOK FOR FEATURES that could show a trend
- The Head and Shoulders is not a common occurance. Look to suplement this data with other patterns?
- A lot of papers and programs create a bunch of synthetic data to train models on
- This might be the best option here.

### 1) Head and Shoulders
- Possible option: Loop through every single one of the stock market top 500 and find the patterns. See how many there are?

In [ ]:
ticker = 'AAPL'
data = download_df(ticker=ticker, interval='1d', period='max')

In [ ]:
# Detect HS in our dataset:
# Convert our data to a logorithmic form and then only select the "close" column
data_norm = np.log(data)
dat_slice = data_norm['close'].to_numpy()

# Find HS Patterns:
hs_patterns, ihs_patterns = find_hs_patterns(dat_slice, 6, early_find=False)
hs_df = pd.DataFrame()
hs_df = load_attributes(dat_slice, hs_df, hs_patterns, len(data))
ihs_df = pd.DataFrame()
ihs_df = load_attributes(dat_slice, ihs_df, ihs_patterns, len(data))

print(f"We detected {len(hs_patterns)} hs and {len(ihs_patterns)}"
      f" inverted hs out of {len(data)} data points")
print(f"Data time range: {data.index[0]} to {data.index[-1]}")

In [ ]:
hs_patterns[0]

In [ ]:
plot_hs(data_norm, ticker, hs_patterns[0], pad=6)

### Goal Here
1) Test to see how the model when trained on hourly, minute, and daily data
2) Train the data on both synthetic and real data. See how that effects pattern recognition
    - NEED TO LEARN HOW TO GENERATE SYNTHETIC STOCK DATA
3) TEST ALL THREE HS CALCULATIONS TO SEE WHICH ONE IS MORE ROBUST/ACCURATE!!!!
    - Neurotraders
    - CodeTrading
    - Medium's
4) Investigate the approach given by the paper from Seoul National University
    - This may not be useful because they use a ton of different patterns. Interesting read about what type of model to use
5) Have Neural Network add a buy/sell weight to each pattern after seeing if the stock went up or down after. Need to add manually?


BREAK DATA INTO TRAINING AND TEST DATA FOR REAL DATA
FOR SYNTHETIC DATA, TEST ON SAME TESTING DATA AS OTHER MODEL.

### 1: Neurotraders Algorithm
This code focuses on capturing time periods of code instead of images of code. Graphs can be generated as needed however. There is a possible way to do this automatically

In [9]:
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
    'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]

# Get the data for these tickers from yahoo finance
# yf_top500 = yf.download(tickers.Symbol.to_list(), interval='60m', period='2y', auto_adjust=True)['Close']

In [ ]:
# Get the data for these tickers from yahoo finance
# Downloading this data because it is very time-consuming and I don't want to make 
#   repeated calls to yfinance
# COMMENTING OUT TO AVOID DOING THIS AGAIN
# for ticker in tickers.Symbol.to_list():
#     df = download_df(ticker=ticker, period='2y', interval='60m')
#     ticker_name = 'data/' + ticker + '_data.csv'
#     df.to_csv(ticker_name, index=True)

In [11]:
# Create a dictionary to store data
# Use ticker names as key
l_yf_top500_norm = {}
# For each ticker
for ticker in tickers.Symbol.to_list()[0:2]:
    try:
        # Get the data from the local files and apply logarithmic transformations to it
        data = pd.read_csv('data/' + ticker + '_data.csv', index_col=None, header=0)
        # Place the datetime in a temp dataset
        temp = data['Datetime']
        # Apply numeric and log to the data
        data = data.apply(pd.to_numeric, errors='coerce')
        data = np.log(data)
        # Add the datetime we saved earlier back in
        data['Datetime'] = temp
        data.index = pd.DatetimeIndex(data['Datetime'])
        # Get the close data and find the HS and IHS patterns
        dat_slice = data['close'].to_numpy()
        hs_patterns, ihs_patterns = find_hs_patterns(dat_slice, 6, early_find=False)
        # # hs_df = pd.DataFrame()
        # # ihs_df = pd.DataFrame()
        # # hs_df = load_attributes(dat_slice, hs_df, hs_patterns, len(data))
        # # ihs_df = load_attributes(dat_slice, ihs_df, ihs_patterns, len(data))
        # Store the data, hs, IHS with the ticker name as key
        l_yf_top500_norm.update({ticker: [data, hs_patterns, ihs_patterns]})
    except Exception as e:
        print(f"Failed on ticker {ticker} with exception {e}")
        continue

In [12]:
for i in l_yf_top500_norm:
    print(f"For ticker {i} there are {len(l_yf_top500_norm[i][1])} hs patterns and "
              f"{len(l_yf_top500_norm[i][2])} inverted hs patterns")

For ticker MMM there are 1 hs patterns and 3 inverted hs patterns
For ticker AOS there are 1 hs patterns and 3 inverted hs patterns


In [52]:
def plot_simple_hs(candle_data: pd.DataFrame, ticker: str, pat: HSPattern, pad: int = 2):
    if pad < 0:
        pad = 0
    
    data = candle_data.iloc[pat.start_i:pat.break_i + 1 + pad]
    fname = "images/hs/" + ticker + '_' + str(pat.start_i) + '_' + str(pat.break_i) + ".png"
    
    mpf.plot(data, type='candle', axisoff=True, style = 'classic', savefig=dict(fname=fname, dpi=144))

In [53]:
for ticker in l_yf_top500_norm:
    num_hs = len(l_yf_top500_norm[ticker][1])
    num_ihs = len(l_yf_top500_norm[ticker][2])
    
    print(f"For ticker {ticker} there are {num_hs} hs patterns and "
              f"{num_ihs} inverted hs patterns")
    
    for i in range(num_hs):
        candle_data = l_yf_top500_norm[ticker][0]
        pat = l_yf_top500_norm[ticker][1][i]
        plot_simple_hs(candle_data, ticker, pat, 2)
        
    for i in range(num_ihs):
        candle_data = l_yf_top500_norm[ticker][0]
        pat = l_yf_top500_norm[ticker][2][i]
        plot_simple_hs(candle_data, ticker, pat, 2)

For ticker MMM there are 1 hs patterns and 3 inverted hs patterns
For ticker AOS there are 1 hs patterns and 3 inverted hs patterns


In [51]:
# Select the data from the dataframe using the ticker name as a key
# Get the hs pattern
candle_data = l_yf_top500_norm['MMM'][0]
pat = l_yf_top500_norm['MMM'][2][1]
plot_simple_hs(candle_data, 'MMM', pat, 2)

KeyError: 'Unrecognized kwarg="background"'