In [86]:
import os
import warnings
warnings.filterwarnings('ignore')

import gc
import linecache
import matplotlib as plt
import mplfinance as mpf
import numpy as np
import pandas as pd
import tracemalloc 

from analysis import detect_and_rename
from tradingpatterns_basic import detect_multiple_tops_bottoms, detect_triangle_pattern, detect_wedge, detect_channel, detect_double_top_bottom

This notebook goes through all the data downloaded locally and runs various scripts to generate images which can then be fed into the CNN

In [39]:
np.random.seed(seed=42)

In [70]:
# Memory leak tracker from python documentation
def display_top(snapshot, key_type='lineno', limit=10):
    snapshot = snapshot.filter_traces((
        tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
        tracemalloc.Filter(False, "<unknown>"),
    ))
    top_stats = snapshot.statistics(key_type)

    print("Top %s lines" % limit)
    for index, stat in enumerate(top_stats[:limit], 1):
        frame = stat.traceback[0]
        print("#%s: %s:%s: %.1f KiB"
              % (index, frame.filename, frame.lineno, stat.size / 1024))
        line = linecache.getline(frame.filename, frame.lineno).strip()
        if line:
            print('    %s' % line)

    other = top_stats[limit:]
    if other:
        size = sum(stat.size for stat in other)
        print("%s other: %.1f KiB" % (len(other), size / 1024))
    total = sum(stat.size for stat in top_stats)
    print("Total allocated size: %.1f KiB" % (total / 1024))

In [42]:
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
    'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]

tickers = tickers.Symbol.to_list()

In [73]:
# Plot the chart. Saves it using the id
# Pretty sure there is a memory leak issue with matplotlib, which is 
# Trying to save an image in memory every time
def plot_simple_df(candle_data: pd.DataFrame, stock_type: str, ticker: str,  count: int, pad: int = 2):
    if pad < 0:
        pad = 0
    fname = "images/"+ stock_type +"/" + stock_type + '_' + ticker + '_' + str(count) + ".png"
    
    candle_data.index = pd.DatetimeIndex(candle_data['Datetime'])
    candle_data.drop(columns=['Datetime'], inplace=True)

    mpf.plot(candle_data, type='candle', axisoff=True, style = 'classic', savefig=dict(fname=fname, dpi=60), closefig=True)

In [44]:
def shuffle_list(ticker_list: list):
    # Shuffling to get different stocks. Will do this after each pattern
    # This makes sure I am pulling from a different list of stocks,
    # But that I will produce the same images each time
    tickers_shuff = tickers
    np.random.shuffle(tickers_shuff)
    return tickers_shuff

In [96]:
# Generate a variety of patterns
def generate_patterns(pat: str):
    for ticker in tickers[301:350]:
        try:
            data = pd.read_csv('data/' + ticker + '_data.csv', index_col=None, header=0)
            # Place the datetime in a temp dataset
            temp = data['Datetime']
            data.rename(columns={"open": "Open", "close": "Close", "high": "High",
                                 "low": "Low", 'volume': 'Volume'}, inplace=True)
            # Apply numeric and log to the data
            data = data.apply(pd.to_numeric, errors='coerce')
            data = np.log(data)
            # Add the datetime we saved earlier back in
            data['Datetime'] = temp
            data.drop(columns=['adj_close', 'adj_high', 'adj_low'], inplace=True)
            
            # Accurately detects head and shoulders by using wavelet noise reduction
            if pat == 'wavelet': 
                data = detect_and_rename(data, 'wavelet', 3)
                data1 = data[data['head_shoulder_pattern_wavelet'].notnull()]
            # This does not generate anything? 
            elif pat == 'mtb':
                data = detect_multiple_tops_bottoms(data, window = 10)
                data1 = data[data['multiple_top_bottom_pattern'].notnull()]
            # Only detects ascending and descending triangles
            elif pat == 'tri':
                data = detect_triangle_pattern(data, window=3)
                data1 = data[data['triangle_pattern'].notnull()]
            elif pat == 'wed':
                data = detect_wedge(data, window=3)
                data1 = data[data['wedge_pattern'].notnull()]
            else:
                print('There is no pattern')
                break
            loc = 0
            for i in data1.index:
                if i + 15 < len(data) and loc < i and i > 15:
                    loc += 50
                    plot_simple_df(data.loc[i - 15 : i+15], pat, ticker, loc, 2)   
        except Exception as e:
            print(f"Failed on ticker {ticker} with exception {e}")
            continue

In [ ]:
ticks = shuffle_list(tickers)

In [ ]:
tracemalloc.start() 
# generate_patterns('wavelet')

snapshot = tracemalloc.take_snapshot()
display_top(snapshot)

In [ ]:
tracemalloc.start() 
generate_patterns('mtb')

snapshot = tracemalloc.take_snapshot()
display_top(snapshot)

In [None]:
# tracemalloc.start() 
generate_patterns('tri')

# snapshot = tracemalloc.take_snapshot()
# display_top(snapshot)

In [ ]:
# TODO: Start here with wedge detection
# tracemalloc.start() 
generate_patterns('wed')

# snapshot = tracemalloc.take_snapshot()
# display_top(snapshot)

In [98]:
gc.collect()

10943480