In [1]:
import os
import bisect
import itertools
import math
import numpy as np
import pandas as pd
import requests
import pickle
from datetime import datetime as dt
from datetime import timedelta
import pytz
from itertools import zip_longest
from pytz import timezone
from pprint import pprint
import pandas_datareader.data as web
from collections import namedtuple, Counter
from operator import itemgetter

import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import yfinance as yf
import fundamentalanalysis as fa
# import streamlit as st

import holidays
import pandas_datareader.data as web
import yahoo_fin.stock_info as si
from urllib.request import Request, urlopen
from html_table_parser.parser import HTMLTableParser 
import streamlit as st

from load_data import *
from get_data import get_tickers
from info import TA_PERIODS

pd.options.plotting.backend = "plotly"
# pd.set_option('display.max_rows', None)

In [2]:
rf_rates = get_rf_data()
SPY_df = get_SPY_data()
SPY_info_df = get_SPY_info()
ticker_info = get_ticker_info()
ticker_list = SPY_info_df.index.to_list()
sector_list = SPY_info_df['Sector'].unique()
first_date = SPY_df.iloc[0].name
last_date = SPY_df.iloc[-1].name
yr_ago = last_date - timedelta(days=365)
# TTM_ratios, ratios_data_report = load_TTM_ratios()

In [66]:
def date_format(date):
    if date.hour == 0:
        date = date.strftime('%d-%m-%y')
    else:
        date = date.strftime('%d-%m-%y %H:%M')
    
    return date

## Resample Data

In [75]:
# Need to add ability to switch to any timeframe within the bounds of the data
def resample_data(ticker, timeframe):
    '''Load ticker's market data'''

    path = 'data/market_data'  
        
    if timeframe.endswith('m') or timeframe.startswith('H'):
        s = [x for x in list(timeframe) if x.isnumeric()]
        x = int(''.join(s))
        # print(x)
        # x = int(timeframe.split('m')[0])
        folder = '1m' if x < 5 and timeframe.endswith('m') else '5m'
        freq = f'{x}T' if timeframe.endswith('m') else f'{x}H'
        
        if ticker == '^GSPC':
            fpath = os.path.join(path, 'spy_data', folder)
            fname = os.path.join(fpath, os.listdir(fpath)[-1])
        else:
            fname = os.path.join(path, folder, f'{ticker}.csv')
        
        df = pd.read_csv(fname)
        col = df.columns[0]
        fmt = ':00-0'
        df.index = pd.to_datetime(df[col].apply(lambda x: x.split(fmt)[0]))
        df.drop(columns=col, index=df.index[-1], inplace=True)
        df.index.name = 'Date'

    else:
        freq = 'W-FRI' if timeframe == 'W1' else 'BM'
        if ticker == '^GSPC':
            df = get_SPY_data()
        else:
            df = get_ticker_data(ticker)

    if timeframe not in ('1m', '5m'):
        resampled_df = pd.DataFrame()
        # df0 = df.asfreq(freq)
        # print(df0.index[-1], df0.index[0])
        # print(df0.index[-1].hour - df0.index[0].hour)
        offset = timedelta(minutes=30)
        resampled_df['Open'] = df['Open'].resample(freq, offset=offset).first()
        resampled_df['High'] = df['High'].resample(freq, offset=offset).max()
        resampled_df['Low'] = df['Low'].resample(freq, offset=offset).min()
        resampled_df['Close'] = df['Close'].resample(freq, offset=offset).last()
        resampled_df['Adj Close'] = df['Adj Close'].resample(freq, offset=offset).last()
        resampled_df['Volume'] = df['Volume'].resample(freq, offset=offset).sum()
        df = resampled_df.dropna()

    return df

In [79]:
df = resample_data('a', 'M1')
# df.index[-1].hour - df.index[0].hour
# offset = timedelta(hours=1, minutes=30)
# df = df['Open'].resample('3H', offset=offset).first()
df.head(20)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-11-30,32.546494,35.765381,28.478184,30.177038,25.781992,105291489
1999-12-31,30.177038,57.224606,29.327612,55.302216,47.247883,66908419
2000-01-31,56.330471,56.464592,41.577251,47.344421,40.449085,44880832
2000-02-29,47.389126,82.573318,47.389126,74.302574,63.480961,32938000
2000-03-31,74.749641,115.879829,64.377686,74.391991,63.557388,57679666
2000-04-28,73.676682,89.413445,52.262161,63.394135,54.161289,46065078
2000-05-31,63.394135,74.4814,38.62661,52.66452,44.994343,108033384
2000-06-30,53.245708,59.907009,44.349072,52.753933,45.070751,179393038
2000-07-31,52.932762,57.895206,28.254648,29.148785,24.903496,160953696
2000-08-31,29.104076,45.779686,27.315807,43.633762,37.278847,127761822


# Technical Analysis

#### To Do:

- Trend Analysis
- MAs - find appropriate MAs for all timeframes
- Volume Analysis
- **Need to add ability to switch to any timeframe within the bounds of the data**
- **Add an option to enable a slider that allows one to view any points along the dataset**
- Calculate Piotroski F-Scores as they could be useful in identifying value
  in securities in the accumulation stage.
- Identify seasonality in prices, which can be caused by factors such as 
  "window dressing" by institutions with upcoming earnings reports.

## Trend Analysis

In [4]:
from scipy.signal import find_peaks, peak_prominences, argrelextrema, argrelmax, argrelmin
from sklearn import preprocessing

pd.options.display.float_format = "{:,.4f}".format

### The Definition & Interaction of Trends

A trend is a period in which a price moves in an irregular but persistent direction.
The three most widely followed
trends are primary, intermediate, and short-term.

__Primary__

The primary trend generally lasts between 9 months and 2 years, and is
a reflection of investors’ attitudes toward unfolding fundamentals in the
business cycle. The business cycle extends statistically from trough to
trough for approximately 3.6 years, so it follows that rising and falling
primary trends (bull and bear markets) last for 1 to 2 years. Since building 
up takes longer than tearing down, bull markets generally last longer
than bear markets. The direction of the secular or very long-term trend
will also affect the magnitude and duration of a primary trend. Those that
move in the direction of the secular trend will generally experience greater
magnitude and duration than those that move in the opposite direction.

__Intermediate__

Anyone who has looked at prices on a chart will notice that they do not move
in a straight line. A primary upswing is interrupted by several re actions
along the way. These countercyclical trends within the confines of a primary
bull market are known as intermediate price movements. They last anywhere
from 6 weeks to as long as 9 months, sometimes even longer, but rarely
shorter. Countercyclical intermediate trends are typically very deceptive,
often being founded on very believable but false assumptions. For example, 
an intermediate rally during a bear market in equities may very well be
founded on a couple of unexpectedly positive economic numbers, which
make it appear that the economy will avoid that much-feared recession.
When subsequent numbers are reported and found to be wanting, the bear
market resumes.
It is important to have an idea of the direction and maturity of the
primary trend, but an analysis of intermediate trends is also helpful for
improving success rates in trading, as well as for determining when the
primary movement may have run its course.

__Short-Term Trends__

Short-term trends typically last 3 to 6 weeks, sometimes shorter and sometimes 
longer. They interrupt the course of the intermediate cycle, just as
the intermediate-term trend in terrupts primary price movements. Short-
term trends are shown in the market cycle model (Figure 1.1) as a dashed
line. They are usually influenced by random news events and are far more
difficult to identify than their intermediate or primary counterparts.



__Major Technical Principles__
- The direction of the primary trend will affect the 
character of intermediate and short-term trends.
- Surprises occur in the direction of the main trend, i.e., on the upside in a 
bull market and on the downside in a bear market.
- Never go for perfection; always shoot for consistency.
- The significance of a peak-and-trough
reversal is determined by the du ration and magnitude of the rallies and
reactions in question.


Maybe use linreg gradient to determine primary and secondary trend

In [5]:
# dt.strftime(end, '%d/%m/%y %H:%M')

In [6]:
def peaks_valleys_analysis(df):
    df = df.copy().drop(columns='Adj Close')
    close = df['Close']
    peaks = argrelextrema(close.to_numpy(), np.greater)[0]
    valleys = argrelextrema(close.to_numpy(), np.less)[0]
    df['isPeak'] = 0
    df['isValley'] = 0
    df['PV Trend'] = 0
    PV = sorted(list(peaks) + list(valleys))
    first = 'Peak' if min(peaks[0], valleys[0]) == peaks[0] else 'Valley'
    second = 'Peak' if first == 'Valley' else 'Valley'
    i, j = (0, 1)
    d0 = abs(close[PV[0]] - close[0])
    dist = [d0]   
    valid_PV = [PV[0]]
    print(f'i: {i}, j: {j}, {close[PV[0]]:.2f} - {close[0]:.2f} = {d0:.2f}\n')
    
    while i < len(PV):
        col = first if i % 2 == 0 else second
        ix = df.iloc[PV[i]].name
        # df.loc[ix, f'is{col}'] = 1
        
        if i == 0:
            d1 = abs(close[PV[0]] - close[0])
            dist.append(d1)
            d2 = abs(close[PV[j]] - close[PV[i]])
            print(f'{close[PV[j]]:.2f} - {close[PV[i]]:.2f} = {d2:.2f}',
                f'\n{close[PV[0]]:.2f} - {close[0]:.2f} = {d1:.2f}',
                f'\n{d2:.2f} / {d1:.2f} = {d2 / d1:.2f}')
        else:
            d2 = abs(close[PV[j]] - close[PV[i]])
            print(f'{close[PV[j]]:.2f} - {close[PV[i]]:.2f} = {d2:.2f}',
                #   f'\n{close[PV[j]]:.2f} - {close[valid_PV[-1]]:.2f} = {d1:.2f}',
                f'\n{d2:.2f} / {d1:.2f} = {d2 / d1:.2f}')
            
        
        
        retracement = d2 / d1
        if retracement >= 1/3:
            d1 = abs(close[PV[j]] - close[valid_PV[-1]])
            dist.append(d1)
            if j > 1:
                valid_PV.append(PV[j])
                
            df.loc[ix, f'is{col}'] = 1
            df.loc[ix, col] = close[PV[j]]
            if i < len(PV) and j < len(PV):
                j += 1      
                i = j - 1
            print(f'i: {i}, j: {j}, {col}\n{valid_PV}\n')
        else:
            if i < len(PV) and j < len(PV):
                i = PV.index(valid_PV[-1])
                j += 1
                print(f'i: {i}, j: {j}, {col}\n{valid_PV}\n')
                # if len(valid_PV) > 1:
                    # j = valid_PV[-1]
                valid_PV.append(PV[i])
                invalid = valid_PV.pop()
                
                df.loc[ix, f'is{col}'] = 0
                df.loc[ix, col] = 0
                print(f'{ix.date()}: {col} - {close[invalid]:.2f} removed \n') 
            
        # d1 = abs(close[PV[i]] - close[valid_PV[-1]])
            # a, b = (i, i - 1) if i % 2 != 0 else (i, i)
            
    
            # print(f'{i}: {close[PVs[b][n]]:.2f} - {close[PVs[a][m]]:.2f} = {d2:.2f},\
            #        \nRetraced: {d2:.2f} / ({close[pv]:.2f} - {close[p1]:.2f})  = {d2/d1:.2f}\n')
            
            # m, n = n, m
            # d_ix1 = df.iloc[max(PVs[i - 1])].name
            # d_ix2 = df.iloc[min(PVs[i])].name
            # df.loc[d_ix1, 'PV Dist'] = d1
            # df.loc[d_ix2, 'PV Dist'] = d2
            # df.loc[d_ix2, 'Retraced'] = d2 / d1
            # if close[p] - close[p1] >= 0 and close[v] - close[pv] >= 0:
            #     trend = 1 # uptrend
            # elif close[p] - close[p1] < 0 and close[v] - close[pv] < 0:
            #     trend = -1 # downtrend
            # else:
            #     trend = 0
            # start = df.iloc[min(p1, pv)].name
            # end = df.iloc[max(p, v)].name
            # df.loc[start:end, 'PV Trend'] = trend
            # print(f"Peak:   {dt.strftime(df.iloc[p1].name, '%d-%m-%y')} - {dt.strftime(ix, '%d-%m-%y')} " \
            #   f"|| {close[p1]:,.2f} - {close[p]:,.2f},\n" \
            #   f"Valley: {dt.strftime(df.iloc[pv].name, '%d-%m-%y')} - {dt.strftime(ix, '%d-%m-%y')} " \
            #   f"|| {close[pv]:,.2f} - {close[v]:,.2f},\n" \
            #   f"Trend: {trend}\n")

    # CAN USE CHANGES IN TREND TO IDENTIFY POINTS TO DRAW TRENDLINES BTWN
    # df['PV Changepoint'] = df['PV Trend'].diff()
    # ixs = df[df['PV Changepoint'] != 0].index.to_list()
    trendlines = []
    
    # for i, ix in enumerate(ixs[1:], 1):
    #     start = ixs[i - 1]
    #     end = ix
    #     npeaks = df.loc[start:end, 'isPeak'].sum()
    #     nvalleys = df.loc[start:end, 'isValley'].sum()
    #     indices = []
    #     if df.loc[ix, 'PV Changepoint'] < 0 and npeaks > 2:
    #         indices = df[df['isPeak'] != 0][start:end].index.to_list()
    #     if df.loc[ix, 'PV Changepoint'] > 0 and nvalleys > 2:
    #         indices = df[df['isValley'] != 0][start:end].index.to_list()
    #     if indices:    
    #         x1, x2 = indices[0], indices[-1]
    #         y1, y2 = close[x1], close[x2]
    #         trendlines.append(((x1, x2), (y1, y2)))
    #     # print(f"{i}.".ljust(3), f"{dt.strftime(start, '%d-%m-%y')} - {dt.strftime(end, '%d-%m-%y')}, peaks: {peaks.sum()}, valleys: {valleys.sum()}")
    
    # cols = ['isPeak', 'isValley', 'Peak', 'Valley', 'PV Dist', 'Retraced', 'PV Changepoint']
    # df.drop(columns=cols, inplace=True)
    
    return df, peaks, valleys, trendlines

In [50]:
period = 'D1'
days = TA_PERIODS[period]['days']
end = last_date
start = end - timedelta(days)
df = make_dataframe('a', period)[start:end]
pv_df, peaks, valleys, trendlines = peaks_valleys_analysis(df)
# print(trendlines)

i: 0, j: 1, 121.61 - 130.72 = 9.11

123.72 - 121.61 = 2.11 
121.61 - 130.72 = 9.11 
2.11 / 9.11 = 0.23
i: 0, j: 2, Valley
[6]

2022-09-27: Valley - 121.61 removed 

121.55 - 121.61 = 0.06 
121.61 - 130.72 = 9.11 
0.06 / 9.11 = 0.01
i: 0, j: 3, Valley
[6]

2022-09-27: Valley - 121.61 removed 

132.64 - 121.61 = 11.03 
121.61 - 130.72 = 9.11 
11.03 / 9.11 = 1.21
i: 3, j: 4, Valley
[6, 12]

125.64 - 132.64 = 7.00 
7.00 / 11.03 = 0.63
i: 4, j: 5, Peak
[6, 12, 16]

127.90 - 125.64 = 2.26 
2.26 / 7.00 = 0.32
i: 4, j: 6, Valley
[6, 12, 16]

2022-10-11: Valley - 125.64 removed 

125.70 - 125.64 = 0.06 
0.06 / 7.00 = 0.01
i: 4, j: 7, Valley
[6, 12, 16]

2022-10-11: Valley - 125.64 removed 

132.30 - 125.64 = 6.66 
6.66 / 7.00 = 0.95
i: 7, j: 8, Valley
[6, 12, 16, 21]

125.94 - 132.30 = 6.36 
6.36 / 6.66 = 0.95
i: 8, j: 9, Peak
[6, 12, 16, 21, 23]

137.69 - 125.94 = 11.75 
11.75 / 6.36 = 1.85
i: 9, j: 10, Valley
[6, 12, 16, 21, 23, 27]

136.49 - 137.69 = 1.20 
1.20 / 11.75 = 0.10
i: 9, j: 11, Pe

IndexError: list index out of range

### Trend Changepoints

In [5]:
def order_peaks_valleys_debug(df):
    '''Remove smallest peak if peaks not followed by valleys & vice versa'''
    
    P = argrelextrema(df.Close.to_numpy(), np.greater)[0].tolist() # peaks
    V = argrelextrema(df.Close.to_numpy(), np.less)[0].tolist() # valleys
    first = 'P' if P[0] < V[0] else 'V'
    pv = {'P', 'V'}
    second = list(pv - {first})[0]
    lst = P if first == 'P' else V
    olst = P if first != 'P' else V
    fn = min if first == 'P' else max
    removed_vals = []

    print(f'first: {first}, peaks: {len(P)}, valleys: {len(V)}')
    print(f'{first}: {lst}')
    print(f'{second}: {olst}')
    
    for i, (p, v) in enumerate(zip(P, V)):
        a = p if first == 'P' else v
        b = v if first == 'P' else p

        if i < len(lst) - 1:
            c = P[i + 1] if first == 'P' else V[i + 1]
            if c < b:
                val = fn((a, df.Close[a]), (c, df.Close[c]), key=itemgetter(1))[0]
                lst.remove(val)
                removed_vals.append(val)
                print(f'\ni: {i}, ({a}, {df.Close[a]:.2f}), ({c}, {df.Close[c]:.2f})')
                print(f'Removed {first}: {df.index[val]} ({val}, {df.Close[val]:.2f})')

    print(f'\nfirst: {first}, peaks: {len(P)}, valleys: {len(V)}')
    print(f'{first}: {lst}')
    print(f'{second}: {olst}')
    print(f'removed: {removed_vals}')
    
    return P, V, removed_vals

In [6]:
def order_peaks_valleys(df, column='Close'):
    '''Remove smallest peak if peaks not followed by valleys & vice versa'''
    
    X = df[column]
    P = argrelextrema(X.to_numpy(), np.greater)[0].tolist() # peaks
    V = argrelextrema(X.to_numpy(), np.less)[0].tolist() # valleys
    first = 'P' if P[0] < V[0] else 'V'
    lst = P if first == 'P' else V
    fn = min if first == 'P' else max
    removed_vals = []
    
    for i, (p, v) in enumerate(zip(P, V)):
        a = p if first == 'P' else v
        b = v if first == 'P' else p
        if i < len(lst) - 1:
            c = P[i + 1] if first == 'P' else V[i + 1]
            if c < b:
                val = fn((a, X[a]), (c, X[c]), key=itemgetter(1))[0]
                lst.remove(val)
                removed_vals.append(val)
    
    return P, V, removed_vals

In [7]:
period = '10m'
days = TA_PERIODS[period]['days']
end = last_date
start = end - timedelta(days)

df = make_dataframe('aal', period)[start:end]

# P, V , r_PV = order_peaks_valleys_debug(df)
P, V , r_PV = order_peaks_valleys(df)
PV = sorted(P + V)
PV_vals = [df.Close[x] for x in PV]
PV_dates = [df.index[x] for x in PV]
pv_df = pd.DataFrame({'Date': PV_dates, 'PV': PV_vals})
pv_df['PV rtn'] = pv_df['PV'].pct_change()
pv_df['PV Var'] = abs(pv_df['PV rtn'].diff())
variance = 0.005
pv_df['Ranging'] = np.where(abs(pv_df['PV Var']) < variance, 1, 0)
# pv_df['Diff'] = pv_df['PV'].diff()
# pv_df['Diff %'] = pv_df['Diff'].pct_change()
idx = pv_df['PV Var'].idxmin()
print(pv_df.iloc[idx - 5: idx + 1])
print(pv_df.shape)
# print(pv_df.iloc[pv_df['PV Var'].idxmin()])
pv_df[pv_df['Ranging'] == 1]
# pv_df

                  Date      PV  PV rtn  PV Var  Ranging
57 2023-03-14 15:50:00 14.6700  0.0086  0.0198        0
58 2023-03-15 09:30:00 14.2401 -0.0293  0.0379        0
59 2023-03-15 09:40:00 14.2600  0.0014  0.0307        0
60 2023-03-15 09:50:00 14.1000 -0.0112  0.0126        0
61 2023-03-15 10:00:00 14.1050  0.0004  0.0116        0
62 2023-03-15 10:10:00 14.0900 -0.0011  0.0014        1
(101, 5)


Unnamed: 0,Date,PV,PV rtn,PV Var,Ranging
26,2023-03-13 11:00:00,15.0716,-0.0021,0.0043,1
32,2023-03-13 13:20:00,14.8,-0.002,0.0027,1
36,2023-03-13 14:00:00,14.855,-0.0003,0.0037,1
50,2023-03-14 12:00:00,14.925,-0.0013,0.004,1
62,2023-03-15 10:10:00,14.09,-0.0011,0.0014,1
63,2023-03-15 10:20:00,14.11,0.0014,0.0025,1
82,2023-03-15 15:50:00,13.87,0.0022,0.0036,1
94,2023-03-16 13:40:00,14.155,0.0007,0.0035,1


In [129]:
def valid_peaks_valleys_debug(df):
    '''
    Remove peaks/valleys with retracement < 1/3

    Parameters
    ----------
    df: DataFrame of a security's market data
    '''
    
    print('S: valid_peaks_valleys'.upper())
    close = df.Close
    P, V, _ = order_peaks_valleys(df)
    PV = sorted(P + V)
    n = len(PV)
    a, b, c = PV[0], PV[1], PV[2]
    d1 = close[b] - close[a]
    d2 = close[c] - close[b]
    first = 'P' if P[0] < V[0] else 'V'
    print(f'first: {first}, peaks: {len(P)}, valleys: {len(V)}\n')
    lst = P if first == 'P' else V
    pv = {'P', 'V'}
    second = list(pv - {first})[0]
    third = first if c in lst else list(pv - {first})[0]
    valid = [(a, first), (b, second), (c, third)]
    removed_vals = []
    retracement = 1/3 * (1 - 0.005)
    i = 2
    
    while i < n - 1:   
        x = first if c in lst else second
        y = list(pv - {x})[0] 
        r = abs(d2 / d1)
        
        print(f'{i}. {date_format(df.index[b])} - {date_format(df.index[c])} \nc: ({x}, {close[c]:.2f})')
        print(f'd1: {close[b]:.2f} - {close[a]:.2f} = {d1:.2f}')
        print(f'd2: {close[c]:.2f} - {close[b]:.2f} = {d2:.2f}')
        print(f'r: |{d2:.2f} / {d1:.2f}| = {r:.2f}\n')

        td1 = pd.Timedelta(df.index[b] - df.index[a])
        td2 = pd.Timedelta(df.index[c] - df.index[b])
        rtd = td2 / td1
        
        rm, px = valid[-1]
        if r < retracement and i > 2:
            # NEED TO DEAL WITH RANGING PEAKS/VALLEYS
            # if rtd < retracement:
            # rm, px = valid[-1]
            print(f'td1: {td1} \ntd2: {td2} \nrtd: {rtd:.2f}')

            removed_vals.append((c, x))
            
            # makes first value in list lowest peak / highest valley
            reverse = True if y == 'V' else False
            val = sorted([(b, close[b]), (d, close[d])],
                        key=itemgetter(1), reverse=reverse)
            removed, kept = val[0][0], val[1][0]
            
            print(f'REMOVED: \nvalid[-1]: {px} {date_format(df.index[rm])}, ({rm}, {close[rm]:.2f})')
            print(f'c - {x}, {date_format(df.index[c])} ({c}, {close[c]:.2f})' \
                f'\nremoved - {y}, {df.index[removed].date()} ({removed}, {close[removed]:.2f})' \
                f'\nkept - {y}, {date_format(df.index[kept])} ({kept}, {close[kept]:.2f})')
            removed_vals.append((removed, y))
            # print(val)
            # valid.append((kept, y))
            if kept == d:
                i = PV.index(kept)
                if len(valid) > 2:
                    valid.pop()
                if i == n - 1:
                    valid.append((kept, y))
                print(f'Skip \ni: {i} c: {kept} {date_format(df.index[kept])} ({y}, {close[kept]:.2f})')
                # l1 = [PV[i - 2], PV[i - 1], PV[i], PV[i + 1]]
                # l2 = [valid[-2][0], valid[-1][0], PV[i], PV[i + 1]]
                # print(f'valid: {l2}\norign: {l1}\nvalid {close[l2]}\norig {close[l1]}\n')

            # Moves to next peak/valley by skipping next value in PV   
            elif kept == rm and px == y:
                print(f'Unchanged {(kept, y)}')
                i += 2
                print(f'Skip \ni: {i} c: {kept} {date_format(df.index[kept])} ({y}, {close[kept]:.2f})')
            
            print()
            # else:
            #     print(f'Time Retracement > {retracement:.4f}')
            #     print(f'td1: {td1} \ntd2: {td2} \nrtd: {rtd:.2f}')
            #     if val != rm and px != x:
            #         valid.append((c, x))
            #     i += 1
        else:
            # val, px = valid[-1]
            if rm != c and px != x:
                valid.append((c, x))
            i += 1    
        
        valid_vals = [(x, round(close[x], 2), y) for x,y in valid]
        print(f'valid: {valid_vals[-3:]}\n')
        
        try:
            a, b, c, d = valid[-2][0], valid[-1][0], PV[i], PV[i + 1]
            d1 = abs(close[b] - close[a])
            d2 = abs(close[c] - close[b])
        # Add last peak/valley if it's valid
        except:
            if i == n - 1:
                a, b, c = valid[-2][0], valid[-1][0], PV[i]
                d1 = abs(close[b] - close[a])
                d2 = abs(close[c] - close[b])
                r = d2 / d1
                if r >= retracement:
                    valid.append((c,y))
                print(f'LAST {i} {df.index[c]} \nc: ({y}, {close[c]:.2f})')

    P, V = [], []

    for val, x in sorted(set(valid)):
        lst = P if x == 'P' else V
        lst.append(val)

    print(f'first: {first}, peaks: {len(P)}, valleys: {len(V)}\n')
    print('E: valid_peaks_valleys'.upper())

    return P, V

In [37]:
period = 'W1'
days = TA_PERIODS[period]['days']
end = last_date
start = end - timedelta(days)
df = make_dataframe('a', period)[start:end]

P, V = valid_peaks_valleys_debug(df)
PV1 = sorted(P + V)
P, V = valid_peaks_valleys(df)
PV2 = sorted(P + V)
print(f'PV1: {len(PV1)}, PV2: {len(PV2)}')
diff = list(set(PV1) - set(PV2))
print(df.index[diff])

S: VALID_PEAKS_VALLEYS
first: P, peaks: 27, valleys: 27

2. 2021-04-30 - 2021-05-07 
c: (P, 133.90)
d1: 133.64 - 136.68 = -3.04
d2: 133.90 - 133.64 = 0.26
r: |0.26 / -3.04| = 0.09

valid: [(5, 136.68, 'P'), (6, 133.64, 'V'), (7, 133.9, 'P')]

3. 2021-05-07 - 2021-05-14 
c: (V, 131.15)
d1: 133.90 - 133.64 = 0.26
d2: 131.15 - 133.90 = 2.75
r: |2.75 / 0.26| = 10.58

valid: [(6, 133.64, 'V'), (7, 133.9, 'P'), (8, 131.15, 'V')]

4. 2021-05-14 - 2021-05-28 
c: (P, 138.13)
d1: 131.15 - 133.90 = 2.75
d2: 138.13 - 131.15 = 6.98
r: |6.98 / 2.75| = 2.54

valid: [(7, 133.9, 'P'), (8, 131.15, 'V'), (10, 138.13, 'P')]

5. 2021-05-28 - 2021-06-04 
c: (V, 137.90)
d1: 138.13 - 131.15 = 6.98
d2: 137.90 - 138.13 = 0.23
r: |0.23 / 6.98| = 0.03

Time Retracement > 0.3317
td1: 14 days 00:00:00 
td2: 7 days 00:00:00 
rtd: 0.50
valid: [(8, 131.15, 'V'), (10, 138.13, 'P'), (11, 137.9, 'V')]

6. 2021-06-04 - 2021-07-09 
c: (P, 150.03)
d1: 137.90 - 138.13 = 0.23
d2: 150.03 - 137.90 = 12.13
r: |12.13 / 0.23| = 52

In [128]:
def valid_peaks_valleys(df, column='Close'):
    '''
    Remove peaks/valleys with retracement < 1/3

    Parameters
    ----------
    df: DataFrame of a security's market data
    '''

    X = df[column]
    P, V, _ = order_peaks_valleys(df)
    PV = sorted(P + V)
    n = len(PV)
    a, b, c = PV[0], PV[1], PV[2]
    d1 = abs(X[b] - X[a])
    d2 = abs(X[c] - X[b])
    first = 'P' if P[0] < V[0] else 'V'
    lst = P if first == 'P' else V
    pv = {'P', 'V'}
    second = list(pv - {first})[0]
    third = first if c in lst else second
    valid = [(a, first), (b, second), (c, third)]
    removed_vals = []
    retracement = 1 / 3 * 0.95
    i = 2
    
    while i < n - 1:   
        x = first if c in lst else second
        y = list(pv - {x})[0] 
        r = d2 / d1
        td1 = pd.Timedelta(df.index[b] - df.index[a])
        td2 = pd.Timedelta(df.index[c] - df.index[b])
        rtd = td2 / td1
        rm, px = valid[-1]
        
        if r < retracement and i > 2:
            removed_vals.append((c, x))
            reverse = True if y == 'V' else False
            val = sorted([(b, X[b]), (d, X[d])],
                         key=itemgetter(1), reverse=reverse)
            removed, kept = val[0][0], val[1][0]
            removed_vals.append((removed, y))
            if kept == d:
                i = PV.index(kept)
                if len(valid) > 2:
                    valid.pop()
                if i == n - 1:
                    valid.append((kept, y))
            elif kept == rm and px == y:
                i += 2
        else:
            # val, px = valid[-1]
            if rm != c and px != x:
                valid.append((c, x))
            i += 1    
        try:
            a, b = valid[-2][0], valid[-1][0]
            c, d = PV[i], PV[i + 1]
            d1 = abs(X[b] - X[a])
            d2 = abs(X[c] - X[b])
        except:
            if i == n - 1:
                a, b, c = valid[-2][0], valid[-1][0], PV[i]
                d1 = abs(X[b] - X[a])
                d2 = abs(X[c] - X[b])
                r = d2 / d1
                if r >= retracement:
                    valid.append((c,y))
              
    P, V = [], []

    for val, x in sorted(set(valid)):
        lst = P if x == 'P' else V
        lst.append(val)

    return P, V

In [132]:
def trend_changepoints_debug(df):
    close = df.Close
    chg = {k: {'start': [], 'end': []} for k in ['up', 'down', 'ranging']} # trend changepoints
    t = None # trend
    P, V = valid_peaks_valleys(df)
    pv = 'P' if P[0] < V[0] else 'V'
    lst = P if pv == 'P' else V
    olst = P if pv != 'P' else V
    up, down, ranging = [], [], []
    print(f'first: {pv} peaks: {len(P)}, valleys: {len(V)}\n')

    for p, v in zip(P[2:], V[2:]):
        ix = p if pv == 'P' else v
        i = lst.index(ix)
        g, h = i - 2, i - 1
        try:
            j, a, b, c = P[g - 1], P[g], P[h], p
            k, d, e, f = V[g - 1], V[g], V[h], v
            p00, p0, p1, p2, v00, v0, v1, v2 = close[[j,a,b,c,k,d,e,f]]
            curr_p, curr_v = [p00, p0, p1, p2], [v00, v0, v1, v2]
            p_dates = df.index[[j,a,b,c]].to_list()
            v_dates = df.index[[k,d,e,f]].to_list()
        except:
            a, b, c = P[g], P[h], p
            d, e, f = V[g], V[h], v
            p0, p1, p2, v0, v1, v2 = close[[a,b,c,d,e,f]]
            curr_p, curr_v = [p0, p1, p2], [v0, v1, v2]
            p_dates = df.index[[a,b,c]].to_list()
            v_dates = df.index[[d,e,f]].to_list()
        
        print(f'p0: {p0:.2f}, p1: {p1:.2f}, p2: {p2:.2f}, v0:{v0:.2f}, v1: {v1:.2f}, v2: {v2:.2f}')
        p_data = {'Peaks': curr_p}
        v_data = {'Valleys': curr_v}
        p_df = pd.DataFrame(p_data, index=p_dates)
        v_df = pd.DataFrame(v_data, index=v_dates)
        pv_df = pd.concat([p_df, v_df]).sort_index().fillna('-')
        pv_df.index = [date_format(x) for x in pv_df.index]
        dates = sorted([df.index[p], df.index[v]])
        dates = [date_format(x) for x in dates]
        s = f'\n{i}. {dates} '
        s = s + t if t else s
        print(s)
        print(f'ix: {ix} - {date_format(df.index[ix])}')
        st_txt = '-' * 30 + '\nStart'

        # NEEDS TO BE FIXED FOR CONSISTENCY - has to do with incomplete conditions
        if p2 > p1 > p0 and v2 > v1 > v0: # Uptrend
            print(f'Uptrend')
            if t and t != 'up':
                print(f'End {t} - {d} - {date_format(df.index[d])}')
                print(f'{st_txt} {date_format(df.index[d])}\n')
                if chg[t]['start']:
                    chg[t]['end'].append(d)
                chg['up']['start'].append(d)
                up.append(d)
                pv_df['Trend'] = ['up'] * pv_df.shape[0]
                print(f'{pv_df}\n')
                pprint(chg)
            if not t:
                chg['up']['start'].append(d)
                up.append(d)
            t = 'up'
        elif v2 < v1 < v0 and p2 < p1 < p0: # Downtrend
            print(f'Downtrend')
            if t and t != 'down':
                print(f'End {t} - {a} - {date_format(df.index[a])}')
                print(f'{st_txt} {date_format(df.index[a])}\n')
                if chg[t]['start']:
                    chg[t]['end'].append(a)
                chg['down']['start'].append(a)
                down.append(a)
                pv_df['Trend'] = ['down'] * pv_df.shape[0]
                print(f'{pv_df}\n')
                pprint(chg)
            if not t:
                chg['down']['start'].append(a)
                down.append(a)
            t = 'down'
        else:
            print(f'Ranging')
            if t in ('up', 'down'): # Ranging
                # NEEDS TO BE FIXED FOR CONSISTENCY
                ix = [a, b] if t == 'up' else [d, e]
                ix = ix[0] if pv == 'P' else ix[1]
                if chg[t]['start']:
                    chg[t]['end'].append(ix)
                chg['ranging']['start'].append(ix)
                ranging.append(ix)
                pv_df['Trend'] = ['ranging'] * pv_df.shape[0]
                print(f'{pv_df}\n')
                print(f'End {t.title()} - {ix} - {date_format(df.index[ix])}\n')
                pprint(chg)
            if not t:
                chg['ranging']['start'].append(a)
                ranging.append(a)
            t = 'ranging'
    
    print('\nChangepoints:')
    pprint(chg)
    
    dr = chg['ranging']
    du = chg['up']
    dd = chg['down']
    remove = {'up': [], 'down': []}

    if dr['end'] and dr['start'][0] >= dr['end'][0]:
        dr['start'].pop(0)
        dr['end'].pop(0)
        print(f'\nRanging end fixed')
        pprint(chg)
        
    # Add last value in data to appropriate changepoint
    print(f'\nup: {up}, down: {down}, ranging: {ranging}')
    x = sorted(up + down + ranging)[-1]
    print(f'Last Value: {x}\n')
    for k, v in chg.items():
        print(f'{k}:')
        if x in v['start']:
            # print(k1, v1)
            print(f"{k}['end']: {chg[k]['end']}")
            chg[k]['end'].append(df.shape[0] - 1)
            print(f'\nLast val - {df.shape[0] - 1} in dict("{k}")\n')
            pprint(chg)
            break
    
    # Fixing first value
    try:
        rs, re = dr['start'][0], dr['end'][0]
        keys = ('up', 'down')
        for k in keys:
            s = chg[k]['start'][0]
            if re == s:
                print(f'Fixing first value in {k}: {s} - {rs}\n')
                chg[k]['start'][0] = rs
                dr['start'].pop(0)
                dr['end'].pop(0)
                break
    except: pass

    # NEEDS DEBUGGING
    # fix if 'up' is broken by ranging and followed by 'up' or vice versa
    for j, d in enumerate((du, dd)):
        k = 'up' if j == 0 else 'down'
        for i in range(len(dr['start']) - 1):
            try:
                if dr['start'][i] == d['end'][i] and \
                    dr['end'][i] == d['start'][i + 1]:
                    print(f'{k} remove', i)
                    remove[k].append([dr['start'][i], dr['end'][i]])
                    print(dr['start'][i], dr['end'][i])
            except Exception as e: 
                print(j, e)
                
            print(f'remove: {remove}\n')

    # fix if 'up' is broken by ranging and followed by 'up' or vice versa        
    for k, v in remove.items():
        d = du if k == 'up' else dd
        for s, e in v:
            print(f'Removing {k}, s: {s}, e: {e}')
            d['start'].remove(e)
            d['end'].remove(s)
            dr['start'].remove(s)
            dr['end'].remove(e)
       
    return chg, P, V

In [142]:
ticker = 'aal'
print('Ticker:', ticker.upper())
period = 'D1'
days = TA_PERIODS[period]['days']
end = last_date
start = last_date - timedelta(days)
end = end + timedelta(1) if period.endswith('m') else end
df = make_dataframe(ticker, period=period)[start:end]
chg, peaks, valleys = trend_changepoints_debug(df)
pprint(chg)

Ticker: AAL
first: V peaks: 13, valleys: 14

p0: 12.75, p1: 12.95, p2: 14.29, v0:11.86, v1: 11.92, v2: 12.05

2. ['10-10-22', '25-10-22'] 
ix: 15 - 10-10-22
Uptrend
p0: 12.95, p1: 14.29, p2: 14.32, v0:11.92, v1: 12.05, v2: 13.58

3. ['02-11-22', '07-11-22'] up
ix: 32 - 02-11-22
Uptrend
p0: 14.29, p1: 14.32, p2: 14.93, v0:12.05, v1: 13.58, v2: 13.98

4. ['09-11-22', '10-11-22'] up
ix: 37 - 09-11-22
Uptrend
p0: 14.32, p1: 14.93, p2: 14.50, v0:13.58, v1: 13.98, v2: 13.85

5. ['21-11-22', '25-11-22'] up
ix: 45 - 21-11-22
Ranging
           Peaks Valleys    Trend
10-10-22       - 12.0500  ranging
25-10-22 14.2900       -  ranging
02-11-22       - 13.5800  ranging
07-11-22 14.3200       -  ranging
09-11-22       - 13.9800  ranging
10-11-22 14.9300       -  ranging
21-11-22       - 13.8500  ranging
25-11-22 14.5000       -  ranging

End Up - 38 - 10-11-22

{'down': {'end': [], 'start': []},
 'ranging': {'end': [], 'start': [38]},
 'up': {'end': [38], 'start': [5]}}
p0: 14.93, p1: 14.50, p2: 1

In [86]:
def trend_changepoints(df, column='Close'):
    X = df[column]
    chg = {k: {'start': [], 'end': []} 
           for k in ['up', 'down', 'ranging']} # trend changepoints
    t = None # trend
    P, V = valid_peaks_valleys(df)
    first = 'P' if P[0] < V[0] else 'V'
    lst = P if first == 'P' else V
    up, down, ranging = [], [], []
    
    # Identify intermediate trend changepoints comprising 5 moves,
    # i.e., 3 trending, 2 retracements 
    for p, v in zip(P[2:], V[2:]):
        ix = p if first == 'P' else v
        i = lst.index(ix)
        g, h = i - 2, i - 1
        a, b, c = P[g], P[h], p
        d, e, f = V[g], V[h], v
        p0, p1, p2, v0, v1, v2 = X[[a,b,c,d,e,f]]

        if p2 > p1 > p0 and v2 > v1 > v0: # Uptrend
            if t and t != 'up':
                if chg[t]['start']:
                    chg[t]['end'].append(d)
                chg['up']['start'].append(d)
                up.append(d)
            if not t:
                chg['up']['start'].append(d)
                up.append(d)
            t = 'up'
        elif v2 < v1 < v0 and p2 < p1 < p0: # Downtrend
            if t and t != 'down':
                if chg[t]['start']:
                    chg[t]['end'].append(a)
                chg['down']['start'].append(a)
                down.append(a)
            if not t:
                chg['down']['start'].append(a)
                down.append(a)
            t = 'down'
        else:
            # NEEDS TO BE FIXED FOR CONSISTENCY
            if t in ('up', 'down'): # Ranging
                ix = [a, b] if t == 'up' else [d, e]
                ix = ix[0] if first == 'P' else ix[1]
                if chg[t]['start']:
                    chg[t]['end'].append(ix)
                chg['ranging']['start'].append(ix)
                ranging.append(ix)
            if not t:
                chg['ranging']['start'].append(a)
                ranging.append(a)
            t = 'ranging'
    
    dr = chg['ranging']
    du = chg['up']
    dd = chg['down']
    remove = {'up': [], 'down': []}

    if dr['end'] and dr['start'][0] >= dr['end'][0]:
        dr['start'].pop(0)
        dr['end'].pop(0)

    # Add last value in data to appropriate changepoint
    x = sorted(up + down + ranging)[-1]
    for k, v in chg.items():
        if x in v['start']:
            chg[k]['end'].append(df.shape[0] - 1)
            break
        
    # Fix first value in chg['ranging']
    try:
        rs, re = dr['start'][0], dr['end'][0]
        for d in (du, dd):
            s = d['start'][0]
            if re == s:
                d['start'][0] = rs
                dr['start'].pop(0)
                dr['end'].pop(0)
                break
    except: pass

    # Find overlapping changepoints if 'up' is broken by ranging 
    # and followed by 'up' or vice versa
    for j, d in enumerate((du, dd)):
        k = 'up' if j == 0 else 'down'
        try:
            for i in range(len(dr['start']) - 1):
                if dr['start'][i] == d['end'][i] and \
                    dr['end'][i] == d['start'][i + 1]:
                    remove[k].append([dr['start'][i], dr['end'][i]])
        except: pass        
    
    # Fix changepoints if 'up' is broken by ranging 
    # and followed by 'up' or vice versa
    for k, v in remove.items():
        d = du if k == 'up' else dd
        for s, e in v:
            d['start'].remove(e)
            d['end'].remove(s)
            dr['start'].remove(s)
            dr['end'].remove(e)
    
    return chg, P, V

In [88]:
ticker = 'a'
print(ticker.upper())
period = 'W1'
days = TA_PERIODS[period]['days']
end = last_date
start = last_date - timedelta(days)
end = end + timedelta(1) if period.endswith('m') else end
df = make_dataframe(ticker, period=period)[start:end]
chg, P, V = trend_changepoints(df)
pprint(chg)

A
{'down': {'end': [], 'start': []},
 'ranging': {'end': [104], 'start': [5]},
 'up': {'end': [], 'start': []}}


### Plot Trends

In [39]:
def plot_trends(ticker, period='D1', graph='line', bgcolor='slategray'):
    period_d = TA_PERIODS[period]
    days = period_d['days']
    end = last_date
    start = end - timedelta(days)
    end = end + timedelta(1) if period.endswith('m') else end
    df = make_dataframe(ticker, period)[start:end]
    
    data = []
    nrows = 1
    fig = make_subplots(rows=nrows, cols=1,
                        shared_xaxes=True, 
                        vertical_spacing=0.05,
                        subplot_titles=[''] * nrows, 
                        )

    if graph == 'candlestick':
        cs = go.Candlestick(x=df.index, 
                            open=df['Open'], 
                            high=df['High'],
                            low=df['Low'], 
                            close=df['Close'],
                            name=ticker)
        cs.increasing.fillcolor = 'green'
        cs.increasing.line.color = 'darkgreen'
        cs.decreasing.fillcolor = 'red'
        cs.decreasing.line.color = 'indianred'
        data.append(cs)
    else:   
        line = go.Scatter(x=df.index, 
                          y=df.Close, 
                          name='Close', 
                          mode='lines',
                          line_color='blue', 
                          opacity=0.5, 
                          connectgaps=True)
        data.append(line)

    pos = [1] * len(data) # position to add rows, cols in subplot 
    fig.add_traces(data=data, rows=pos, cols=pos)  
    
    # show all peaks/valleys & removed peaks/valleys
    vals = order_peaks_valleys(df)
    keys = ['peaks', 'valleys', 'removed']
    colors = ['orange', 'red', 'red']
    symbols = ['x', 'x', 'circle-x']
    line_widths = [0.2, 0.2, 2.5]
    sizes = [7, 7, 10]
    d = dict(val=vals, color=colors, symbol=symbols, 
             lw=line_widths, size=sizes)
    pv_d = {k: {k: v[i] for k, v in d.items()} 
            for i, k in enumerate(keys)}
   
    for k, v in pv_d.items():
        X = v['val']
        fig.add_scatter(x=df.Close[X].index,
                        y=df.Close[X],
                        name=k,
                        mode='markers',
                        marker=dict(symbol=v['symbol'], 
                                    line=dict(color=v['color'],
                                                width=v['lw']),
                                    color=v['color'],
                                    size=v['size']),
                        opacity=0.75,
                        )
    
   # show valid peaks & valleys        
    chg, peaks, valleys = trend_changepoints_debug(df)
    X = sorted(peaks + valleys)

    fig.add_scatter(x=df.Close[X].index,
                    y=df.Close[X],
                    name='valid peaks/valleys',
                    mode='markers',
                    marker=dict(symbol='circle-open', 
                            color='darkgreen', 
                            size=12, 
                            line_width=2.5), 
                    opacity=0.75,
                    showlegend=False)
    
    # show trend changepoints
    for k, v in chg.items():
        try:
            # print(k)
            x = max(v['start'], v['end'])[-1]
            for x0, x1 in zip_longest(v['start'], v['end'], fillvalue=x):                # print(x0, x1)
                if k == 'up':
                    txt = 'U'
                    color = 'green'
                elif k == 'down':
                    txt = 'D'
                    color = 'red'
                else:
                    txt = 'R'
                    color = 'violet'
                fig.add_vrect(x0=df.index[x0], 
                              x1=df.index[x1], 
                              line_width=0, 
                              fillcolor=color, 
                              opacity=0.2,
                              annotation_text=txt, 
                              annotation_position="top left")
        except:
            # print(f'next {k}')
            pass
            
    us_holidays = pd.to_datetime(list(holidays.US(range(start.year, end.year + 1)).keys()))
    rangebreaks = []
    rangeselector = []

    if period == 'M1':
        rangeselector = dict(buttons=[
                                dict(count=1, label="YTD", step="year", stepmode="todate"),
                                dict(count=6, label="6m", step="month", stepmode="backward"),
                                dict(count=1, label="1y", step="year", stepmode="backward"),
                                dict(count=2, label="2y", step="year", stepmode="backward"),
                                dict(count=3, label="3y", step="year", stepmode="backward"),
                                dict(step="all")
                                ])
    elif period == 'W1':
        rangeselector = dict(buttons=[
                                dict(count=1, label="YTD", step="year", stepmode="todate"),
                                dict(count=6, label="6m", step="month", stepmode="backward"),
                                dict(count=1, label="1y", step="year", stepmode="backward"),
                                dict(step="all")
                                ])  
    elif period == 'D1':
        rangebreaks = [dict(bounds=["sat", "mon"])]
    else:
        us_holidays += pd.offsets.Hour(9) + pd.offsets.Minute(30)
        rangebreaks = [dict(bounds=[16, 9.5], pattern="hour"), 
                       dict(bounds=["sat", "mon"])]

    if rangebreaks:
        us_holidays = pd.to_datetime(sorted(list(set(us_holidays) - set(df.index))))
        rangebreaks.append(dict(values=us_holidays))
        fig.update_xaxes(rangebreaks=rangebreaks)

    if rangeselector:
        fig.update_layout(xaxis1=dict(rangeselector=rangeselector))

    # pprint(fig.layout)

    cname = SPY_info_df.loc[ticker.upper(), 'Security']
    title = f'{cname} ({ticker.upper()}) - {period}'

    fig.update_layout(title=dict(text=title, xanchor='left'), paper_bgcolor=bgcolor, plot_bgcolor=bgcolor)
    fig.layout.xaxis.rangeslider.visible = False
    
    return fig

In [143]:
bg = ['black', 'slategray', 'beige']
plot_trends(ticker, period, bgcolor=bg[1])

first: V peaks: 13, valleys: 14

p0: 12.75, p1: 12.95, p2: 14.29, v0:11.86, v1: 11.92, v2: 12.05

2. ['10-10-22', '25-10-22'] 
ix: 15 - 10-10-22
Uptrend
p0: 12.95, p1: 14.29, p2: 14.32, v0:11.92, v1: 12.05, v2: 13.58

3. ['02-11-22', '07-11-22'] up
ix: 32 - 02-11-22
Uptrend
p0: 14.29, p1: 14.32, p2: 14.93, v0:12.05, v1: 13.58, v2: 13.98

4. ['09-11-22', '10-11-22'] up
ix: 37 - 09-11-22
Uptrend
p0: 14.32, p1: 14.93, p2: 14.50, v0:13.58, v1: 13.98, v2: 13.85

5. ['21-11-22', '25-11-22'] up
ix: 45 - 21-11-22
Ranging
           Peaks Valleys    Trend
10-10-22       - 12.0500  ranging
25-10-22 14.2900       -  ranging
02-11-22       - 13.5800  ranging
07-11-22 14.3200       -  ranging
09-11-22       - 13.9800  ranging
10-11-22 14.9300       -  ranging
21-11-22       - 13.8500  ranging
25-11-22 14.5000       -  ranging

End Up - 38 - 10-11-22

{'down': {'end': [], 'start': []},
 'ranging': {'end': [], 'start': [38]},
 'up': {'end': [38], 'start': [5]}}
p0: 14.93, p1: 14.50, p2: 14.43, v0:13.

## S/R Levels

In [15]:
from sklearn import preprocessing
import time
# pd.set_option('display.max_rows', None)
pd.options.display.float_format = "{:,.2f}".format

In [16]:
end = last_date
years = 1
days = 180
start = end - timedelta(days * years)
ticker = 'aapl'
df = get_ticker_data(ticker)[start:end]

In [17]:
def fibonacci_retracement_levels(df):
    highest_swing = -1
    lowest_swing = -1
    high = df['High']
    low = df['Low']
    nr = df.shape[0]

    for i in range(1, nr - 1):
        if high[i] > high[i - 1] and high[i] > high[i + 1] \
            and (highest_swing == -1 or high[i] > high[highest_swing]):
            highest_swing = i
        if low[i] < low[i - 1] and low[i] < low[i + 1] \
            and (lowest_swing == -1 or low[i] < low[lowest_swing]):
            lowest_swing = i

    ratios = [0, 0.236, 0.382, 0.5, 0.618, 0.786, 1]
    levels = []
    max_level = high[highest_swing]
    min_level = low[lowest_swing]

    for ratio in ratios:
        # Uptrend
        if highest_swing > lowest_swing:
            level = max_level - (max_level - min_level) * ratio
        # Downtrend
        else:
            level = min_level + (max_level - min_level) * ratio
        levels.append(level)

    frl_data = {}
    unit = 'minutes' if df.iloc[0].name.minute != 0 else 'days'  
    prev_date = df.iloc[0].name

    for i in range(nr):
        date = df.iloc[i].name
        s_date = date.strftime('%d-%m-%y')
        l = bisect.bisect_left(levels, low[i])
        h = bisect.bisect_left(levels, high[i])
        h = h - 1 if h > 0 else h
        frls = [levels[h], levels[l]] if h != l else [levels[h]]
        # Deals with multiple levels being tested by one bar
        for frl in frls:
            if high[i] > frl and low[i] < frl:
                cum_vol = df.loc[prev_date:date, 'Volume'].sum()
                delta = date - prev_date # time it takes level to form
                delta = delta.days if unit == 'days' else delta.total_seconds() / 60
                prev_date = date
                d = {'Date': [], 'Timedelta': [], 'Volume': [], 'Tested': 0}
                frl_data.setdefault(frl, d)
                frl_data[frl]['Date'].append(s_date)
                frl_data[frl]['Timedelta'].append(delta)
                frl_data[frl]['Volume'].append(cum_vol)
                frl_data[frl]['Tested'] += 1             

    # Calculate significance of levels       
    d = {'FRL': [], 'Date': [], 'Timedelta': [], 'Volume': [], 'Tested': []}

    for k, v in frl_data.items():
        d['FRL'].append(k)
        d['Volume'].append(sum(v['Volume']))
        d['Timedelta'].append(sum(v['Timedelta']))
        d['Tested'].append(v['Tested'])
        d['Date'].append(v['Date'][-1])
        # print(v['Date'][-1])

    # NOT YET DECIDED HOW TO CALCULATE FRL SIGNAL

    return ratios, levels, frl_data


In [18]:
r, l, d = fibonacci_retracement_levels(df)
d

{151.34202392578126: {'Date': ['19-09-22',
   '22-09-22',
   '23-09-22',
   '26-09-22',
   '27-09-22',
   '25-10-22',
   '26-10-22',
   '28-10-22',
   '01-11-22',
   '02-11-22',
   '15-11-22',
   '17-11-22',
   '18-11-22',
   '23-11-22',
   '03-02-23',
   '06-02-23',
   '07-02-23',
   '08-02-23',
   '09-02-23',
   '13-02-23',
   '14-02-23',
   '17-02-23',
   '07-03-23',
   '09-03-23',
   '13-03-23',
   '14-03-23',
   '15-03-23'],
  'Timedelta': [0,
   3,
   1,
   3,
   1,
   4,
   1,
   1,
   4,
   1,
   4,
   2,
   1,
   5,
   2,
   3,
   1,
   1,
   1,
   4,
   1,
   3,
   5,
   2,
   4,
   1,
   1],
  'Volume': [81474200,
   377513300,
   182682400,
   189369300,
   177782100,
   237262800,
   162926600,
   273942600,
   343084900,
   173983900,
   257222100,
   234476000,
   155219000,
   243659200,
   350359900,
   224215600,
   153180900,
   147442700,
   120127200,
   175656800,
   123906600,
   254593400,
   266646600,
   157220400,
   206815100,
   158153000,
   150863800],
  

In [19]:
def isSupport(df, i):
    '''Returns True if value is a price support level'''

    X = df['Low']
    support = X[i] < X[i - 1] \
                and X[i] < X[i + 1] \
                and X[i + 1] < X[i + 2] \
                and X[i - 1] < X[i - 2]

    return support


def isResistance(df, i):
    '''Returns True if value is a price resistance level'''

    X = df['High']
    resistance = X[i] > X[i - 1] \
                    and X[i] > X[i + 1] \
                    and X[i + 1] > X[i + 2] \
                    and X[i - 1] > X[i - 2] 

    return resistance
    

In [20]:
def convert_to_timestamp(x):
    """Convert date objects to integers"""
    
    return time.mktime(x.timetuple())

In [21]:
def sr_levels(df):
    '''Returns key support/resistance levels for a security'''

    df = df.copy()
    df['SR Signal'] = 0
    prev_date = df.iloc[0].name
    unit = 'minutes' if prev_date.minute != 0 else 'days'  
    spt, rst = (0, 0)
    levels = []
    s_levels = []
    sr_data = {}
    many_tests = {} # dict of bars that test more than 1 level
    s = (df['High'] - df['Low']).mean()
    nr, nc = df.shape

    def isFarFromLevel(l):
        '''Returns True if price is not near a previously discovered support or resistance'''
        
        return np.sum([abs(l - x[1]) < s for x in levels]) == 0

    for i in range(2, nr):
        date = df.iloc[i].name
        s_date = date.strftime('%d-%m-%y')
        high = df['High'][i]
        low = df['Low'][i]
        close = df['Close'][i]
        new_spt = False
        new_rst = False
        sr_switch = False

        if i < nr - 2:
            if isSupport(df, i):
                if isFarFromLevel(low):
                    new_spt = True
                    spt = low
                    df.loc[date, 'Support'] = spt
                    levels.append((i, spt))
                    s_levels = sorted([x[1] for x in levels])
                    print('NS'.ljust(5), f'- {date.date()} - S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
            
            if isResistance(df, i):
                if isFarFromLevel(high):
                    new_rst = True
                    rst = high
                    df.loc[date, 'Resistance'] = rst
                    levels.append((i, rst))
                    s_levels = sorted([x[1] for x in levels])
                    print('NR'.ljust(5), f'- {date.date()} - R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f},')    

        # Switch support to resistance & vice versa
        if len(levels) > 1:
            if new_spt:
                ix = bisect.bisect(s_levels, spt)
                rst = s_levels[ix] if ix < len(s_levels) else s_levels[ix - 1]
            if new_rst:
                ix = bisect.bisect_left(s_levels, rst)
                spt = s_levels[ix - 1] if ix > 0 else s_levels[ix]
            if low > rst: # When resistance broken 
                sr_switch = True
                spt = rst
                ix = bisect.bisect(s_levels, low)
                rst = s_levels[ix] if ix < len(s_levels) else s_levels[ix - 1]
                print('R-S'.ljust(5), f'- {date.date()} - S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
            if high < spt: # When support broken 
                sr_switch = True
                rst = spt
                ix = bisect.bisect_left(s_levels, high)
                spt = s_levels[ix - 1] if ix > 0 else s_levels[ix]
                print('S-R'.ljust(5), f'- {date.date()} - R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f}')
        
        if new_rst or new_spt or sr_switch:
            cum_vol = df.loc[prev_date:date, 'Volume'].sum()
            delta = date - prev_date # time it takes level to form
            delta = delta.days if unit == 'days' else delta.total_seconds() / 60
            prev_date = date
            d = {'Date': [], 'Timedelta': [], 'Volume': [], 'SR': [], 'Tested': 0, 'Tested Date': []}
            sr_data.setdefault(spt, d)
            sr_data[spt]['Date'].append(s_date)
            sr_data[spt]['Timedelta'].append(delta)
            sr_data[spt]['Volume'].append(cum_vol)
            sr_data[spt]['SR'].append('S')
            # Prevents double-counting when support == resistance
            if spt != rst:
                sr_data.setdefault(rst, d)
                sr_data[rst]['Date'].append(s_date)
                sr_data[rst]['Timedelta'].append(delta)
                sr_data[rst]['Volume'].append(cum_vol)
                sr_data[rst]['SR'].append('R')
                                  
        if spt:
            if close < spt:
                df.loc[date, 'SR Signal'] = 1 # Generate signal
            # Check if S/R levels are tested       
            if high > spt and low < spt:
                sr_data[spt]['Tested'] += 1
                sr_data[spt]['Tested Date'].append(s_date)
                print('ST'.ljust(5), f'- {date.date()} - S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                ix = bisect.bisect_left(s_levels, spt)
                n_spt = s_levels[ix - 1] if ix > 0 else s_levels[ix]    
                while low < n_spt and spt != rst and spt != n_spt:
                    print(f'SH-SL - {date.date()} - NS: {n_spt:.2f}, S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                    rst = spt
                    spt = n_spt
                    many_tests.setdefault(i, set()).union([spt, rst])
                    if ix > 0:
                        ix -= 1
                        n_spt = s_levels[ix]
                        sr_data[n_spt]['Date'].append(s_date)
                        sr_data[n_spt]['Timedelta'].append(delta)
                        sr_data[n_spt]['Volume'].append(cum_vol)
                        sr_data[n_spt]['SR'].append('S')            
   
        if rst:
            if close > rst:
                df.loc[date, 'SR Signal'] = 1 # Generate signal
            # Check if S/R levels are tested       
            if high > rst and low < rst:
                if spt != rst: # Prevents double-counting
                    sr_data[rst]['Tested'] += 1
                    sr_data[rst]['Tested Date'].append(s_date)
                    print('RT'.ljust(5), f'- {date.date()} - R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                    ix = bisect.bisect(s_levels, rst)
                    n_rst = s_levels[ix] if ix < len(s_levels) else s_levels[ix - 1]
                    while high > n_rst and spt != rst and rst != n_rst:
                        print(f'RL-RH - {date.date()} - NR: {n_rst:.2f}, R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                        spt = rst
                        rst = n_rst
                        many_tests.setdefault(i, set()).union([spt, rst])
                        if ix < len(s_levels) - 1:
                            ix += 1
                            # print(f'ix: {ix}, {s_levels}')
                            n_rst = s_levels[ix]
                            sr_data[n_rst]['Date'].append(s_date)
                            sr_data[n_rst]['Timedelta'].append(delta)
                            sr_data[n_rst]['Volume'].append(cum_vol)
                            sr_data[n_rst]['SR'].append('R')        

        if spt and rst: 
            df.loc[date:, 'Support'] = spt
            df.loc[date:, 'Resistance'] = rst

    del sr_data[0]     
    d = {'SR Level': [], 'Volume': [], 'Timedelta': [], 'Tested': [], 'Date': []}

    for k, v in sr_data.items():
        d['SR Level'].append(k)
        d['Volume'].append(sum(v['Volume']))
        d['Timedelta'].append(sum(v['Timedelta']))
        d['Tested'].append(v['Tested'])
        d['Date'].append(v['Date'][-1])

    ix = 'SR Level'
    sr_df = pd.DataFrame(d, index=d[ix]).drop(columns=ix)
    sr_df['Date'] = pd.to_datetime(sr_df['Date'])
    sr_df['Date'] = sr_df['Date'].apply(convert_to_timestamp)
    scaler = preprocessing.MinMaxScaler(feature_range=(1, 5))
    sd = scaler.fit_transform(sr_df)
    scaled_df = pd.DataFrame(sd, columns=sr_df.columns, index=d[ix])
    scaled_df['Signal'] = scaled_df.mean(axis=1)
    # print(sr_df)
    # print(scaled_df)
    
    # Make 'SR Signal' last column
    cols = list(df.columns)
    cols.append(cols.pop(cols.index('SR Signal')))
    df = df[cols]
    nr, nc = df.shape
    j = nc - 1 # 'SR Signal' column num

    # Assign significance to signals by level
    for i in range(nr):
        if df['SR Signal'][i]:
            if i in many_tests:
                signal = 0
                for l in many_tests[i]:
                    signal += scaled_df.loc[l, 'Signal']
            else:
                if df['Close'][i] > df['Resistance'][i]:
                    l = df['Resistance'][i]
                if df['Close'][i] < df['Support'][i]:
                    l = df['Support'][i]
                signal = scaled_df.loc[l, 'Signal']
 
            df.iloc[i, j] = signal        
    
    return levels, df, sr_data

In [22]:
# sr_data.clear()
levels, ta_df, sr_data = sr_levels(df)
# levels

NR    - 2022-09-21 - R: 158.74, S: 0.00, hi: 158.74, lo: 153.60,
NS    - 2022-09-23 - S: 148.56, R: 158.74, hi: 151.47, lo: 148.56
NR    - 2022-09-27 - R: 154.72, S: 148.56, hi: 154.72, lo: 149.95,
ST    - 2022-09-28 - S: 148.56, R: 154.72, hi: 150.64, lo: 144.84
S-R   - 2022-09-29 - R: 148.56, S: 148.56, hi: 146.72, lo: 140.68
S-R   - 2022-09-30 - R: 148.56, S: 148.56, hi: 143.10, lo: 138.00
S-R   - 2022-10-03 - R: 148.56, S: 148.56, hi: 143.07, lo: 137.69
S-R   - 2022-10-04 - R: 148.56, S: 148.56, hi: 146.22, lo: 144.26
S-R   - 2022-10-05 - R: 148.56, S: 148.56, hi: 147.38, lo: 143.01
S-R   - 2022-10-06 - R: 148.56, S: 148.56, hi: 147.54, lo: 145.22
S-R   - 2022-10-07 - R: 148.56, S: 148.56, hi: 143.10, lo: 139.45
S-R   - 2022-10-10 - R: 148.56, S: 148.56, hi: 141.89, lo: 138.57
S-R   - 2022-10-11 - R: 148.56, S: 148.56, hi: 141.35, lo: 138.22
S-R   - 2022-10-12 - R: 148.56, S: 148.56, hi: 140.36, lo: 138.16
NS    - 2022-10-13 - S: 134.37, R: 148.56, hi: 143.59, lo: 134.37
RT    - 20

In [23]:
pprint(sr_data)

{134.3699951171875: {'Date': ['13-10-22',
                              '03-11-22',
                              '07-12-22',
                              '15-12-22',
                              '20-12-22',
                              '23-12-22',
                              '27-12-22',
                              '28-12-22',
                              '29-12-22',
                              '30-12-22',
                              '03-01-23',
                              '04-01-23',
                              '05-01-23',
                              '06-01-23',
                              '09-01-23',
                              '10-01-23',
                              '11-01-23',
                              '12-01-23',
                              '18-01-23'],
                     'SR': ['S',
                            'S',
                            'S',
                            'S',
                            'S',
                            'S',
   

In [24]:
d = {'SR Level': [], 'Volume': [], 'Timedelta': [], 'Tested': []}

for k, v in sr_data.items():
    d['SR Level'].append(k)
    d['Volume'].append(sum(v['Volume']))
    d['Timedelta'].append(sum(v['Timedelta']))
    d['Tested'].append(v['Tested'])

sr_df = pd.DataFrame(d, index=d['SR Level']).drop(columns='SR Level')
scaler = preprocessing.MinMaxScaler(feature_range=(1, 5))
sd = scaler.fit_transform(sr_df)
cols = ['Volume', 'Timedelta', 'Tested']
scaled_df = pd.DataFrame(sd, columns=sr_df.columns, index=d['SR Level'])
scaled_df['Signal'] = scaled_df.mean(axis=1)
print(sr_df.head(), '\n')
print(scaled_df)

            Volume  Timedelta  Tested
158.74  2010932400         23       0
148.56  8617455200        121      27
154.72  2522970900         41       6
134.37  4588320600         53       8
144.13  4141199000         72      13 

        Volume  Timedelta  Tested  Signal
158.74    1.00       1.00    1.00    1.00
148.56    5.00       5.00    5.00    5.00
154.72    1.31       1.73    1.89    1.64
134.37    2.56       2.22    2.19    2.32
144.13    2.29       3.00    2.93    2.74


## MA Signals

- MA Crossovers, i.e. MA5/MA25, etc...
- Identify when moving averages converge

In [25]:
def calc_slope(x):
    slope = np.polyfit(range(len(x)), x, 1)[0] 
    
    return slope

In [26]:
def calculate_ma_signals(df, MAs):
    for i, ma in enumerate(MAs):
        ix = df.iloc[ma].name
        df[f'MA{ma}'] = df['Close'].rolling(ma).mean()
        df[f'MA{ma} Slope'] = df[f'Close'].rolling(ma).apply(calc_slope)
        # df[f'MA{ma} Slope Diff'] = df[f'MA{ma} Slope'].diff()
        df.loc[ix:, f'MA{ma} Signal'] = np.where(df['Close'][ma:] > df[f'MA{ma}'][ma:], 1, 0)
        df[f'MA{ma} Crossover'] = df[f'MA{ma} Signal'].diff()
        df.drop(columns=f'MA{ma} Signal', inplace=True)

    df.loc[:, 'MA Std. Dev'] = df.loc[:, [f'MA{ma}' for ma in MAs]].std(axis=1)
    
    return df

In [27]:
MAs = [10, 20, 50]
ma_df = calculate_ma_signals(ta_df, MAs)
# ma_df.loc['2022-09-12':'2022-11-12', ['MA10 Slope',	'MA10 Slope Diff', 'MA20 Slope', 'MA50 Slope']]
ma_df.loc['2022-09-12':'2022-11-12', ['MA10', 'MA20', 'MA50', 'MA Std. Dev']]

Unnamed: 0_level_0,MA10,MA20,MA50,MA Std. Dev
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-09-19,,,,
2022-09-20,,,,
2022-09-21,,,,
2022-09-22,,,,
2022-09-23,,,,
2022-09-26,,,,
2022-09-27,,,,
2022-09-28,,,,
2022-09-29,,,,
2022-09-30,150.13,,,


In [28]:
math.atan2(ma_df['MA10'].values, ma_df.index.values)

TypeError: only size-1 arrays can be converted to Python scalars

In [None]:
ma_df.loc['2022-10-03':'2022-10-04', ['MA10', 'MA10 Slope', 'MA20', 'MA20 Slope']]

Unnamed: 0,MA10,MA10 Slope,MA20,MA20 Slope
2022-10-03,148.93,-0.73,152.09,
2022-10-04,147.85,-0.83,151.67,


In [None]:
x = ma_df.loc['2022-10-03':'2022-10-04', 'MA10']
slope = np.polyfit(range(len(x)), x, 1)[0] 
slope


-1.0799987792968864

### Bollinger Bands

In [29]:
from talib import BBANDS, MACD, RSI

In [30]:
up, mid, down = BBANDS(df['Close'], timeperiod=21, nbdevup=2, nbdevdn=2, matype=0)
bands = {'BB Up': up, 'BB Mid': mid, 'BB Down': down}
bb_df = pd.DataFrame(bands)
bb_df

Unnamed: 0_level_0,BB Up,BB Mid,BB Down
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-09-19,,,
2022-09-20,,,
2022-09-21,,,
2022-09-22,,,
2022-09-23,,,
...,...,...,...
2023-03-10,156.04,150.43,144.82
2023-03-13,156.02,150.41,144.80
2023-03-14,156.16,150.48,144.81
2023-03-15,156.04,150.44,144.85


In [31]:
open.rolling(5, center=True).mean()

AttributeError: 'builtin_function_or_method' object has no attribute 'rolling'

In [32]:
from talib import BBANDS, MACD, RSI

In [33]:
up, mid, low = BBANDS(df.Close, timeperiod=21, nbdevup=2, nbdevdn=2, matype=0)

In [34]:
# df[['BB Up', 'BB Mid', 'BB Down']] = up, mid, low
df['BB Up'] = up
df['BB Mid'] = mid
df['BB Down'] = low
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,BB Up,BB Mid,BB Down
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-09-19,149.31,154.56,149.10,154.48,153.99,81474200,,,
2022-09-20,153.40,158.08,153.08,156.90,156.40,107689800,,,
2022-09-21,157.34,158.74,153.60,153.72,153.23,101696800,,,
2022-09-22,152.38,154.47,150.91,152.74,152.25,86652500,,,
2022-09-23,151.19,151.47,148.56,150.43,149.95,96029900,,,
...,...,...,...,...,...,...,...,...,...
2023-03-10,150.21,150.94,147.61,148.50,148.50,68524400,156.04,150.43,144.82
2023-03-13,147.81,153.14,147.70,150.47,150.47,84457100,156.02,150.41,144.80
2023-03-14,151.28,153.40,150.10,152.59,152.59,73695900,156.16,150.48,144.81
2023-03-15,151.19,153.25,149.92,152.99,152.99,77167900,156.04,150.44,144.85


In [35]:
rsi = RSI(df.Close, timeperiod=14)
rsi.index

DatetimeIndex(['2022-09-19', '2022-09-20', '2022-09-21', '2022-09-22',
               '2022-09-23', '2022-09-26', '2022-09-27', '2022-09-28',
               '2022-09-29', '2022-09-30',
               ...
               '2023-03-03', '2023-03-06', '2023-03-07', '2023-03-08',
               '2023-03-09', '2023-03-10', '2023-03-13', '2023-03-14',
               '2023-03-15', '2023-03-16'],
              dtype='datetime64[ns]', name='Date', length=124, freq=None)

In [36]:
macd, macdsignal, macdhist = MACD(df.Close, fastperiod=12, slowperiod=26, signalperiod=9)

## Volume Analysis

In [37]:
TA_PERIODS

{'M1': {'MA': [3, 6, 9], 'days': 2920},
 'W1': {'MA': [10, 20, 40], 'days': 730},
 'D1': {'MA': [10, 20, 50, 200], 'days': 180},
 '30m': {'MA': [7, 17, 33, 65], 'days': 20},
 '10m': {'MA': [20, 50, 100, 195], 'days': 7},
 '5m': {'MA': [40, 100, 200], 'days': 3},
 '2m': {'MA': [20, 50, 100], 'days': 1},
 '1m': {'MA': [50, 100, 200], 'days': 0}}

In [38]:
days = TA_PERIODS[period]['days']
end = last_date
start = end - timedelta(days)

In [53]:
def volume_analysis(ticker, period, start, end):
    MAs = TA_PERIODS[period]['MA']
    short_ma, inter_ma, primary_ma, *_ = MAs
    df = make_dataframe(ticker, period)[start:end]
    nrows = df.shape[0]
    df = df[['Close', 'Volume']]
    df.rename(columns={'Close': 'Price'}, inplace=True)
    df['Price %'] = df['Price'].pct_change()
    df['Volume %'] = df['Volume'].pct_change()
    p = np.sign(df['Price %'])
    v = np.sign(df['Volume %'])
    df['Price-Volume'] = np.where(p == v, 0, 1)
    nrows1 = df[df['Price-Volume'] == 1].shape[0]
    print(f'{nrows1 / nrows:.0%} of Price-Volume == 1')

    for ma in MAs[:3]:
        comp = []
        for col in ('Price', 'Volume'):
            s1 = f'{col} MA{ma}'
            s2 = f'{s1} %'
            df[s1] = df[col].rolling(ma).mean()
            df[s2] = df[s1].pct_change()
            X = np.sign(df[s2])
            comp.append(X)
        p, v = comp
        s = f'Price-Volume MA{ma}'
        df[s] = np.where(p == v, 0, 1)
        nrows1 = df[df[s] == 1].shape[0]
        print(f'{nrows1 / nrows:.0%} of {s} == 1')

    # df.dropna(inplace=True)

    # cols = ['Price %', 'Volume %', 
    #         # 'Close Trend', 'Volume Trend',
    #         'Price-Volume']
    # df = df[cols]

    return df

In [56]:
vol_df = volume_analysis(ticker, period, start, end)
vol_df.tail(20)

49% of Price-Volume == 1
53% of Price-Volume MA10 == 1
68% of Price-Volume MA20 == 1
73% of Price-Volume MA50 == 1


Unnamed: 0_level_0,Price,Volume,Price %,Volume %,Price-Volume,Price MA10,Price MA10 %,Volume MA10,Volume MA10 %,Price-Volume MA10,Price MA20,Price MA20 %,Volume MA20,Volume MA20 %,Price-Volume MA20,Price MA50,Price MA50 %,Volume MA50,Volume MA50 %,Price-Volume MA50
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-02-16,153.71,68167900,-0.01,0.04,1,153.08,0.0,74276440.0,-0.06,1,148.38,0.01,74376800.0,0.01,0,139.91,0.0,77052530.0,-0.0,1
2023-02-17,152.55,59144100,-0.01,-0.13,0,152.88,-0.0,64755120.0,-0.13,0,149.11,0.0,73322825.0,-0.01,1,140.1,0.0,76940868.0,-0.0,1
2023-02-21,148.48,58867200,-0.03,-0.0,0,152.56,-0.0,63656010.0,-0.02,0,149.48,0.0,72178170.0,-0.02,1,140.25,0.0,76723790.0,-0.0,1
2023-02-22,148.91,51011300,0.0,-0.13,1,151.98,-0.0,60424880.0,-0.05,0,149.8,0.0,71406980.0,-0.01,1,140.37,0.0,76501450.0,-0.0,1
2023-02-23,149.4,48394200,0.0,-0.05,1,151.73,-0.0,58852290.0,-0.03,0,150.18,0.0,70536725.0,-0.01,1,140.52,0.0,75947394.0,-0.01,1
2023-02-24,146.71,55469600,-0.02,0.15,1,151.32,-0.0,58798540.0,-0.0,0,150.31,0.0,70604950.0,0.0,0,140.56,0.0,75647532.0,-0.0,1
2023-02-27,147.92,44998500,0.01,-0.19,1,151.01,-0.0,57553320.0,-0.02,0,150.41,0.0,69327085.0,-0.02,1,140.61,0.0,74669778.0,-0.01,1
2023-02-28,147.41,50547000,-0.0,0.12,1,150.36,-0.0,56388120.0,-0.02,0,150.63,0.0,68653670.0,-0.01,1,140.7,0.0,74034894.0,-0.01,1
2023-03-01,145.31,55479000,-0.01,0.1,1,149.57,-0.01,55765260.0,-0.01,0,150.69,0.0,68133895.0,-0.01,1,140.87,0.0,73165836.0,-0.01,1
2023-03-02,145.91,52238100,0.0,-0.06,1,148.63,-0.01,54431690.0,-0.02,0,150.71,0.0,66862620.0,-0.02,1,141.1,0.0,71007460.0,-0.03,1


In [42]:
def plot_signals(graph, ticker, start, end, period, plot_data,
                 show_vol, show_rsi, show_macd, show_sr, show_fr, 
                 show_bb, show_MAs, show_adv_MAs, show_trend_analysis, 
                 show_trendlines_c, show_trendlines_hl):
    '''
    Returns plot figure

    Parameters
    ----------
    df: DataFrame of a security's market data
    start: Start Date
    end: End Date
    period: user-input
    minor_ma: moving average window
    secondary_ma : moving average window
    primary_ma: moving average window
    show_prices: display candlestick data on crowded charts
    '''

    ticker = ticker.split(' - ')[0]
    end = end + timedelta(1) if period.endswith('m') else end
    df = make_dataframe(ticker, period)[start:end]
    MAs = plot_data['MAs']
    nrows = 1 + show_vol + show_rsi + show_macd
    r1 = 1 - 0.1 * nrows
    r2 = (1 - r1) / (nrows - 1)
    row_heights = [r1] + [r2] * (nrows - 1)
    fig_row = 2
    data = []
    name = ticker

    fig = make_subplots(rows=nrows, cols=1,
                        shared_xaxes=True, 
                        vertical_spacing=0.05,
                        subplot_titles=[''] * nrows, 
                        row_heights=row_heights)
    fig.update_xaxes(showgrid=True)          
    fig.update_yaxes(showgrid=False, type='log')

    if graph == 'Candlesticks' and not show_trend_analysis and not show_trendlines_c:
        cs = go.Candlestick(x=df.index, 
                            open=df['Open'], 
                            high=df['High'],
                            low=df['Low'], 
                            close=df['Close'],
                            name=name)  
        cs.increasing.fillcolor = 'green'
        cs.increasing.line.color = 'darkgreen'
        cs.decreasing.fillcolor = 'red'
        cs.decreasing.line.color = 'indianred'
        data.append(cs)
    else:
        xy = go.Scatter(x=df.index,
                        y=df['Close'],
                        name=name,
                        line_width=1.5,
                        connectgaps=True)
        data.append(xy)

    # Plot MAs & advanced MAs
    if show_MAs or show_adv_MAs:
        adv_MAs = plot_data['Adv MAs']
        colors = ['red', 'cyan', 'gold']
        dash = 'dot' if show_MAs and show_adv_MAs else 'solid'
        for ma, adv_ma, color in zip(MAs, adv_MAs, colors): 
            if show_MAs:
                y = df['Close'].rolling(ma).mean()
                sma = go.Scatter(x=df.index,
                                 y=y,
                                 name=f'MA{ma}',
                                 line_width=1,
                                 line_color=color,
                                 connectgaps=True)
                data.append(sma)
            if show_adv_MAs:
                y = df['Close'].rolling(ma).mean().shift(adv_ma)
                advanced_sma = go.Scatter(x=df.index,
                                          y=y,
                                          name=f'MA{ma}+{adv_ma}',
                                          line_width=1.25,
                                          line_color=color,
                                          line_dash=dash,
                                          connectgaps=True)
                data.append(advanced_sma)
    
    pos = [1] * len(data) # position to add rows, cols in subplot 
    fig.add_traces(data=data, rows=pos, cols=pos)  

    # Support & resistance lines
    if show_sr:
        levels, _ = sr_levels(df)
        for i, l in levels:
            n = df.shape[0] - i
            fig.add_scatter(x=df.index[i:],
                            y=[l] * n,
                            name='SR',
                            line_width=0.5,
                            line_color='orange',
                            mode='lines',
                            showlegend=False,
                            connectgaps=True)   

    # Fibonacci retracements
    if show_fr:
        colors = ["darkgray", "indianred", "green", "blue", "cyan", "magenta", "gold"]
        ratios, levels = fibonacci_retracements(df)
        for i in range(len(ratios)):
            fig.add_scatter(x=df.index,
                            y=[levels[i]] * df.shape[0],
                            name=f'FR {ratios[i]:,.2%}',
                            line_color=colors[i],
                            line_width=0.75,
                            line_dash='dot',
                            connectgaps=True)
    
    # Bollinger bands
    if show_bb:
        up, mid, down = BBANDS(df['Close'], timeperiod=21, nbdevup=2, nbdevdn=2, matype=0)
        bands = {'BB Up': up, 'BB Mid': mid, 'BB Down': down}
        for k, v in bands.items():
            dash = 'dot' if k == 'BB Mid' else 'solid'
            fig.add_scatter(x=v.index,
                            y=v.values,
                            name=k,
                            line_width=1,
                            line_dash=dash,
                            mode='lines',
                            connectgaps=True)

    # Trend Analysis
    if show_trend_analysis:
        fig.update_xaxes(showgrid=False)
        fig.update_yaxes(showgrid=False)
        # P, V, r_PV = order_peaks_valleys(df)
        vals = order_peaks_valleys(df)
        keys = ['peaks', 'valleys', 'removed']
        colors = ['orange', 'red', 'red']
        symbols = ['x', 'x', 'circle-x']
        line_widths = [0.2, 0.2, 2.5]
        sizes = [7, 7, 10]
        d = dict(val=vals, color=colors, symbol=symbols, 
                 lw=line_widths, size=sizes)
        pv_d = {k: {k: v[i] for k, v in d.items()} 
                for i, k in enumerate(keys)}

        # show all peaks/valleys & removed peaks/valleys    
        for k, v in pv_d.items():
            X = v['val']
            fig.add_scatter(x=df.Close[X].index,
                            y=df.Close[X],
                            name=k,
                            mode='markers',
                            marker=dict(symbol=v['symbol'], 
                                        line=dict(color=v['color'],
                                                  width=v['lw']),
                                        color=v['color'],
                                        size=v['size']),
                            opacity=0.75,
                            )

        # show valid peaks & valleys        
        chg, peaks, valleys = trend_changepoints(df)
        X = sorted(peaks + valleys)

        fig.add_scatter(x=df.Close[X].index,
                        y=df.Close[X],
                        name='valid peaks/valleys',
                        mode='markers',
                        marker=dict(symbol='circle-open', 
                                color='limegreen', 
                                size=12, 
                                line_width=2.5), 
                        opacity=0.5,
                        showlegend=False)
        
        # show trend changepoints
        for k, v in chg.items():
            try:
                x = max(v['start'], v['end'])[-1]
                for x0, x1 in zip_longest(v['start'], v['end'], fillvalue=x):
                    if k == 'up':
                        txt = 'U'
                        color = 'green'
                    elif k == 'down':
                        txt = 'D'
                        color = 'red'
                    else:
                        txt = 'R'
                        color = 'gray'
                    fig.add_vrect(x0=df.index[x0], 
                                  x1=df.index[x1], 
                                  line_width=0, 
                                  fillcolor=color, 
                                  opacity=0.2,
                                  annotation_text=txt, 
                                  annotation_position="top left")
            except: pass      

    # Trendlines
    if show_trendlines_c or show_trendlines_hl:
        pv_df, peaks, valleys, PV, trendlines_c, trendlines_hl = peaks_valleys_trendlines(df)
        fig.add_scatter(x=df.Close[peaks].index,
                        y=df.Close[peaks],
                        name='Peaks',
                        mode='markers',
                        marker=dict(symbol='x', color='yellow', size=5))
        fig.add_scatter(x=df.Close[valleys].index,
                        y=df.Close[valleys],
                        name='Valleys',
                        mode='markers',
                        marker=dict(symbol='x', color='red', size=5))
        fig.add_scatter(x=df.Close[PV].index,
                        y=df.Close[PV],
                        name='Valid Peaks / Valleys',
                        mode='markers',
                        marker=dict(symbol='circle-open', color='limegreen', size=8))
        if show_trendlines_c:      
            for x, y in trendlines_c:
                fig.add_scatter(x=x,
                                y=y,
                                name='Trendline (Close)',
                                mode='lines',
                                line_color='magenta',
                                opacity=0.5,
                                showlegend=False)
        if show_trendlines_hl:      
            for x, y in trendlines_hl:
                fig.add_scatter(x=x,
                                y=y,
                                name='Trendline (HL)',
                                mode='lines',
                                line_color='cyan',
                                opacity=0.5,
                                showlegend=False)

    # Volume subplot
    if show_vol:
        name = 'Volume'
        fig.add_bar(x=df.index,
                    y=df[name],
                    name=name,
                    marker={'color': 'steelblue'},
                    row=fig_row, col=1)
        # fig.update_layout({f'yaxis{fig_row}': {'title': name}})
        fig_row += 1

    # RSI subplot
    if show_rsi:
        rsi = RSI(df['Close'], timeperiod=14)
        name = 'RSI'
        fig.add_scatter(x=rsi.index,
                        y=rsi.values,
                        name=name,
                        line_width=1,
                        mode='lines',
                        connectgaps=True,
                        showlegend=False,
                        row=fig_row, col=1)
        fig.add_hline(70, line_width=0.5, line_dash='dot', line_color='red', 
                      row=fig_row, col=1)
        fig.add_hline(30, line_width=0.5, line_dash='dot', line_color='red', 
                      row=fig_row, col=1)
        fig.update_layout({f'yaxis{fig_row}': {'type': 'linear', 'title': name}})
        fig_row += 1
    
    # MACD subplot
    if show_macd:
        macd, *_ = MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        name = 'MACD'
        fig.add_scatter(x=macd.index,
                        y=macd.values,
                        name=name,
                        line_width=1,
                        mode='lines',
                        connectgaps=True,
                        showlegend=False,
                        row=fig_row, col=1)
        fig.update_layout({f'yaxis{fig_row}': {'type': 'linear', 'title': name}})
    
    us_holidays = pd.to_datetime(list(holidays.US(range(start.year, end.year + 1)).keys()))
    rangebreaks = []
    rangeselector = []

    if period == 'M1':
        rangeselector = dict(buttons=[
                                dict(count=1, label="YTD", step="year", stepmode="todate"),
                                dict(count=6, label="6m", step="month", stepmode="backward"),
                                dict(count=1, label="1y", step="year", stepmode="backward"),
                                dict(count=2, label="2y", step="year", stepmode="backward"),
                                dict(count=3, label="3y", step="year", stepmode="backward"),
                                dict(step="all")
                                ])
    elif period == 'W1':
        rangeselector = dict(buttons=[
                                dict(count=1, label="YTD", step="year", stepmode="todate"),
                                dict(count=6, label="6m", step="month", stepmode="backward"),
                                dict(count=1, label="1y", step="year", stepmode="backward"),
                                dict(step="all")
                                ])  
    elif period == 'D1':
        rangebreaks = [dict(bounds=["sat", "mon"])]
    else:
        us_holidays += pd.offsets.Hour(9) + pd.offsets.Minute(30)
        rangebreaks = [dict(bounds=[16, 9.5], pattern="hour"), 
                       dict(bounds=["sat", "mon"])]

    if rangebreaks:
        us_holidays = pd.to_datetime(sorted(list(set(us_holidays) - set(df.index))))
        rangebreaks.append(dict(values=us_holidays))
        fig.update_xaxes(rangebreaks=rangebreaks)

    if rangeselector:
        fig.update_layout(xaxis1=dict(rangeselector=rangeselector))

    cname = SPY_info_df.loc[ticker, 'Security']
    title = f'{cname} ({ticker}) - {period}'

    fig.update_layout(title=dict(text=title, xanchor='center'))
    fig.layout.xaxis.rangeslider.visible = False

    return fig

In [43]:
ticker = 'A'
period = 'D1'
df = make_dataframe(ticker, period)
graph = 'line'
plot_MAs = TA_PERIODS[period]['MA']
plot_data = {'MAs': plot_MAs,
             'Adv MAs': [int(ma**(1/2)) for ma in plot_MAs]}
end = df.index[-1]
days = TA_PERIODS[period]['days']
start = end - timedelta(days=days)
show_vol = True 
show_rsi = False
show_macd = False
show_sr = False
show_fr = False
show_bb = False
show_MAs = False
show_adv_MAs = False
show_trend_analysis = False 
show_trendlines_c = False
show_trendlines_hl = False

In [48]:
plot_signals(graph, ticker, start, end, period, plot_data,
             show_vol, show_rsi, show_macd, show_sr, show_fr, 
             show_bb, show_MAs, show_adv_MAs, show_trend_analysis, 
             show_trendlines_c, show_trendlines_hl)