In [1]:
import os
import bisect
import itertools
import math
import numpy as np
import pandas as pd
import requests
import pickle
from datetime import timedelta, datetime as dt
import pytz
from pytz import timezone
from pprint import pprint
import pandas_datareader.data as web
from collections import namedtuple
from operator import itemgetter
from tqdm import tqdm

import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import yfinance as yf
import fundamentalanalysis as fa
import holidays
import pandas_datareader.data as web
import yahoo_fin.stock_info as si
from urllib.request import Request, urlopen
from html_table_parser.parser import HTMLTableParser 
import streamlit as st

from load_data import *
from get_data import get_tickers

pd.options.plotting.backend = "plotly"
# pd.set_option('display.max_rows', None)


In [2]:
rf_rates = get_rf_data()
SPY_df = get_SPY_data()
SPY_info_df = get_SPY_info()
ticker_info = get_ticker_info()
ticker_list = SPY_info_df.index.to_list()
sector_list = SPY_info_df['Sector'].unique()
first_date = SPY_df.iloc[0].name
last_date = SPY_df.iloc[-1].name
yr_ago = last_date - timedelta(days=365)
# TTM_ratios, ratios_data_report = load_TTM_ratios()

## Technical Analysis

To Do:
- Peak-Valleys / Trendlines
- MAs
- Volume Analysis

In [3]:
end = dt.now()
days = end.weekday() - 5 if end.weekday() >= 5 else 0
days = days + 2 if end.weekday() == 0 & end.hour < 24 else 0
end -= timedelta(days)
start = end - timedelta(60 - days)

print(start, end)

2023-01-16 23:04:11.659624 2023-03-17 23:04:11.659624


In [4]:
resampled_df = resample_data('acgl', '60m')
# resampled_df.loc['2023-02-15': '2023-02-21', :]
resampled_df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-16 11:30:00,64.650002,65.260002,64.489998,65.214996,65.214996,320830
2023-03-16 12:30:00,65.220001,66.514999,65.199997,66.404999,66.404999,476671
2023-03-16 13:30:00,66.425003,66.699997,66.169998,66.364998,66.364998,325049
2023-03-16 14:30:00,66.370003,66.644997,66.279999,66.404999,66.404999,472719
2023-03-16 15:30:00,66.410004,66.455002,65.970001,66.080002,66.080002,784967


### Peak-Troughs / Trendlines

In [19]:
from scipy.signal import find_peaks, peak_prominences, argrelextrema
from sklearn import preprocessing
from scipy.stats import linregress

pd.options.display.float_format = "{:,.2f}".format

In [20]:
end = last_date
years = 1
days = 180
start = end - timedelta(days * years)
ticker = 'a'
df = get_ticker_data(ticker)[start:end]
df_5m = resample_data(ticker, '5m')
open = df['Open'] 
high = df['High']
low = df['Low']
close = df['Close']
volume = df['Volume']

print(f'start: {start.date()}, end: {end.date()}')

start: 2022-09-10, end: 2023-03-09


- The significance of a trendline is a function of its length, the number of times it has been touched, and the
angle of ascent or descent

In [20]:
def previous_current_next(iterable):
    """
    Make an iterator that yields an (previous, current, next) tuple per element.
    Returns None if the value does not make sense (i.e. previous before
    first and next after last).
    """
    
    iterable = iter(iterable)
    prv = None
    cur = next(iterable)
    try:
        while True:
            nxt = next(iterable)
            yield (prv, cur, nxt)
            prv = cur
            cur = nxt
    except StopIteration:
        yield (prv, cur, None)

In [9]:
x = [x for x in range(5)]
ix = iter(x)
iy = iter(reversed(x))

In [10]:
# Examples
# print( list(previous_current_next([])))
# print( list(previous_current_next([1])))
# print( list(previous_current_next([1,2])))
print( list(previous_current_next(x)))

[(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, None)]


In [21]:
def peaks_valleys_trendlines(df):
    '''
    Analyses peaks and valleys to identify valid trendlines

    Parameters
    ----------
    df: DataFrame
    '''
    df = df.copy().drop(columns='Adj Close')
    close = df['Close']
    peaks = argrelextrema(close.to_numpy(), np.greater)[0]
    valleys = argrelextrema(close.to_numpy(), np.less)[0]
    PV = sorted(list(peaks) + list(valleys))
    first = 'Peak' if min(peaks[0], valleys[0]) == peaks[0] else 'Valley'
    second = 'Peak' if first == 'Valley' else 'Valley'
    first_vals = set(peaks) if first == 'Peak' else set(valleys)
    second_vals = set(peaks) if second == 'Peak' else set(valleys)
    ix = PV[0]
    d0 = abs(close[ix] - close[0])
    dist = [d0]   
    valid_PV = [ix]
    last_valid = first
    df.loc[df.index[peaks], 'isPeak'] = 1
    df.loc[df.index[valleys], 'isValley'] = 1
    df['Peak'] = close[peaks]
    df['Valley'] = close[valleys]
    trend = -1 if first == 'Valley' else 1
    df['PV Trend'] = trend
    trendlines_c = []
    trendlines_hl = []
    i, j = 0, 1
    cnt = 0
    print(f'nPeaks: {len(peaks)}, nValleys: {len(valleys)}, nPV: {len(PV)}' \
          f'\nfirst peak, valley: {peaks[0], valleys[0]}' \
          f'\nlast peak, valley: {peaks[-1], valleys[-1]}\n')
    
    while j < len(PV):
        col = first if cnt % 2 else second
        o_col = first if not cnt % 2 else second
        cnt += 1
        ix = df.index[PV[j]]
        d1 = dist[-1]
        d2 = close[PV[j]] - close[PV[i]]
        retracement = abs(d2) / d1
        print(f'{cnt}. i: {i}, j: {j} \nPV[{i}]: {PV[i]}, PV[{j}]: {PV[j]}')
        try:
            print(f'PV[{i + 1}]: {PV[i + 1]}, PV[{j + 1}]: {PV[j + 1]}')
        except: pass
        print(f'd2: {close[PV[j]]:.2f} - {close[PV[i]]:.2f} = {d2:.2f}',
              f'\nRetraced: {abs(d2):.2f} / {d1:.2f} = {retracement:.2f}')
        
        # Consider peak/valley valid 
        if retracement >= 1/3:
            print(f'Add {col} {close[PV[j]]:.2f} on {ix.date()}')
            d1 = abs(close[PV[j]] - close[valid_PV[-1]]) # distance between peak & valley
            dist.append(d1)
            valid_PV.append(PV[j])
            last_valid = col
            if j < len(PV):
                j += 1      
                i = j - 1
        elif j < len(PV):
            print(f'Skip {col} {close[PV[j]]:.2f} on {ix.date()}')
            # P = col == 'Peak' and j < len(PV) - 2
            # V = col == 'Valley' and j < len(PV) - 2
            # print(f'i: {i}, j: {j}, P: {P}, V: {V}')
            # if P or V:
                # VV = close[PV[j + 1]] >= close[PV[j - 1]]
                # PP = close[PV[j + 1]] <= close[PV[j - 1]]
                # if (V and VV) or (P and PP):                    
            df.loc[ix, f'is{col}'] = 0
            if len(dist) > 1:
                dist.pop() # Remove distance of invalid peak/valley
            if len(valid_PV) > 1:
                invalid = valid_PV.pop() # Remove invalid peak/valley
                last_valid = first if last_valid == second else first
                ix = df.index[invalid]
                # df.loc[ix, f'is{o_col}'] = 0
                print(f'Remove {o_col} {close[invalid]:.2f} on {ix.date()}') 
                # else:
                #     j += 1
                #     cnt += 1
                #     print(f'Skip {o_col} {close[PV[j]]:.2f} on {df.index[PV[j]].date()}')
            i = PV.index(valid_PV[-1])
            j += 1
        print(f'i: {i}, j: {j}, \n{[round(x, 2) for x in dist]}\n{valid_PV}\n')
            
    # Add last highest peak & last lowest valley if not in valid_PV
    print(f'Last {first} is {close[valid_PV[-2]]:.2f} on {df.index[valid_PV[-2]].date()}\n')
    print(f'Last {second} is {close[valid_PV[-1]]:.2f} on {df.index[valid_PV[-1]].date()}')
    df_ix_list = df.index.to_list()
    nr = df.shape[0]

    if valid_PV[-1] < nr - 1:
        ix = df.index[valid_PV[-1] + 1]
        val = df.loc[ix:, first].dropna() 
        if not val.empty:
            f_ix = val.idxmax() if first == 'Peak' else val.idxmin() # last higest/lowest index
            f_ix_pos = df_ix_list.index(f_ix) # index position    
            if f_ix_pos not in valid_PV and f_ix_pos < nr - 1:
                valid_PV.append(f_ix_pos)
                ix = df.index[f_ix_pos + 1]
                val = df.loc[ix:, second].dropna()
                if not val.empty:
                    s_ix = val.idxmax() if second == 'Peak' else val.idxmin()
                    s_ix_pos = df_ix_list.index(s_ix)
                    valid_PV.append(s_ix_pos)
                    print(f'Updated: \nLast {first} is {close[f_ix_pos]:.2f} on {f_ix.date()} iloc[{f_ix_pos}]\n' \
                          f'Last {second} is {close[s_ix_pos]:.2f} on {s_ix.date()} iloc[{s_ix_pos}]\n')
    
    first_vals &= set(valid_PV[::2])
    second_vals &= set(valid_PV[1::2])
    first_vals = sorted(list(first_vals))
    second_vals = sorted(list(second_vals))
    print(f'n{first}: {len(first_vals)}, n{second}: {len(second_vals)}\n')
    n = len(min(first_vals, second_vals, key=len))
    
    # Identify trends
    for i in range(1, n):
        a, pa = first_vals[i], first_vals[i - 1]
        b, pb = second_vals[i], second_vals[i - 1]
        start = df.index[min(pa, pb)] if i == 1 else end
        end = df.index[max(a, b)]
        if close[a] - close[pa] < 0 and close[b] - close[pb] < 0:
            trend = -1
        elif close[a] - close[pa] > 0 and close[b] - close[pb] > 0:
            trend = 1
        else:
            trend = 0
        df.loc[start:, 'PV Trend'] = trend
        print(f"{i}. start: {start.date()}, end: {end.date()}" \
              f"\np.{first}: {dt.strftime(df.index[pa], '%d.%m.%y')} - {close[pa]:.2f}, " \
              f"{first}: {dt.strftime(df.index[a], '%d.%m.%y')} - {close[a]:.2f}" \
              f"\np.{second}: {dt.strftime(df.index[pb], '%d.%m.%y')} - {close[pb]:.2f}, " \
              f"{second}: {dt.strftime(df.index[b], '%d.%m.%y')} - {close[b]:.2f}" \
              f"\ntrend: {trend}\n")
    
    # Identify potential trendline ranges  
    df['PV Changepoint'] = df['PV Trend'].diff()
    df['Row'] = np.arange(nr)
    mask = (df['PV Changepoint'] != 0) & (df['PV Changepoint'].notna())
    c_points = df[mask].index
    # print(c_points)
    
    for i in range(1, len(c_points)):
        start = df.index[0] if i == 1 else end
        end = c_points[i]
        npeaks = df.loc[start:end, 'isPeak'].sum()
        nvalleys = df.loc[start:end, 'isValley'].sum()
        uptrend = False
        downtrend = False
        if df.loc[start, 'PV Trend'] < 0 and npeaks >= 2:
            downtrend = True
            indices = df[df['isPeak'] == 1][start:end].index
        if df.loc[start, 'PV Trend'] > 0 and nvalleys >= 2:
            uptrend = True
            indices = df[df['isValley'] == 1][start:end].index
        print(f"{i}.".ljust(3), 
              f"{dt.strftime(start, '%d.%m.%y')} - {dt.strftime(end, '%d.%m.%y')}, " \
              f"peaks: {npeaks:.0f}, valleys: {nvalleys:.0f}, " \
              f"trend: {df.loc[start, 'PV Trend']:.0f}, c: {df.loc[start, 'PV Changepoint']:.0f}")
    
        if uptrend or downtrend:    
            xs = np.array(df['Row'][indices])
            ys = np.array(close[indices])
            m, c, r, *_ = linregress(xs, ys)
            x0, x2 = indices[0], indices[-1]
            xn = df_ix_list.index(x2) + 4
            xn = xn if xn < nr else nr - 1
            y0 = close[x0]
            yn = m * xn + c
            trendlines_c.append(((x0, df.index[xn]), (y0, yn)))
            r2 = r**2
            slope_angle = math.atan(m)  # slope angle in radians
            slope_angle_degrees = math.degrees(slope_angle)  # slope angle in degrees
            print('Close Trendline')
            print(linregress(xs, ys))
            print(f'R2: {r2:.2f} \nangle_radians: {slope_angle:.2f} \nangle_deg: {slope_angle_degrees:.2f}')
            print(f'y = mx + c \n{yn:.2f} = {m:.2f} x {xn:.0f} + {c:.2f}')
            print(f'xs: {xs}, \nys: {ys}\n')
            if uptrend:
                ys = np.array(df.Low[indices])
                m, c, r, *_ = linregress(xs, ys)
                y0 = df.Low[x0]
            if downtrend:
                ys = np.array(df.High[indices])
                m, c, r, *_ = linregress(xs, ys)
                y0 = df.High[x0]
                            
            yn = m * xn + c
            trendlines_hl.append(((x0, df.index[xn]), (y0, yn)))
            r2 = r**2
            slope_angle = math.atan(m)  # slope angle in radians
            slope_angle_degrees = math.degrees(slope_angle)  # slope angle in degrees
            print('HL Trendline')
            print(linregress(xs, ys))
            print(f'R2: {r2:.2f} \nangle_radians: {slope_angle:.2f} \nangle_deg: {slope_angle_degrees:.2f}')
            print(f'y = mx + c \n{yn:.2f} = {m:.2f} x {xn:.0f} + {c:.2f}')
            print(f'xs: {xs}, \nys: {ys}\n')
        
    # cols = ['isPeak', 'isValley', 'Peak', 'Valley', 'Row', 'PV Changepoint']
    cols = df.columns[:3].tolist() + ['Volume', 'Row']
    df.drop(columns=cols, inplace=True)
    
    return df, peaks, valleys, valid_PV, trendlines_c, trendlines_hl

In [22]:
pv_df, peaks, valleys, peak_valleys, trendlines_c, trendlines_hl = peaks_valleys_trendlines(df)

nPeaks: 32, nValleys: 32, nPV: 64
first peak, valley: (1, 5)
last peak, valley: (120, 122)

1. i: 0, j: 1 
PV[0]: 1, PV[1]: 5
PV[1]: 5, PV[2]: 6
d2: 129.89 - 139.87 = -9.98 
Retraced: 9.98 / 2.24 = 4.46
Add Valley 129.89 on 2022-09-16
i: 1, j: 2, 
[2.24, 9.98]
[1, 5]

2. i: 1, j: 2 
PV[1]: 5, PV[2]: 6
PV[2]: 6, PV[3]: 12
d2: 130.72 - 129.89 = 0.83 
Retraced: 0.83 / 9.98 = 0.08
Skip Peak 130.72 on 2022-09-19
Remove Valley 129.89 on 2022-09-16
i: 0, j: 3, 
[2.24]
[1]

3. i: 0, j: 3 
PV[0]: 1, PV[3]: 12
PV[1]: 5, PV[4]: 13
d2: 121.61 - 139.87 = -18.26 
Retraced: 18.26 / 2.24 = 8.15
Add Valley 121.61 on 2022-09-27
i: 3, j: 4, 
[2.24, 18.26]
[1, 12]

4. i: 3, j: 4 
PV[3]: 12, PV[4]: 13
PV[4]: 13, PV[5]: 15
d2: 123.72 - 121.61 = 2.11 
Retraced: 2.11 / 18.26 = 0.12
Skip Peak 123.72 on 2022-09-28
Remove Valley 121.61 on 2022-09-27
i: 0, j: 5, 
[2.24]
[1]

5. i: 0, j: 5 
PV[0]: 1, PV[5]: 15
PV[1]: 5, PV[6]: 18
d2: 121.55 - 139.87 = -18.32 
Retraced: 18.32 / 2.24 = 8.18
Add Valley 121.55 on 2022

In [23]:
pv_df.head()

Unnamed: 0,Close,isPeak,isValley,Peak,Valley,PV Trend,PV Changepoint
2022-09-09,137.63,,,,,1,
2022-09-12,139.87,1.0,,139.87,,0,-1.0
2022-09-13,133.54,,,,,0,0.0
2022-09-14,133.25,,,,,0,0.0
2022-09-15,133.16,,,,,0,0.0


In [24]:
trendlines_c

[((Timestamp('2022-09-16 00:00:00'), Timestamp('2022-10-26 00:00:00')),
  (129.88999938964844, 124.48950023651123)),
 ((Timestamp('2022-10-20 00:00:00'), Timestamp('2022-11-09 00:00:00')),
  (125.94000244140624, 137.86800842285157)),
 ((Timestamp('2022-11-09 00:00:00'), Timestamp('2023-01-13 00:00:00')),
  (137.0, 151.47813966391635))]

In [25]:
pv_df[(pv_df['Valley'].notna()) | (pv_df['Peak'].notna())].tail()

Unnamed: 0,Close,isPeak,isValley,Peak,Valley,PV Trend,PV Changepoint
2023-02-24,141.2,,1.0,,141.2,0,0.0
2023-02-27,142.18,0.0,,142.18,,0,0.0
2023-03-01,137.51,,1.0,,137.51,0,0.0
2023-03-03,143.93,1.0,,143.93,,0,0.0
2023-03-07,140.33,,1.0,,140.33,0,0.0


In [26]:
pv_df[(pv_df['isValley'] == 1) | (pv_df['isPeak'] == 1)][['Peak', 'Valley']]

Unnamed: 0,Peak,Valley
2022-09-12,139.87,
2022-09-16,,129.89
2022-09-27,,121.61
2022-09-30,,121.55
2022-10-05,132.64,
2022-10-11,,125.64
2022-10-14,,125.7
2022-10-18,132.3,
2022-10-20,,125.94
2022-10-26,137.69,


In [43]:
fig

In [28]:
ticker = ticker.upper()
cs = go.Candlestick(x=df.index, 
                    open=df['Open'], 
                    high=df['High'],
                    low=df['Low'], 
                    close=df['Close'],
                    name=ticker)
cs.increasing.fillcolor = 'green'
cs.increasing.line.color = 'darkgreen'
cs.decreasing.fillcolor = 'red'
cs.decreasing.line.color = 'indianred'
fig = go.Figure()
    
fig.add_scatter(x=df.index, 
                y=df.Close, 
                name='Close', 
                mode='lines',
                line_color='blue', 
                opacity=0.5, 
                connectgaps=True)
fig.add_scatter(x=df.Close[peaks].index,
                y=df.Close[peaks],
                name='Peaks',
                mode='markers',
                marker=dict(symbol='x', color='orange'), 
                opacity=0.5)
fig.add_scatter(x=df.Close[valleys].index,
                y=df.Close[valleys],
                name='Valleys',
                mode='markers',
                marker=dict(symbol='x', color='red'), 
                opacity=0.5)

for x, y in trendlines_c:
    fig.add_scatter(x=x,
                    y=y,
                    name='Close Trendlines',
                    mode='lines', 
                    opacity=0.5,
                    line_color='hotpink',
                    showlegend=False)
# for x, y in trendlines_hl:
#     fig.add_scatter(x=x,
#                     y=y,
#                     name='HL Trendlines',
#                     mode='lines', 
#                     opacity=0.5,
#                     line_color='magenta',
#                     showlegend=False)

fig.add_scatter(x=close[peak_valleys].index,
                y=close[peak_valleys],
                name='peak_valleys',
                mode='markers',
                marker=dict(symbol='circle-open', color='green', size=8))
# fig.add_vline(x=close[peaks].index,
#                 # y=heights,
#                 # mode='lines',
#                 )
us_holidays = list(holidays.US(range(start.year, end.year + 1)).keys())
rangebreaks = [dict(bounds=["sat", "mon"]), dict(values=us_holidays)]
# rangebreaks.extend([dict(bounds=[16, 9.5], pattern="hour")])
fig.update_xaxes(rangebreaks=rangebreaks)

color = 'beige'
title = SPY_info_df.loc[ticker, 'Security']

fig.update_layout(title=title, paper_bgcolor=color, plot_bgcolor=color) 
fig.layout.xaxis.rangeslider.visible = False
fig

In [29]:
margins = list(l = 50, r = 50, b = 100, t = 100, pad = 4)
margins

TypeError: list() takes no keyword arguments

### S/R Levels

In [24]:
from sklearn import preprocessing
import time
# pd.set_option('display.max_rows', None)
pd.options.display.float_format = "{:,.2f}".format

In [4]:
end = last_date
years = 1
days = 180
start = end - timedelta(days * years)
ticker = 'aapl'
df = get_ticker_data(ticker)[start:end]

In [5]:
def fibonacci_retracement_levels(df):
    highest_swing = -1
    lowest_swing = -1
    high = df['High']
    low = df['Low']
    nr = df.shape[0]

    for i in range(1, nr - 1):
        if high[i] > high[i - 1] and high[i] > high[i + 1] \
            and (highest_swing == -1 or high[i] > high[highest_swing]):
            highest_swing = i
        if low[i] < low[i - 1] and low[i] < low[i + 1] \
            and (lowest_swing == -1 or low[i] < low[lowest_swing]):
            lowest_swing = i

    ratios = [0, 0.236, 0.382, 0.5, 0.618, 0.786, 1]
    levels = []
    max_level = high[highest_swing]
    min_level = low[lowest_swing]

    for ratio in ratios:
        # Uptrend
        if highest_swing > lowest_swing:
            level = max_level - (max_level - min_level) * ratio
        # Downtrend
        else:
            level = min_level + (max_level - min_level) * ratio
        levels.append(level)

    frl_data = {}
    unit = 'minutes' if df.index[0].minute != 0 else 'days'  
    prev_date = df.iloc[0].name

    for i in range(nr):
        date = df.iloc[i].name
        s_date = date.strftime('%d.%m.%y')
        l = bisect.bisect_left(levels, low[i])
        h = bisect.bisect_left(levels, high[i])
        h = h - 1 if h > 0 else h
        frls = [levels[h], levels[l]] if h != l else [levels[h]]
        # Deals with multiple levels being tested by one bar
        for frl in frls:
            if high[i] > frl and low[i] < frl:
                cum_vol = df.loc[prev_date:date, 'Volume'].sum()
                delta = date - prev_date # time it takes level to form
                delta = delta.days if unit == 'days' else delta.total_seconds() / 60
                prev_date = date
                d = {'Date': [], 'Timedelta': [], 'Volume': [], 'Tested': 0}
                frl_data.setdefault(frl, d)
                frl_data[frl]['Date'].append(s_date)
                frl_data[frl]['Timedelta'].append(delta)
                frl_data[frl]['Volume'].append(cum_vol)
                frl_data[frl]['Tested'] += 1             

    # Calculate significance of levels       
    d = {'FRL': [], 'Date': [], 'Timedelta': [], 'Volume': [], 'Tested': []}

    for k, v in frl_data.items():
        d['FRL'].append(k)
        d['Volume'].append(sum(v['Volume']))
        d['Timedelta'].append(sum(v['Timedelta']))
        d['Tested'].append(v['Tested'])
        d['Date'].append(v['Date'][-1])
        # print(v['Date'][-1])

    # NOT YET DECIDED HOW TO CALCULATE FRL SIGNAL

    return ratios, levels, frl_data


In [6]:
r, l, d = fibonacci_retracement_levels(df)
d

{155.68073529052734: {'Date': ['02-09-22',
   '06-09-22',
   '07-09-22',
   '08-09-22',
   '09-09-22',
   '13-09-22',
   '14-09-22',
   '20-09-22',
   '21-09-22',
   '28-10-22',
   '03-02-23',
   '16-02-23'],
  'Timedelta': [0, 4, 1, 1, 1, 4, 1, 4, 1, 1, 1, 13],
  'Volume': [76957800,
   150672600,
   161164400,
   172373400,
   152952600,
   295641400,
   210622000,
   351442800,
   209386600,
   273942600,
   272618900,
   742740900],
  'Tested': 12},
 148.9456159057617: {'Date': ['16-09-22',
   '23-09-22',
   '28-09-22',
   '24-10-22',
   '26-10-22',
   '27-10-22',
   '28-10-22',
   '02-11-22',
   '11-11-22',
   '14-11-22',
   '15-11-22',
   '16-11-22',
   '17-11-22',
   '21-11-22',
   '22-11-22',
   '01-12-22',
   '05-12-22',
   '13-12-22',
   '02-02-23',
   '03-02-23',
   '21-02-23',
   '22-02-23',
   '23-02-23',
   '27-02-23',
   '28-02-23'],
  'Timedelta': [2,
   2,
   5,
   3,
   2,
   1,
   0,
   5,
   1,
   3,
   1,
   1,
   1,
   4,
   1,
   1,
   4,
   1,
   1,
   0,
   5,


In [7]:
def isSupport(df, i):
    '''Returns True if value is a price support level'''

    X = df['Low']
    support = X[i] < X[i - 1] \
                and X[i] < X[i + 1] \
                and X[i + 1] < X[i + 2] \
                and X[i - 1] < X[i - 2]

    return support


def isResistance(df, i):
    '''Returns True if value is a price resistance level'''

    X = df['High']
    resistance = X[i] > X[i - 1] \
                    and X[i] > X[i + 1] \
                    and X[i + 1] > X[i + 2] \
                    and X[i - 1] > X[i - 2] 

    return resistance
    

In [8]:
def convert_to_timestamp(x):
    """Convert date objects to integers"""
    
    return time.mktime(x.timetuple())

In [14]:
def sr_levels(df):
    '''Returns key support/resistance levels for a security'''

    df = df.copy()
    df['SR Signal'] = 0
    prev_date = df.index[0]
    unit = 'minutes' if prev_date.minute != 0 else 'days'  
    spt, rst = (0, 0)
    levels = []
    s_levels = []
    sr_data = {}
    many_tests = {} # dict of bars that test more than 1 level
    s = (df['High'] - df['Low']).mean()
    nr, nc = df.shape

    def isFarFromLevel(l):
        '''Returns True if price is not near a previously discovered support or resistance'''
        
        return np.sum([abs(l - x[1]) < s for x in levels]) == 0

    for i in range(2, nr):
        date = df.iloc[i].name
        s_date = date.strftime('%d.%m.%y')
        high = df['High'][i]
        low = df['Low'][i]
        close = df['Close'][i]
        new_spt = False
        new_rst = False
        sr_switch = False

        if i < nr - 2:
            if isSupport(df, i):
                if isFarFromLevel(low):
                    new_spt = True
                    spt = low
                    df.loc[date, 'Support'] = spt
                    levels.append((i, spt))
                    s_levels = sorted([x[1] for x in levels])
                    print('NS'.ljust(5), f'- {date.date()} - S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
            
            if isResistance(df, i):
                if isFarFromLevel(high):
                    new_rst = True
                    rst = high
                    df.loc[date, 'Resistance'] = rst
                    levels.append((i, rst))
                    s_levels = sorted([x[1] for x in levels])
                    print('NR'.ljust(5), f'- {date.date()} - R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f},')    

        # Switch support to resistance & vice versa
        if len(levels) > 1:
            if new_spt:
                ix = bisect.bisect(s_levels, spt)
                rst = s_levels[ix] if ix < len(s_levels) else s_levels[ix - 1]
            if new_rst:
                ix = bisect.bisect_left(s_levels, rst)
                spt = s_levels[ix - 1] if ix > 0 else s_levels[ix]
            if low > rst: # When resistance broken 
                sr_switch = True
                spt = rst
                ix = bisect.bisect(s_levels, low)
                rst = s_levels[ix] if ix < len(s_levels) else s_levels[ix - 1]
                print('R-S'.ljust(5), f'- {date.date()} - S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
            if high < spt: # When support broken 
                sr_switch = True
                rst = spt
                ix = bisect.bisect_left(s_levels, high)
                spt = s_levels[ix - 1] if ix > 0 else s_levels[ix]
                print('S-R'.ljust(5), f'- {date.date()} - R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f}')
        
        if new_rst or new_spt or sr_switch:
            cum_vol = df.loc[prev_date:date, 'Volume'].sum()
            delta = date - prev_date # time it takes level to form
            delta = delta.days if unit == 'days' else delta.total_seconds() / 60
            prev_date = date
            d = {'Date': [], 'Timedelta': [], 'Volume': [], 'SR': [], 'Tested': 0, 'Tested Date': []}
            sr_data.setdefault(spt, d)
            sr_data[spt]['Date'].append(s_date)
            sr_data[spt]['Timedelta'].append(delta)
            sr_data[spt]['Volume'].append(cum_vol)
            sr_data[spt]['SR'].append('S')
            # Prevents double-counting when support == resistance
            if spt != rst:
                sr_data.setdefault(rst, d)
                sr_data[rst]['Date'].append(s_date)
                sr_data[rst]['Timedelta'].append(delta)
                sr_data[rst]['Volume'].append(cum_vol)
                sr_data[rst]['SR'].append('R')
                                  
        if spt:
            if close < spt:
                df.loc[date, 'SR Signal'] = 1 # Generate signal
            # Check if S/R levels are tested       
            if high > spt and low < spt:
                sr_data[spt]['Tested'] += 1
                sr_data[spt]['Tested Date'].append(s_date)
                print('ST'.ljust(5), f'- {date.date()} - S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                ix = bisect.bisect_left(s_levels, spt)
                n_spt = s_levels[ix - 1] if ix > 0 else s_levels[ix]    
                while low < n_spt and spt != rst and spt != n_spt:
                    print(f'SH-SL - {date.date()} - NS: {n_spt:.2f}, S: {spt:.2f}, R: {rst:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                    rst = spt
                    spt = n_spt
                    many_tests.setdefault(i, set()).union([spt, rst])
                    if ix > 0:
                        ix -= 1
                        n_spt = s_levels[ix]
                        sr_data[n_spt]['Date'].append(s_date)
                        sr_data[n_spt]['Timedelta'].append(delta)
                        sr_data[n_spt]['Volume'].append(cum_vol)
                        sr_data[n_spt]['SR'].append('S')            
   
        if rst:
            if close > rst:
                df.loc[date, 'SR Signal'] = 1 # Generate signal
            # Check if S/R levels are tested       
            if high > rst and low < rst:
                if spt != rst: # Prevents double-counting
                    sr_data[rst]['Tested'] += 1
                    sr_data[rst]['Tested Date'].append(s_date)
                    print('RT'.ljust(5), f'- {date.date()} - R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                    ix = bisect.bisect(s_levels, rst)
                    n_rst = s_levels[ix] if ix < len(s_levels) else s_levels[ix - 1]
                    while high > n_rst and spt != rst and rst != n_rst:
                        print(f'RL-RH - {date.date()} - NR: {n_rst:.2f}, R: {rst:.2f}, S: {spt:.2f}, hi: {high:.2f}, lo: {low:.2f}')
                        spt = rst
                        rst = n_rst
                        many_tests.setdefault(i, set()).union([spt, rst])
                        if ix < len(s_levels) - 1:
                            ix += 1
                            # print(f'ix: {ix}, {s_levels}')
                            n_rst = s_levels[ix]
                            sr_data[n_rst]['Date'].append(s_date)
                            sr_data[n_rst]['Timedelta'].append(delta)
                            sr_data[n_rst]['Volume'].append(cum_vol)
                            sr_data[n_rst]['SR'].append('R')        

        if spt and rst: 
            df.loc[date:, 'Support'] = spt
            df.loc[date:, 'Resistance'] = rst

    del sr_data[0]     
    d = {'SR Level': [], 'Volume': [], 'Timedelta': [], 'Tested': [], 'Date': []}

    for k, v in sr_data.items():
        d['SR Level'].append(k)
        d['Volume'].append(sum(v['Volume']))
        d['Timedelta'].append(sum(v['Timedelta']))
        d['Tested'].append(v['Tested'])
        d['Date'].append(v['Date'][-1])

    ix = 'SR Level'
    sr_df = pd.DataFrame(d, index=d[ix]).drop(columns=ix)
    sr_df['Date'] = pd.to_datetime(sr_df['Date'])
    sr_df['Date'] = sr_df['Date'].apply(convert_to_timestamp)
    scaler = preprocessing.MinMaxScaler(feature_range=(1, 5))
    sd = scaler.fit_transform(sr_df)
    scaled_df = pd.DataFrame(sd, columns=sr_df.columns, index=d[ix])
    scaled_df['Signal'] = scaled_df.mean(axis=1)
    # print(sr_df)
    # print(scaled_df)
    
    # Make 'SR Signal' last column
    cols = list(df.columns)
    cols.append(cols.pop(cols.index('SR Signal')))
    df = df[cols]
    nr, nc = df.shape
    j = nc - 1 # 'SR Signal' column num

    # Assign significance to signals by level
    for i in range(nr):
        if df['SR Signal'][i]:
            if i in many_tests:
                signal = 0
                for l in many_tests[i]:
                    signal += scaled_df.loc[l, 'Signal']
            else:
                if df['Close'][i] > df['Resistance'][i]:
                    l = df['Resistance'][i]
                if df['Close'][i] < df['Support'][i]:
                    l = df['Support'][i]
                signal = scaled_df.loc[l, 'Signal']
 
            df.iloc[i, j] = signal        
    
    return levels, df, sr_data

In [15]:
# sr_data.clear()
levels, ta_df, sr_data = sr_levels(df)
# levels

NS    - 2022-09-08 - S: 152.68, R: 0.00, hi: 156.36, lo: 152.68
NR    - 2022-09-12 - R: 164.26, S: 152.68, hi: 164.26, lo: 159.30,
ST    - 2022-09-15 - S: 152.68, R: 164.26, hi: 155.24, lo: 151.38
NS    - 2022-09-16 - S: 148.37, R: 164.26, hi: 151.35, lo: 148.37
RT    - 2022-09-19 - R: 152.68, S: 148.37, hi: 154.56, lo: 149.10
R-S   - 2022-09-20 - S: 152.68, R: 164.26, hi: 158.08, lo: 153.08
NR    - 2022-09-21 - R: 158.74, S: 152.68, hi: 158.74, lo: 153.60,
ST    - 2022-09-22 - S: 152.68, R: 158.74, hi: 154.47, lo: 150.91
S-R   - 2022-09-23 - R: 152.68, S: 148.37, hi: 151.47, lo: 148.56
RT    - 2022-09-26 - R: 152.68, S: 148.37, hi: 153.77, lo: 149.64
RT    - 2022-09-27 - R: 152.68, S: 148.37, hi: 154.72, lo: 149.95
ST    - 2022-09-28 - S: 148.37, R: 152.68, hi: 150.64, lo: 144.84
S-R   - 2022-09-29 - R: 148.37, S: 148.37, hi: 146.72, lo: 140.68
S-R   - 2022-09-30 - R: 148.37, S: 148.37, hi: 143.10, lo: 138.00
S-R   - 2022-10-03 - R: 148.37, S: 148.37, hi: 143.07, lo: 137.69
S-R   - 20

In [16]:
pprint(sr_data)

{134.3699951171875: {'Date': ['13-10-22',
                              '03-11-22',
                              '07-12-22',
                              '15-12-22',
                              '20-12-22',
                              '23-12-22',
                              '27-12-22',
                              '28-12-22',
                              '29-12-22',
                              '30-12-22',
                              '03-01-23',
                              '04-01-23',
                              '05-01-23',
                              '06-01-23',
                              '09-01-23',
                              '10-01-23',
                              '11-01-23',
                              '12-01-23',
                              '18-01-23'],
                     'SR': ['S',
                            'S',
                            'S',
                            'S',
                            'S',
                            'S',
   

In [26]:
d = {'SR Level': [], 'Volume': [], 'Timedelta': [], 'Tested': []}

for k, v in sr_data.items():
    d['SR Level'].append(k)
    d['Volume'].append(sum(v['Volume']))
    d['Timedelta'].append(sum(v['Timedelta']))
    d['Tested'].append(v['Tested'])

sr_df = pd.DataFrame(d, index=d['SR Level']).drop(columns='SR Level')
scaler = preprocessing.MinMaxScaler(feature_range=(1, 5))
sd = scaler.fit_transform(sr_df)
cols = ['Volume', 'Timedelta', 'Tested']
scaled_df = pd.DataFrame(sd, columns=sr_df.columns, index=d['SR Level'])
scaled_df['Signal'] = scaled_df.mean(axis=1)
print(sr_df.head(), '\n')
print(scaled_df)

            Volume  Timedelta  Tested
152.68  4975022300         70      16
164.26   609351400          8       0
148.37  8030617400        102      22
158.74  2146815800         31       0
134.37  4587985500         53       8 

        Volume  Timedelta  Tested  Signal
152.68    3.35       3.64    3.91    3.63
164.26    1.00       1.00    1.00    1.00
148.37    5.00       5.00    5.00    5.00
158.74    1.83       1.98    1.00    1.60
134.37    3.14       2.91    2.45    2.84
144.13    2.53       3.04    3.18    2.92


## MA Signals

- MA Crossovers, i.e. MA5/MA25, etc...
- Identify when moving averages converge

In [58]:
def calc_slope(x):
    slope = np.polyfit(range(len(x)), x, 1)[0] 
    
    return slope

In [59]:
def calculate_ma_signals(df, MAs):
    for i, ma in enumerate(MAs):
        ix = df.iloc[ma].name
        df[f'MA{ma}'] = df['Close'].rolling(ma).mean()
        df[f'MA{ma} Slope'] = df[f'Close'].rolling(ma).apply(calc_slope)
        # df[f'MA{ma} Slope Diff'] = df[f'MA{ma} Slope'].diff()
        df.loc[ix:, f'MA{ma} Signal'] = np.where(df['Close'][ma:] > df[f'MA{ma}'][ma:], 1, 0)
        df[f'MA{ma} Crossover'] = df[f'MA{ma} Signal'].diff()
        df.drop(columns=f'MA{ma} Signal', inplace=True)

    df.loc[:, 'MA Std. Dev'] = df.loc[:, [f'MA{ma}' for ma in MAs]].std(axis=1)
    
    return df

In [63]:
MAs = [10, 20, 50]
ma_df = calculate_ma_signals(df, MAs)
# ma_df.loc['2022-09-12':'2022-11-12', ['MA10 Slope',	'MA10 Slope Diff', 'MA20 Slope', 'MA50 Slope']]
ma_df.loc['2022-09-12':'2022-11-12']

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,MA10,MA10 Slope,MA10 Crossover,MA20,MA20 Slope,MA20 Crossover,MA50,MA50 Slope,MA50 Crossover,MA Std. Dev
2022-09-12,137.53,140.0,137.23,139.87,139.42,1851100,,,,,,,,,,
2022-09-13,136.32,137.41,133.44,133.54,133.11,1587000,,,,,,,,,,
2022-09-14,133.8,135.36,132.1,133.25,132.82,1577000,,,,,,,,,,
2022-09-15,133.81,135.51,132.66,133.16,132.73,1446500,,,,,,,,,,
2022-09-16,132.12,132.13,128.28,129.89,129.47,2300600,,,,,,,,,,
2022-09-19,128.79,130.79,128.36,130.72,130.3,1059600,,,,,,,,,,
2022-09-20,129.1,129.6,126.82,128.14,127.73,1274700,,,,,,,,,,
2022-09-21,128.73,130.57,126.33,126.48,126.07,1105700,,,,,,,,,,
2022-09-22,125.55,126.04,123.76,124.39,123.99,1378600,131.71,-1.52,,,,,,,,
2022-09-23,123.73,124.43,121.44,123.48,123.08,1481900,130.29,-1.57,,,,,,,,


In [58]:
math.atan2(ma_df['MA10'].values, ma_df.index.values)

TypeError: only size-1 arrays can be converted to Python scalars

In [74]:
ma_df.loc['2022-10-03':'2022-10-04', ['MA10', 'MA10 Slope', 'MA20', 'MA20 Slope']]

Unnamed: 0,MA10,MA10 Slope,MA20,MA20 Slope
2022-10-03,148.93,-0.73,152.09,
2022-10-04,147.85,-0.83,151.67,


In [75]:
x = ma_df.loc['2022-10-03':'2022-10-04', 'MA10']
slope = np.polyfit(range(len(x)), x, 1)[0] 
slope


-1.0799987792968864

### Bollinger Bands

In [17]:
from talib import BBANDS, MACD, RSI

In [18]:
up, mid, down = BBANDS(df['Close'], timeperiod=21, nbdevup=2, nbdevdn=2, matype=0)
bands = {'BB Up': up, 'BB Mid': mid, 'BB Down': down}
bb_df = pd.DataFrame(bands)
bb_df

Unnamed: 0,BB Up,BB Mid,BB Down
2022-08-29,,,
2022-08-30,,,
2022-08-31,,,
2022-09-01,,,
2022-09-02,,,
...,...,...,...
2023-02-17,159.14,148.58,138.01
2023-02-21,158.50,149.08,139.66
2023-02-22,158.18,149.45,140.73
2023-02-23,157.94,149.78,141.62


In [131]:
open.rolling(5, center=True).mean()

Datetime
2023-02-13 09:30:00-05:00      NaN
2023-02-13 09:35:00-05:00      NaN
2023-02-13 09:40:00-05:00   158.76
2023-02-13 09:45:00-05:00   158.66
2023-02-13 09:50:00-05:00   158.61
                             ...  
2023-02-16 15:40:00-05:00   160.52
2023-02-16 15:45:00-05:00   160.51
2023-02-16 15:50:00-05:00   160.51
2023-02-16 15:55:00-05:00      NaN
2023-02-16 16:00:00-05:00      NaN
Name: Open, Length: 313, dtype: float64

In [16]:
from talib import BBANDS, MACD, RSI

In [17]:
up, mid, low = BBANDS(df.Close, timeperiod=21, nbdevup=2, nbdevdn=2, matype=0)

In [21]:
# df[['BB Up', 'BB Mid', 'BB Down']] = up, mid, low
df['BB Up'] = up
df['BB Mid'] = mid
df['BB Down'] = low
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,BB Up,BB Mid,BB Down
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-02-13 09:30:00-05:00,158.89,159.91,158.64,158.72,158.72,19034,,,
2023-02-13 09:35:00-05:00,158.80,159.34,158.80,159.11,159.11,5547,,,
2023-02-13 09:40:00-05:00,158.95,159.05,158.58,158.78,158.78,9484,,,
2023-02-13 09:45:00-05:00,158.70,158.78,158.23,158.43,158.43,51620,,,
2023-02-13 09:50:00-05:00,158.44,158.70,158.37,158.51,158.51,11596,,,
...,...,...,...,...,...,...,...,...,...
2023-02-16 15:40:00-05:00,160.51,160.70,160.24,160.67,160.67,32917,160.70,160.33,159.96
2023-02-16 15:45:00-05:00,160.66,160.69,160.16,160.31,160.31,52181,160.67,160.35,160.02
2023-02-16 15:50:00-05:00,160.35,160.85,160.34,160.59,160.59,79573,160.68,160.37,160.06
2023-02-16 15:55:00-05:00,160.56,160.61,160.27,160.53,160.53,185843,160.69,160.39,160.10


In [23]:
rsi = RSI(df.Close, timeperiod=14)
rsi.index

DatetimeIndex(['2023-02-13 09:30:00-05:00', '2023-02-13 09:35:00-05:00',
               '2023-02-13 09:40:00-05:00', '2023-02-13 09:45:00-05:00',
               '2023-02-13 09:50:00-05:00', '2023-02-13 09:55:00-05:00',
               '2023-02-13 10:00:00-05:00', '2023-02-13 10:05:00-05:00',
               '2023-02-13 10:10:00-05:00', '2023-02-13 10:15:00-05:00',
               ...
               '2023-02-16 15:15:00-05:00', '2023-02-16 15:20:00-05:00',
               '2023-02-16 15:25:00-05:00', '2023-02-16 15:30:00-05:00',
               '2023-02-16 15:35:00-05:00', '2023-02-16 15:40:00-05:00',
               '2023-02-16 15:45:00-05:00', '2023-02-16 15:50:00-05:00',
               '2023-02-16 15:55:00-05:00', '2023-02-16 16:00:00-05:00'],
              dtype='datetime64[ns, pytz.FixedOffset(-300)]', name='Datetime', length=313, freq=None)

In [123]:
macd, macdsignal, macdhist = MACD(df.Close, fastperiod=12, slowperiod=26, signalperiod=9)