Technical Analysis

In [1]:
import os
import pandas as pd
import numpy as np
import talib as ta
from pathlib import Path
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
%matplotlib inline

In [4]:

tickers = ["AAPL", "MSFT", "AMZN", "FB", "TSLA"]
#START = "today"
#YEARS = 5

Securities Data

In [5]:
load_dotenv()

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

# Format current date as ISO format
today = pd.Timestamp("2020-12-31", tz="America/New_York").isoformat()

# Set timeframe to one day ('1D') for the Alpaca API
timeframe = "1D"
start = pd.Timestamp("2020-01-14", tz="America/New_York").isoformat()
end = pd.Timestamp("2021-01-14", tz="America/New_York").isoformat()

df_portfolio = alpaca.get_barset(
    tickers,
    timeframe,
    start = start,
    end = end
).df

df_portfolio.index = df_portfolio.index.date
df_portfolio = df_portfolio.dropna()

In [9]:

# Create and empty DataFrame for each ticker
df_aapl = pd.DataFrame()
df_msf = pd.DataFrame()
df_amzn = pd.DataFrame()
df_fb = pd.DataFrame()
df_tsla = pd.DataFrame()

# Fetch the prices for each ticker
df_aapl = df_portfolio["AAPL"]
df_msft = df_portfolio["MSFT"]
df_amzn = df_portfolio['AMZN']
df_fb = df_portfolio["FB"]
df_tsla = df_portfolio["TSLA"]

Tech Analysis

In [10]:
def applyTA (input_df):
    """
        Add columns of technical analysis in the dataframe
        Parameter:
            input_df - dataframe with date index and columns of OHLCV data
        Returns:
            df_ta - dataframe with date index and technical analysis signals
    """
    
    df = input_df.copy()
    
    # Construct EMA Singals
    df['8D_EMA'] = ta.EMA(df['close'], timeperiod = 8)
    df['21D_EMA'] = ta.EMA(df['close'], timeperiod = 21)
    df['50D_EMA'] = ta.EMA(df['close'], timeperiod = 50)
    df['8_21_x_long'] = np.where(df['8D_EMA'] > df['21D_EMA'], 1.0, 0.0)
    df['8_21_x_short'] = np.where(df['8D_EMA'] < df['21D_EMA'], -1.0, 0.0)
    df['8_21_x_signal'] = df['8_21_x_long'] + df['8_21_x_short'] 
    df['21_50_x_long'] = np.where(df['21D_EMA'] > df['50D_EMA'], 1.0, 0.0)
    df['21_50_x_short'] = np.where(df['21D_EMA'] < df['50D_EMA'], -1.0, 0.0)
    df['21_50_x_signal'] = df['21_50_x_long'] + df['21_50_x_short']
    df['above_8D'] = np.where(df['close'] > df['8D_EMA'], 1.0, 0.0)
    df['above_21D'] = np.where(df['close'] > df['21D_EMA'], 1.0, 0.0)
    df['above_50D'] = np.where(df['close'] > df['50D_EMA'], 1.0, 0.0)
    
    # Bollinger Bands
    df['bb_upper_band'], df['bb_middle_band'], df['bb_lower_band'] = ta.BBANDS(df['close'], timeperiod =20)
    df['bb_long'] = np.where(df['close'] < df['bb_lower_band'], 1.0, 0.0)
    df['bb_short'] = np.where(df['close'] > df['bb_upper_band'], -1.0, 0.0)
    df['bb_signal'] = df['bb_long'] + df['bb_short']
    
    # Calculate RSI
    df['RSI'] = ta.RSI(df['close'],14)
    df['rsi_short'] = np.where(df['RSI'] > 70, -1.0, 0.0)
    df['rsi_long'] = np.where(df['RSI'] < 30, 1.0, 0.0)
    df['rsi_signal'] = df['rsi_long'] + df['rsi_short']
    
    # Calculate Stochastic Oscillators trading signal
    df['slowk'], df['slowd'] = ta.STOCH(df['high'], df['low'], df['close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)   
    df['slowk_x_long'] = np.where(df['slowk'] > df['slowd'], 1.0, 0.0)
    df['slowk_x_short'] = np.where(df['slowk'] < df['slowd'], -1.0, 0.0)
    df['slowk_x_signal'] = df['slowk_x_long'] + df['slowk_x_short']
    
     # Calculate ADX
    df['adx'] = ta.ADX(df['high'], df['low'], df['close'], timeperiod=14)
    df['adx_strong'] = np.where(df['adx'] > 50, 1.0, 0.0)
    df['adx_weak'] = np.where(df['adx'] < 20, 1.0, 0.0)

    # Calculate OBV signal
    df['obv'] = ta.OBV(df['close'], df['volume'])
    df['obv_change']=df['obv'].pct_change()
    df['obv_increase'] = np.where(df['obv_change'] > 0, 1.0, 0.0)

    # Calculate Chaikin A/D Oscillator signal
    df['ADOSC'] = ta.ADOSC(df['high'], df['low'], df['close'], df['volume'], fastperiod=3, slowperiod=10)    
    df['adosc_positive'] = np.where(df['ADOSC'] > 0, 1.0, 0.0)

    # Drop non-TA signals columns and NAs
    df_ta = df[{'8_21_x_signal', '21_50_x_signal', 'above_8D', 'above_21D', 'above_50D', 'bb_signal', 'rsi_signal', 'slowk_x_signal', 'adx_strong', 'adx_weak', 'obv_increase', 'adosc_positive'}]
    df_ta = df_ta.dropna()
    
    return df_ta

In [11]:
applyTA(df_aapl)


Unnamed: 0,above_21D,adosc_positive,above_8D,rsi_signal,slowk_x_signal,adx_strong,adx_weak,obv_increase,above_50D,21_50_x_signal,bb_signal,8_21_x_signal
2020-08-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-08-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-08-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-08-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-08-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-08,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,-1.0,0.0,1.0
2021-01-11,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,1.0
2021-01-12,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,1.0
2021-01-13,1.0,0.0,1.0,0.0,-1.0,0.0,1.0,0.0,1.0,-1.0,0.0,1.0


In [12]:
applyTA(df_msft)


Unnamed: 0,above_21D,adosc_positive,above_8D,rsi_signal,slowk_x_signal,adx_strong,adx_weak,obv_increase,above_50D,21_50_x_signal,bb_signal,8_21_x_signal
2020-08-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-08-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-08-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-08-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-08-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-08,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0
2021-01-11,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0
2021-01-12,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.0
2021-01-13,0.0,1.0,0.0,0.0,-1.0,0.0,1.0,1.0,0.0,1.0,0.0,-1.0


In [13]:
applyTA(df_amzn)

Unnamed: 0,above_21D,adosc_positive,above_8D,rsi_signal,slowk_x_signal,adx_strong,adx_weak,obv_increase,above_50D,21_50_x_signal,bb_signal,8_21_x_signal
2020-08-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-08-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-08-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-08-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-08-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-08,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,-1.0
2021-01-11,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.0
2021-01-12,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,-1.0
2021-01-13,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,-1.0


Candlesticks

In [None]:

import os
import talib as ta
import pandas as pd
import numpy as np
import datetime as dt
import alpaca_trade_api as tradeapi
import hvplot.pandas
from dotenv import load_dotenv

load_dotenv()
TICKERS = ["AAPL", "MSFT", "AMZN", "FB", "TSLA"]
START = "today"
YEARS = 5

In [None]:
def get_workday(date):
    """ Convert Date to its latest workday
        Parameter:
            date - Any format usagle with pd.to_datetime
        Return:
            timestamp object of the nearest workday earlier than the given date
    """
    date = pd.to_datetime(date)
    offset = max(1, (date.weekday() + 6) % 7 - 3)
    timedelta = dt.timedelta(offset)
    return pd.Timestamp((date - timedelta).date(),tz="America/New_York").isoformat()

def get_alpaca_data():
    """ Get 2 years OHLCV data from Alpaca API for the global set of tickers
        Parameter:
            None
        Return:
            Pandas Dataframe of 2 years of OHLC data for the global tickers
    """
    # Set Keys for Alpaca Trade API
    alpaca_api_key = os.getenv("ALPACA_API_KEY")
    alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")
    
    # Initiate REST API
    api = tradeapi.REST(
        alpaca_api_key,
        alpaca_secret_key,
        api_version = "v2"
    )
    
    # Set start and end date
    end_date = get_workday(START)
    two_yrs = dt.timedelta(days=(365*YEARS))
    start_date = get_workday((pd.to_datetime("today") - two_yrs).date())
    # Set timeframe to '1D' for Alpaca API
    timeframe = "1D"
    # Get OHLCV Data
    portfolio = api.get_barset(
        TICKERS,
        timeframe,
        start = start_date,
        end = end_date
    ).df
    
    return portfolio

In [None]:
    """
        Add columns of candlestick patterns found in the dataframe
        Parameter:
            input_df - dataframe with date index and columns of OHLCV data
        Returns:
            df - the same dataframe received with additional columns of the candlestick patterns found
    """
    
    df = input_df.copy()
    # Bullish
    df["CDLHAMMER"] =  ta.CDLHAMMER(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLINVERTEDHAMMER "] =  ta.CDLINVERTEDHAMMER(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLPIERCING"] =  ta.CDLPIERCING(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLMORNINGSTAR"] =  ta.CDLMORNINGSTAR(df["open"], df["high"], df["low"], df["close"], penetration = 0)/100
    df["CDL3WHITESOLDIERS"] =  ta.CDL3WHITESOLDIERS(df["open"], df["high"], df["low"], df["close"])/100
    # Bearish
    df["CDLHANGINGMAN"] =  ta.CDLHANGINGMAN(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLSHOOTINGSTAR"] =  ta.CDLSHOOTINGSTAR(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLEVENINGSTAR"] =  ta.CDLEVENINGSTAR(df["open"], df["high"], df["low"], df["close"], penetration = 0)/100
    df["CDL3BLACKCROWS"] =  ta.CDL3BLACKCROWS(df["open"], df["high"], df["low"], df["close"])/100
    df["CDLDARKCLOUDCOVER"] =  ta.CDLDARKCLOUDCOVER(df["open"], df["high"], df["low"], df["close"], penetration = 0)/100
    # Both
    df["CDLENGULFING"] =  ta.CDLENGULFING(df["open"], df["high"], df["low"], df["close"])/100

    return df

In [None]:
port_df = get_alpaca_data()
aapl_df = applyCDL(port_df["AAPL"])
aapl_df["pct_change"] = aapl_df["close"].pct_change()
aapl_df["Return"] = np.where(aapl_df["pct_change"] >= 0, 1.0, -1.0)
FEATURES = ['CDLHAMMER', 'CDLINVERTEDHAMMER ', 'CDLPIERCING', 'CDLMORNINGSTAR',
            'CDL3WHITESOLDIERS', 'CDLHANGINGMAN', 'CDLSHOOTINGSTAR','CDLEVENINGSTAR', 
            'CDL3BLACKCROWS', 'CDLDARKCLOUDCOVER', 'CDLENGULFING']
aapl_df[FEATURES] = aapl_df[FEATURES].shift(1)
aapl_df.drop(columns = ["open", "high", "low", "close", "volume", "pct_change"], inplace = True)
aapl_df.dropna(inplace = True)
aapl_df.tail()