## Requirements

In [4]:
import requests
import pandas as pd
import datetime
import os
import numpy as np
import matplotlib.pyplot as plt 
api_key = os.environ.get('EOD_Historical_Data_API_Key')

## ETL Functions

In [3]:
def getDailyStockPrices(symbol: str, start_date: str, end_date: str = None):

    #Set last date of dataset to today unless assigned during function call
    if end_date is None:
        end_date = datetime.datetime.now().strftime("%Y-%m-%d")

    # Validate Start & End Date 
    # Convert strings to datetime objects
    start_date_dtObj = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    end_date_dtObj = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    # Validate
    end_date = max(end_date_dtObj, start_date_dtObj)
    start_date = min(end_date_dtObj, start_date_dtObj)

    # API Request to return Pandas DataFrame
    api_url = f'https://eodhistoricaldata.com/api/eod/{symbol}.US?from={start_date}&to={end_date}&api_token={api_key}&fmt=json'
    data = requests.get(api_url).json()
    df = pd.DataFrame(data)

    # Add column to DataFrame containing Symbol
    df.insert(loc=0, column='symbol', value=symbol)

    # Format date column as datetime, and set as DataFrame index
    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index("date", drop=False)

    return df

In [None]:
def getIntradayStockPrices(symbol: str, interval: int = 2):

    # API Request to return Pandas DataFrame
    api_url = f'https://eodhistoricaldata.com/api/intraday/{symbol}.US?api_token={api_key}&interval=1m&fmt=json'
    data = requests.get(api_url).json()
    df = pd.DataFrame(data)

    # Convert UNIX datetime from UTC to Eastern
    unixUTC = pd.to_datetime(df['timestamp'], unit='s')
    df['datetime_est'] = unixUTC.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.tz_localize(None)
    
    # Add column to DataFrame, in 1st position, containing truncated Date (EST)
    df.insert(loc=0, column='date_est', value=df['datetime_est'].dt.date)

    # Define 'Trading Session' function **LOOK INTO VECTORIZING**
    def tradingSession(time):
        # Define 'Trading Session' variables
        marketStart = datetime.time(9, 30, 0)
        marketEnd = datetime.time(16, 0, 0) 
        # Define 'Trading Session' logic       
        if time < marketStart:
            return "Pre-Market"
        elif time >= marketEnd:
            return "After Hours"
        else:
            return "Market"

    # Calculate 'Trading Session' for all data points
    df['trading_session'] = df['datetime_est'].dt.time.apply(tradingSession)
   
    # Filter for Market hours
    df = df[df['trading_session'] == 'Market']

    # Define Interval Groups
    position = df.columns.get_loc('datetime_est')
    elapsed = df.iloc[0:, position] - df.iat[0, position]
    minutes_elapsed = (elapsed.dt.seconds/60) + 1
    df['interval_group'] = -(-minutes_elapsed // (390/interval)) # Upside-down floor division to convert floor to ceiling with negation. 

    # Aggregate Data using Interval Groups
    agg_dict = {
        'open': 'first',
        'high': np.max,
        'low': np.min,
        'close': 'last',
        'volume': np.sum
    }
    df = df.groupby(['date_est', 'interval_group']).agg(agg_dict).reset_index()

    # Add column to DataFrame, in 1st position, containing Symbol
    df.insert(loc=0, column='symbol', value=symbol)

    return df

## Technical Indicators

In [None]:
def sma(df, column: str, period: int):

    data = df[[column]]

    # Calculate SMA
    sma = data.rolling(window=period, min_periods=period).mean()

    # Add column to DataFrame with SMA period in header
    col_header = "sma_" + str(period)
    df[col_header] = sma

    return sma

In [None]:
def ema(df, column: str, period: int):

    data = df[[column]]

    # Calculate EMA
    ema = data.ewm(span=period, adjust=False, min_periods=period).mean()

    # Add column to DataFrame with EMA period in header
    col_header = "ema_" + str(period)
    df[col_header] = ema

    return ema

### Arnaud Legoux Moving Average
Offset: You can set the offset in decimals between the level of 0 and 1. A setting of 0.99 makes the ALMA extremely responsive, while a value of 0.01 makes it very smooth.

Sigma: A setting of 6 makes the filter rather large while a smaller sigma setting makes it more focused. According to Mr. Legoux, a sigma value of 6 is said to offer good performance.

In [1]:
def alma(df, column: str, period: int, offset: float = 0.85, sigma: float = 6.0):

    # Convert column to Numpy Array
    data = df[[column]].to_numpy()

    # ALMA inputs
    m = np.floor(offset * (period - 1))
    s = period / sigma 
    alma = np.zeros(data.shape)
    w_sum = np.zeros(data.shape)

    # Calculate ALMA for each row
    for i in range(len(data)):
        if i < period - 1:
            alma[i] = np.nan
        else:
            for j in range(period):
                w = np.exp(-(j-m)*(j-m)/(2*s*s))
                alma[i] += data[i + 1 - period + j] * w
                w_sum[i] += w
            alma[i] = alma[i] / w_sum[i]
 
    # Add column to DataFrame with ALMA period in header
    col_header = "alma_" + str(period)
    df[col_header] = alma

    return alma

### Bollinger Bands
Short Term: 10 day MA, 1.5 std dev

Medium Term (most commonly used): 20 day MA, 2 std dev

Long Term: 50 day MA, 2.5 std dev

In [None]:
def bollingerBands(df, column: str, ma_type: str = 'alma', period: int = 20, std: int = 2, plot: bool = True):

    data = df[[column]]
    ma_type = ma_type.lower()

    # Calculate moving average
    if ma_type == 'alma':
        # Use Arnaud Legoux moving average
        ma = alma(df, column, period)
        col_header = 'alma_' + str(period)
    elif ma_type == 'ema':
        # Use exponential moving average
        ma = ema(df, column, period)
        col_header = 'ema_' + str(period)
    else :
        # Use simple moving average
        ma = sma(df, column, period)
        col_header = 'sma_' + str(period)

    # Calculate rolling Standard Deviation
    rstd = data.rolling(window=period, min_periods=period).std()

    # Add columns to DataFrame with Bollinger Bands moving average & period in header
    df['bb_upper_' + col_header] = ma + (rstd * std)
    df['bb_lower_' + col_header] = ma - (rstd * std)

    # Show Visual
    if plot:
        plt.style.use('fivethirtyeight')
        plt.rcParams['figure.figsize'] = (20, 10)
        #Determine color of price line depending on dataset trend; green for positive growth, red for negative growth.
        if df[column].iloc[0] > df[column].iloc[-1]: 
            data.plot(label = column, color = '#E45756', linewidth = 2)
        else:
            data.plot(label = column, color = '#54A24B', linewidth = 2)
        df['bb_upper_' + col_header].plot(label = 'bb_upper_' + col_header, linestyle = '--', linewidth = 1, color = 'black')
        df[col_header].plot(label = col_header, linestyle = '--', linewidth = 1.2, color = 'grey')
        df['bb_lower_' + col_header].plot(label = 'bb_lower_' + col_header, linestyle = '--', linewidth = 1, color = 'black')
        plt.legend(loc = 'upper left')
        plt.title(df['symbol'].iloc[0])
        plt.show()

    return df


In [None]:
def rsi(df, column: str, period: int = 14, ma_type: str = 'alma'):

    delta = df[[column]].diff()
    ma_type = ma_type.lower()

    # Make two series: one for lower closes and one for higher closes
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    
    # Calculate moving average
    if ma_type == 'alma':
        # Use Arnaud Legoux moving average
        ma_up = alma(up, column, period)
        ma_down = alma(down, column, period)
        col_header = 'alma_' + str(period)
    elif ma_type == 'ema':
        # Use exponential moving average
        ma_up = ema(up, column, period)
        ma_down = ema(down, column, period)
        col_header = "ema_" + str(period)
    else :
        # Use simple moving average
        ma_up = sma(up, column, period)
        ma_down = sma(down, column, period)
        col_header = "sma_" + str(period)

    # Calculate RSI
    rs = ma_up / ma_down
    rsi = 100 - (100/(1 + rs))

    # Add column to DataFrame with RSI moving average & period in header
    df['rsi_' + col_header] = rsi

    return df

## Misc.

In [8]:
def sp500():

    # Read table from Wiki
    sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]

    # Sort
    sp500 = sp500.sort_values(by='Symbol')

    # Convert to list
    sp500 = sp500.Symbol.to_list()

    #Convert Non-Class A Stocks to readable format
    sp500 = [i.replace('.','-') for i in sp500]

    return sp500