## Requirements

In [2]:
import requests
import pandas as pd
import datetime
import os
import numpy as np
api_key = os.environ.get('EOD_Historical_Data_API_Key')

## ETL Functions

In [3]:
def getDailyStockPrices(symbol: str, start_date: str, end_date: str = None):

    #Set last date of dataset to today unless assigned during function call
    if end_date is None:
        end_date = datetime.datetime.now().strftime("%Y-%m-%d")

    # Validate Start & End Date 
    # Convert strings to datetime objects
    start_date_dtObj = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    end_date_dtObj = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    # Validate
    end_date = max(end_date_dtObj, start_date_dtObj)
    start_date = min(end_date_dtObj, start_date_dtObj)

    # API Request to return Pandas DataFrame
    api_url = f'https://eodhistoricaldata.com/api/eod/{symbol}.US?from={start_date}&to={end_date}&api_token={api_key}&fmt=json'
    data = requests.get(api_url).json()
    df = pd.DataFrame(data)

    # Add column to DataFrame containing Symbol
    df.insert(loc = 0, column = 'symbol', value = symbol)

    # Format date column as datetime, and set as DataFrame index
    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index("date", drop=False)

    return df

In [None]:
def getIntradayStockPrices(symbol: str, interval: int = 2):

    # API Request to return Pandas DataFrame
    api_url = f'https://eodhistoricaldata.com/api/intraday/{symbol}.US?api_token={api_key}&interval=1m&fmt=json'
    data = requests.get(api_url).json()
    df = pd.DataFrame(data)

    # Convert UNIX datetime from UTC to Eastern
    unixUTC = pd.to_datetime(df['timestamp'], unit='s')
    df['datetime_est'] = unixUTC.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.tz_localize(None)
    
    # Add column to DataFrame, in 1st position, containing truncated Date (EST)
    df.insert(loc = 0, column = 'date_est', value = df['datetime_est'].dt.date)

    # Define 'Trading Session' function **LOOK INTO VECTORIZING**
    def tradingSession(time):
        # Define 'Trading Session' variables
        marketStart = datetime.time(9, 30, 0)
        marketEnd = datetime.time(16, 0, 0) 
        # Define 'Trading Session' logic       
        if time < marketStart:
            return "Pre-Market"
        elif time >= marketEnd:
            return "After Hours"
        else:
            return "Market"

    # Calculate 'Trading Session' for all data points
    df['trading_session'] = df['datetime_est'].dt.time.apply(tradingSession)
   
    # Filter for Market hours
    df = df[df['trading_session'] == 'Market']

    # Define Interval Groups
    position = df.columns.get_loc('datetime_est')
    elapsed = df.iloc[0:, position] - df.iat[0, position]
    minutes_elapsed = (elapsed.dt.seconds/60) + 1
    df['interval_group'] = -(-minutes_elapsed // (390/interval)) # Upside-down floor division to convert floor to ceiling with negation. 

    # Aggregate Data using Interval Groups
    agg_dict = {
        'open': 'first',
        'high': np.max,
        'low': np.min,
        'close': 'last',
        'volume': np.sum
    }
    df = df.groupby(['date_est', 'interval_group']).agg(agg_dict).reset_index()

    # Add column to DataFrame, in 1st position, containing Symbol
    df.insert(loc = 0, column = 'symbol', value = symbol)

    return df

## Technical Indicators

### Arnaud Legoux Moving Average
Offset: You can set the offset in decimals between the level of 0 and 1. A setting of 0.99 makes the ALMA extremely responsive, while a value of 0.01 makes it very smooth.

Sigma: A setting of 6 makes the filter rather large while a smaller sigma setting makes it more focused. According to Mr. Legoux, a sigma value of 6 is said to offer good performance.

In [None]:
def alma(df, period: int, offset: float = 0.85, sigma: float = 6.0):

    df = df[['adjusted_close']].values.ravel()

    m = np.floor(offset * (period - 1))
    s = period / sigma 
    alma = np.zeros(df.shape)
    w_sum = np.zeros(df.shape)

    for i in range(len(df)):
        if i < period - 1:
            alma[i] = np.nan
        else:
            for j in range(period):
                w = np.exp(-(j-m)*(j-m)/(2*s*s))
                alma[i] += df[i + 1 - period + j] * w
                w_sum[i] += w
            alma[i] = alma[i] / w_sum[i]
 
    return alma

In [None]:
def rsi(df, periods: int = 14, ema = True):

    close_delta = df['close'].diff()

    # Make two series: one for lower closes and one for higher closes
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    
    if ema:
	    # Use exponential moving average
        ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
        ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    else:
        # Use simple moving average
        ma_up = up.rolling(window = periods, adjust=False).mean()
        ma_down = down.rolling(window = periods, adjust=False).mean()
        
    rs = ma_up / ma_down
    rsi = 100 - (100/(1 + rs))

    return rsi

## Misc.

In [1]:
def sp500():

    # Read file from Wiki
    sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]

    # Sort and reset index
    sp500.sort_values(by='Symbol', inplace=True)
    sp500.reset_index(drop=True, inplace=True)

    # Convert to list
    sp500 = sp500.Symbol.to_list()

    #Convert Non-Class A Stocks to readable format
    sp500 = [i.replace('.','-') for i in sp500]

    return sp500