In [2]:
from datetime import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
from dateutil.relativedelta import relativedelta
import alpaca_trade_api as alp

with open('paper_api_keys.txt') as api_file:
    api_keys = api_file.read().replace('\n', '').split(',')
    alpaca_api = {a.split('=')[0]: a.split('=')[1] for a in api_keys}
    
api = alp.REST(key_id=alpaca_api['APCA_API_KEY_ID'], secret_key = alpaca_api['APCA_API_SECRET_KEY'], base_url=alpaca_api['APCA_API_BASE_URL'])

from Functions import *

start_date = '2020-01-01'
end_date = datetime.now()

buying_power = 100_000
max_per_trade = .01

#Get Alpaca APIs

### Get portfolio
###
#portfolio = pd.DataFrame([{'Symbol': p.symbol, 'Quantity': float(p.qty), 'Value': float(p.market_value)} for p in api.list_positions()])

portfolio = pd.DataFrame(columns = ['Symbol', 'Quantity', 'Value'])
portfolio['Symbol'] = portfolio['Symbol'].astype(str)
portfolio['Quantity'] = portfolio['Quantity'].astype(float)
portfolio['Value'] = portfolio['Value'].astype(float)

### Define One Day of trading behavior, should take in a date, and output the actions that will be taken

def TradingDay(current_day, portfolio, buying_power, api, epochs):
    ###Get stocks of Interest
    soi = IdentifyStocksOfInterest()
    ###combine with symbols from portfolio
    stocks_to_predict = list(set(portfolio['Symbol'].tolist() + soi))

    ### Make predictions
    preds = []
    current_prices = dict([])
    for symbol in stocks_to_predict:
        print('Working on: ' + symbol)

        data = GetHistoricalData(symbol, stocks_to_predict, up_until = current_day, api = api)

        if data is None:
            print('Skipping ' + symbol + ', insufficient data.')
            continue

        preds.append(Predict7DayHigh(symbol, data, epochs))
        current_prices[symbol] = data.iloc[-1][symbol]
    preds = dict(preds)
    predicted_stocks = list(preds.keys())

    ### Determine which to buy and which to sell
    orders = dict([])
    for symbol in predicted_stocks:
        
        if preds[symbol] > 0:
            side = 'buy'
            quantity = (buying_power * max_per_trade) // current_prices[symbol]

        elif (preds[symbol] < 0) and (symbol in portfolio['Symbol']):
            side = 'sell'
            quantity = portfolio[portfolio['Symbol']==symbol]['Quantity'].astype(int)[0]
        
        else:
            continue

        orders[symbol] = {'Side': side, 'Quantity': quantity}
    
    return orders

def Execution(day_of_order, orders, portfolio, buying_power):

    assert isinstance(day_of_order, pd.Timestamp)

    ###First, ensure your dates are valid
    calendar = api.get_calendar(start = day_of_order, end = day_of_order + relativedelta(days = 5))

    assert calendar[0].date == day_of_order

    ### Now determine which day these trades will be placed
    day_of_execution = calendar[1].date

    next_day_prices = api.get_barset(symbols = orders.keys(), timeframe = 'day', start = day_of_execution, limit = 1)
    next_day_prices = {symbol: next_day_prices[symbol][0].o for symbol in next_day_prices}

    for symbol in orders.keys():

        quantity = orders[symbol]['Quantity']
        price = next_day_prices[symbol]
        cost = quantity * next_day_prices[symbol]

        if symbol in portfolio['Symbol']:
            if orders[symbol][side] == 'buy':
                print('Trying to buy {} shares of {} at {} per share for a total cost of {}'.format(quantity, symbol, np.round(price, 5), np.round(cost,5)))
                if cost > buying_power:
                    print('Did not buy, total cost is {} and we only have {}'.format(cost, buying_power))
                    continue
                buying_power -= cost
                portfolio.loc[portfolio['Symbol']==symbol, 'Quantity'] += quantity
                portfolio.loc[portfolio['Symbol']==symbol, 'Value'] = portfolio.loc[portfolio['Symbol']==symbol, 'Quantity'] * price
                print('Successfully bought {} shares of {} at {} per share for a total cost of {}'.format(quantity, symbol, np.round(price, 5), np.round(cost,5)))
                print('New buying power: ' + str(buying_power))
            elif orders[symbol][side] == 'sell':
                print('Selling {} shares of {} at {} per share for a total sale of {}.'.format(quantity, symbol, np.round(price,5), np.round(cost,5)))
                buying_power += cost
                portfolio = portfolio[portfolio['Symbol'] != symbol]
                print('New buying power: ' + str(buying_power))
        else:
            print('Trying to buy {} shares of {} at {} per share for a total cost of {}'.format(quantity, symbol, np.round(price, 5), np.round(cost,5)))
            if cost > buying_power:
                print('Did not buy, total cost is {} and we only have {}'.format(cost, buying_power))
                continue
            buying_power -= cost
            portfolio = portfolio.append(pd.DataFrame([{'Symbol': symbol, 'Quantity': quantity, 'Value': quantity * cost}]))
            print('Successfully bought {} shares of {} at {} per share for a total cost of {}'.format(quantity, symbol, np.round(price, 5), np.round(cost,5)))
            print('New buying power: ' + str(buying_power))

    return portfolio, buying_power

In [93]:
day = datetime.now()

In [91]:
ok = api.polygon.historic_agg_v2(symbol = 'AAPL', multiplier=1, timespan = 'day', _from='2015-09-12', to='2020-10-02')

In [103]:
def GetHistoricalData(symbols, api, end, start = None, open_or_close = 'open'):
    ###For now, we will explicitly say what the relevant stocks are,
    ###but in future this should be automated
    symbols_to_pull = np.unique(symbols)
        
    if isinstance(end, str):
        end = pd.to_datetime(end)
    if end is None:
        end = datetime.now()
    
    ### We will use 5 years of historical daily data, meaning we need ~ 1,265 trading days of data,
    n_days = 1265
    if start == None:
        start = end - relativedelta(days = n_days)
        
    all_quotes = []
    for sym in symbols_to_pull:
        quotes = api.polygon.historic_agg_v2(symbol = sym, multiplier = 1, timespan = 'day', _from = start, to = end).df
        quotes = quotes[[open_or_close]]
        quotes.rename(columns={open_or_close: sym}, inplace = True)
        all_quotes.append(quotes)
    data = pd.concat(all_quotes, axis = 1)
        
    bad_cols = data.columns[data.isna().sum() > 0]
    if len(bad_cols) > 0:
        data = data.drop(bad_cols, axis = 1)
        print('Skipping {}, had missing values for the period.'.format(list(bad_cols)))
        
        return data

In [181]:
day_of_order = pd.to_datetime('2020-05-01')

if not isinstance(day_of_order, pd.Timestamp):
    day_of_order = pd.Timestamp(day_of_order, tz = 'America/New_York')

###First, ensure your dates are valid
calendar = api.get_calendar(start = day_of_order, end = day_of_order + relativedelta(days = 5))

assert calendar[0].date == day_of_order

### Now determine which day these trades will be placed
day_of_execution = calendar[1].date
print(day_of_execution)

next_day_prices = dict(GetDayQuotes(["AAPL", 'FIT'], api,  pd.Timestamp(day_of_execution, tz = 'America/New_York'), 'open'))
print(next_day_prices)

2020-05-04 00:00:00
{'AAPL': timestamp
2020-05-04 00:00:00-04:00    72.2925
Name: AAPL, dtype: float64, 'FIT': timestamp
2020-05-04 00:00:00-04:00    6.81
Name: FIT, dtype: float64}


In [184]:
def GetDayQuotes(symbols, api, date, open_or_close = 'open'):
    
    if isinstance(date, str):
        date = pd.to_datetime(end)
    if date is None:
        date = datetime.now()
        
    all_quotes = []
    for sym in symbols:
        quotes = api.polygon.historic_agg_v2(symbol = sym, multiplier = 1, timespan = 'day', _from = date, to = date).df
        quotes = quotes[[open_or_close]]
        quotes.rename(columns={open_or_close: sym}, inplace = True)
        all_quotes.append(quotes)
    data = pd.concat(all_quotes, axis = 1).head(1)
    bad_cols = data.columns[data.isna().sum() > 0]
    if len(bad_cols) > 0:
        data = data.drop(bad_cols, axis = 1)
        print('Skipping {}, had missing values for the period.'.format(list(bad_cols)))
    return dict(data.iloc[0])

{'AAPL': timestamp
2020-05-04 00:00:00-04:00    72.2925
Name: AAPL, dtype: float64, 'FIT': timestamp
2020-05-04 00:00:00-04:00    6.81
Name: FIT, dtype: float64}


In [267]:
GetLongReturns(['SPY'], api, pd.to_datetime('2019-11-01'), pd.to_datetime('2020-11-01'))

Top Fund: SPY
Top Fund Return: 1.082906991997901
Top 5 Funds: ['SPY']
Top 5 Fund Return: 1.082906991997901
Overall weighted Return: 1.082906991997901


Unnamed: 0,Start,End,Return,ReturnRank
SPY,304.92,330.2,1.082907,1.0


In [272]:
pd.DataFrame([{'A': 2, 'B': 4}]) * 2

Unnamed: 0,A,B
0,4,8


In [277]:
import tensorflow as tf

In [3]:
current_day = pd.to_datetime('2020-11-01')
###Get stocks of Interest
soi = IdentifyStocksOfInterest()
###combine with symbols from portfolio
stocks_to_predict = list(set(portfolio['Symbol'].tolist() + soi))
epochs = 5
### Make predictions
preds = []
current_prices = dict([])

### For now we are using same data for all of them, but in future data grab
### should be inside for loop

data = GetHistoricalData(stocks_to_predict, end = current_day, api = api)

stocks_to_predict = [col for col in data if col in stocks_to_predict]

for symbol in stocks_to_predict:
    preds.append(Predict7DayHigh(symbol, data, epochs))
    current_prices[symbol] = data.iloc[-1][symbol]
preds = dict(preds)


Skipping ['MRNA'], had missing values for the period.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float6

{'AAPL': -0.0657152853391731,
 'AMD': 0.12877959523654206,
 'BA': -0.14831663220920283,
 'FIT': -1.7619703678374596,
 'GPRO': 1.0103357908053276,
 'MSFT': 0.0004303977957635827,
 'PLUG': 1.2414764225213235,
 'SNAP': -0.877734141245751}

In [8]:
preds

{'AAPL': -0.071478516,
 'AMD': 0.09619836,
 'BA': -0.22039852,
 'FIT': -0.12421891,
 'GPRO': 0.059104644,
 'MSFT': 0.00087082386,
 'PLUG': 0.1920564,
 'SNAP': -0.35802776}

In [9]:
current_prices

{'AAPL': 108.77,
 'AMD': 74.7,
 'BA': 148.6,
 'FIT': 7.05,
 'GPRO': 5.85,
 'MSFT': 202.33,
 'PLUG': 15.47,
 'SNAP': 40.79}

In [242]:
top_fund_return

1.4732649203348636

Unnamed: 0_level_0,AAPL,FIT
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-12-26 00:00:00-05:00,72.4775,6.64
2019-12-27 00:00:00-05:00,72.45,6.48
2019-12-30 00:00:00-05:00,72.88,6.44
2019-12-31 00:00:00-05:00,73.4125,6.57
2020-01-02 00:00:00-05:00,75.0875,6.46


In [141]:
quotes

Unnamed: 0_level_0,open,high,low,close,volume,vwap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-20 00:00:00-05:00,26.9600,27.1950,26.6250,27.1800,199573736.0,26.9486
2015-01-21 00:00:00-05:00,27.2375,27.7650,27.0675,27.3875,194303588.0,27.4588
2015-01-22 00:00:00-05:00,27.5650,28.1175,27.4300,28.1000,215185636.0,27.8581
2015-01-23 00:00:00-05:00,28.0750,28.4375,27.8825,28.2450,185859312.0,28.1870
2015-01-26 00:00:00-05:00,28.4350,28.5907,28.2000,28.2750,222459916.0,28.3717
...,...,...,...,...,...,...
2019-12-26 00:00:00-05:00,71.2050,72.4950,71.1750,72.4775,93336016.0,72.0360
2019-12-27 00:00:00-05:00,72.7800,73.4925,72.0300,72.4500,146371744.0,72.6935
2019-12-30 00:00:00-05:00,72.3650,73.1725,71.3050,72.8800,144238456.0,72.4813
2019-12-31 00:00:00-05:00,72.4825,73.4200,72.3800,73.4125,100990500.0,73.0982


pandas._libs.tslibs.timestamps.Timestamp

In [None]:
class Backtester():
    def __init__(self, trading_strategy, execution_strategy):
        self.trader = trading_strategy
        self.executor = execution_strategy
    
    def one_day(date):
        assert isinstance(date, pd.Timestamp)
        
        #Check market open
        day_ts = pd.Timestamp(day, tz = 'America/New_York')
        calendar = api.get_calendar(start = day_ts.isoformat(), end = (day_ts + relativedelta(days = 1)).isoformat())
        if calendar[0].date != day:
            print('Skipping {}, the market is closed.'.format(day.strftime('%Y-%m-%d')))
            return None, None, None
        
        
    def run(start_date, end_date):