We will dissect each part of the backtester to see how they work.
# 1. The DataHandler

In [87]:
from data_handler import HistoricalPolygonDataHandler
from polygon.tickers import get_id
from datetime import datetime, date
from event import MarketEvent
import queue
import pandas as pd
import numpy as np

In [88]:
events = queue.Queue()

In [89]:
data_handler = HistoricalPolygonDataHandler(events)
data_handler.load_data("AAPL", start=date(2023, 8, 1), end=date(2023, 9, 1))
data_handler.load_data("AA", start=date(2023, 7, 1), end=date(2023, 9, 1))

In [90]:
data_handler.get_loaded_symbols()

['AAPL', 'AA']

In [91]:
for i in range(10):  
    data_handler.update_bars(datetime(2023, 8, 1, hour=9, minute=30+i))

In [92]:
len(data_handler._latest_bars["AAPL"])

10

In [93]:
data_handler.get_latest_bars("AAPL", N=2)

Unnamed: 0,open,high,low,close,close_original,volume,tradeable,halted
2023-08-01 09:38:00,196.395,196.49,196.3871,196.43,196.43,128663,True,False
2023-08-01 09:39:00,196.4205,196.59,196.37,196.55,196.55,235643,True,False


# 2. Broker

In [94]:
from broker import SimulatedBroker
from event import OrderEvent
broker = SimulatedBroker(events, data_handler)
order = OrderEvent(datetime(2023, 8, 1, hour=9, minute=39), "AAPL", side="BUY", quantity=10)

2023-08-01T09:39:00 | ORDER BUY 10 of AAPL


In [95]:
broker.execute_order(order)

In [96]:
for i in range(10):
    events.get() # Remove the MarketEvents
event = events.get()
event

<event.FillEvent at 0x1eada2d2490>

In [97]:
event.total_fill

1965.5

# 3. Portfolio

In [98]:
from portfolio import StandardPortfolio
from event import FillEvent
from data_handler import HistoricalPolygonDataHandler
from polygon.tickers import get_id
from datetime import datetime, date
import queue
events = queue.Queue()

In [99]:
data_handler = HistoricalPolygonDataHandler(events)
data_handler.load_data("AAPL", start=date(2023, 8, 1), end=date(2023, 9, 1))
data_handler.load_data("AA", start=date(2023, 7, 1), end=date(2023, 9, 1))

portfolio = StandardPortfolio(events, data_handler, start_date=datetime(2023, 8, 1, hour=9, minute=30))

In [100]:
portfolio.current_equity

10000.0

In [101]:
portfolio.current_positions_value

0

In [102]:
for i in range(1, 5):  
    data_handler.update_bars(datetime(2023, 8, 1, hour=9, minute=30+i))
    portfolio.append_portfolio_log(dt=datetime(2023, 8, 1, hour=9, minute=30+i))
data_handler.get_latest_bars("AAPL", N=2)

Unnamed: 0,open,high,low,close,close_original,volume,tradeable,halted
2023-08-01 09:33:00,196.05,196.19,195.95,195.9658,195.9658,124321,True,False
2023-08-01 09:34:00,195.95,196.3961,195.925,196.39,196.39,161667,True,False


In [103]:
fill = FillEvent(dt=datetime(2023, 8, 1, hour=9, minute=34), symbol="AAPL", side='BUY', quantity=25, fill_price=196.39, fees=100)
portfolio.update_from_fill(fill)

In [104]:
fill = FillEvent(dt=datetime(2023, 8, 1, hour=9, minute=34), symbol="AAPL", side='BUY', quantity=25, fill_price=196.39, fees=100)
portfolio.update_from_fill(fill)

In [105]:
portfolio.current_positions

{'AAPL': 50}

In [106]:
portfolio.current_cash

-19.5

In [107]:
portfolio.current_positions_value

9819.5

In [108]:
portfolio.current_equity

9800.0

In [109]:
for i in range(5):  
    data_handler.update_bars(datetime(2023, 8, 1, hour=9, minute=35+i))
    portfolio.append_portfolio_log(dt=datetime(2023, 8, 1, hour=9, minute=35+i))
data_handler.get_latest_bars("AAPL", N=2)

Unnamed: 0,open,high,low,close,close_original,volume,tradeable,halted
2023-08-01 09:38:00,196.395,196.49,196.3871,196.43,196.43,128663,True,False
2023-08-01 09:39:00,196.4205,196.59,196.37,196.55,196.55,235643,True,False


In [110]:
fill = FillEvent(dt=datetime(2023, 8, 1, hour=9, minute=39), symbol="AAPL", side='SELL', quantity=60, fill_price=196.55, fees=100)
portfolio.update_from_fill(fill)

In [111]:
for i in range(5):  
    data_handler.update_bars(datetime(2023, 8, 1, hour=9, minute=40+i))
    portfolio.append_portfolio_log(dt=datetime(2023, 8, 1, hour=9, minute=40+i))
data_handler.get_latest_bars("AAPL", N=2)

Unnamed: 0,open,high,low,close,close_original,volume,tradeable,halted
2023-08-01 09:43:00,196.4814,196.62,196.45,196.535,196.535,93577,True,False
2023-08-01 09:44:00,196.53,196.56,196.42,196.4798,196.4798,83679,True,False


In [112]:
data_handler.update_bars(datetime(2023, 8, 1, hour=9, minute=45))
fill = FillEvent(dt=datetime(2023, 8, 1, hour=9, minute=45), symbol="AAPL", side='BUY', quantity=10, fill_price=196.50, fees=100)
portfolio.update_from_fill(fill)
portfolio.append_portfolio_log(dt=datetime(2023, 8, 1, hour=9, minute=45))

In [113]:
# I prefer only logging the performance each day instead of every bar, else this list gets unnecessarily long
portfolio.create_df_from_holdings_log()

Unnamed: 0_level_0,equity,cash,positions_value,positions,returns,returns_cum
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-08-01 09:30:00,10000.0,10000.0,0.0,{},0.0,0.0
2023-08-01 09:31:00,10000.0,10000.0,0.0,{},0.0,0.0
2023-08-01 09:32:00,10000.0,10000.0,0.0,{},0.0,0.0
2023-08-01 09:33:00,10000.0,10000.0,0.0,{},0.0,0.0
2023-08-01 09:34:00,10000.0,10000.0,0.0,{},0.0,0.0
2023-08-01 09:35:00,9792.5,-19.5,9812.0,{'AAPL': 50},-0.02075,-0.02075
2023-08-01 09:36:00,9799.5,-19.5,9819.0,{'AAPL': 50},0.000715,-0.02005
2023-08-01 09:37:00,9800.5,-19.5,9820.0,{'AAPL': 50},0.000102,-0.01995
2023-08-01 09:38:00,9802.0,-19.5,9821.5,{'AAPL': 50},0.000153,-0.0198
2023-08-01 09:39:00,9808.0,-19.5,9827.5,{'AAPL': 50},0.000612,-0.0192


In [114]:
fills_log = pd.DataFrame(portfolio.fills_log)
fills_log.set_index('datetime', inplace=True)
fills_log

Unnamed: 0_level_0,symbol,side,quantity,fill_price,fees
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-08-01 09:34:00,AAPL,BUY,25,196.39,100
2023-08-01 09:34:00,AAPL,BUY,25,196.39,100
2023-08-01 09:39:00,AAPL,SELL,60,196.55,100
2023-08-01 09:45:00,AAPL,BUY,10,196.5,100


# 4. Portfolio - Trade log
The trade log is the list of profit/losses from every trade. All information we need to calculate that are in the fills log. However this is not straightforward. If you look at the fills log above, what are the trades? Should the BUY 25 from the first 2 trades be grouped? If the short trade was 40 shares instead, how would be assign them to the first 2 trades? Do we see that as closing the the first and partially closing the second? What about positions that are still open?

When constructing the trade log, we use the following rules:
1. We never group opening trades. So we see the first 2 trades as seperate trades.
2. Trades in the opposing direction are assigned as FIFO. If the opposing direction is larger than the entire posiiton, we see this as a new position. So the SELL -60 means we exit the 1st and 2nd trade, and create a new trade that is short 10 shares. If the trade was SELL -40 instead, we would assign -25 to the first trade and -15 to the second trade. Because we use FIFO.
3. For open positions, we can calculate the current P/L.

(All P/L are realized. I will not bother with unrealized P/L in the trade log.)

In [115]:
def fills_to_trades(fills):
    trade_log = pd.DataFrame(columns=['datetime_in', 'symbol', 'side', 'quantity', 'entry', 'exit', 'datetime_out', 'fees', 'net P/L %', 'net P/L $', 'remaining_qty'])
    for dt, trade in fills_log.iterrows():
        symbol = trade['symbol']
        side = trade['side']
        opposite_side = 'SELL' if side == 'BUY' else 'BUY'
        quantity = trade['quantity']
        fill_price = trade['fill_price']
        fees = trade['fees']

        current_position_in_symbol_opposite = trade_log[(trade_log['symbol'] == symbol) & (trade_log['side'] == opposite_side) & (trade_log['remaining_qty'] > 0)]
        if len(current_position_in_symbol_opposite) == 0:
            # If no open trades in this symbol in the opposite direction, create new trade
            trade_log.loc[len(trade_log)] = [dt, symbol, side, quantity, fill_price, np.nan, np.nan, fees, np.nan, np.nan, quantity]
        else:
            # Else we (partially) close the trade(s) and create a new trade if a net position remains. Using FIFO.
            for index, open_trade in current_position_in_symbol_opposite.iterrows():
                remaining_qty_open_trade = open_trade['remaining_qty']
                already_filled_qty_open_trade = open_trade['quantity'] - open_trade['remaining_qty']
                current_average_fill = open_trade['exit']

                # Partial close of open_trade
                if quantity < remaining_qty_open_trade:
                    if np.isnan(current_average_fill):
                        trade_log.loc[index, "exit"] = fill_price
                    else:
                        average_fill_exit = ((current_average_fill * already_filled_qty_open_trade) + (fill_price * quantity))/(already_filled_qty_open_trade + quantity) # Calculate new average fill
                        trade_log.loc[index, "exit"] = average_fill_exit

                    trade_log.loc[index, "remaining_qty"] -= quantity
                    trade_log.loc[index, "fees"] += fees
                    break # We don't have to look at the next trade

                # Full close of open_trade
                elif quantity >= remaining_qty_open_trade:
                    if np.isnan(current_average_fill):
                        trade_log.loc[index, "exit"] = fill_price
                    else:
                        average_fill_exit = ((current_average_fill * already_filled_qty_open_trade) + (fill_price * quantity))/(already_filled_qty_open_trade + quantity) # Calculate new average fill
                        trade_log.loc[index, "exit"] = average_fill_exit

                    trade_log.loc[index, "remaining_qty"] = 0
                    trade_log.loc[index, "fees"] += fees
                    trade_log.loc[index, "datetime_out"] = dt

                    if quantity == remaining_qty_open_trade:
                        break # We don't have to look at the next trade
                    else:
                        quantity = quantity - remaining_qty_open_trade # Calculate remaining quantity

                        # If we are at the end and there is still a remaining quantity, that is a new position
                        if index == len(current_position_in_symbol_opposite)-1:
                            trade_log.loc[len(trade_log)] = [dt, symbol, side, quantity, fill_price, np.nan, np.nan, fees, np.nan, np.nan, quantity]
    return trade_log
            

In [None]:
def calculate_PNL_trade_log(trade_log):
    trade_log["direction"] = np.where(trade_log["side"] == "BUY", 1, -1)
    trade_log["filled_qty"] = trade_log["quantity"] - trade_log["remaining_qty"]

    trade_log["net P/L %"] = (
        (
            trade_log["filled_qty"]
            * trade_log["direction"]
            * (trade_log["exit"] - trade_log["entry"])
        )
        - trade_log["fees"]
    ) / trade_log["entry"]

    trade_log["net P/L $"] = (
        trade_log["filled_qty"] * trade_log["direction"]
    ) * (trade_log["exit"] - trade_log["entry"]) - trade_log["fees"]
    return trade_log.drop(columns=["direction", "filled_qty"])

Using the above example:

In [137]:
fills_log

Unnamed: 0_level_0,symbol,side,quantity,fill_price,fees
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-08-01 10:00:00,AAPL,BUY,100,10,1
2023-08-01 10:01:00,AAPL,SELL,100,15,1
2023-08-01 10:02:00,AAPL,SELL,100,20,1
2023-08-01 10:03:00,AAPL,BUY,100,15,1


In [138]:
calculate_PNL_trade_log(fills_to_trades(fills_log))

Unnamed: 0,datetime_in,symbol,side,quantity,entry,exit,datetime_out,fees,net P/L %,net P/L $,remaining_qty
0,2023-08-01 10:00:00,AAPL,BUY,100,10,15.0,2023-08-01 10:01:00,2,49.8,498.0,0
1,2023-08-01 10:02:00,AAPL,SELL,100,20,15.0,2023-08-01 10:03:00,2,24.9,498.0,0


Closing order bigger than opening order:

In [139]:
fills_log = pd.DataFrame([[datetime(2023, 8, 1, hour=10, minute=0), "AAPL", "BUY", 100, 10, 1], 
                          [datetime(2023, 8, 1, hour=10, minute=1), "AAPL", "SELL", 200, 15, 1],
                          ], columns=['datetime', 'symbol', 'side', 'quantity', 'fill_price', 'fees'])
fills_log.set_index('datetime', inplace=True)
fills_log

Unnamed: 0_level_0,symbol,side,quantity,fill_price,fees
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-08-01 10:00:00,AAPL,BUY,100,10,1
2023-08-01 10:01:00,AAPL,SELL,200,15,1


In [140]:
calculate_PNL_trade_log(fills_to_trades(fills_log))

Unnamed: 0,datetime_in,symbol,side,quantity,entry,exit,datetime_out,fees,net P/L %,net P/L $,remaining_qty
0,2023-08-01 10:00:00,AAPL,BUY,100,10,15.0,2023-08-01 10:01:00,2,49.8,498.0,0
1,2023-08-01 10:01:00,AAPL,SELL,100,15,,,1,,,100


Closing order smaller than opening order:

In [141]:
fills_log = pd.DataFrame([[datetime(2023, 8, 1, hour=10, minute=0), "AAPL", "BUY", 100, 10, 1], 
                          [datetime(2023, 8, 1, hour=10, minute=1), "AAPL", "SELL", 50, 15, 1],
                          ], columns=['datetime', 'symbol', 'side', 'quantity', 'fill_price', 'fees'])
fills_log.set_index('datetime', inplace=True)
fills_log

Unnamed: 0_level_0,symbol,side,quantity,fill_price,fees
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-08-01 10:00:00,AAPL,BUY,100,10,1
2023-08-01 10:01:00,AAPL,SELL,50,15,1


Two fully closed orders:

In [142]:
calculate_PNL_trade_log(fills_to_trades(fills_log))

Unnamed: 0,datetime_in,symbol,side,quantity,entry,exit,datetime_out,fees,net P/L %,net P/L $,remaining_qty
0,2023-08-01 10:00:00,AAPL,BUY,100,10,15.0,,2,24.8,248.0,50


In [143]:
fills_log = pd.DataFrame([[datetime(2023, 8, 1, hour=10, minute=0), "AAPL", "BUY", 100, 10, 1], 
                          [datetime(2023, 8, 1, hour=10, minute=1), "AAPL", "SELL", 100, 15, 1],
                          [datetime(2023, 8, 1, hour=10, minute=2), "AAPL", "SELL", 100, 20, 1],
                          [datetime(2023, 8, 1, hour=10, minute=3), "AAPL", "BUY", 100, 15, 1],
                          ], columns=['datetime', 'symbol', 'side', 'quantity', 'fill_price', 'fees'])
fills_log.set_index('datetime', inplace=True)
fills_log

Unnamed: 0_level_0,symbol,side,quantity,fill_price,fees
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-08-01 10:00:00,AAPL,BUY,100,10,1
2023-08-01 10:01:00,AAPL,SELL,100,15,1
2023-08-01 10:02:00,AAPL,SELL,100,20,1
2023-08-01 10:03:00,AAPL,BUY,100,15,1


In [144]:
calculate_PNL_trade_log(fills_to_trades(fills_log))

Unnamed: 0,datetime_in,symbol,side,quantity,entry,exit,datetime_out,fees,net P/L %,net P/L $,remaining_qty
0,2023-08-01 10:00:00,AAPL,BUY,100,10,15.0,2023-08-01 10:01:00,2,49.8,498.0,0
1,2023-08-01 10:02:00,AAPL,SELL,100,20,15.0,2023-08-01 10:03:00,2,24.9,498.0,0
